there are two different fixes, which are independent one from the other:
1) fix up throttling "mad sleeping" and packets to be sent at once
2) enlarge the tcp window
note 1: the second fix, is optional and could be improved by enlarging less the window on low bandwidth*delay connections.
note 2: this is a port of a fix i've made at first on amule code (which is identical)
note 3: there're some debug messages around, so don't mind
note 4: i haven't found a better way to post the patch.. suggestions are hoped
note 5: i've no more notes
EDIT: i forgot an important note.. the performance improvements can be noted with high upload speeds connections (e.g.: mine is a symmetric 10mbit)
diff -urd ../srchybrid-orig/AsyncSocketEx.cpp ./AsyncSocketEx.cpp --- ../srchybrid-orig/AsyncSocketEx.cpp 2006-01-20 02:58:18.000000000 +0100 +++ ./AsyncSocketEx.cpp 2006-01-20 03:02:21.000000000 +0100 @@ -513,6 +513,12 @@ SOCKET hSocket=socket(AF_INET, nSocketType, 0); if (hSocket==INVALID_SOCKET) return FALSE; + + int window_size = 256 * 1024; + + setsockopt(hSocket, SOL_SOCKET, SO_SNDBUF, (char *) &window_size, sizeof(window_size) ); + setsockopt(hSocket, SOL_SOCKET, SO_RCVBUF, (char *) &window_size, sizeof(window_size) ); + m_SocketData.hSocket=hSocket; AttachHandle(hSocket); if (!AsyncSelect(lEvent)) diff -urd ../srchybrid-orig/AsyncSocketExLayer.cpp ./AsyncSocketExLayer.cpp --- ../srchybrid-orig/AsyncSocketExLayer.cpp 2006-01-20 02:58:18.000000000 +0100 +++ ./AsyncSocketExLayer.cpp 2006-01-20 03:02:20.000000000 +0100 @@ -476,6 +476,12 @@ SOCKET hSocket=socket(AF_INET, nSocketType, 0); if (hSocket==INVALID_SOCKET) res=FALSE; + + int window_size = 256 * 1024; + + setsockopt(hSocket, SOL_SOCKET, SO_SNDBUF, (char *) &window_size, sizeof(window_size) ); + setsockopt(hSocket, SOL_SOCKET, SO_RCVBUF, (char *) &window_size, sizeof(window_size) ); + m_pOwnerSocket->m_SocketData.hSocket=hSocket; m_pOwnerSocket->AttachHandle(hSocket); if (!m_pOwnerSocket->AsyncSelect(lEvent)) @@ -600,4 +606,4 @@ } else return m_pNextLayer->ShutDownNext(nHow); -} \ No newline at end of file +} diff -urd ../srchybrid-orig/UploadBandwidthThrottler.cpp ./UploadBandwidthThrottler.cpp --- ../srchybrid-orig/UploadBandwidthThrottler.cpp 2006-01-20 02:58:20.000000000 +0100 +++ ./UploadBandwidthThrottler.cpp 2006-01-20 03:00:16.000000000 +0100 @@ -23,6 +23,7 @@ #include "LastCommonRouteFinder.h" #include "OtherFunctions.h" #include "emuledlg.h" +#include "Log.h" #ifdef _DEBUG #define new DEBUG_NEW @@ -368,6 +369,20 @@ uint32 rememberedSlotCounter = 0; DWORD lastTickReachedBandwidth = ::GetTickCount(); + uint32 extraSleepTime = 0; + uint32 nextPrint = 0; // needed for debugging + DWORD lastSpent = lastLoopTick; + uint64 spentBytes = 0; + uint64 spentOverhead = 0; + sint64 estSendBytesT = 0; + uint32 timeSinceLastSpent = 0; + uint32 nextEstSBT = 0; + + + const uint32 maxScale = 1200 * 1024, minScale = 6 * 1024, minFragments = 2, maxFragments = 128; + const float factor = (float)(maxFragments - minFragments) / 2 / (maxScale - minScale); + const uint16 avgPeriod = 10; + while(doRun) { pauseEvent->Lock(); @@ -377,7 +392,12 @@ allowedDataRate = theApp.lastCommonRouteFinder->GetUpload(); uint32 minFragSize = 1300; - uint32 doubleSendSize = minFragSize*2; // send two packages at a time so they can share an ACK + + // Linearly scaling fragments number + uint16 nFragments = (uint16)( factor * ( ( allowedDataRate < maxScale ? allowedDataRate : maxScale ) - minScale ) + minFragments/2 + .5 ) * 2; + + uint32 doubleSendSize = minFragSize*nFragments; + if(allowedDataRate < 6*1024) { minFragSize = 536; doubleSendSize = minFragSize; // don't send two packages at a time at very low speeds to give them a smoother load @@ -386,8 +406,7 @@ #define TIME_BETWEEN_UPLOAD_LOOPS 1 uint32 sleepTime; if(allowedDataRate == 0 || allowedDataRate == _UI32_MAX || realBytesToSpend >= 1000) { - // we could send at once, but sleep a while to not suck up all cpu - sleepTime = TIME_BETWEEN_UPLOAD_LOOPS; + sleepTime = extraSleepTime; } else { // sleep for just as long as we need to get back to having one byte to send sleepTime = max((uint32)ceil((double)(-realBytesToSpend + 1000)/allowedDataRate), TIME_BETWEEN_UPLOAD_LOOPS); @@ -419,7 +438,34 @@ realBytesToSpend += allowedDataRate*timeSinceLastLoop; + // keep in mind that we don't count the IP overhead (~ +3% [~ 1500/1460]) + // and we're prone to understimate using an EWMA (~ +2% [simulation]) + if ((uint32)(estSendBytesT/avgPeriod*1.05f) > allowedDataRate) + realBytesToSpend = 0; + bytesToSpend = realBytesToSpend/1000; + + // debug message + + if (nextPrint < time(NULL)) { + AddDebugLogLine(DLP_VERYLOW, false, + _T("dLL: %u dLS: %u TS: %d B realTS: %d B SB: %d B DR: %u KB/s MaxUpload: %u KB/s dSS: %u B eSBT: %d B eSR: %d KB/s nFrag: %u ST: %u"), + timeSinceLastLoop, + timeSinceLastSpent, + (int)bytesToSpend, + (int)realBytesToSpend, + (int)spentBytes, + allowedDataRate / 1024, + theApp.lastCommonRouteFinder->GetUpload() / 1024, + doubleSendSize, + (int)estSendBytesT, + (int)(estSendBytesT / 10240), + nFragments, + sleepTime); + + nextPrint=time(NULL)+2; + } + } else { realBytesToSpend = _I64_MAX; bytesToSpend = _I32_MAX; @@ -431,9 +477,9 @@ lastLoopTick = thisLoopTick; + spentBytes = 0; + spentOverhead = 0; if(bytesToSpend >= 1) { - uint64 spentBytes = 0; - uint64 spentOverhead = 0; sendLocker.Lock(); @@ -471,7 +517,7 @@ // Check if any sockets haven't gotten data for a long time. Then trickle them a package. for(uint32 slotCounter = 0; slotCounter < (uint32)m_StandardOrder_list.GetSize(); slotCounter++) { - ThrottledFileSocket* socket = m_StandardOrder_list.GetAt(slotCounter); + ThrottledFileSocket* socket = m_StandardOrder_list.GetAt(( slotCounter + rememberedSlotCounter ) % m_StandardOrder_list.GetSize()); if(socket != NULL) { if(thisLoopTick-socket->GetLastCalledSend() > SEC2MS(1)) { @@ -572,6 +618,36 @@ m_SentBytesSinceLastCallOverhead += spentOverhead; sendLocker.Unlock(); + + // This one is another important change + // if you send the thread to sleep you will never get good performances + // the cpu is already freed by the system calls (like read and write on the sockets) + // When implemented this you will also get even less cpu usage by the throttling thread + // This is happening because switching between sleep and run a lot of times + // could have high impact on the scheds and paging optimizations of the so. + extraSleepTime = 0; + + } else { + if (extraSleepTime == 0) + extraSleepTime=1; + + extraSleepTime = min(extraSleepTime * 5, 1000); // 1s at most + } + + estSendBytesT += spentBytes + spentOverhead; + + if (nextEstSBT < thisLoopTick) { + + timeSinceLastSpent = thisLoopTick - lastSpent; + lastSpent = thisLoopTick; + + + sint64 estSendBytesTOld = estSendBytesT; + estSendBytesT -= (sint64)(estSendBytesT*(float)timeSinceLastSpent/(1000*avgPeriod)); + + AddDebugLogLine(DLP_VERYLOW, false, _T("eSBT: %d eSBTO: %d tSLL: %u Rate: %.2f\n"), (int)estSendBytesT, (int)estSendBytesTOld, timeSinceLastSpent, (float)estSendBytesT/(1024*avgPeriod)); + + nextEstSBT = thisLoopTick + 250; } } @@ -589,4 +665,4 @@ sendLocker.Unlock(); return 0; -} \ No newline at end of file +}
This post has been edited by lupzz: 20 January 2006 - 02:26 AM