.h file was made neat(inlines are removed from declarations, as they are marked as inline in definitions).
SetValueBE is slightly tweaked by removing SetValue(0).
CUInt128::CUInt128(const CUInt128 &uValue, UINT uNumBits) is slightly tweaked by using rand() & 1 instead of rand() % 2. (& 1 and % 2 are different for int because a negative int may have a remainder of -1, not 1).
using #pragma to disable unreferenced formal parameter warnings on Assembly codes.
******************
Hi all,
I have nearly re-written the whole UInt 128 class while following its original 'logic'(well.... I have to say it's really messed up with different conventions on BE/LE).
There are mainly 4 kinds of improvements:
- Faster routines for string conversions.
- Faster routines for long-int maths using assembly. (The use of __int64 on 32-bit machine is very inefficient)
- Short functions without condition jumps(if/while/for) are marked as inline to reduce overhead. (I don't trust compilers' decision on this so I declare 'inline's explicitly)
- Unroll several loops.
So here is the code. And I'll give you quick reading guides.
UInt128.h:
Quick reading guide:
First there are 7 private non-member(static) functions' declarations. They are implemented using assembly in UInt128.c.
Then short functions are marked as inline. (to omit function call overheads)
Several loops are unrolled.
*** No Interface Changed ***
/* Modified by yumeyao on 2013/1/9: Optimize routines to improve speed. */ /* Copyright (C)2003 Barry Dunne (http://www.emule-project.net) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ // Note To Mods // /* Please do not change anything here and release it.. There is going to be a new forum created just for the Kademlia side of the client.. If you feel there is an error or a way to improve something, please post it in the forum first and let us look at it.. If it is a real improvement, it will be added to the offical client.. Changing something without knowing what all it does can cause great harm to the network if released in mass form.. Any mod that changes anything within the Kademlia side will not be allowed to advertise there client on the eMule forum.. */ #pragma once namespace Kademlia { class CUInt128 { public: CUInt128::CUInt128() { SetValue((ULONG)0); } CUInt128(bool bFill) { if( bFill ) { m_uData[0] = (ULONG)-1; m_uData[1] = (ULONG)-1; m_uData[2] = (ULONG)-1; m_uData[3] = (ULONG)-1; } else SetValue((ULONG)0); } CUInt128(ULONG uValue) { SetValue(uValue); } CUInt128(const byte *pbyValueBE) { SetValueBE(pbyValueBE); } //Generates a new number, copying the most significant 'numBits' bits from 'value'. //The remaining bits are randomly generated. CUInt128(const CUInt128 &uValue, UINT uNumBits = 128); const byte* CUInt128::GetData() const; byte* CUInt128::GetDataPtr() const; /** Bit at level 0 being most significant. */ UINT GetBitNumber(UINT uBit) const; int CompareTo(const CUInt128 &uOther) const; int CompareTo(ULONG uValue) const; void ToHexString(CString *pstr) const; CString ToHexString() const; void ToBinaryString(CString *pstr, bool bTrim = false) const; void ToByteArray(byte *pby) const; ULONG CUInt128::Get32BitChunk(int iVal) const; CUInt128& CUInt128::SetValue(const CUInt128 &uValue); CUInt128& SetValue(ULONG uValue); CUInt128& CUInt128::SetValueBE(const byte *pbyValueBE); CUInt128& SetValueRandom(); CUInt128& SetValueGUID(); CUInt128& SetBitNumber(UINT uBit, UINT uValue); CUInt128& CUInt128::ShiftLeft(UINT uBits); CUInt128& CUInt128::Add(const CUInt128 &uValue); CUInt128& CUInt128::Add(ULONG uValue); //CUInt128& Div(ULONG uValue); CUInt128& CUInt128::Subtract(const CUInt128 &uValue); CUInt128& CUInt128::Subtract(ULONG uValue); CUInt128& CUInt128::Xor(const CUInt128 &uValue); CUInt128& CUInt128::XorBE(const byte *pbyValueBE); void CUInt128::operator+ (const CUInt128 &uValue) {Add(uValue);} void CUInt128::operator- (const CUInt128 &uValue) {Subtract(uValue);} void CUInt128::operator= (const CUInt128 &uValue) {SetValue(uValue);} bool CUInt128::operator< (const CUInt128 &uValue) const {return (CompareTo(uValue) < 0);} bool CUInt128::operator> (const CUInt128 &uValue) const {return (CompareTo(uValue) > 0);} bool CUInt128::operator<= (const CUInt128 &uValue) const {return (CompareTo(uValue) <= 0);} bool CUInt128::operator>= (const CUInt128 &uValue) const {return (CompareTo(uValue) >= 0);} bool CUInt128::operator== (const CUInt128 &uValue) const {return (CompareTo(uValue) == 0);} bool CUInt128::operator!= (const CUInt128 &uValue) const {return (CompareTo(uValue) != 0);} void CUInt128::operator+ (ULONG uValue) {Add(uValue);} void CUInt128::operator- (ULONG uValue) {Subtract(uValue);} void CUInt128::operator= (ULONG uValue) {SetValue(uValue);} bool CUInt128::operator< (ULONG uValue) const {return (CompareTo(uValue) < 0);} bool CUInt128::operator> (ULONG uValue) const {return (CompareTo(uValue) > 0);} bool CUInt128::operator<= (ULONG uValue) const {return (CompareTo(uValue) <= 0);} bool CUInt128::operator>= (ULONG uValue) const {return (CompareTo(uValue) >= 0);} bool CUInt128::operator== (ULONG uValue) const {return (CompareTo(uValue) == 0);} bool CUInt128::operator!= (ULONG uValue) const {return (CompareTo(uValue) != 0);} private: static void __fastcall _ByteSwapPer32Bit(void *dst, const void *src); static void __fastcall _Add(ULONG *m_uData, const ULONG *m_uAddend); static void __fastcall _AddUlong(ULONG *m_uData, ULONG uAddend); static void __fastcall _Subtract(ULONG *m_uData, const ULONG *m_uSubtractor); static void __fastcall _SubtractUlong(ULONG *m_uData, ULONG uSubtractor); static void __fastcall _ShiftLeft(ULONG *m_uData, UINT uBits); static void __fastcall _ShiftLeftOneBit(ULONG *m_uData); ULONG m_uData[4]; }; inline const byte* CUInt128::GetData() const { return (byte*)m_uData; } inline byte* CUInt128::GetDataPtr() const { return (byte*)m_uData; } inline CString CUInt128::ToHexString() const { CString pstr; ToHexString(&pstr); return pstr; } inline void CUInt128::ToByteArray(byte *pby) const { _ByteSwapPer32Bit(pby, (void *)m_uData); } inline ULONG CUInt128::Get32BitChunk(int iVal) const { return m_uData[iVal]; } inline CUInt128& CUInt128::SetValue(const CUInt128 &uValue) { m_uData[0] = uValue.m_uData[0]; m_uData[1] = uValue.m_uData[1]; m_uData[2] = uValue.m_uData[2]; m_uData[3] = uValue.m_uData[3]; return *this; } inline CUInt128& CUInt128::SetValue(ULONG uValue) { m_uData[0] = 0; m_uData[1] = 0; m_uData[2] = 0; m_uData[3] = uValue; return *this; } inline CUInt128& CUInt128::SetValueBE(const byte *pbyValueBE) { _ByteSwapPer32Bit(m_uData, pbyValueBE); return *this; } //The check routine is inlined. //compiler could directly call the easy way if uBits is a constant 1. inline CUInt128& CUInt128::ShiftLeft(UINT uBits) { if (uBits == 1) _ShiftLeftOneBit(m_uData); else _ShiftLeft(m_uData, uBits); return *this; } inline CUInt128& CUInt128::Add(const CUInt128 &uValue) { _Add(m_uData, uValue.m_uData); return *this; } inline CUInt128& CUInt128::Add(ULONG uValue) { if (uValue != 0) _AddUlong(m_uData, uValue); return *this; } inline CUInt128& CUInt128::Subtract(const CUInt128 &uValue) { _Subtract(m_uData, uValue.m_uData); return *this; } inline CUInt128& CUInt128::Subtract(ULONG uValue) { if (uValue != 0) _SubtractUlong(m_uData, uValue); return *this; } inline CUInt128& CUInt128::Xor(const CUInt128 &uValue) { m_uData[0] ^= uValue.m_uData[0]; m_uData[1] ^= uValue.m_uData[1]; m_uData[2] ^= uValue.m_uData[2]; m_uData[3] ^= uValue.m_uData[3]; return *this; } inline CUInt128& CUInt128::XorBE(const byte *pbyValueBE) { ULONG m_uTemp[4]={0}; _ByteSwapPer32Bit(m_uTemp, pbyValueBE); m_uData[0] ^= m_uTemp[0]; m_uData[1] ^= m_uTemp[1]; m_uData[2] ^= m_uTemp[2]; m_uData[3] ^= m_uTemp[3]; return *this; } }
UInt128.cpp
Quick reading guide:
- 7 assembly functions' implements
- inlined functions moved to UInt128.h
- Unrolled several loops
- CUInt128::CUInt128(const CUInt128 &uValue, UINT uNumBits)
The values are copied directly because SetBitNumber will overwrite them anyway. - ToHexString & ToBinaryString use faster routines from "Helper/FastString.h".
/* Modified by yumeyao on 2013/1/9: Optimize routines to improve speed. */ /* Copyright (C)2003 Barry Dunne (http://www.emule-project.net) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ // Note To Mods // /* Please do not change anything here and release it.. There is going to be a new forum created just for the Kademlia side of the client.. If you feel there is an error or a way to improve something, please post it in the forum first and let us look at it.. If it is a real improvement, it will be added to the offical client.. Changing something without knowing what all it does can cause great harm to the network if released in mass form.. Any mod that changes anything within the Kademlia side will not be allowed to advertise there client on the eMule forum.. */ #include "stdafx.h" #pragma warning(push) #pragma warning(disable:4516) // access-declarations are deprecated; member using-declarations provide a better alternative #pragma warning(disable:4242) // conversion from 'type1' to 'type2', possible loss of data #pragma warning(disable:4244) // conversion from 'type1' to 'type2', possible loss of data #pragma warning(disable:4100) // unreferenced formal parameter #pragma warning(disable:4189) // unreferenced local variable #pragma warning(disable:4702) // unreachable code #include <cryptopp/osrng.h> #pragma warning(pop) #include "./UInt128.h" #include "Helper/FastString.h" #include "Log.h" #ifdef _DEBUG #define new DEBUG_NEW #undef THIS_FILE static char THIS_FILE[] = __FILE__; #endif using namespace Kademlia; using namespace CryptoPP; #pragma warning(push) #pragma warning(disable:4100) __declspec(naked) void __fastcall CUInt128::_ByteSwapPer32Bit(void *dst, const void *src) { __asm { mov eax,[edx+0] bswap eax mov [ecx+0],eax mov eax,[edx+4] bswap eax mov [ecx+4],eax mov eax,[edx+8] bswap eax mov [ecx+8],eax mov eax,[edx+12] bswap eax mov [ecx+12],eax ret } } __declspec(naked) void __fastcall CUInt128::_Add(ULONG *m_uData, const ULONG *m_uAddend) { __asm { mov eax, [ecx+12] add eax, [edx+12] mov [ecx+12], eax mov eax, [ecx+8] adc eax, [edx+8] mov [ecx+8], eax mov eax, [ecx+4] adc eax, [edx+4] mov [ecx+4], eax mov eax, [ecx+0] adc eax, [edx+0] mov [ecx+0], eax ret } } __declspec(naked) void __fastcall CUInt128::_AddUlong(ULONG *m_uData, ULONG uAddend) { __asm { mov eax, [ecx+12] add eax, edx mov [ecx+12], eax mov eax, [ecx+8] adc eax, 0 mov [ecx+8], eax mov eax, [ecx+4] adc eax, 0 mov [ecx+4], eax mov eax, [ecx+0] adc eax, 0 mov [ecx+0], eax ret } } __declspec(naked) void __fastcall CUInt128::_Subtract(ULONG *m_uData, const ULONG *m_uSubtractor) { __asm { mov eax, [ecx+12] sub eax, [edx+12] mov [ecx+12], eax mov eax, [ecx+8] sbb eax, [edx+8] mov [ecx+8], eax mov eax, [ecx+4] sbb eax, [edx+4] mov [ecx+4], eax mov eax, [ecx+0] sbb eax, [edx+0] mov [ecx+0], eax ret } } __declspec(naked) void __fastcall CUInt128::_SubtractUlong(ULONG *m_uData, ULONG uSubtractor) { __asm { mov eax, [ecx+12] sub eax, edx mov [ecx+12], eax mov eax, [ecx+8] sbb eax, 0 mov [ecx+8], eax mov eax, [ecx+4] sbb eax, 0 mov [ecx+4], eax mov eax, [ecx+0] sbb eax, 0 mov [ecx+0], eax ret } } /* We can use carry flag to shift 1 bit */ __declspec(naked) void __fastcall CUInt128::_ShiftLeftOneBit(ULONG *m_uData) { __asm { clc rcl dword ptr [ecx+12], 1 rcl dword ptr [ecx+8], 1 rcl dword ptr [ecx+4], 1 rcl dword ptr [ecx+0], 1 ret } } __declspec(naked) void __fastcall CUInt128::_ShiftLeft(ULONG *m_uData, UINT uBits) { __asm { test edx, edx jz DoNothing mov eax, edx shr edx, 5 and eax, 31 cmp edx, 4 jge SetZero push esi _emit 0x91 ; xchg eax, ecx ; /* cl is used for bit shift. ; Use _emit because ecx will be restored later. ; Only if _emit is used, the compiler treats ecx as unchanged */ push edi mov esi, [eax+edx*4+0] push edx cmp edx, 3 je LastStage mov edi, [eax+edx*4+4] shld esi, edi, cl add edx, 1 mov [eax+0], esi mov esi, [eax+edx*4+0] cmp edx, 3 je LastStage mov edi, [eax+edx*4+4] shld esi, edi, cl add edx, 1 mov [eax+4], esi mov esi, [eax+edx*4+0] cmp edx, 3 je LastStage mov edi, [eax+edx*4+4] shld esi, edi, cl mov [eax+8], esi mov esi, [eax+12] LastStage: pop edx shl esi, cl lea edi, [eax+12] xor ecx, ecx _emit 0x91 ; xchg eax, ecx ; /* restore ecx */ sub edx, 1 js Done mov [edi], eax sub edi, 4 sub edx, 1 js Done mov [edi], eax sub edi, 4 sub edx, 1 js Done mov [edi], eax sub edi, 4 Done: mov [edi], esi pop edi pop esi DoNothing: ret SetZero: xor eax, eax mov [ecx], eax mov [ecx+4], eax mov [ecx+8], eax mov [ecx+12], eax ret } } #pragma warning(pop) CUInt128::CUInt128(const CUInt128 &uValue, UINT uNumBits) { // Copy the whole ULONGs SetValue(uValue); // Pad with random bytes (Not seeding based on time to allow multiple different ones to be created in quick succession) for (UINT iIndex=uNumBits; iIndex<128; iIndex++) SetBitNumber(iIndex, (rand() & 1)); } CUInt128& CUInt128::SetValueRandom() { AutoSeededRandomPool rng; byte byRandomBytes[16]; rng.GenerateBlock(byRandomBytes, 16); SetValueBE( byRandomBytes ); return *this; } CUInt128& CUInt128::SetValueGUID() { SetValue((ULONG)0); GUID guid; if (CoCreateGuid(&guid) != S_OK) return *this; m_uData[0] = guid.Data1; m_uData[1] = ((ULONG)guid.Data2) << 16 | guid.Data3; m_uData[2] = ((ULONG)guid.Data4[0]) << 24 | ((ULONG)guid.Data4[1]) << 16 | ((ULONG)guid.Data4[2]) << 8 | ((ULONG)guid.Data4[3]); m_uData[3] = ((ULONG)guid.Data4[4]) << 24 | ((ULONG)guid.Data4[5]) << 16 | ((ULONG)guid.Data4[6]) << 8 | ((ULONG)guid.Data4[7]); return *this; } UINT CUInt128::GetBitNumber(UINT uBit) const { if (uBit > 127) return 0; int iLongNum = uBit / 32; int iShift = 31 - (uBit % 32); return ((m_uData[iLongNum] >> iShift) & 1); } CUInt128& CUInt128::SetBitNumber(UINT uBit, UINT uValue) { int iLongNum = uBit / 32; int iShift = 31 - (uBit % 32); ULONG uData = m_uData[iLongNum]; ULONG uMask = 1 << iShift; uData |= uMask; if (uValue == 0) uData ^= uMask; m_uData[iLongNum] = uData; return *this; } void CUInt128::ToHexString(CString *pstr) const { wchar_t sElement[34]; for (int iIndex=0; iIndex<4; iIndex++) { ULongToHexString(&(sElement[iIndex*8]), m_uData[iIndex]); } (*(long *)&sElement[32]) = 0; pstr->SetString(sElement); } void CUInt128::ToBinaryString(CString *pstr, bool bTrim) const { wchar_t sElement[130]; ULongToBinaryStringReversed(&(sElement[0*32]), m_uData[0]); ULongToBinaryStringReversed(&(sElement[1*32]), m_uData[1]); ULongToBinaryStringReversed(&(sElement[2*32]), m_uData[2]); ULongToBinaryStringReversed(&(sElement[3*32]), m_uData[3]); (*(long *)&sElement[128]) = 0; wchar_t *sStart = sElement; if (bTrim) { while (*sStart == '0') { sStart++; } if (*sStart == '\0') sStart--; } pstr->SetString(sStart); } int CUInt128::CompareTo(const CUInt128 &uOther) const { for (int iIndex=0; iIndex<4; iIndex++) { if (m_uData[iIndex] < uOther.m_uData[iIndex]) return -1; if (m_uData[iIndex] > uOther.m_uData[iIndex]) return 1; } return 0; } int CUInt128::CompareTo(ULONG uValue) const { if ((m_uData[0] > 0) || (m_uData[1] > 0) || (m_uData[2] > 0) || (m_uData[3] > uValue)) return 1; if (m_uData[3] < uValue) return -1; return 0; } /* Untested CUInt128& CUInt128::Div(ULONG uValue) { ULONG uBit, uRemain = 0; for (i = 0; i < 128; i++) { uBit = GetBitNumber(0); uRemain <<= 1; if (uBit) uRemain |= 1; ShiftLeft(1); if (uRemain >= uValue) { uRemain -= uValue; SetBitNumber(127, 1); } } return *this; } */
"Helper/FastString.h" & "Helper/FastString.c"
Quick reading guide:
ToHexString uses a look table so that one byte is converted directly to 2 wchars(4 bytes, a 32-bit int).
ToBinaryString doesn't use GetBitNumber each time, instead it uses a mask and shift this mask to get each bit, then write it back as string.
FastString.h:
/* Copyright (C)2013 yumeyao (yumeyao#gmail.com) These routines are designed for emule where Basic String could over-perform CString a lot. */ /* only w string used */ /* for MSVC++ use */ #pragma once #define FASTSTRING_CALL __fastcall #define FASTSTRING_INLINE __forceinline #ifdef __cplusplus extern "C" { #endif extern const long ByteToHexLookupTable[256]; FASTSTRING_INLINE void ULongToHexString(void *pstrOut, unsigned long uValue) { unsigned long uTemp = uValue; unsigned int uByte; uByte = (unsigned char)uTemp; *((long *)(pstrOut)+3) = ByteToHexLookupTable[uByte]; uTemp >>= 8; uByte = (unsigned char)uTemp; *((long *)(pstrOut)+2) = ByteToHexLookupTable[uByte]; uTemp >>= 8; uByte = (unsigned char)uTemp; *((long *)(pstrOut)+1) = ByteToHexLookupTable[uByte]; uTemp >>= 8; /* uByte = (unsigned char)uTemp;*/ uByte = uTemp; /* now last one, already with high 24 bit zero */ *((long *)(pstrOut)+0) = ByteToHexLookupTable[uByte]; } void FASTSTRING_CALL ULongToBinaryStringReversed(void *pstrOut, unsigned long uValue); #ifdef __cplusplus } #endif
FastString.c
/* Copyright (C)2013 yumeyao (yumeyao#gmail.com) These routines are designed for emule where Basic String could over-perform CString a lot. */ #include "FastString.h" /* assumes cpu is little-endian */ #define w(h,l) (h+(l<<16)) #define line(h) w(h,'0'),w(h,'1'),w(h,'2'),w(h,'3'),w(h,'4'),w(h,'5'),w(h,'6'),w(h,'7'), \ w(h,'8'),w(h,'9'),w(h,'A'),w(h,'B'),w(h,'C'),w(h,'D'),w(h,'E'),w(h,'F') const long ByteToHexLookupTable[256] = { line('0'), line('1'), line('2'), line('3'), line('4'), line('5'), line('6'), line('7'), line('8'), line('9'), line('A'), line('B'), line('C'), line('D'), line('E'), line('F') }; void FASTSTRING_CALL ULongToBinaryStringReversed(void *pstrOut, unsigned long uValue) { unsigned long uMask = 0x80000000; do { unsigned long uMask0 = uMask; unsigned long uResult; uMask >>= 1; uResult = ((uValue & uMask) != 0) ? '1' : '0'; uResult <<= 16; uResult |= ((uValue & uMask0) != 0) ? '1' : '0'; *(unsigned long *)pstrOut = uResult; pstrOut = ((unsigned long *)pstrOut) + 1; uMask >>= 1; } while (uMask); }
This post has been edited by YumeYao: 10 January 2013 - 09:01 AM