I inspected the generated code for a time critical loop.
I compiled with /Ox/Og/Ot/Oy/Oa.
None of the variables involved are declared volatile.
I do not understand, why pSrc is loaded twice into edx.
Is there any way to avoid this or doesn't it affect the
performance?
; 136 : {
; 137 : BYTE ucData = *pData;
; 138 : *pData = *pSrc++;
mov edx, DWORD PTR _pSrc$[esp+104]
mov al, BYTE PTR [edx]
mov cl, BYTE PTR [ebx]
mov BYTE PTR [ebx], al
mov edx, DWORD PTR _pSrc$[esp+104] <-- why?
; 139 : if (!pbMask || *pbMask++)
mov eax, DWORD PTR _pbMask$[esp+104]
inc edx
test eax, eax
mov BYTE PTR _ucData$182555[esp+104], cl
mov DWORD PTR _pSrc$[esp+104], edx