- Implement spacial even more optimized versions of SRCCOPY on equal surfaces for 8,16,24 and 32bpp.
- Add the right-to-left versions to the function tables
- Add _CALCSHIFT macro(s) to calculate the shift in the dib functions, instead of precalculating it in EngBitBlt. This costs us a few bytes per function (yes, with so many functions, every byte counts!) but since it's only for 1 and 4 bpp and improves the code by keeping DIB specific code out of the Eng function, it's reasonable to do so.
- Add optional optimization pragmas

svn path=/trunk/; revision=56212
This commit is contained in:
Timo Kreuzer 2012-03-22 20:12:50 +00:00
parent b8d5c066d9
commit 141eee9683
6 changed files with 97 additions and 16 deletions

View File

@ -1,6 +1,56 @@
#include "DibLib.h"
VOID
FASTCALL
Dib_BitBlt_SRCCOPY_EqSurf(PBLTDATA pBltData)
{
ULONG cLines, cjWidth = pBltData->ulWidth * pBltData->jDstBpp;
PBYTE pjDestBase = pBltData->siDst.pjBase;
PBYTE pjSrcBase = pBltData->siSrc.pjBase;
/* Loop all lines */
cLines = pBltData->ulHeight;
while (cLines--)
{
memcpy(pjDestBase, pjSrcBase, cjWidth);
pjDestBase += pBltData->siDst.lDelta;
pjSrcBase += pBltData->siSrc.lDelta;
}
}
#define Dib_BitBlt_SRCCOPY_S8_D8_EqSurf Dib_BitBlt_SRCCOPY_EqSurf
#define Dib_BitBlt_SRCCOPY_S16_D16_EqSurf Dib_BitBlt_SRCCOPY_EqSurf
#define Dib_BitBlt_SRCCOPY_S24_D24_EqSurf Dib_BitBlt_SRCCOPY_EqSurf
/* special movsd optimization on x86 */
#if defined(_M_IX86) || defined(_M_AMD64)
VOID
FASTCALL
Dib_BitBlt_SRCCOPY_S32_D32_EqSurf(PBLTDATA pBltData)
{
ULONG cLines, cRows = pBltData->ulWidth;
PBYTE pjDestBase = pBltData->siDst.pjBase;
PBYTE pjSrcBase = pBltData->siSrc.pjBase;
/* Loop all lines */
cLines = pBltData->ulHeight;
while (cLines--)
{
__movsd((PULONG)pjDestBase, (PULONG)pjSrcBase, cRows);
pjDestBase += pBltData->siDst.lDelta;
pjSrcBase += pBltData->siSrc.lDelta;
}
}
#else
#define Dib_BitBlt_SRCCOPY_S32_D32_EqSurf Dib_BitBlt_SRCCOPY_EqSurf
#endif
#define Dib_BitBlt_SRCCOPY_S8_D8_EqSurf_manual 1
#define Dib_BitBlt_SRCCOPY_S16_D16_EqSurf_manual 1
#define Dib_BitBlt_SRCCOPY_S24_D24_EqSurf_manual 1
#define Dib_BitBlt_SRCCOPY_S32_D32_EqSurf_manual 1
#define __USES_SOURCE 1
#define __USES_PATTERN 0
#define __USES_DEST 0

View File

@ -1,6 +1,8 @@
#include "DibLib.h"
BYTE ajShift4[2] = {4, 0};
enum
{
INDEX_BitBlt_NOOP,

View File

@ -9,6 +9,21 @@
#include <windef.h>
#include <wingdi.h>
#include <winddi.h>
#ifdef _OPTIMIZE_DIBLIB
#ifdef _MSC_VER
#pragma optimize("g", on)
#else
#pragma GCC optimize("O3")
#endif
#endif
typedef
ULONG
(NTAPI *PFN_XLATE)(XLATEOBJ* pxlo, ULONG ulColor);
extern BYTE ajShift4[2];
#include "DibLib_interface.h"
#define _DibXlate(pBltData, ulColor) (pBltData->pfnXlate(pBltData->pxlo, ulColor))
@ -16,51 +31,54 @@
#define __PASTE_(s1,s2) s1##s2
#define __PASTE(s1,s2) __PASTE_(s1,s2)
#define __DIB_FUNCTION_NAME_SRCDSTEQ2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp ## _EqSurf
#define __DIB_FUNCTION_NAME_SRCDSTEQ(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDSTEQ2(name, src_bpp, dst_bpp)
#define __DIB_FUNCTION_NAME_SRCDSTEQR2L2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp ## _EqSurfR2L
#define __DIB_FUNCTION_NAME_SRCDSTEQR2L(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDSTEQR2L2(name, src_bpp, dst_bpp)
#define __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) Dib_ ## name ## _S ## src_bpp ## _D ## dst_bpp
#define __DIB_FUNCTION_NAME_SRCDST(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp)
#define __DIB_FUNCTION_NAME_DST2(name, dst_bpp) Dib_ ## name ## _D ## dst_bpp
#define __DIB_FUNCTION_NAME_DST(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_DST2(name, dst_bpp)
#define __DIB_FUNCTION_NAME_SRCDSTEQ(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurf
#define __DIB_FUNCTION_NAME_SRCDSTEQL2R(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurfL2R
#define __DIB_FUNCTION_NAME_SRCDSTEQR2L(name, src_bpp, dst_bpp) __DIB_FUNCTION_NAME_SRCDST2(name, src_bpp, dst_bpp) ## _EqSurfR2L
#define _ReadPixel_1(pjSource, jShift) (((*(pjSource)) >> (jShift)) & 1)
#define _WritePixel_1(pjDest, jShift, ulColor) (void)(*(pjDest) = (UCHAR)((*(pjDest) & ~(1<<(jShift))) | ((ulColor)<<(jShift))))
#define _NextPixel_1(ppj, pjShift) (void)((*(pjShift))--, *(pjShift) &= 7, (*(ppj) += (*(pjShift) >> 5)))
#define _NextPixelR2L_1(ppj, pjShift) (void)((*(ppj) -= (*(pjShift) >> 5)), (*(pjShift))++, *(pjShift) &= 7)
#define _SHIFT_1(x) x
#define _CALCSHIFT_1(pShift, x) (void)(*(pShift) = (7 - ((x) & 7)))
#define _ReadPixel_4(pjSource, jShift) (((*(pjSource)) >> (jShift)) & 15)
#define _WritePixel_4(pjDest, jShift, ulColor) (void)(*(pjDest) = (UCHAR)((*(pjDest) & ~(15<<(jShift))) | ((ulColor)<<(jShift))))
#define _NextPixel_4(ppj, pjShift) (void)((*(ppj) += (*(pjShift) & 1)), (*(pjShift)) -= 4, *(pjShift) &= 7)
#define _NextPixelR2L_4(ppj, pjShift) (void)((*(pjShift)) -= 4, *(pjShift) &= 7, (*(ppj) -= (*(pjShift) & 1)))
#define _SHIFT_4(x) x
#define _CALCSHIFT_4(pShift, x) (void)(*(pShift) = ajShift4[(x) & 1])
#define _ReadPixel_8(pjSource, x) (*(UCHAR*)(pjSource))
#define _WritePixel_8(pjDest, x, ulColor) (void)(*(UCHAR*)(pjDest) = (UCHAR)(ulColor))
#define _NextPixel_8(ppj, pjShift) (void)(*(ppj) += 1)
#define _NextPixelR2L_8(ppj, pjShift) (void)(*(ppj) -= 1)
#define _SHIFT_8(x)
#define _CALCSHIFT_8(pShift, x)
#define _ReadPixel_16(pjSource, x) (*(USHORT*)(pjSource))
#define _WritePixel_16(pjDest, x, ulColor) (void)(*(USHORT*)(pjDest) = (USHORT)(ulColor))
#define _NextPixel_16(ppj, pjShift) (void)(*(ppj) -= 2)
#define _NextPixelR2L_16(ppj, pjShift) (void)(*(ppj) += 2)
#define _SHIFT_16(x)
#define _CALCSHIFT_16(pShift, x)
#define _ReadPixel_24(pjSource, x) ((pjSource)[0] | ((pjSource)[1] << 8) | ((pjSource)[2] << 16))
#define _WritePixel_24(pjDest, x, ulColor) (void)(((pjDest)[0] = ((ulColor)&0xFF)),((pjDest)[1] = (((ulColor)>>8)&0xFF)),((pjDest)[2] = (((ulColor)>>16)&0xFF)))
#define _NextPixel_24(ppj, pjShift) (void)(*(ppj) -= 3)
#define _NextPixelR2L_24(ppj, pjShift) (void)(*(ppj) += 3)
#define _SHIFT_24(x)
#define _CALCSHIFT_24(pShift, x)
#define _ReadPixel_32(pjSource, x) (*(ULONG*)(pjSource))
#define _WritePixel_32(pjDest, x, ulColor) (void)(*(ULONG*)(pjDest) = (ulColor))
#define _NextPixel_32(ppj, pjShift) (void)(*(ppj) += 4)
#define _NextPixelR2L_32(ppj, pjShift) (void)(*(ppj) -= 4)
#define _SHIFT_32(x)
#define _CALCSHIFT_32(pShift, x)

View File

@ -44,7 +44,15 @@
PFN_DIBFUNCTION
__PASTE(gapfn, __FUNCTIONNAME)[7][7] =
{
{0, 0, 0, 0, 0, 0},
{
0,
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 1, 1),
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 4, 4),
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 8, 8),
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 16, 16),
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 24, 24),
__DIB_FUNCTION_NAME_SRCDSTEQR2L(__FUNCTIONNAME, 32, 32),
},
{
__DIB_FUNCTION_NAME_SRCDSTEQ(__FUNCTIONNAME, 1, 1),
__DIB_FUNCTION_NAME_SRCDST(__FUNCTIONNAME, 1, 1),

View File

@ -9,6 +9,9 @@
#define _WritePixel(pj, jShift, c) __PASTE(_WritePixel_, _DEST_BPP)(pj, jShift, c)
#define _NextPixel(bpp, ppj, pjShift) __PASTE(_NextPixel_, bpp)(ppj, pjShift)
#define _SHIFT(bpp, x) __PASTE(_SHIFT_, bpp)(x)
#define _CALCSHIFT(bpp, pshift, x) __PASTE(_CALCSHIFT_, bpp)(pshift, x)
#if (__PASTE(_DibFunction, _manual) != 1)
VOID
FASTCALL
@ -42,7 +45,7 @@ _DibFunction(PBLTDATA pBltData)
pjPatBase = pBltData->siPat.pjBase;
pjPatBase += pBltData->siPat.ptOrig.y * pBltData->siPat.lDelta;
pjPattern = pjPatBase + pBltData->siPat.ptOrig.x * _DEST_BPP / 8;
_SHIFT(_DEST_BPP, jPatShift = pBltData->siPat.jShift0;)
_CALCSHIFT(_DEST_BPP, &jPatShift, pBltData->siPat.ptOrig.x);
cPatLines = pBltData->ulPatHeight - pBltData->siPat.ptOrig.y;
cPatRows = pBltData->ulPatWidth - pBltData->siPat.ptOrig.x;
#endif
@ -57,14 +60,14 @@ _DibFunction(PBLTDATA pBltData)
{
/* Set current bit pointers and shifts */
pjDest = pjDestBase;
_SHIFT(_DEST_BPP, jDstShift = pBltData->siDst.jShift0;)
_CALCSHIFT(_DEST_BPP, &jDstShift, pBltData->siDst.ptOrig.x);
#if __USES_SOURCE
pjSource = pjSrcBase;
_SHIFT(_SOURCE_BPP, jSrcShift = pBltData->siSrc.jShift0;)
_CALCSHIFT(_SOURCE_BPP, &jSrcShift, pBltData->siSrc.ptOrig.x);
#endif
#if __USES_MASK
pjMask = pjMaskBase;
jMskShift = pBltData->siMsk.jShift0;
_CALCSHIFT_1(&jMskShift, pBltData->siMsk.ptOrig.x);
#endif
/* Loop all rows */
@ -126,5 +129,7 @@ _DibFunction(PBLTDATA pBltData)
}
}
#endif // manual
#undef _DibFunction
#undef __FUNCTIONNAME2

View File

@ -1,17 +1,14 @@
#include "RopFunctions.h"
typedef
ULONG
(NTAPI *PFN_XLATE)(XLATEOBJ* pxlo, ULONG ulColor);
typedef struct
{
ULONG iFormat;
PBYTE pvScan0;
PBYTE pjBase;
LONG lDelta;
POINTL ptOrig;
BYTE jShift0;
BYTE jBpp;
} SURFINFO;
typedef struct
@ -30,6 +27,7 @@ typedef struct
ULONG rop4;
PFN_DOROP apfnDoRop[2];
ULONG ulSolidColor;
BYTE jDstBpp;
} BLTDATA, *PBLTDATA;
typedef