Implement alpha blend as macro and replace inaccurate blitters

This commit is contained in:
Isaac Aronson 2023-09-13 17:35:50 -05:00 committed by Sam Lantinga
parent 0f351cd6af
commit e5bbe32641
4 changed files with 38 additions and 137 deletions

View File

@ -493,21 +493,30 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
} \
}
/* Blend a single color channel or alpha value */
#define ALPHA_BLEND_CHANNEL(sC, dC, sA) \
do { \
Uint16 x; \
x = ((sC - dC) * sA) + ((dC << 8) - dC); \
x += 0x1U; \
x += x >> 8; \
dC = x >> 8; \
} while (0)
/* Blend the RGB values of two pixels with an alpha value */
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \
do { \
dR = (Uint8)((((int)(sR - dR) * (int)A) / 255) + dR); \
dG = (Uint8)((((int)(sG - dG) * (int)A) / 255) + dG); \
dB = (Uint8)((((int)(sB - dB) * (int)A) / 255) + dB); \
ALPHA_BLEND_CHANNEL(sR, dR, A); \
ALPHA_BLEND_CHANNEL(sG, dG, A); \
ALPHA_BLEND_CHANNEL(sB, dB, A); \
} while (0)
/* Blend the RGBA values of two pixels */
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
do { \
dR = (Uint8)((((int)(sR - dR) * (int)sA) / 255) + dR); \
dG = (Uint8)((((int)(sG - dG) * (int)sA) / 255) + dG); \
dB = (Uint8)((((int)(sB - dB) * (int)sA) / 255) + dB); \
dA = (Uint8)((int)sA + dA - ((int)sA * dA) / 255); \
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
do { \
ALPHA_BLEND_CHANNEL(sR, dR, sA); \
ALPHA_BLEND_CHANNEL(sG, dG, sA); \
ALPHA_BLEND_CHANNEL(sB, dB, sA); \
ALPHA_BLEND_CHANNEL(255, dA, sA); \
} while (0)
/* This is a very useful loop for optimizing blitters */

View File

@ -460,22 +460,24 @@ static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
int dstskip = info->dst_skip >> 2;
Uint32 s;
Uint32 d;
Uint32 s1;
Uint32 d1;
while (height--) {
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4({
s = *srcp;
d = *dstp;
s1 = s & 0xff00ff;
d1 = d & 0xff00ff;
d1 = (d1 + ((s1 - d1) * alpha >> 8))
& 0xff00ff;
s &= 0xff00;
d &= 0xff00;
d = (d + ((s - d) * alpha >> 8)) & 0xff00;
*dstp = d1 | d | 0xff000000;
Uint8 sR = (s >> 16) & 0xFF;
Uint8 sG = (s >> 8) & 0xFF;
Uint8 sB = s & 0xFF;
Uint8 dR = (d >> 16) & 0xFF;
Uint8 dG = (d >> 8) & 0xFF;
Uint8 dB = d & 0xFF;
ALPHA_BLEND_CHANNEL(sR, dR, alpha);
ALPHA_BLEND_CHANNEL(sG, dG, alpha);
ALPHA_BLEND_CHANNEL(sB, dB, alpha);
*dstp = (dR << 16) | (dG << 8) | dB | 0xFF000000;
++srcp;
++dstp;
}, width);
@ -950,97 +952,6 @@ static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
}
}
/* fast ARGB8888->RGB565 blending with pixel alpha */
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *)info->src;
int srcskip = info->src_skip >> 2;
Uint16 *dstp = (Uint16 *)info->dst;
int dstskip = info->dst_skip >> 1;
while (height--) {
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4({
Uint32 s = *srcp;
unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
/* FIXME: Here we special-case opaque alpha since the
compositioning used (>>8 instead of /255) doesn't handle
it correctly. Also special-case alpha=0 for speed?
Benchmark this! */
if (alpha) {
if (alpha == (SDL_ALPHA_OPAQUE >> 3)) {
*dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
} else {
Uint32 d = *dstp;
/*
* convert source and destination to G0RAB65565
* and blend all components at the same time
*/
s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
+ (s >> 3 & 0x1f);
d = (d | d << 16) & 0x07e0f81f;
d += (s - d) * alpha >> 5;
d &= 0x07e0f81f;
*dstp = (Uint16)(d | d >> 16);
}
}
srcp++;
dstp++;
}, width);
/* *INDENT-ON* */ /* clang-format on */
srcp += srcskip;
dstp += dstskip;
}
}
/* fast ARGB8888->RGB555 blending with pixel alpha */
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *)info->src;
int srcskip = info->src_skip >> 2;
Uint16 *dstp = (Uint16 *)info->dst;
int dstskip = info->dst_skip >> 1;
while (height--) {
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4({
unsigned alpha;
Uint32 s = *srcp;
alpha = s >> 27; /* downscale alpha to 5 bits */
/* FIXME: Here we special-case opaque alpha since the
compositioning used (>>8 instead of /255) doesn't handle
it correctly. Also special-case alpha=0 for speed?
Benchmark this! */
if (alpha) {
if (alpha == (SDL_ALPHA_OPAQUE >> 3)) {
*dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
} else {
Uint32 d = *dstp;
/*
* convert source and destination to G0RAB65565
* and blend all components at the same time
*/
s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
+ (s >> 3 & 0x1f);
d = (d | d << 16) & 0x03e07c1f;
d += (s - d) * alpha >> 5;
d &= 0x03e07c1f;
*dstp = (Uint16)(d | d >> 16);
}
}
srcp++;
dstp++;
}, width);
/* *INDENT-ON* */ /* clang-format on */
srcp += srcskip;
dstp += dstskip;
}
}
/* General (slow) N->N blending with per-surface alpha */
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
{
@ -1119,15 +1030,6 @@ static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
}
}
/* Accurate alpha blending with no division */
static Uint8 AlphaBlendChannel(Uint8 sC, Uint8 dC, Uint8 sA)
{
Uint16 x = ((sC - dC) * sA) + ((dC << 8) - dC);
x += 0x1U; // Use 0x80 to round instead of floor
x += x >> 8;
return x >> 8;
}
/* General (slow) N->N blending with pixel alpha */
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
{
@ -1141,7 +1043,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
SDL_PixelFormat *dstfmt = info->dst_fmt;
int srcbpp;
int dstbpp;
int freeFormat;
SDL_bool freeFormat;
Uint32 Pixel;
unsigned sR, sG, sB, sA;
unsigned dR, dG, dB, dA;
@ -1149,7 +1051,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
/* Set up some basic variables */
srcbpp = srcfmt->bytes_per_pixel;
dstbpp = dstfmt->bytes_per_pixel;
freeFormat = 0;
freeFormat = SDL_FALSE;
#ifdef SDL_AVX2_INTRINSICS
if (srcbpp == 4 && dstbpp == 4 && width >= 4 && SDL_HasAVX2()) {
@ -1167,7 +1069,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
/* Handle case where bad input sent */
if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) {
dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888);
freeFormat = 1;
freeFormat = SDL_TRUE;
}
while (height--) {
@ -1177,10 +1079,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
if (sA) {
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
dR = AlphaBlendChannel(sR, dR, sA);
dG = AlphaBlendChannel(sG, dG, sA);
dB = AlphaBlendChannel(sB, dB, sA);
dA = AlphaBlendChannel(255, dA, sA);
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
}
src += srcbpp;
@ -1214,13 +1113,6 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
}
case 2:
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
if (df->Gmask == 0x7e0) {
return BlitARGBto565PixelAlpha;
} else if (df->Gmask == 0x3e0) {
return BlitARGBto555PixelAlpha;
}
}
return BlitNtoNPixelAlpha;
case 4:

View File

@ -113,11 +113,11 @@ void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
SDL_PixelFormat *dstfmt = info->dst_fmt;
int chunks = width / 8;
int free_format = 0;
SDL_bool free_format = SDL_FALSE;
/* Handle case when passed invalid format, assume ARGB destination */
if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) {
dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888);
free_format = 1;
free_format = SDL_TRUE;
}
const __m256i shift_mask = GetSDL_PixelFormatShuffleMask_AVX2(srcfmt, dstfmt);
const __m256i splat_mask = GetSDL_PixelFormatAlphaSplatMask_AVX2(dstfmt);

View File

@ -126,11 +126,11 @@ void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
SDL_PixelFormat *dstfmt = info->dst_fmt;
const int chunks = width / 4;
int free_format = 0;
SDL_bool free_format = SDL_FALSE;
/* Handle case when passed invalid format, assume ARGB destination */
if (dstfmt->Ashift == 0 && dstfmt->Ashift == dstfmt->Bshift) {
dstfmt = SDL_CreatePixelFormat(SDL_PIXELFORMAT_ARGB8888);
free_format = 1;
free_format = SDL_TRUE;
}
const __m128i shift_mask = GetSDL_PixelFormatShuffleMask_SSE4_1(srcfmt, dstfmt);
const __m128i splat_mask = GetSDL_PixelFormatAlphaSplatMask_SSE4_1(dstfmt);