| /* |
| Simple DirectMedia Layer |
| Copyright (C) 1997-2024 Sam Lantinga <slouken@libsdl.org> |
| |
| This software is provided 'as-is', without any express or implied |
| warranty. In no event will the authors be held liable for any damages |
| arising from the use of this software. |
| |
| Permission is granted to anyone to use this software for any purpose, |
| including commercial applications, and to alter it and redistribute it |
| freely, subject to the following restrictions: |
| |
| 1. The origin of this software must not be misrepresented; you must not |
| claim that you wrote the original software. If you use this software |
| in a product, an acknowledgment in the product documentation would be |
| appreciated but is not required. |
| 2. Altered source versions must be plainly marked as such, and must not be |
| misrepresented as being the original software. |
| 3. This notice may not be removed or altered from any source distribution. |
| */ |
| #include "SDL_internal.h" |
| |
| #include "SDL_blit.h" |
| #include "SDL_blit_slow.h" |
| #include "SDL_pixels_c.h" |
| |
| typedef enum |
| { |
| SlowBlitPixelAccess_RGB, |
| SlowBlitPixelAccess_RGBA, |
| SlowBlitPixelAccess_10Bit, |
| SlowBlitPixelAccess_Large, |
| } SlowBlitPixelAccess; |
| |
| static SlowBlitPixelAccess GetPixelAccessMethod(SDL_PixelFormat *pf) |
| { |
| if (pf->bytes_per_pixel > 4) { |
| return SlowBlitPixelAccess_Large; |
| } else if (SDL_ISPIXELFORMAT_10BIT(pf->format)) { |
| return SlowBlitPixelAccess_10Bit; |
| } else if (pf->Amask) { |
| return SlowBlitPixelAccess_RGBA; |
| } else { |
| return SlowBlitPixelAccess_RGB; |
| } |
| } |
| |
| /* The ONE TRUE BLITTER |
| * This puppy has to handle all the unoptimized cases - yes, it's slow. |
| */ |
| void SDL_Blit_Slow(SDL_BlitInfo *info) |
| { |
| const int flags = info->flags; |
| const Uint32 modulateR = info->r; |
| const Uint32 modulateG = info->g; |
| const Uint32 modulateB = info->b; |
| const Uint32 modulateA = info->a; |
| Uint32 srcpixel = 0; |
| Uint32 srcR = 0, srcG = 0, srcB = 0, srcA = 0; |
| Uint32 dstpixel = 0; |
| Uint32 dstR = 0, dstG = 0, dstB = 0, dstA = 0; |
| Uint64 srcy, srcx; |
| Uint64 posy, posx; |
| Uint64 incy, incx; |
| SDL_PixelFormat *src_fmt = info->src_fmt; |
| SDL_PixelFormat *dst_fmt = info->dst_fmt; |
| int srcbpp = src_fmt->bytes_per_pixel; |
| int dstbpp = dst_fmt->bytes_per_pixel; |
| SlowBlitPixelAccess src_access; |
| SlowBlitPixelAccess dst_access; |
| Uint32 rgbmask = ~src_fmt->Amask; |
| Uint32 ckey = info->colorkey & rgbmask; |
| |
| src_access = GetPixelAccessMethod(src_fmt); |
| dst_access = GetPixelAccessMethod(dst_fmt); |
| |
| incy = ((Uint64)info->src_h << 16) / info->dst_h; |
| incx = ((Uint64)info->src_w << 16) / info->dst_w; |
| posy = incy / 2; /* start at the middle of pixel */ |
| |
| while (info->dst_h--) { |
| Uint8 *src = 0; |
| Uint8 *dst = info->dst; |
| int n = info->dst_w; |
| posx = incx / 2; /* start at the middle of pixel */ |
| srcy = posy >> 16; |
| while (n--) { |
| srcx = posx >> 16; |
| src = (info->src + (srcy * info->src_pitch) + (srcx * srcbpp)); |
| |
| switch (src_access) { |
| case SlowBlitPixelAccess_RGB: |
| DISEMBLE_RGB(src, srcbpp, src_fmt, srcpixel, srcR, srcG, srcB); |
| srcA = 0xFF; |
| break; |
| case SlowBlitPixelAccess_RGBA: |
| DISEMBLE_RGBA(src, srcbpp, src_fmt, srcpixel, srcR, srcG, srcB, srcA); |
| break; |
| case SlowBlitPixelAccess_10Bit: |
| srcpixel = *((Uint32 *)(src)); |
| switch (src_fmt->format) { |
| case SDL_PIXELFORMAT_XRGB2101010: |
| RGBA_FROM_ARGB2101010(srcpixel, srcR, srcG, srcB, srcA); |
| srcA = 0xFF; |
| break; |
| case SDL_PIXELFORMAT_XBGR2101010: |
| RGBA_FROM_ABGR2101010(srcpixel, srcR, srcG, srcB, srcA); |
| srcA = 0xFF; |
| break; |
| case SDL_PIXELFORMAT_ARGB2101010: |
| RGBA_FROM_ARGB2101010(srcpixel, srcR, srcG, srcB, srcA); |
| break; |
| case SDL_PIXELFORMAT_ABGR2101010: |
| RGBA_FROM_ABGR2101010(srcpixel, srcR, srcG, srcB, srcA); |
| break; |
| default: |
| break; |
| } |
| break; |
| case SlowBlitPixelAccess_Large: |
| /* Handled in SDL_Blit_Slow_Float() */ |
| break; |
| } |
| |
| if (flags & SDL_COPY_COLORKEY) { |
| /* srcpixel isn't set for 24 bpp */ |
| if (srcbpp == 3) { |
| srcpixel = (srcR << src_fmt->Rshift) | |
| (srcG << src_fmt->Gshift) | (srcB << src_fmt->Bshift); |
| } |
| if ((srcpixel & rgbmask) == ckey) { |
| posx += incx; |
| dst += dstbpp; |
| continue; |
| } |
| } |
| if ((flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL))) { |
| switch (dst_access) { |
| case SlowBlitPixelAccess_RGB: |
| DISEMBLE_RGB(dst, dstbpp, dst_fmt, dstpixel, dstR, dstG, dstB); |
| dstA = 0xFF; |
| break; |
| case SlowBlitPixelAccess_RGBA: |
| DISEMBLE_RGBA(dst, dstbpp, dst_fmt, dstpixel, dstR, dstG, dstB, dstA); |
| break; |
| case SlowBlitPixelAccess_10Bit: |
| dstpixel = *((Uint32 *)(dst)); |
| switch (dst_fmt->format) { |
| case SDL_PIXELFORMAT_XRGB2101010: |
| RGBA_FROM_ARGB2101010(dstpixel, dstR, dstG, dstB, dstA); |
| dstA = 0xFF; |
| break; |
| case SDL_PIXELFORMAT_XBGR2101010: |
| RGBA_FROM_ABGR2101010(dstpixel, dstR, dstG, dstB, dstA); |
| dstA = 0xFF; |
| break; |
| case SDL_PIXELFORMAT_ARGB2101010: |
| RGBA_FROM_ARGB2101010(dstpixel, dstR, dstG, dstB, dstA); |
| break; |
| case SDL_PIXELFORMAT_ABGR2101010: |
| RGBA_FROM_ABGR2101010(dstpixel, dstR, dstG, dstB, dstA); |
| break; |
| default: |
| break; |
| } |
| break; |
| case SlowBlitPixelAccess_Large: |
| /* Handled in SDL_Blit_Slow_Float() */ |
| break; |
| } |
| } else { |
| /* don't care */ |
| } |
| |
| if (flags & SDL_COPY_MODULATE_COLOR) { |
| srcR = (srcR * modulateR) / 255; |
| srcG = (srcG * modulateG) / 255; |
| srcB = (srcB * modulateB) / 255; |
| } |
| if (flags & SDL_COPY_MODULATE_ALPHA) { |
| srcA = (srcA * modulateA) / 255; |
| } |
| if (flags & (SDL_COPY_BLEND | SDL_COPY_ADD)) { |
| /* This goes away if we ever use premultiplied alpha */ |
| if (srcA < 255) { |
| srcR = (srcR * srcA) / 255; |
| srcG = (srcG * srcA) / 255; |
| srcB = (srcB * srcA) / 255; |
| } |
| } |
| switch (flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) { |
| case 0: |
| dstR = srcR; |
| dstG = srcG; |
| dstB = srcB; |
| dstA = srcA; |
| break; |
| case SDL_COPY_BLEND: |
| dstR = srcR + ((255 - srcA) * dstR) / 255; |
| dstG = srcG + ((255 - srcA) * dstG) / 255; |
| dstB = srcB + ((255 - srcA) * dstB) / 255; |
| dstA = srcA + ((255 - srcA) * dstA) / 255; |
| break; |
| case SDL_COPY_ADD: |
| dstR = srcR + dstR; |
| if (dstR > 255) { |
| dstR = 255; |
| } |
| dstG = srcG + dstG; |
| if (dstG > 255) { |
| dstG = 255; |
| } |
| dstB = srcB + dstB; |
| if (dstB > 255) { |
| dstB = 255; |
| } |
| break; |
| case SDL_COPY_MOD: |
| dstR = (srcR * dstR) / 255; |
| dstG = (srcG * dstG) / 255; |
| dstB = (srcB * dstB) / 255; |
| break; |
| case SDL_COPY_MUL: |
| dstR = ((srcR * dstR) + (dstR * (255 - srcA))) / 255; |
| if (dstR > 255) { |
| dstR = 255; |
| } |
| dstG = ((srcG * dstG) + (dstG * (255 - srcA))) / 255; |
| if (dstG > 255) { |
| dstG = 255; |
| } |
| dstB = ((srcB * dstB) + (dstB * (255 - srcA))) / 255; |
| if (dstB > 255) { |
| dstB = 255; |
| } |
| break; |
| } |
| |
| switch (dst_access) { |
| case SlowBlitPixelAccess_RGB: |
| ASSEMBLE_RGB(dst, dstbpp, dst_fmt, dstR, dstG, dstB); |
| break; |
| case SlowBlitPixelAccess_RGBA: |
| ASSEMBLE_RGBA(dst, dstbpp, dst_fmt, dstR, dstG, dstB, dstA); |
| break; |
| case SlowBlitPixelAccess_10Bit: |
| { |
| Uint32 pixel; |
| switch (dst_fmt->format) { |
| case SDL_PIXELFORMAT_XRGB2101010: |
| dstA = 0xFF; |
| SDL_FALLTHROUGH; |
| case SDL_PIXELFORMAT_ARGB2101010: |
| ARGB2101010_FROM_RGBA(pixel, dstR, dstG, dstB, dstA); |
| break; |
| case SDL_PIXELFORMAT_XBGR2101010: |
| dstA = 0xFF; |
| SDL_FALLTHROUGH; |
| case SDL_PIXELFORMAT_ABGR2101010: |
| ABGR2101010_FROM_RGBA(pixel, dstR, dstG, dstB, dstA); |
| break; |
| default: |
| pixel = 0; |
| break; |
| } |
| *(Uint32 *)dst = pixel; |
| break; |
| } |
| case SlowBlitPixelAccess_Large: |
| /* Handled in SDL_Blit_Slow_Float() */ |
| break; |
| } |
| |
| posx += incx; |
| dst += dstbpp; |
| } |
| posy += incy; |
| info->dst += info->dst_pitch; |
| } |
| } |
| |
| /* Convert from F16 to float |
| * Public domain implementation from https://gist.github.com/rygorous/2144712 |
| */ |
| typedef union |
| { |
| Uint32 u; |
| float f; |
| struct |
| { |
| Uint32 Mantissa : 23; |
| Uint32 Exponent : 8; |
| Uint32 Sign : 1; |
| } x; |
| } FP32; |
| |
| #ifdef _MSC_VER |
| #pragma warning(push) |
| #pragma warning(disable:4214) |
| #endif |
| |
| typedef union |
| { |
| Uint16 u; |
| struct |
| { |
| Uint16 Mantissa : 10; |
| Uint16 Exponent : 5; |
| Uint16 Sign : 1; |
| } x; |
| } FP16; |
| |
| #ifdef _MSC_VER |
| #pragma warning(pop) |
| #endif |
| |
| static float half_to_float(Uint16 unValue) |
| { |
| static const FP32 magic = { (254 - 15) << 23 }; |
| static const FP32 was_infnan = { (127 + 16) << 23 }; |
| FP16 h; |
| FP32 o; |
| |
| h.u = unValue; |
| o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits |
| o.f *= magic.f; // exponent adjust |
| if (o.f >= was_infnan.f) // make sure Inf/NaN survive |
| o.u |= 255 << 23; |
| o.u |= (h.u & 0x8000) << 16; // sign bit |
| return o.f; |
| } |
| |
| /* Convert from float to F16 |
| * Public domain implementation from https://stackoverflow.com/questions/76799117/how-to-convert-a-float-to-a-half-type-and-the-other-way-around-in-c |
| */ |
| static Uint16 float_to_half(float a) |
| { |
| Uint32 ia; |
| Uint16 ir; |
| |
| SDL_memcpy(&ia, &a, sizeof(ia)); |
| |
| ir = (ia >> 16) & 0x8000; |
| if ((ia & 0x7f800000) == 0x7f800000) { |
| if ((ia & 0x7fffffff) == 0x7f800000) { |
| ir |= 0x7c00; /* infinity */ |
| } else { |
| ir |= 0x7e00 | ((ia >> (24 - 11)) & 0x1ff); /* NaN, quietened */ |
| } |
| } else if ((ia & 0x7f800000) >= 0x33000000) { |
| int shift = (int)((ia >> 23) & 0xff) - 127; |
| if (shift > 15) { |
| ir |= 0x7c00; /* infinity */ |
| } else { |
| ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */ |
| if (shift < -14) { /* denormal */ |
| ir |= ia >> (-1 - shift); |
| ia = ia << (32 - (-1 - shift)); |
| } else { /* normal */ |
| ir |= ia >> (24 - 11); |
| ia = ia << (32 - (24 - 11)); |
| ir = ir + ((14 + shift) << 10); |
| } |
| /* IEEE-754 round to nearest of even */ |
| if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) { |
| ir++; |
| } |
| } |
| } |
| return ir; |
| } |
| |
| static void ReadFloatPixel(Uint8 *pixels, SlowBlitPixelAccess access, SDL_PixelFormat *fmt, SDL_Colorspace colorspace, float SDR_white_point, |
| float *outR, float *outG, float *outB, float *outA) |
| { |
| Uint32 pixel; |
| Uint32 R, G, B, A; |
| float fR = 0.0f, fG = 0.0f, fB = 0.0f, fA = 0.0f; |
| float v[4]; |
| |
| switch (access) { |
| case SlowBlitPixelAccess_RGB: |
| DISEMBLE_RGB(pixels, fmt->bytes_per_pixel, fmt, pixel, R, G, B); |
| fR = (float)R / 255.0f; |
| fG = (float)G / 255.0f; |
| fB = (float)B / 255.0f; |
| fA = 1.0f; |
| break; |
| case SlowBlitPixelAccess_RGBA: |
| DISEMBLE_RGBA(pixels, fmt->bytes_per_pixel, fmt, pixel, R, G, B, A); |
| fR = (float)R / 255.0f; |
| fG = (float)G / 255.0f; |
| fB = (float)B / 255.0f; |
| fA = (float)A / 255.0f; |
| break; |
| case SlowBlitPixelAccess_10Bit: |
| pixel = *((Uint32 *)pixels); |
| switch (fmt->format) { |
| case SDL_PIXELFORMAT_XRGB2101010: |
| RGBAFLOAT_FROM_ARGB2101010(pixel, fR, fG, fB, fA); |
| fA = 1.0f; |
| break; |
| case SDL_PIXELFORMAT_XBGR2101010: |
| RGBAFLOAT_FROM_ABGR2101010(pixel, fR, fG, fB, fA); |
| fA = 1.0f; |
| break; |
| case SDL_PIXELFORMAT_ARGB2101010: |
| RGBAFLOAT_FROM_ARGB2101010(pixel, fR, fG, fB, fA); |
| break; |
| case SDL_PIXELFORMAT_ABGR2101010: |
| RGBAFLOAT_FROM_ABGR2101010(pixel, fR, fG, fB, fA); |
| break; |
| default: |
| fR = fG = fB = fA = 0.0f; |
| break; |
| } |
| break; |
| case SlowBlitPixelAccess_Large: |
| switch (SDL_PIXELTYPE(fmt->format)) { |
| case SDL_PIXELTYPE_ARRAYU16: |
| v[0] = (float)(((Uint16 *)pixels)[0]) / SDL_MAX_UINT16; |
| v[1] = (float)(((Uint16 *)pixels)[1]) / SDL_MAX_UINT16; |
| v[2] = (float)(((Uint16 *)pixels)[2]) / SDL_MAX_UINT16; |
| if (fmt->bytes_per_pixel == 8) { |
| v[3] = (float)(((Uint16 *)pixels)[3]) / SDL_MAX_UINT16; |
| } else { |
| v[3] = 1.0f; |
| } |
| break; |
| case SDL_PIXELTYPE_ARRAYF16: |
| v[0] = half_to_float(((Uint16 *)pixels)[0]); |
| v[1] = half_to_float(((Uint16 *)pixels)[1]); |
| v[2] = half_to_float(((Uint16 *)pixels)[2]); |
| if (fmt->bytes_per_pixel == 8) { |
| v[3] = half_to_float(((Uint16 *)pixels)[3]); |
| } else { |
| v[3] = 1.0f; |
| } |
| break; |
| case SDL_PIXELTYPE_ARRAYF32: |
| v[0] = ((float *)pixels)[0]; |
| v[1] = ((float *)pixels)[1]; |
| v[2] = ((float *)pixels)[2]; |
| if (fmt->bytes_per_pixel == 16) { |
| v[3] = ((float *)pixels)[3]; |
| } else { |
| v[3] = 1.0f; |
| } |
| break; |
| default: |
| /* Unknown array type */ |
| v[0] = v[1] = v[2] = v[3] = 0.0f; |
| break; |
| } |
| switch (SDL_PIXELORDER(fmt->format)) { |
| case SDL_ARRAYORDER_RGB: |
| fR = v[0]; |
| fG = v[1]; |
| fB = v[2]; |
| fA = 1.0f; |
| break; |
| case SDL_ARRAYORDER_RGBA: |
| fR = v[0]; |
| fG = v[1]; |
| fB = v[2]; |
| fA = v[3]; |
| break; |
| case SDL_ARRAYORDER_ARGB: |
| fA = v[0]; |
| fR = v[1]; |
| fG = v[2]; |
| fB = v[3]; |
| break; |
| case SDL_ARRAYORDER_BGR: |
| fB = v[0]; |
| fG = v[1]; |
| fR = v[2]; |
| fA = 1.0f; |
| break; |
| case SDL_ARRAYORDER_BGRA: |
| fB = v[0]; |
| fG = v[1]; |
| fR = v[2]; |
| fA = v[3]; |
| break; |
| case SDL_ARRAYORDER_ABGR: |
| fA = v[0]; |
| fB = v[1]; |
| fG = v[2]; |
| fR = v[3]; |
| break; |
| default: |
| /* Unknown array order */ |
| fA = fR = fG = fB = 0.0f; |
| break; |
| } |
| break; |
| } |
| |
| /* Convert to nits so src and dst are guaranteed to be linear and in the same units */ |
| switch (SDL_COLORSPACETRANSFER(colorspace)) { |
| case SDL_TRANSFER_CHARACTERISTICS_SRGB: |
| fR = SDL_sRGBtoLinear(fR); |
| fG = SDL_sRGBtoLinear(fG); |
| fB = SDL_sRGBtoLinear(fB); |
| break; |
| case SDL_TRANSFER_CHARACTERISTICS_PQ: |
| fR = SDL_PQtoNits(fR) / SDR_white_point; |
| fG = SDL_PQtoNits(fG) / SDR_white_point; |
| fB = SDL_PQtoNits(fB) / SDR_white_point; |
| break; |
| case SDL_TRANSFER_CHARACTERISTICS_LINEAR: |
| fR /= SDR_white_point; |
| fG /= SDR_white_point; |
| fB /= SDR_white_point; |
| break; |
| default: |
| /* Unknown, leave it alone */ |
| break; |
| } |
| |
| *outR = fR; |
| *outG = fG; |
| *outB = fB; |
| *outA = fA; |
| } |
| |
| static void WriteFloatPixel(Uint8 *pixels, SlowBlitPixelAccess access, SDL_PixelFormat *fmt, SDL_Colorspace colorspace, float SDR_white_point, |
| float fR, float fG, float fB, float fA) |
| { |
| Uint32 R, G, B, A; |
| float v[4]; |
| |
| /* We converted to nits so src and dst are guaranteed to be linear and in the same units */ |
| switch (SDL_COLORSPACETRANSFER(colorspace)) { |
| case SDL_TRANSFER_CHARACTERISTICS_SRGB: |
| fR = SDL_sRGBfromLinear(fR); |
| fG = SDL_sRGBfromLinear(fG); |
| fB = SDL_sRGBfromLinear(fB); |
| break; |
| case SDL_TRANSFER_CHARACTERISTICS_PQ: |
| fR = SDL_PQfromNits(fR * SDR_white_point); |
| fG = SDL_PQfromNits(fG * SDR_white_point); |
| fB = SDL_PQfromNits(fB * SDR_white_point); |
| break; |
| case SDL_TRANSFER_CHARACTERISTICS_LINEAR: |
| fR *= SDR_white_point; |
| fG *= SDR_white_point; |
| fB *= SDR_white_point; |
| break; |
| default: |
| /* Unknown, leave it alone */ |
| break; |
| } |
| |
| switch (access) { |
| case SlowBlitPixelAccess_RGB: |
| R = (Uint8)SDL_roundf(SDL_clamp(fR, 0.0f, 1.0f) * 255.0f); |
| G = (Uint8)SDL_roundf(SDL_clamp(fG, 0.0f, 1.0f) * 255.0f); |
| B = (Uint8)SDL_roundf(SDL_clamp(fB, 0.0f, 1.0f) * 255.0f); |
| ASSEMBLE_RGB(pixels, fmt->bytes_per_pixel, fmt, R, G, B); |
| break; |
| case SlowBlitPixelAccess_RGBA: |
| R = (Uint8)SDL_roundf(SDL_clamp(fR, 0.0f, 1.0f) * 255.0f); |
| G = (Uint8)SDL_roundf(SDL_clamp(fG, 0.0f, 1.0f) * 255.0f); |
| B = (Uint8)SDL_roundf(SDL_clamp(fB, 0.0f, 1.0f) * 255.0f); |
| A = (Uint8)SDL_roundf(SDL_clamp(fA, 0.0f, 1.0f) * 255.0f); |
| ASSEMBLE_RGBA(pixels, fmt->bytes_per_pixel, fmt, R, G, B, A); |
| break; |
| case SlowBlitPixelAccess_10Bit: |
| { |
| Uint32 pixel; |
| switch (fmt->format) { |
| case SDL_PIXELFORMAT_XRGB2101010: |
| fA = 1.0f; |
| SDL_FALLTHROUGH; |
| case SDL_PIXELFORMAT_ARGB2101010: |
| ARGB2101010_FROM_RGBAFLOAT(pixel, fR, fG, fB, fA); |
| break; |
| case SDL_PIXELFORMAT_XBGR2101010: |
| fA = 1.0f; |
| SDL_FALLTHROUGH; |
| case SDL_PIXELFORMAT_ABGR2101010: |
| ABGR2101010_FROM_RGBAFLOAT(pixel, fR, fG, fB, fA); |
| break; |
| default: |
| pixel = 0; |
| break; |
| } |
| *(Uint32 *)pixels = pixel; |
| break; |
| } |
| case SlowBlitPixelAccess_Large: |
| switch (SDL_PIXELORDER(fmt->format)) { |
| case SDL_ARRAYORDER_RGB: |
| v[0] = fR; |
| v[1] = fG; |
| v[2] = fB; |
| v[3] = 1.0f; |
| break; |
| case SDL_ARRAYORDER_RGBA: |
| v[0] = fR; |
| v[1] = fG; |
| v[2] = fB; |
| v[3] = fA; |
| break; |
| case SDL_ARRAYORDER_ARGB: |
| v[0] = fA; |
| v[1] = fR; |
| v[2] = fG; |
| v[3] = fB; |
| break; |
| case SDL_ARRAYORDER_BGR: |
| v[0] = fB; |
| v[1] = fG; |
| v[2] = fR; |
| v[3] = 1.0f; |
| break; |
| case SDL_ARRAYORDER_BGRA: |
| v[0] = fB; |
| v[1] = fG; |
| v[2] = fR; |
| v[3] = fA; |
| break; |
| case SDL_ARRAYORDER_ABGR: |
| v[0] = fA; |
| v[1] = fB; |
| v[2] = fG; |
| v[3] = fR; |
| break; |
| default: |
| /* Unknown array order */ |
| v[0] = v[1] = v[2] = v[3] = 0.0f; |
| break; |
| } |
| switch (SDL_PIXELTYPE(fmt->format)) { |
| case SDL_PIXELTYPE_ARRAYU16: |
| ((Uint16 *)pixels)[0] = (Uint16)SDL_roundf(SDL_clamp(v[0], 0.0f, 1.0f) * SDL_MAX_UINT16); |
| ((Uint16 *)pixels)[1] = (Uint16)SDL_roundf(SDL_clamp(v[1], 0.0f, 1.0f) * SDL_MAX_UINT16); |
| ((Uint16 *)pixels)[2] = (Uint16)SDL_roundf(SDL_clamp(v[2], 0.0f, 1.0f) * SDL_MAX_UINT16); |
| if (fmt->bytes_per_pixel == 8) { |
| ((Uint16 *)pixels)[3] = (Uint16)SDL_roundf(SDL_clamp(v[3], 0.0f, 1.0f) * SDL_MAX_UINT16); |
| } |
| break; |
| case SDL_PIXELTYPE_ARRAYF16: |
| ((Uint16 *)pixels)[0] = float_to_half(v[0]); |
| ((Uint16 *)pixels)[1] = float_to_half(v[1]); |
| ((Uint16 *)pixels)[2] = float_to_half(v[2]); |
| if (fmt->bytes_per_pixel == 8) { |
| ((Uint16 *)pixels)[3] = float_to_half(v[3]); |
| } |
| break; |
| case SDL_PIXELTYPE_ARRAYF32: |
| ((float *)pixels)[0] = v[0]; |
| ((float *)pixels)[1] = v[1]; |
| ((float *)pixels)[2] = v[2]; |
| if (fmt->bytes_per_pixel == 16) { |
| ((float *)pixels)[3] = v[3]; |
| } |
| break; |
| default: |
| /* Unknown array type */ |
| break; |
| } |
| break; |
| } |
| } |
| |
| typedef enum |
| { |
| SDL_TONEMAP_NONE, |
| SDL_TONEMAP_LINEAR, |
| SDL_TONEMAP_CHROME |
| } SDL_TonemapOperator; |
| |
| typedef struct |
| { |
| SDL_TonemapOperator op; |
| |
| union { |
| struct { |
| float scale; |
| } linear; |
| |
| struct { |
| float a; |
| float b; |
| const float *color_primaries_matrix; |
| } chrome; |
| |
| } data; |
| |
| } SDL_TonemapContext; |
| |
| static void TonemapLinear(float *r, float *g, float *b, float scale) |
| { |
| *r *= scale; |
| *g *= scale; |
| *b *= scale; |
| } |
| |
| /* This uses the same tonemapping algorithm developed by Google for Chrome: |
| * https://colab.research.google.com/drive/1hI10nq6L6ru_UFvz7-f7xQaQp0qarz_K |
| * |
| * Essentially, you use the source headroom and the destination headroom |
| * to calculate scaling factors: |
| * tonemap_a = (dst_headroom / (src_headroom * src_headroom)); |
| * tonemap_b = (1.0f / dst_headroom); |
| * |
| * Then you normalize your source color by the HDR whitepoint, |
| * and calculate a final scaling factor in BT.2020 colorspace. |
| */ |
| static void TonemapChrome(float *r, float *g, float *b, float tonemap_a, float tonemap_b) |
| { |
| float v1 = *r; |
| float v2 = *g; |
| float v3 = *b; |
| float vmax = SDL_max(v1, SDL_max(v2, v3)); |
| |
| if (vmax > 0.0f) { |
| float scale = (1.0f + tonemap_a * vmax) / (1.0f + tonemap_b * vmax); |
| TonemapLinear(r, g, b, scale); |
| } |
| } |
| |
| static void ApplyTonemap(SDL_TonemapContext *ctx, float *r, float *g, float *b) |
| { |
| switch (ctx->op) { |
| case SDL_TONEMAP_LINEAR: |
| TonemapLinear(r, g, b, ctx->data.linear.scale); |
| break; |
| case SDL_TONEMAP_CHROME: |
| if (ctx->data.chrome.color_primaries_matrix) { |
| SDL_ConvertColorPrimaries(r, g, b, ctx->data.chrome.color_primaries_matrix); |
| } |
| TonemapChrome(r, g, b, ctx->data.chrome.a, ctx->data.chrome.b); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| /* The SECOND TRUE BLITTER |
| * This one is even slower than the first, but also handles large pixel formats and colorspace conversion |
| */ |
| void SDL_Blit_Slow_Float(SDL_BlitInfo *info) |
| { |
| const int flags = info->flags; |
| const Uint32 modulateR = info->r; |
| const Uint32 modulateG = info->g; |
| const Uint32 modulateB = info->b; |
| const Uint32 modulateA = info->a; |
| float srcR, srcG, srcB, srcA; |
| float dstR, dstG, dstB, dstA; |
| Uint64 srcy, srcx; |
| Uint64 posy, posx; |
| Uint64 incy, incx; |
| SDL_PixelFormat *src_fmt = info->src_fmt; |
| SDL_PixelFormat *dst_fmt = info->dst_fmt; |
| int srcbpp = src_fmt->bytes_per_pixel; |
| int dstbpp = dst_fmt->bytes_per_pixel; |
| SlowBlitPixelAccess src_access; |
| SlowBlitPixelAccess dst_access; |
| SDL_Colorspace src_colorspace; |
| SDL_Colorspace dst_colorspace; |
| SDL_ColorPrimaries src_primaries; |
| SDL_ColorPrimaries dst_primaries; |
| const float *color_primaries_matrix = NULL; |
| float src_white_point; |
| float dst_white_point; |
| float dst_headroom; |
| float src_headroom; |
| SDL_TonemapContext tonemap; |
| |
| if (SDL_GetSurfaceColorspace(info->src_surface, &src_colorspace) < 0 || |
| SDL_GetSurfaceColorspace(info->dst_surface, &dst_colorspace) < 0) { |
| return; |
| } |
| src_primaries = SDL_COLORSPACEPRIMARIES(src_colorspace); |
| dst_primaries = SDL_COLORSPACEPRIMARIES(dst_colorspace); |
| |
| src_white_point = SDL_GetSurfaceSDRWhitePoint(info->src_surface, src_colorspace); |
| dst_white_point = SDL_GetSurfaceSDRWhitePoint(info->dst_surface, dst_colorspace); |
| src_headroom = SDL_GetSurfaceHDRHeadroom(info->src_surface, src_colorspace); |
| dst_headroom = SDL_GetSurfaceHDRHeadroom(info->dst_surface, dst_colorspace); |
| if (dst_headroom == 0.0f) { |
| /* The destination will have the same headroom as the source */ |
| dst_headroom = src_headroom; |
| SDL_SetFloatProperty(SDL_GetSurfaceProperties(info->dst_surface), SDL_PROP_SURFACE_HDR_HEADROOM_FLOAT, dst_headroom); |
| } |
| |
| SDL_zero(tonemap); |
| |
| if (src_headroom > dst_headroom) { |
| const char *tonemap_operator = SDL_GetStringProperty(SDL_GetSurfaceProperties(info->src_surface), SDL_PROP_SURFACE_TONEMAP_OPERATOR_STRING, NULL); |
| if (tonemap_operator) { |
| if (SDL_strncmp(tonemap_operator, "*=", 2) == 0) { |
| tonemap.op = SDL_TONEMAP_LINEAR; |
| tonemap.data.linear.scale = SDL_atof(tonemap_operator + 2); |
| } else if (SDL_strcasecmp(tonemap_operator, "chrome") == 0) { |
| tonemap.op = SDL_TONEMAP_CHROME; |
| } else if (SDL_strcasecmp(tonemap_operator, "none") == 0) { |
| tonemap.op = SDL_TONEMAP_NONE; |
| } |
| } else { |
| tonemap.op = SDL_TONEMAP_CHROME; |
| } |
| if (tonemap.op == SDL_TONEMAP_CHROME) { |
| tonemap.data.chrome.a = (dst_headroom / (src_headroom * src_headroom)); |
| tonemap.data.chrome.b = (1.0f / dst_headroom); |
| |
| /* We'll convert to BT.2020 primaries for the tonemap operation */ |
| tonemap.data.chrome.color_primaries_matrix = SDL_GetColorPrimariesConversionMatrix(src_primaries, SDL_COLOR_PRIMARIES_BT2020); |
| if (tonemap.data.chrome.color_primaries_matrix) { |
| src_primaries = SDL_COLOR_PRIMARIES_BT2020; |
| } |
| } |
| } |
| |
| if (src_primaries != dst_primaries) { |
| color_primaries_matrix = SDL_GetColorPrimariesConversionMatrix(src_primaries, dst_primaries); |
| } |
| |
| src_access = GetPixelAccessMethod(src_fmt); |
| dst_access = GetPixelAccessMethod(dst_fmt); |
| |
| incy = ((Uint64)info->src_h << 16) / info->dst_h; |
| incx = ((Uint64)info->src_w << 16) / info->dst_w; |
| posy = incy / 2; /* start at the middle of pixel */ |
| |
| while (info->dst_h--) { |
| Uint8 *src = 0; |
| Uint8 *dst = info->dst; |
| int n = info->dst_w; |
| posx = incx / 2; /* start at the middle of pixel */ |
| srcy = posy >> 16; |
| while (n--) { |
| srcx = posx >> 16; |
| src = (info->src + (srcy * info->src_pitch) + (srcx * srcbpp)); |
| |
| ReadFloatPixel(src, src_access, src_fmt, src_colorspace, src_white_point, &srcR, &srcG, &srcB, &srcA); |
| |
| if (tonemap.op) { |
| ApplyTonemap(&tonemap, &srcR, &srcG, &srcB); |
| } |
| |
| if (color_primaries_matrix) { |
| SDL_ConvertColorPrimaries(&srcR, &srcG, &srcB, color_primaries_matrix); |
| } |
| |
| if (flags & SDL_COPY_COLORKEY) { |
| /* colorkey isn't supported */ |
| } |
| if ((flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL))) { |
| ReadFloatPixel(dst, dst_access, dst_fmt, dst_colorspace, dst_white_point, &dstR, &dstG, &dstB, &dstA); |
| } else { |
| /* don't care */ |
| dstR = dstG = dstB = dstA = 0.0f; |
| } |
| |
| if (flags & SDL_COPY_MODULATE_COLOR) { |
| srcR = (srcR * modulateR) / 255; |
| srcG = (srcG * modulateG) / 255; |
| srcB = (srcB * modulateB) / 255; |
| } |
| if (flags & SDL_COPY_MODULATE_ALPHA) { |
| srcA = (srcA * modulateA) / 255; |
| } |
| if (flags & (SDL_COPY_BLEND | SDL_COPY_ADD)) { |
| /* This goes away if we ever use premultiplied alpha */ |
| if (srcA < 1.0f) { |
| srcR = (srcR * srcA); |
| srcG = (srcG * srcA); |
| srcB = (srcB * srcA); |
| } |
| } |
| switch (flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) { |
| case 0: |
| dstR = srcR; |
| dstG = srcG; |
| dstB = srcB; |
| dstA = srcA; |
| break; |
| case SDL_COPY_BLEND: |
| dstR = srcR + ((1.0f - srcA) * dstR); |
| dstG = srcG + ((1.0f - srcA) * dstG); |
| dstB = srcB + ((1.0f - srcA) * dstB); |
| dstA = srcA + ((1.0f - srcA) * dstA); |
| break; |
| case SDL_COPY_ADD: |
| dstR = srcR + dstR; |
| dstG = srcG + dstG; |
| dstB = srcB + dstB; |
| break; |
| case SDL_COPY_MOD: |
| dstR = (srcR * dstR); |
| dstG = (srcG * dstG); |
| dstB = (srcB * dstB); |
| break; |
| case SDL_COPY_MUL: |
| dstR = ((srcR * dstR) + (dstR * (1.0f - srcA))); |
| dstG = ((srcG * dstG) + (dstG * (1.0f - srcA))); |
| dstB = ((srcB * dstB) + (dstB * (1.0f - srcA))); |
| break; |
| } |
| |
| WriteFloatPixel(dst, dst_access, dst_fmt, dst_colorspace, dst_white_point, dstR, dstG, dstB, dstA); |
| |
| posx += incx; |
| dst += dstbpp; |
| } |
| posy += incy; |
| info->dst += info->dst_pitch; |
| } |
| } |
| |