mirror of
https://github.com/Detanup01/gbe_fork.git
synced 2025-09-10 12:03:06 +02:00
Update deps in libs folder
This commit is contained in:
parent
2d19ede535
commit
8687787e6f
8 changed files with 2187 additions and 1266 deletions
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
https://github.com/microsoft/Detours
|
https://github.com/microsoft/Detours
|
||||||
|
|
||||||
VERSION: https://github.com/microsoft/Detours/tree/4b8c659f549b0ab21cf649377c7a84eb708f5e68
|
VERSION: https://github.com/microsoft/Detours/tree/9764cebcb1a75940e68fa83d6730ffaf0f669401
|
||||||
|
|
||||||
#### LICENSE
|
#### LICENSE
|
||||||
|
|
||||||
|
|
|
@ -156,6 +156,8 @@ inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
|
||||||
|
|
||||||
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
{
|
{
|
||||||
|
PBYTE pbCodeOriginal;
|
||||||
|
|
||||||
if (pbCode == NULL) {
|
if (pbCode == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -179,6 +181,7 @@ inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
|
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
|
||||||
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
|
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
|
||||||
pbCode = pbNew;
|
pbCode = pbNew;
|
||||||
|
pbCodeOriginal = pbCode;
|
||||||
|
|
||||||
// First, skip over the import vector if there is one.
|
// First, skip over the import vector if there is one.
|
||||||
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32]
|
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32]
|
||||||
|
@ -195,6 +198,23 @@ inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
|
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
|
||||||
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
|
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
|
||||||
pbCode = pbNew;
|
pbCode = pbNew;
|
||||||
|
|
||||||
|
// Patches applied by the OS will jump through an HPAT page to get
|
||||||
|
// the target function in the patch image. The jump is always performed
|
||||||
|
// to the target function found at the current instruction pointer +
|
||||||
|
// PAGE_SIZE - 6 (size of jump).
|
||||||
|
// If this is an OS patch, we want to detour at the point of the target function
|
||||||
|
// padding in the base image. Ideally, we would detour at the target function, but
|
||||||
|
// since it's patched it begins with a short jump (to padding) which isn't long
|
||||||
|
// enough to hold the detour code bytes.
|
||||||
|
if (pbCode[0] == 0xff &&
|
||||||
|
pbCode[1] == 0x25 &&
|
||||||
|
*(UNALIGNED INT32 *)&pbCode[2] == (UNALIGNED INT32)(pbCode + 0x1000)) { // jmp [eip+PAGE_SIZE-6]
|
||||||
|
|
||||||
|
DETOUR_TRACE(("%p->%p: OS patch encountered, reset back to long jump 5 bytes prior to target function.\n", pbCode, pbCodeOriginal));
|
||||||
|
pbCode = pbCodeOriginal;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pbCode;
|
return pbCode;
|
||||||
|
@ -369,6 +389,8 @@ inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
|
||||||
|
|
||||||
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
{
|
{
|
||||||
|
PBYTE pbCodeOriginal;
|
||||||
|
|
||||||
if (pbCode == NULL) {
|
if (pbCode == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -392,6 +414,7 @@ inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
|
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
|
||||||
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
|
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
|
||||||
pbCode = pbNew;
|
pbCode = pbNew;
|
||||||
|
pbCodeOriginal = pbCode;
|
||||||
|
|
||||||
// First, skip over the import vector if there is one.
|
// First, skip over the import vector if there is one.
|
||||||
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
|
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
|
||||||
|
@ -408,6 +431,21 @@ inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
|
||||||
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
|
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
|
||||||
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
|
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
|
||||||
pbCode = pbNew;
|
pbCode = pbNew;
|
||||||
|
|
||||||
|
// Patches applied by the OS will jump through an HPAT page to get
|
||||||
|
// the target function in the patch image. The jump is always performed
|
||||||
|
// to the target function found at the current instruction pointer +
|
||||||
|
// PAGE_SIZE - 6 (size of jump).
|
||||||
|
// If this is an OS patch, we want to detour at the point of the target function
|
||||||
|
// in the base image. Since we need 5 bytes to perform the jump, detour at the
|
||||||
|
// point of the long jump instead of the short jump at the start of the target.
|
||||||
|
if (pbCode[0] == 0xff &&
|
||||||
|
pbCode[1] == 0x25 &&
|
||||||
|
*(UNALIGNED INT32 *)&pbCode[2] == 0xFFA) { // jmp [rip+PAGE_SIZE-6]
|
||||||
|
|
||||||
|
DETOUR_TRACE(("%p->%p: OS patch encountered, reset back to long jump 5 bytes prior to target function.\n", pbCode, pbCodeOriginal));
|
||||||
|
pbCode = pbCodeOriginal;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pbCode;
|
return pbCode;
|
||||||
|
@ -1151,10 +1189,46 @@ inline void detour_find_jmp_bounds(PBYTE pbCode,
|
||||||
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
|
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline BOOL detour_is_code_os_patched(PBYTE pbCode)
|
||||||
|
{
|
||||||
|
// Identify whether the provided code pointer is a OS patch jump.
|
||||||
|
// We can do this by checking if a branch (b <imm26>) is present, and if so,
|
||||||
|
// it must be jumping to an HPAT page containing ldr <reg> [PC+PAGE_SIZE-4], br <reg>.
|
||||||
|
ULONG Opcode = fetch_opcode(pbCode);
|
||||||
|
|
||||||
|
if ((Opcode & 0xfc000000) != 0x14000000) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
// The branch must be jumping forward if it's going into the HPAT.
|
||||||
|
// Check that the sign bit is cleared.
|
||||||
|
if ((Opcode & 0x2000000) != 0) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
ULONG Delta = (ULONG)((Opcode & 0x1FFFFFF) * 4);
|
||||||
|
PBYTE BranchTarget = pbCode + Delta;
|
||||||
|
|
||||||
|
// Now inspect the opcodes of the code we jumped to in order to determine if it's HPAT.
|
||||||
|
ULONG HpatOpcode1 = fetch_opcode(BranchTarget);
|
||||||
|
ULONG HpatOpcode2 = fetch_opcode(BranchTarget + 4);
|
||||||
|
|
||||||
|
if (HpatOpcode1 != 0x58008010) { // ldr <reg> [PC+PAGE_SIZE]
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
if (HpatOpcode2 != 0xd61f0200) { // br <reg>
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
inline BOOL detour_does_code_end_function(PBYTE pbCode)
|
inline BOOL detour_does_code_end_function(PBYTE pbCode)
|
||||||
{
|
{
|
||||||
ULONG Opcode = fetch_opcode(pbCode);
|
ULONG Opcode = fetch_opcode(pbCode);
|
||||||
if ((Opcode & 0xfffffc1f) == 0xd65f0000 || // br <reg>
|
// When the OS has patched a function entry point, it will incorrectly
|
||||||
|
// appear as though the function is just a single branch instruction.
|
||||||
|
if (detour_is_code_os_patched(pbCode)) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
if ((Opcode & 0xffbffc1f) == 0xd61f0000 || // ret/br <reg>
|
||||||
(Opcode & 0xfc000000) == 0x14000000) { // b <imm26>
|
(Opcode & 0xfc000000) == 0x14000000) { // b <imm26>
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
@ -1837,41 +1911,46 @@ LONG WINAPI DetourTransactionCommitEx(_Out_opt_ PVOID **pppFailedPointer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update any suspended threads.
|
|
||||||
for (t = s_pPendingThreads; t != NULL; t = t->pNext) {
|
|
||||||
CONTEXT cxt;
|
|
||||||
cxt.ContextFlags = CONTEXT_CONTROL;
|
|
||||||
|
|
||||||
#undef DETOURS_EIP
|
#undef DETOURS_EIP
|
||||||
|
#undef DETOURS_CONTEXT_FLAGS
|
||||||
|
|
||||||
#ifdef DETOURS_X86
|
#ifdef DETOURS_X86
|
||||||
#define DETOURS_EIP Eip
|
#define DETOURS_EIP Eip
|
||||||
|
#define DETOURS_CONTEXT_FLAGS CONTEXT_CONTROL
|
||||||
#endif // DETOURS_X86
|
#endif // DETOURS_X86
|
||||||
|
|
||||||
#ifdef DETOURS_X64
|
#ifdef DETOURS_X64
|
||||||
#define DETOURS_EIP Rip
|
#define DETOURS_EIP Rip
|
||||||
|
#define DETOURS_CONTEXT_FLAGS (CONTEXT_CONTROL | CONTEXT_INTEGER)
|
||||||
#endif // DETOURS_X64
|
#endif // DETOURS_X64
|
||||||
|
|
||||||
#ifdef DETOURS_IA64
|
#ifdef DETOURS_IA64
|
||||||
#define DETOURS_EIP StIIP
|
#define DETOURS_EIP StIIP
|
||||||
|
#define DETOURS_CONTEXT_FLAGS CONTEXT_CONTROL
|
||||||
#endif // DETOURS_IA64
|
#endif // DETOURS_IA64
|
||||||
|
|
||||||
#ifdef DETOURS_ARM
|
#ifdef DETOURS_ARM
|
||||||
#define DETOURS_EIP Pc
|
#define DETOURS_EIP Pc
|
||||||
|
#define DETOURS_CONTEXT_FLAGS CONTEXT_CONTROL
|
||||||
#endif // DETOURS_ARM
|
#endif // DETOURS_ARM
|
||||||
|
|
||||||
#ifdef DETOURS_ARM64
|
#ifdef DETOURS_ARM64
|
||||||
#define DETOURS_EIP Pc
|
#define DETOURS_EIP Pc
|
||||||
|
#define DETOURS_CONTEXT_FLAGS (CONTEXT_CONTROL | CONTEXT_INTEGER)
|
||||||
#endif // DETOURS_ARM64
|
#endif // DETOURS_ARM64
|
||||||
|
|
||||||
typedef ULONG_PTR DETOURS_EIP_TYPE;
|
typedef ULONG_PTR DETOURS_EIP_TYPE;
|
||||||
|
|
||||||
|
// Update any suspended threads.
|
||||||
|
for (t = s_pPendingThreads; t != NULL; t = t->pNext) {
|
||||||
|
CONTEXT cxt;
|
||||||
|
cxt.ContextFlags = DETOURS_CONTEXT_FLAGS;
|
||||||
if (GetThreadContext(t->hThread, &cxt)) {
|
if (GetThreadContext(t->hThread, &cxt)) {
|
||||||
for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
|
for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
|
||||||
if (o->fIsRemove) {
|
if (o->fIsRemove) {
|
||||||
if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pTrampoline &&
|
if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pTrampoline &&
|
||||||
cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pTrampoline
|
cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pTrampoline
|
||||||
+ sizeof(o->pTrampoline))
|
+ sizeof(*o->pTrampoline))
|
||||||
) {
|
) {
|
||||||
|
|
||||||
cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
|
cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
|
||||||
|
@ -2064,6 +2143,15 @@ LONG WINAPI DetourAttachEx(_Inout_ PVOID *ppPointer,
|
||||||
DETOUR_TRACE((" ppldTarget=%p, code=%p [gp=%p]\n",
|
DETOUR_TRACE((" ppldTarget=%p, code=%p [gp=%p]\n",
|
||||||
ppldTarget, pbTarget, pTargetGlobals));
|
ppldTarget, pbTarget, pTargetGlobals));
|
||||||
#else // DETOURS_IA64
|
#else // DETOURS_IA64
|
||||||
|
#if defined(_M_ARM64EC)
|
||||||
|
if (RtlIsEcCode(reinterpret_cast<DWORD64>(*ppPointer))) {
|
||||||
|
DETOUR_TRACE(("*ppPointer is an Arm64EC address (ppPointer=%p). "
|
||||||
|
"An Arm64EC address cannot be legitimately detoured with an x64 jmp. "
|
||||||
|
"Mark the target function with __declspec(hybrid_patchable) to make it detour-able. "
|
||||||
|
"We still allow an Arm64EC function to be detoured with an x64 jmp to make it easy (crash) to debug.\n", ppPointer));
|
||||||
|
DETOUR_BREAK();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
pbTarget = (PBYTE)DetourCodeFromPointer(pbTarget, NULL);
|
pbTarget = (PBYTE)DetourCodeFromPointer(pbTarget, NULL);
|
||||||
pDetour = DetourCodeFromPointer(pDetour, NULL);
|
pDetour = DetourCodeFromPointer(pDetour, NULL);
|
||||||
#endif // !DETOURS_IA64
|
#endif // !DETOURS_IA64
|
||||||
|
|
|
@ -83,11 +83,15 @@
|
||||||
#undef DETOURS_32BIT
|
#undef DETOURS_32BIT
|
||||||
#undef DETOURS_64BIT
|
#undef DETOURS_64BIT
|
||||||
|
|
||||||
|
#ifndef DECLSPEC_HYBRID_PATCHABLE
|
||||||
|
#define DECLSPEC_HYBRID_PATCHABLE DECLSPEC_CHPE_PATCHABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_X86_)
|
#if defined(_X86_)
|
||||||
#define DETOURS_X86
|
#define DETOURS_X86
|
||||||
#define DETOURS_OPTION_BITS 64
|
#define DETOURS_OPTION_BITS 64
|
||||||
|
|
||||||
#elif defined(_AMD64_)
|
#elif defined(_AMD64_) || defined(_ARM64EC_)
|
||||||
#define DETOURS_X64
|
#define DETOURS_X64
|
||||||
#define DETOURS_OPTION_BITS 32
|
#define DETOURS_OPTION_BITS 32
|
||||||
|
|
||||||
|
@ -102,7 +106,7 @@
|
||||||
#define DETOURS_ARM64
|
#define DETOURS_ARM64
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Unknown architecture (x86, amd64, ia64, arm, arm64)
|
#error Unknown architecture (x86, amd64, ia64, arm, arm64, arm64ec)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
|
|
|
@ -645,6 +645,7 @@ BOOL WINAPI DetourEnumerateImportsEx(_In_opt_ HMODULE hModule,
|
||||||
struct _DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT
|
struct _DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT
|
||||||
{
|
{
|
||||||
PVOID pContext;
|
PVOID pContext;
|
||||||
|
PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile;
|
||||||
PF_DETOUR_IMPORT_FUNC_CALLBACK pfImportFunc;
|
PF_DETOUR_IMPORT_FUNC_CALLBACK pfImportFunc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -664,6 +665,19 @@ DetourEnumerateImportsThunk(_In_ PVOID VoidContext,
|
||||||
return pContext->pfImportFunc(pContext->pContext, nOrdinal, pszFunc, ppvFunc ? *ppvFunc : NULL);
|
return pContext->pfImportFunc(pContext->pContext, nOrdinal, pszFunc, ppvFunc ? *ppvFunc : NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
BOOL
|
||||||
|
CALLBACK
|
||||||
|
DetourEnumerateImportsFile(_In_ PVOID VoidContext,
|
||||||
|
_In_opt_ HMODULE hModule,
|
||||||
|
_In_opt_ LPCSTR pszFile)
|
||||||
|
{
|
||||||
|
_DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT const * const
|
||||||
|
pContext = (_DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT*)VoidContext;
|
||||||
|
return pContext->pfImportFile(pContext->pContext, hModule, pszFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
BOOL WINAPI DetourEnumerateImports(_In_opt_ HMODULE hModule,
|
BOOL WINAPI DetourEnumerateImports(_In_opt_ HMODULE hModule,
|
||||||
_In_opt_ PVOID pContext,
|
_In_opt_ PVOID pContext,
|
||||||
_In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
|
_In_opt_ PF_DETOUR_IMPORT_FILE_CALLBACK pfImportFile,
|
||||||
|
@ -674,11 +688,10 @@ BOOL WINAPI DetourEnumerateImports(_In_opt_ HMODULE hModule,
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
_DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT const context = { pContext, pfImportFunc };
|
_DETOUR_ENUMERATE_IMPORTS_THUNK_CONTEXT const context = { pContext, pfImportFile, pfImportFunc };
|
||||||
|
|
||||||
return DetourEnumerateImportsEx(hModule,
|
return DetourEnumerateImportsEx(hModule,
|
||||||
(PVOID)&context,
|
(PVOID)&context,
|
||||||
pfImportFile,
|
&DetourEnumerateImportsFile,
|
||||||
&DetourEnumerateImportsThunk);
|
&DetourEnumerateImportsThunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
https://github.com/nlohmann/json
|
https://github.com/nlohmann/json
|
||||||
|
|
||||||
VERSION: https://github.com/nlohmann/json/releases/tag/v3.11.3
|
VERSION: https://github.com/nlohmann/json/releases/tag/v3.12.0
|
||||||
|
|
||||||
#### LICENSE
|
#### LICENSE
|
||||||
|
|
||||||
|
|
2839
libs/json/json.hpp
2839
libs/json/json.hpp
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
https://github.com/nothings/stb
|
https://github.com/nothings/stb
|
||||||
|
|
||||||
VERSION: https://github.com/nothings/stb/tree/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31
|
VERSION: https://github.com/nothings/stb/tree/f58f558c120e9b32c217290b80bad1a0729fbb2c
|
||||||
|
|
||||||
#### LICENSE
|
#### LICENSE
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* stb_image_resize2 - v2.10 - public domain image resizing
|
/* stb_image_resize2 - v2.14 - public domain image resizing
|
||||||
|
|
||||||
by Jeff Roberts (v2) and Jorge L Rodriguez
|
by Jeff Roberts (v2) and Jorge L Rodriguez
|
||||||
http://github.com/nothings/stb
|
http://github.com/nothings/stb
|
||||||
|
@ -11,35 +11,6 @@
|
||||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||||
before the #include. That will create the implementation in that file.
|
before the #include. That will create the implementation in that file.
|
||||||
|
|
||||||
PORTING FROM VERSION 1
|
|
||||||
|
|
||||||
The API has changed. You can continue to use the old version of stb_image_resize.h,
|
|
||||||
which is available in the "deprecated/" directory.
|
|
||||||
|
|
||||||
If you're using the old simple-to-use API, porting is straightforward.
|
|
||||||
(For more advanced APIs, read the documentation.)
|
|
||||||
|
|
||||||
stbir_resize_uint8():
|
|
||||||
- call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout`
|
|
||||||
|
|
||||||
stbir_resize_float():
|
|
||||||
- call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout`
|
|
||||||
|
|
||||||
stbir_resize_uint8_srgb():
|
|
||||||
- function name is unchanged
|
|
||||||
- cast channel count to `stbir_pixel_layout`
|
|
||||||
- above is sufficient unless your image has alpha and it's not RGBA/BGRA
|
|
||||||
- in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode
|
|
||||||
|
|
||||||
stbir_resize_uint8_srgb_edgemode()
|
|
||||||
- switch to the "medium complexity" API
|
|
||||||
- stbir_resize(), very similar API but a few more parameters:
|
|
||||||
- pixel_layout: cast channel count to `stbir_pixel_layout`
|
|
||||||
- data_type: STBIR_TYPE_UINT8_SRGB
|
|
||||||
- edge: unchanged (STBIR_EDGE_WRAP, etc.)
|
|
||||||
- filter: STBIR_FILTER_DEFAULT
|
|
||||||
- which channel is alpha is specified in stbir_pixel_layout, see enum for details
|
|
||||||
|
|
||||||
EASY API CALLS:
|
EASY API CALLS:
|
||||||
Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge.
|
Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge.
|
||||||
|
|
||||||
|
@ -283,7 +254,7 @@
|
||||||
using the stbir_set_filter_callbacks function.
|
using the stbir_set_filter_callbacks function.
|
||||||
|
|
||||||
PROGRESS
|
PROGRESS
|
||||||
For interactive use with slow resize operations, you can use the the
|
For interactive use with slow resize operations, you can use the
|
||||||
scanline callbacks in the extended API. It would have to be a *very* large
|
scanline callbacks in the extended API. It would have to be a *very* large
|
||||||
image resample to need progress though - we're very fast.
|
image resample to need progress though - we're very fast.
|
||||||
|
|
||||||
|
@ -296,6 +267,34 @@
|
||||||
ASSERT
|
ASSERT
|
||||||
Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
|
Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
|
||||||
|
|
||||||
|
PORTING FROM VERSION 1
|
||||||
|
The API has changed. You can continue to use the old version of stb_image_resize.h,
|
||||||
|
which is available in the "deprecated/" directory.
|
||||||
|
|
||||||
|
If you're using the old simple-to-use API, porting is straightforward.
|
||||||
|
(For more advanced APIs, read the documentation.)
|
||||||
|
|
||||||
|
stbir_resize_uint8():
|
||||||
|
- call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout`
|
||||||
|
|
||||||
|
stbir_resize_float():
|
||||||
|
- call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout`
|
||||||
|
|
||||||
|
stbir_resize_uint8_srgb():
|
||||||
|
- function name is unchanged
|
||||||
|
- cast channel count to `stbir_pixel_layout`
|
||||||
|
- above is sufficient unless your image has alpha and it's not RGBA/BGRA
|
||||||
|
- in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode
|
||||||
|
|
||||||
|
stbir_resize_uint8_srgb_edgemode()
|
||||||
|
- switch to the "medium complexity" API
|
||||||
|
- stbir_resize(), very similar API but a few more parameters:
|
||||||
|
- pixel_layout: cast channel count to `stbir_pixel_layout`
|
||||||
|
- data_type: STBIR_TYPE_UINT8_SRGB
|
||||||
|
- edge: unchanged (STBIR_EDGE_WRAP, etc.)
|
||||||
|
- filter: STBIR_FILTER_DEFAULT
|
||||||
|
- which channel is alpha is specified in stbir_pixel_layout, see enum for details
|
||||||
|
|
||||||
FUTURE TODOS
|
FUTURE TODOS
|
||||||
* For polyphase integral filters, we just memcpy the coeffs to dupe
|
* For polyphase integral filters, we just memcpy the coeffs to dupe
|
||||||
them, but we should indirect and use the same coeff memory.
|
them, but we should indirect and use the same coeff memory.
|
||||||
|
@ -308,6 +307,8 @@
|
||||||
some pixel reconversion, but probably dwarfed by things falling out
|
some pixel reconversion, but probably dwarfed by things falling out
|
||||||
of cache. Probably also something possible with alternating between
|
of cache. Probably also something possible with alternating between
|
||||||
scattering and gathering at high resize scales?
|
scattering and gathering at high resize scales?
|
||||||
|
* Should we have a multiple MIPs at the same time function (could keep
|
||||||
|
more memory in cache during multiple resizes)?
|
||||||
* Rewrite the coefficient generator to do many at once.
|
* Rewrite the coefficient generator to do many at once.
|
||||||
* AVX-512 vertical kernels - worried about downclocking here.
|
* AVX-512 vertical kernels - worried about downclocking here.
|
||||||
* Convert the reincludes to macros when we know they aren't changing.
|
* Convert the reincludes to macros when we know they aren't changing.
|
||||||
|
@ -328,6 +329,16 @@
|
||||||
Nathan Reed: warning fixes for 1.0
|
Nathan Reed: warning fixes for 1.0
|
||||||
|
|
||||||
REVISIONS
|
REVISIONS
|
||||||
|
2.14 (2025-05-09) fixed a bug using downsampling gather horizontal first, and
|
||||||
|
scatter with vertical first.
|
||||||
|
2.13 (2025-02-27) fixed a bug when using input callbacks, turned off simd for
|
||||||
|
tiny-c, fixed some variables that should have been static,
|
||||||
|
fixes a bug when calculating temp memory with resizes that
|
||||||
|
exceed 2GB of temp memory (very large resizes).
|
||||||
|
2.12 (2024-10-18) fix incorrect use of user_data with STBIR_FREE
|
||||||
|
2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode
|
||||||
|
with AVX-2, fix some weird scaling edge conditions with
|
||||||
|
point sample mode.
|
||||||
2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control,
|
2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control,
|
||||||
fix MSVC 32-bit arm half float routines.
|
fix MSVC 32-bit arm half float routines.
|
||||||
2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
|
2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
|
||||||
|
@ -335,11 +346,11 @@
|
||||||
2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD (thanks
|
2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD (thanks
|
||||||
to Ryan Salsbury), fix for sub-rect resizes, use the
|
to Ryan Salsbury), fix for sub-rect resizes, use the
|
||||||
pragmas to control unrolling when they are available.
|
pragmas to control unrolling when they are available.
|
||||||
2.07 (2024-05-24) fix for slow final split during threaded conversions of very
|
2.07 (2024-05-24) fix for slow final split during threaded conversions of very
|
||||||
wide scanlines when downsampling (caused by extra input
|
wide scanlines when downsampling (caused by extra input
|
||||||
converting), fix for wide scanline resamples with many
|
converting), fix for wide scanline resamples with many
|
||||||
splits (int overflow), fix GCC warning.
|
splits (int overflow), fix GCC warning.
|
||||||
2.06 (2024-02-10) fix for identical width/height 3x or more down-scaling
|
2.06 (2024-02-10) fix for identical width/height 3x or more down-scaling
|
||||||
undersampling a single row on rare resize ratios (about 1%).
|
undersampling a single row on rare resize ratios (about 1%).
|
||||||
2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras),
|
2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras),
|
||||||
fix for output callback (thanks Julien Koenen).
|
fix for output callback (thanks Julien Koenen).
|
||||||
|
@ -379,62 +390,6 @@ typedef uint32_t stbir_uint32;
|
||||||
typedef uint64_t stbir_uint64;
|
typedef uint64_t stbir_uint64;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _M_IX86_FP
|
|
||||||
#if ( _M_IX86_FP >= 1 )
|
|
||||||
#ifndef STBIR_SSE
|
|
||||||
#define STBIR_SSE
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2)
|
|
||||||
#ifndef STBIR_SSE2
|
|
||||||
#define STBIR_SSE2
|
|
||||||
#endif
|
|
||||||
#if defined(__AVX__) || defined(STBIR_AVX2)
|
|
||||||
#ifndef STBIR_AVX
|
|
||||||
#ifndef STBIR_NO_AVX
|
|
||||||
#define STBIR_AVX
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#if defined(__AVX2__) || defined(STBIR_AVX2)
|
|
||||||
#ifndef STBIR_NO_AVX2
|
|
||||||
#ifndef STBIR_AVX2
|
|
||||||
#define STBIR_AVX2
|
|
||||||
#endif
|
|
||||||
#if defined( _MSC_VER ) && !defined(__clang__)
|
|
||||||
#ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c
|
|
||||||
#define STBIR_FP16C
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifdef __F16C__
|
|
||||||
#ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc)
|
|
||||||
#define STBIR_FP16C
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
|
|
||||||
#ifndef STBIR_NEON
|
|
||||||
#define STBIR_NEON
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(_M_ARM) || defined(__arm__)
|
|
||||||
#ifdef STBIR_USE_FMA
|
|
||||||
#undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
#ifndef STBIR_WASM
|
|
||||||
#define STBIR_WASM
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef STBIRDEF
|
#ifndef STBIRDEF
|
||||||
#ifdef STB_IMAGE_RESIZE_STATIC
|
#ifdef STB_IMAGE_RESIZE_STATIC
|
||||||
#define STBIRDEF static
|
#define STBIRDEF static
|
||||||
|
@ -1033,7 +988,7 @@ typedef struct
|
||||||
char no_cache_straddle[64];
|
char no_cache_straddle[64];
|
||||||
} stbir__per_split_info;
|
} stbir__per_split_info;
|
||||||
|
|
||||||
typedef void stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input );
|
typedef float * stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input );
|
||||||
typedef void stbir__alpha_weight_func( float * decode_buffer, int width_times_channels );
|
typedef void stbir__alpha_weight_func( float * decode_buffer, int width_times_channels );
|
||||||
typedef void stbir__horizontal_gather_channels_func( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer,
|
typedef void stbir__horizontal_gather_channels_func( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer,
|
||||||
stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width );
|
stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width );
|
||||||
|
@ -1096,8 +1051,8 @@ struct stbir__info
|
||||||
|
|
||||||
#define stbir__max_uint8_as_float 255.0f
|
#define stbir__max_uint8_as_float 255.0f
|
||||||
#define stbir__max_uint16_as_float 65535.0f
|
#define stbir__max_uint16_as_float 65535.0f
|
||||||
#define stbir__max_uint8_as_float_inverted (1.0f/255.0f)
|
#define stbir__max_uint8_as_float_inverted 3.9215689e-03f // (1.0f/255.0f)
|
||||||
#define stbir__max_uint16_as_float_inverted (1.0f/65535.0f)
|
#define stbir__max_uint16_as_float_inverted 1.5259022e-05f // (1.0f/65535.0f)
|
||||||
#define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
|
#define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
|
||||||
|
|
||||||
// min/max friendly
|
// min/max friendly
|
||||||
|
@ -1202,23 +1157,86 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
|
||||||
#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
|
#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define STBIR_INPUT_CALLBACK_PADDING 3
|
||||||
|
|
||||||
|
#ifdef _M_IX86_FP
|
||||||
|
#if ( _M_IX86_FP >= 1 )
|
||||||
|
#ifndef STBIR_SSE
|
||||||
|
#define STBIR_SSE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __TINYC__
|
||||||
|
// tiny c has no intrinsics yet - this can become a version check if they add them
|
||||||
|
#define STBIR_NO_SIMD
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2)
|
||||||
|
#ifndef STBIR_SSE2
|
||||||
|
#define STBIR_SSE2
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX__) || defined(STBIR_AVX2)
|
||||||
|
#ifndef STBIR_AVX
|
||||||
|
#ifndef STBIR_NO_AVX
|
||||||
|
#define STBIR_AVX
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if defined(__AVX2__) || defined(STBIR_AVX2)
|
||||||
|
#ifndef STBIR_NO_AVX2
|
||||||
|
#ifndef STBIR_AVX2
|
||||||
|
#define STBIR_AVX2
|
||||||
|
#endif
|
||||||
|
#if defined( _MSC_VER ) && !defined(__clang__)
|
||||||
|
#ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c
|
||||||
|
#define STBIR_FP16C
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef __F16C__
|
||||||
|
#ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc)
|
||||||
|
#define STBIR_FP16C
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
|
||||||
|
#ifndef STBIR_NEON
|
||||||
|
#define STBIR_NEON
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_M_ARM) || defined(__arm__)
|
||||||
|
#ifdef STBIR_USE_FMA
|
||||||
|
#undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
#ifndef STBIR_WASM
|
||||||
|
#define STBIR_WASM
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// restrict pointers for the output pointers, other loop and unroll control
|
// restrict pointers for the output pointers, other loop and unroll control
|
||||||
#if defined( _MSC_VER ) && !defined(__clang__)
|
#if defined( _MSC_VER ) && !defined(__clang__)
|
||||||
#define STBIR_STREAMOUT_PTR( star ) star __restrict
|
#define STBIR_STREAMOUT_PTR( star ) star __restrict
|
||||||
#define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop
|
#define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop
|
||||||
#if _MSC_VER >= 1900
|
#if _MSC_VER >= 1900
|
||||||
#define STBIR_NO_UNROLL_LOOP_START __pragma(loop( no_vector ))
|
#define STBIR_NO_UNROLL_LOOP_START __pragma(loop( no_vector ))
|
||||||
#else
|
#else
|
||||||
#define STBIR_NO_UNROLL_LOOP_START
|
#define STBIR_NO_UNROLL_LOOP_START
|
||||||
#endif
|
#endif
|
||||||
#elif defined( __clang__ )
|
#elif defined( __clang__ )
|
||||||
#define STBIR_STREAMOUT_PTR( star ) star __restrict__
|
#define STBIR_STREAMOUT_PTR( star ) star __restrict__
|
||||||
#define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
|
#define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
|
||||||
#if ( __clang_major__ >= 4 ) || ( ( __clang_major__ >= 3 ) && ( __clang_minor__ >= 5 ) )
|
#if ( __clang_major__ >= 4 ) || ( ( __clang_major__ >= 3 ) && ( __clang_minor__ >= 5 ) )
|
||||||
#define STBIR_NO_UNROLL_LOOP_START _Pragma("clang loop unroll(disable)") _Pragma("clang loop vectorize(disable)")
|
#define STBIR_NO_UNROLL_LOOP_START _Pragma("clang loop unroll(disable)") _Pragma("clang loop vectorize(disable)")
|
||||||
#else
|
#else
|
||||||
#define STBIR_NO_UNROLL_LOOP_START
|
#define STBIR_NO_UNROLL_LOOP_START
|
||||||
#endif
|
#endif
|
||||||
#elif defined( __GNUC__ )
|
#elif defined( __GNUC__ )
|
||||||
#define STBIR_STREAMOUT_PTR( star ) star __restrict__
|
#define STBIR_STREAMOUT_PTR( star ) star __restrict__
|
||||||
#define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
|
#define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
|
||||||
|
@ -1448,8 +1466,8 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#define stbir__simdf_pack_to_8words(out,reg0,reg1) out = _mm_packus_epi32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())), _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())))
|
#define stbir__simdf_pack_to_8words(out,reg0,reg1) out = _mm_packus_epi32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())), _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())))
|
||||||
#else
|
#else
|
||||||
STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
|
static STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
|
||||||
STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768));
|
static STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768));
|
||||||
|
|
||||||
#define stbir__simdf_pack_to_8words(out,reg0,reg1) \
|
#define stbir__simdf_pack_to_8words(out,reg0,reg1) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -3214,10 +3232,9 @@ static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline
|
||||||
newspan->n0 = -left_margin;
|
newspan->n0 = -left_margin;
|
||||||
newspan->n1 = ( max_left - min_left ) - left_margin;
|
newspan->n1 = ( max_left - min_left ) - left_margin;
|
||||||
scanline_extents->edge_sizes[0] = 0; // don't need to copy the left margin, since we are directly decoding into the margin
|
scanline_extents->edge_sizes[0] = 0; // don't need to copy the left margin, since we are directly decoding into the margin
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we can't merge the min_left range, add it as a second range
|
// if we can't merge the min_left range, add it as a second range
|
||||||
|
else
|
||||||
if ( ( right_margin ) && ( min_right != 0x7fffffff ) )
|
if ( ( right_margin ) && ( min_right != 0x7fffffff ) )
|
||||||
{
|
{
|
||||||
stbir__span * newspan = scanline_extents->spans + 1;
|
stbir__span * newspan = scanline_extents->spans + 1;
|
||||||
|
@ -3232,7 +3249,14 @@ static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline
|
||||||
newspan->n0 = scanline_extents->spans[1].n1 + 1;
|
newspan->n0 = scanline_extents->spans[1].n1 + 1;
|
||||||
newspan->n1 = scanline_extents->spans[1].n1 + 1 + ( max_right - min_right );
|
newspan->n1 = scanline_extents->spans[1].n1 + 1 + ( max_right - min_right );
|
||||||
scanline_extents->edge_sizes[1] = 0; // don't need to copy the right margin, since we are directly decoding into the margin
|
scanline_extents->edge_sizes[1] = 0; // don't need to copy the right margin, since we are directly decoding into the margin
|
||||||
return;
|
}
|
||||||
|
|
||||||
|
// sort the spans into write output order
|
||||||
|
if ( ( scanline_extents->spans[1].n1 > scanline_extents->spans[1].n0 ) && ( scanline_extents->spans[0].n0 > scanline_extents->spans[1].n0 ) )
|
||||||
|
{
|
||||||
|
stbir__span tspan = scanline_extents->spans[0];
|
||||||
|
scanline_extents->spans[0] = scanline_extents->spans[1];
|
||||||
|
scanline_extents->spans[1] = tspan;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3247,6 +3271,7 @@ static void stbir__calculate_in_pixel_range( int * first_pixel, int * last_pixel
|
||||||
|
|
||||||
first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
|
first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
|
||||||
last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
|
last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
|
||||||
|
if ( last < first ) last = first; // point sample mode can span a value *right* at 0.5, and cause these to cross
|
||||||
|
|
||||||
if ( edge == STBIR_EDGE_WRAP )
|
if ( edge == STBIR_EDGE_WRAP )
|
||||||
{
|
{
|
||||||
|
@ -3282,6 +3307,11 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_
|
||||||
|
|
||||||
stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge );
|
stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge );
|
||||||
|
|
||||||
|
// make sure we never generate a range larger than our precalculated coeff width
|
||||||
|
// this only happens in point sample mode, but it's a good safe thing to do anyway
|
||||||
|
if ( ( in_last_pixel - in_first_pixel + 1 ) > coefficient_width )
|
||||||
|
in_last_pixel = in_first_pixel + coefficient_width - 1;
|
||||||
|
|
||||||
last_non_zero = -1;
|
last_non_zero = -1;
|
||||||
for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
|
for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
|
||||||
{
|
{
|
||||||
|
@ -3317,19 +3347,22 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff )
|
static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff, int max_width )
|
||||||
{
|
{
|
||||||
if ( new_pixel <= contribs->n1 ) // before the end
|
if ( new_pixel <= contribs->n1 ) // before the end
|
||||||
{
|
{
|
||||||
if ( new_pixel < contribs->n0 ) // before the front?
|
if ( new_pixel < contribs->n0 ) // before the front?
|
||||||
{
|
{
|
||||||
int j, o = contribs->n0 - new_pixel;
|
if ( ( contribs->n1 - new_pixel + 1 ) <= max_width )
|
||||||
for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- )
|
{
|
||||||
coeffs[ j + o ] = coeffs[ j ];
|
int j, o = contribs->n0 - new_pixel;
|
||||||
for ( j = 1 ; j < o ; j-- )
|
for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- )
|
||||||
coeffs[ j ] = coeffs[ 0 ];
|
coeffs[ j + o ] = coeffs[ j ];
|
||||||
coeffs[ 0 ] = new_coeff;
|
for ( j = 1 ; j < o ; j-- )
|
||||||
contribs->n0 = new_pixel;
|
coeffs[ j ] = coeffs[ 0 ];
|
||||||
|
coeffs[ 0 ] = new_coeff;
|
||||||
|
contribs->n0 = new_pixel;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3338,12 +3371,15 @@ static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int j, e = new_pixel - contribs->n0;
|
if ( ( new_pixel - contribs->n0 + 1 ) <= max_width )
|
||||||
for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any
|
{
|
||||||
coeffs[j] = 0;
|
int j, e = new_pixel - contribs->n0;
|
||||||
|
for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any
|
||||||
|
coeffs[j] = 0;
|
||||||
|
|
||||||
coeffs[ e ] = new_coeff;
|
coeffs[ e ] = new_coeff;
|
||||||
contribs->n1 = new_pixel;
|
contribs->n1 = new_pixel;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3522,6 +3558,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
|
|
||||||
coeffs = coefficient_group;
|
coeffs = coefficient_group;
|
||||||
contribs = contributors;
|
contribs = contributors;
|
||||||
|
|
||||||
for (n = 0; n < num_contributors; n++)
|
for (n = 0; n < num_contributors; n++)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -3561,7 +3598,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
int endi = contribs->n1;
|
int endi = contribs->n1;
|
||||||
contribs->n1 = input_last_n1;
|
contribs->n1 = input_last_n1;
|
||||||
for( i = input_size; i <= endi; i++ )
|
for( i = input_size; i <= endi; i++ )
|
||||||
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start] );
|
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start], coefficient_width );
|
||||||
}
|
}
|
||||||
|
|
||||||
// now check left hand edge
|
// now check left hand edge
|
||||||
|
@ -3573,7 +3610,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
|
|
||||||
// reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist)
|
// reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist)
|
||||||
for( i = -1 ; i > contribs->n0 ; i-- )
|
for( i = -1 ; i > contribs->n0 ; i-- )
|
||||||
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c-- );
|
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c--, coefficient_width );
|
||||||
save_n0 = contribs->n0;
|
save_n0 = contribs->n0;
|
||||||
save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)!
|
save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)!
|
||||||
|
|
||||||
|
@ -3583,7 +3620,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
coeffs[i] = coeffs[i-save_n0];
|
coeffs[i] = coeffs[i-save_n0];
|
||||||
|
|
||||||
// now that we have shrunk down the contribs, we insert the first one safely
|
// now that we have shrunk down the contribs, we insert the first one safely
|
||||||
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff );
|
stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff, coefficient_width );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3592,6 +3629,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
int diff = contribs->n1 - contribs->n0 + 1;
|
int diff = contribs->n1 - contribs->n0 + 1;
|
||||||
while ( diff && ( coeffs[ diff-1 ] == 0.0f ) )
|
while ( diff && ( coeffs[ diff-1 ] == 0.0f ) )
|
||||||
--diff;
|
--diff;
|
||||||
|
|
||||||
contribs->n1 = contribs->n0 + diff - 1;
|
contribs->n1 = contribs->n0 + diff - 1;
|
||||||
|
|
||||||
if ( contribs->n0 <= contribs->n1 )
|
if ( contribs->n0 <= contribs->n1 )
|
||||||
|
@ -3617,9 +3655,9 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
|
||||||
filter_info->widest = widest;
|
filter_info->widest = widest;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STBIR_RENORM_TYPE
|
#undef STBIR_RENORM_TYPE
|
||||||
|
|
||||||
static int stbir__pack_coefficients( int num_contributors, stbir__contributors* contributors, float * coefficents, int coefficient_width, int widest, int row0, int row1 )
|
static int stbir__pack_coefficients( int num_contributors, stbir__contributors* contributors, float * coefficents, int coefficient_width, int widest, int row0, int row1 )
|
||||||
{
|
{
|
||||||
#define STBIR_MOVE_1( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint32*)(dest))[0] = ((stbir_uint32*)(src))[0]; }
|
#define STBIR_MOVE_1( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint32*)(dest))[0] = ((stbir_uint32*)(src))[0]; }
|
||||||
#define STBIR_MOVE_2( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; }
|
#define STBIR_MOVE_2( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; }
|
||||||
|
@ -3940,7 +3978,7 @@ static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * ot
|
||||||
for (k = gn0 ; k <= gn1 ; k++ )
|
for (k = gn0 ; k <= gn1 ; k++ )
|
||||||
{
|
{
|
||||||
float gc = *g_coeffs++;
|
float gc = *g_coeffs++;
|
||||||
|
|
||||||
// skip zero and denormals - must skip zeros to avoid adding coeffs beyond scatter_coefficient_width
|
// skip zero and denormals - must skip zeros to avoid adding coeffs beyond scatter_coefficient_width
|
||||||
// (which happens when pivoting from horizontal, which might have dummy zeros)
|
// (which happens when pivoting from horizontal, which might have dummy zeros)
|
||||||
if ( ( ( gc >= stbir__small_float ) || ( gc <= -stbir__small_float ) ) )
|
if ( ( ( gc >= stbir__small_float ) || ( gc <= -stbir__small_float ) ) )
|
||||||
|
@ -3964,7 +4002,7 @@ static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * ot
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc );
|
stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc, scatter_coefficient_width );
|
||||||
}
|
}
|
||||||
STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width );
|
STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width );
|
||||||
}
|
}
|
||||||
|
@ -4441,7 +4479,7 @@ static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_chann
|
||||||
|
|
||||||
#ifdef STBIR_SIMD
|
#ifdef STBIR_SIMD
|
||||||
#ifdef stbir__simdf_swiz2 // do we have two argument swizzles?
|
#ifdef stbir__simdf_swiz2 // do we have two argument swizzles?
|
||||||
end_decode -= 12;
|
end_decode -= 12;
|
||||||
STBIR_NO_UNROLL_LOOP_START
|
STBIR_NO_UNROLL_LOOP_START
|
||||||
while( decode <= end_decode )
|
while( decode <= end_decode )
|
||||||
{
|
{
|
||||||
|
@ -4452,13 +4490,13 @@ static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_chann
|
||||||
stbir__simdf_load( b, decode+4 );
|
stbir__simdf_load( b, decode+4 );
|
||||||
stbir__simdf_load( c, decode+8 );
|
stbir__simdf_load( c, decode+8 );
|
||||||
|
|
||||||
na = stbir__simdf_swiz2( a, b, 2, 1, 0, 5 );
|
na = stbir__simdf_swiz2( a, b, 2, 1, 0, 5 );
|
||||||
b = stbir__simdf_swiz2( a, b, 4, 3, 6, 7 );
|
b = stbir__simdf_swiz2( a, b, 4, 3, 6, 7 );
|
||||||
nb = stbir__simdf_swiz2( b, c, 0, 1, 4, 3 );
|
nb = stbir__simdf_swiz2( b, c, 0, 1, 4, 3 );
|
||||||
c = stbir__simdf_swiz2( b, c, 2, 7, 6, 5 );
|
c = stbir__simdf_swiz2( b, c, 2, 7, 6, 5 );
|
||||||
|
|
||||||
stbir__simdf_store( decode, na );
|
stbir__simdf_store( decode, na );
|
||||||
stbir__simdf_store( decode+4, nb );
|
stbir__simdf_store( decode+4, nb );
|
||||||
stbir__simdf_store( decode+8, c );
|
stbir__simdf_store( decode+8, c );
|
||||||
decode += 12;
|
decode += 12;
|
||||||
}
|
}
|
||||||
|
@ -4480,18 +4518,18 @@ static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_chann
|
||||||
stbir__simdf_load( f, decode+15 );
|
stbir__simdf_load( f, decode+15 );
|
||||||
stbir__simdf_load( g, decode+18 );
|
stbir__simdf_load( g, decode+18 );
|
||||||
|
|
||||||
a = stbir__simdf_swiz( a, 2, 1, 0, 3 );
|
a = stbir__simdf_swiz( a, 2, 1, 0, 3 );
|
||||||
b = stbir__simdf_swiz( b, 2, 1, 0, 3 );
|
b = stbir__simdf_swiz( b, 2, 1, 0, 3 );
|
||||||
c = stbir__simdf_swiz( c, 2, 1, 0, 3 );
|
c = stbir__simdf_swiz( c, 2, 1, 0, 3 );
|
||||||
d = stbir__simdf_swiz( d, 2, 1, 0, 3 );
|
d = stbir__simdf_swiz( d, 2, 1, 0, 3 );
|
||||||
e = stbir__simdf_swiz( e, 2, 1, 0, 3 );
|
e = stbir__simdf_swiz( e, 2, 1, 0, 3 );
|
||||||
f = stbir__simdf_swiz( f, 2, 1, 0, 3 );
|
f = stbir__simdf_swiz( f, 2, 1, 0, 3 );
|
||||||
g = stbir__simdf_swiz( g, 2, 1, 0, 3 );
|
g = stbir__simdf_swiz( g, 2, 1, 0, 3 );
|
||||||
|
|
||||||
// stores overlap, need to be in order,
|
// stores overlap, need to be in order,
|
||||||
stbir__simdf_store( decode, a );
|
stbir__simdf_store( decode, a );
|
||||||
i21 = decode[21];
|
i21 = decode[21];
|
||||||
stbir__simdf_store( decode+3, b );
|
stbir__simdf_store( decode+3, b );
|
||||||
i23 = decode[23];
|
i23 = decode[23];
|
||||||
stbir__simdf_store( decode+6, c );
|
stbir__simdf_store( decode+6, c );
|
||||||
stbir__simdf_store( decode+9, d );
|
stbir__simdf_store( decode+9, d );
|
||||||
|
@ -4543,7 +4581,8 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
|
||||||
int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size);
|
int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size);
|
||||||
const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes;
|
const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes;
|
||||||
stbir__span const * spans = stbir_info->scanline_extents.spans;
|
stbir__span const * spans = stbir_info->scanline_extents.spans;
|
||||||
float* full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels;
|
float * full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels;
|
||||||
|
float * last_decoded = 0;
|
||||||
|
|
||||||
// if we are on edge_zero, and we get in here with an out of bounds n, then the calculate filters has failed
|
// if we are on edge_zero, and we get in here with an out of bounds n, then the calculate filters has failed
|
||||||
STBIR_ASSERT( !(edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)) );
|
STBIR_ASSERT( !(edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)) );
|
||||||
|
@ -4571,12 +4610,12 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
|
||||||
if ( stbir_info->in_pixels_cb )
|
if ( stbir_info->in_pixels_cb )
|
||||||
{
|
{
|
||||||
// call the callback with a temp buffer (that they can choose to use or not). the temp is just right aligned memory in the decode_buffer itself
|
// call the callback with a temp buffer (that they can choose to use or not). the temp is just right aligned memory in the decode_buffer itself
|
||||||
input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ), input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data );
|
input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING, input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data );
|
||||||
}
|
}
|
||||||
|
|
||||||
STBIR_PROFILE_START( decode );
|
STBIR_PROFILE_START( decode );
|
||||||
// convert the pixels info the float decode_buffer, (we index from end_decode, so that when channels<effective_channels, we are right justified in the buffer)
|
// convert the pixels info the float decode_buffer, (we index from end_decode, so that when channels<effective_channels, we are right justified in the buffer)
|
||||||
stbir_info->decode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data );
|
last_decoded = stbir_info->decode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data );
|
||||||
STBIR_PROFILE_END( decode );
|
STBIR_PROFILE_END( decode );
|
||||||
|
|
||||||
if (stbir_info->alpha_weight)
|
if (stbir_info->alpha_weight)
|
||||||
|
@ -4611,9 +4650,19 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
|
||||||
float * marg = full_decode_buffer + x * effective_channels;
|
float * marg = full_decode_buffer + x * effective_channels;
|
||||||
float const * src = full_decode_buffer + stbir__edge_wrap(edge_horizontal, x, input_full_size) * effective_channels;
|
float const * src = full_decode_buffer + stbir__edge_wrap(edge_horizontal, x, input_full_size) * effective_channels;
|
||||||
STBIR_MEMCPY( marg, src, margin * effective_channels * sizeof(float) );
|
STBIR_MEMCPY( marg, src, margin * effective_channels * sizeof(float) );
|
||||||
|
if ( e == 1 ) last_decoded = marg + margin * effective_channels;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// some of the horizontal gathers read one float off the edge (which is masked out), but we force a zero here to make sure no NaNs leak in
|
||||||
|
// (we can't pre-zero it, because the input callback can use that area as padding)
|
||||||
|
last_decoded[0] = 0.0f;
|
||||||
|
|
||||||
|
// we clear this extra float, because the final output pixel filter kernel might have used one less coeff than the max filter width
|
||||||
|
// when this happens, we do read that pixel from the input, so it too could be Nan, so just zero an extra one.
|
||||||
|
// this fits because each scanline is padded by three floats (STBIR_INPUT_CALLBACK_PADDING)
|
||||||
|
last_decoded[1] = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4810,12 +4859,13 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
|
||||||
stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
|
stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
|
||||||
|
|
||||||
#define stbir__1_coeff_remnant( ofs ) \
|
#define stbir__1_coeff_remnant( ofs ) \
|
||||||
{ stbir__simdf t; \
|
{ stbir__simdf t,d; \
|
||||||
stbir__simdf_load1z( t, hc + (ofs) ); \
|
stbir__simdf_load1z( t, hc + (ofs) ); \
|
||||||
|
stbir__simdf_load2( d, decode + (ofs) * 2 ); \
|
||||||
stbir__simdf_0123to0011( t, t ); \
|
stbir__simdf_0123to0011( t, t ); \
|
||||||
stbir__simdf_mult_mem( t, t, decode+(ofs)*2 ); \
|
stbir__simdf_mult( t, t, d ); \
|
||||||
stbir__simdf8_add4( tot0, tot0, t ); }
|
stbir__simdf8_add4( tot0, tot0, t ); }
|
||||||
|
|
||||||
#define stbir__2_coeff_remnant( ofs ) \
|
#define stbir__2_coeff_remnant( ofs ) \
|
||||||
{ stbir__simdf t; \
|
{ stbir__simdf t; \
|
||||||
stbir__simdf_load2( t, hc + (ofs) ); \
|
stbir__simdf_load2( t, hc + (ofs) ); \
|
||||||
|
@ -6191,6 +6241,8 @@ static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbi
|
||||||
if ( vertical_first )
|
if ( vertical_first )
|
||||||
{
|
{
|
||||||
// Now resample the gathered vertical data in the horizontal axis into the encode buffer
|
// Now resample the gathered vertical data in the horizontal axis into the encode buffer
|
||||||
|
decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
|
||||||
|
decode_buffer[ width_times_channels+1 ] = 0.0f;
|
||||||
stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
|
stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6362,6 +6414,8 @@ static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir_
|
||||||
void * scanline_scatter_buffer;
|
void * scanline_scatter_buffer;
|
||||||
void * scanline_scatter_buffer_end;
|
void * scanline_scatter_buffer_end;
|
||||||
int on_first_input_y, last_input_y;
|
int on_first_input_y, last_input_y;
|
||||||
|
int width = (stbir_info->vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size;
|
||||||
|
int width_times_channels = stbir_info->effective_channels * width;
|
||||||
|
|
||||||
STBIR_ASSERT( !stbir_info->vertical.is_gather );
|
STBIR_ASSERT( !stbir_info->vertical.is_gather );
|
||||||
|
|
||||||
|
@ -6396,7 +6450,12 @@ static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir_
|
||||||
|
|
||||||
// mark all the buffers as empty to start
|
// mark all the buffers as empty to start
|
||||||
for( y = 0 ; y < stbir_info->ring_buffer_num_entries ; y++ )
|
for( y = 0 ; y < stbir_info->ring_buffer_num_entries ; y++ )
|
||||||
stbir__get_ring_buffer_entry( stbir_info, split_info, y )[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
|
{
|
||||||
|
float * decode_buffer = stbir__get_ring_buffer_entry( stbir_info, split_info, y );
|
||||||
|
decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
|
||||||
|
decode_buffer[ width_times_channels+1 ] = 0.0f;
|
||||||
|
decode_buffer[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
|
||||||
|
}
|
||||||
|
|
||||||
// do the loop in input space
|
// do the loop in input space
|
||||||
on_first_input_y = 1; last_input_y = start_input_y;
|
on_first_input_y = 1; last_input_y = start_input_y;
|
||||||
|
@ -6519,11 +6578,11 @@ static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir
|
||||||
samp->coefficient_width = stbir__get_coefficient_width(samp, samp->is_gather, user_data);
|
samp->coefficient_width = stbir__get_coefficient_width(samp, samp->is_gather, user_data);
|
||||||
|
|
||||||
// filter_pixel_width is the conservative size in pixels of input that affect an output pixel.
|
// filter_pixel_width is the conservative size in pixels of input that affect an output pixel.
|
||||||
// In rare cases (only with 2 pix to 1 pix with the default filters), it's possible that the
|
// In rare cases (only with 2 pix to 1 pix with the default filters), it's possible that the
|
||||||
// filter will extend before or after the scanline beyond just one extra entire copy of the
|
// filter will extend before or after the scanline beyond just one extra entire copy of the
|
||||||
// scanline (we would hit the edge twice). We don't let you do that, so we clamp the total
|
// scanline (we would hit the edge twice). We don't let you do that, so we clamp the total
|
||||||
// width to 3x the total of input pixel (once for the scanline, once for the left side
|
// width to 3x the total of input pixel (once for the scanline, once for the left side
|
||||||
// overhang, and once for the right side). We only do this for edge mode, since the other
|
// overhang, and once for the right side). We only do this for edge mode, since the other
|
||||||
// modes can just re-edge clamp back in again.
|
// modes can just re-edge clamp back in again.
|
||||||
if ( edge == STBIR_EDGE_WRAP )
|
if ( edge == STBIR_EDGE_WRAP )
|
||||||
if ( samp->filter_pixel_width > ( scale_info->input_full_size * 3 ) )
|
if ( samp->filter_pixel_width > ( scale_info->input_full_size * 3 ) )
|
||||||
|
@ -6532,11 +6591,11 @@ static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir
|
||||||
// This is how much to expand buffers to account for filters seeking outside
|
// This is how much to expand buffers to account for filters seeking outside
|
||||||
// the image boundaries.
|
// the image boundaries.
|
||||||
samp->filter_pixel_margin = samp->filter_pixel_width / 2;
|
samp->filter_pixel_margin = samp->filter_pixel_width / 2;
|
||||||
|
|
||||||
// filter_pixel_margin is the amount that this filter can overhang on just one side of either
|
// filter_pixel_margin is the amount that this filter can overhang on just one side of either
|
||||||
// end of the scanline (left or the right). Since we only allow you to overhang 1 scanline's
|
// end of the scanline (left or the right). Since we only allow you to overhang 1 scanline's
|
||||||
// worth of pixels, we clamp this one side of overhang to the input scanline size. Again,
|
// worth of pixels, we clamp this one side of overhang to the input scanline size. Again,
|
||||||
// this clamping only happens in rare cases with the default filters (2 pix to 1 pix).
|
// this clamping only happens in rare cases with the default filters (2 pix to 1 pix).
|
||||||
if ( edge == STBIR_EDGE_WRAP )
|
if ( edge == STBIR_EDGE_WRAP )
|
||||||
if ( samp->filter_pixel_margin > scale_info->input_full_size )
|
if ( samp->filter_pixel_margin > scale_info->input_full_size )
|
||||||
samp->filter_pixel_margin = scale_info->input_full_size;
|
samp->filter_pixel_margin = scale_info->input_full_size;
|
||||||
|
@ -6544,7 +6603,7 @@ static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir
|
||||||
samp->num_contributors = stbir__get_contributors(samp, samp->is_gather);
|
samp->num_contributors = stbir__get_contributors(samp, samp->is_gather);
|
||||||
|
|
||||||
samp->contributors_size = samp->num_contributors * sizeof(stbir__contributors);
|
samp->contributors_size = samp->num_contributors * sizeof(stbir__contributors);
|
||||||
samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float); // extra sizeof(float) is padding
|
samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra sizeof(float) is padding
|
||||||
|
|
||||||
samp->gather_prescatter_contributors = 0;
|
samp->gather_prescatter_contributors = 0;
|
||||||
samp->gather_prescatter_coefficients = 0;
|
samp->gather_prescatter_coefficients = 0;
|
||||||
|
@ -6714,7 +6773,7 @@ static void stbir__free_internal_mem( stbir__info *info )
|
||||||
STBIR__FREE_AND_CLEAR( info->horizontal.coefficients );
|
STBIR__FREE_AND_CLEAR( info->horizontal.coefficients );
|
||||||
STBIR__FREE_AND_CLEAR( info->horizontal.contributors );
|
STBIR__FREE_AND_CLEAR( info->horizontal.contributors );
|
||||||
STBIR__FREE_AND_CLEAR( info->alloced_mem );
|
STBIR__FREE_AND_CLEAR( info->alloced_mem );
|
||||||
STBIR__FREE_AND_CLEAR( info );
|
STBIR_FREE( info, info->user_data );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6909,7 +6968,8 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
void * alloced = 0;
|
void * alloced = 0;
|
||||||
size_t alloced_total = 0;
|
size_t alloced_total = 0;
|
||||||
int vertical_first;
|
int vertical_first;
|
||||||
int decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size, alloc_ring_buffer_num_entries;
|
size_t decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size;
|
||||||
|
int alloc_ring_buffer_num_entries;
|
||||||
|
|
||||||
int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy
|
int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy
|
||||||
int conservative_split_output_size = stbir__get_max_split( splits, vertical->scale_info.output_sub_size );
|
int conservative_split_output_size = stbir__get_max_split( splits, vertical->scale_info.output_sub_size );
|
||||||
|
@ -6954,14 +7014,16 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
vertical_first = stbir__should_do_vertical_first( stbir__compute_weights[ (int)stbir_channel_count_index[ effective_channels ] ], horizontal->filter_pixel_width, horizontal->scale_info.scale, horizontal->scale_info.output_sub_size, vertical->filter_pixel_width, vertical->scale_info.scale, vertical->scale_info.output_sub_size, vertical->is_gather, STBIR__V_FIRST_INFO_POINTER );
|
vertical_first = stbir__should_do_vertical_first( stbir__compute_weights[ (int)stbir_channel_count_index[ effective_channels ] ], horizontal->filter_pixel_width, horizontal->scale_info.scale, horizontal->scale_info.output_sub_size, vertical->filter_pixel_width, vertical->scale_info.scale, vertical->scale_info.output_sub_size, vertical->is_gather, STBIR__V_FIRST_INFO_POINTER );
|
||||||
|
|
||||||
// sometimes read one float off in some of the unrolled loops (with a weight of zero coeff, so it doesn't have an effect)
|
// sometimes read one float off in some of the unrolled loops (with a weight of zero coeff, so it doesn't have an effect)
|
||||||
decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
|
// we use a few extra floats instead of just 1, so that input callback buffer can overlap with the decode buffer without
|
||||||
|
// the conversion routines overwriting the callback input data.
|
||||||
|
decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for input callback stagger
|
||||||
|
|
||||||
#if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
|
#if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
|
||||||
if ( effective_channels == 3 )
|
if ( effective_channels == 3 )
|
||||||
decode_buffer_size += sizeof(float); // avx in 3 channel mode needs one float at the start of the buffer (only with separate allocations)
|
decode_buffer_size += sizeof(float); // avx in 3 channel mode needs one float at the start of the buffer (only with separate allocations)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ring_buffer_length_bytes = horizontal->scale_info.output_sub_size * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
|
ring_buffer_length_bytes = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for padding
|
||||||
|
|
||||||
// if we do vertical first, the ring buffer holds a whole decoded line
|
// if we do vertical first, the ring buffer holds a whole decoded line
|
||||||
if ( vertical_first )
|
if ( vertical_first )
|
||||||
|
@ -6976,13 +7038,13 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
if ( ( !vertical->is_gather ) && ( alloc_ring_buffer_num_entries > conservative_split_output_size ) )
|
if ( ( !vertical->is_gather ) && ( alloc_ring_buffer_num_entries > conservative_split_output_size ) )
|
||||||
alloc_ring_buffer_num_entries = conservative_split_output_size;
|
alloc_ring_buffer_num_entries = conservative_split_output_size;
|
||||||
|
|
||||||
ring_buffer_size = alloc_ring_buffer_num_entries * ring_buffer_length_bytes;
|
ring_buffer_size = (size_t)alloc_ring_buffer_num_entries * (size_t)ring_buffer_length_bytes;
|
||||||
|
|
||||||
// The vertical buffer is used differently, depending on whether we are scattering
|
// The vertical buffer is used differently, depending on whether we are scattering
|
||||||
// the vertical scanlines, or gathering them.
|
// the vertical scanlines, or gathering them.
|
||||||
// If scattering, it's used at the temp buffer to accumulate each output.
|
// If scattering, it's used at the temp buffer to accumulate each output.
|
||||||
// If gathering, it's just the output buffer.
|
// If gathering, it's just the output buffer.
|
||||||
vertical_buffer_size = horizontal->scale_info.output_sub_size * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
|
vertical_buffer_size = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float); // extra float for padding
|
||||||
|
|
||||||
// we make two passes through this loop, 1st to add everything up, 2nd to allocate and init
|
// we make two passes through this loop, 1st to add everything up, 2nd to allocate and init
|
||||||
for(;;)
|
for(;;)
|
||||||
|
@ -7018,9 +7080,9 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
|
|
||||||
info->offset_x = new_x;
|
info->offset_x = new_x;
|
||||||
info->offset_y = new_y;
|
info->offset_y = new_y;
|
||||||
info->alloc_ring_buffer_num_entries = alloc_ring_buffer_num_entries;
|
info->alloc_ring_buffer_num_entries = (int)alloc_ring_buffer_num_entries;
|
||||||
info->ring_buffer_num_entries = 0;
|
info->ring_buffer_num_entries = 0;
|
||||||
info->ring_buffer_length_bytes = ring_buffer_length_bytes;
|
info->ring_buffer_length_bytes = (int)ring_buffer_length_bytes;
|
||||||
info->splits = splits;
|
info->splits = splits;
|
||||||
info->vertical_first = vertical_first;
|
info->vertical_first = vertical_first;
|
||||||
|
|
||||||
|
@ -7101,19 +7163,24 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
// alloc memory for to-be-pivoted coeffs (if necessary)
|
// alloc memory for to-be-pivoted coeffs (if necessary)
|
||||||
if ( vertical->is_gather == 0 )
|
if ( vertical->is_gather == 0 )
|
||||||
{
|
{
|
||||||
int both;
|
size_t both;
|
||||||
int temp_mem_amt;
|
size_t temp_mem_amt;
|
||||||
|
|
||||||
// when in vertical scatter mode, we first build the coefficients in gather mode, and then pivot after,
|
// when in vertical scatter mode, we first build the coefficients in gather mode, and then pivot after,
|
||||||
// that means we need two buffers, so we try to use the decode buffer and ring buffer for this. if that
|
// that means we need two buffers, so we try to use the decode buffer and ring buffer for this. if that
|
||||||
// is too small, we just allocate extra memory to use as this temp.
|
// is too small, we just allocate extra memory to use as this temp.
|
||||||
|
|
||||||
both = vertical->gather_prescatter_contributors_size + vertical->gather_prescatter_coefficients_size;
|
both = (size_t)vertical->gather_prescatter_contributors_size + (size_t)vertical->gather_prescatter_coefficients_size;
|
||||||
|
|
||||||
#ifdef STBIR__SEPARATE_ALLOCATIONS
|
#ifdef STBIR__SEPARATE_ALLOCATIONS
|
||||||
temp_mem_amt = decode_buffer_size;
|
temp_mem_amt = decode_buffer_size;
|
||||||
|
|
||||||
|
#ifdef STBIR_SIMD8
|
||||||
|
if ( effective_channels == 3 )
|
||||||
|
--temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
temp_mem_amt = ( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * splits;
|
temp_mem_amt = (size_t)( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * (size_t)splits;
|
||||||
#endif
|
#endif
|
||||||
if ( temp_mem_amt >= both )
|
if ( temp_mem_amt >= both )
|
||||||
{
|
{
|
||||||
|
@ -7208,33 +7275,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
|
||||||
if ( ( !info->vertical.is_gather ) && ( info->ring_buffer_num_entries > conservative_split_output_size ) )
|
if ( ( !info->vertical.is_gather ) && ( info->ring_buffer_num_entries > conservative_split_output_size ) )
|
||||||
info->ring_buffer_num_entries = conservative_split_output_size;
|
info->ring_buffer_num_entries = conservative_split_output_size;
|
||||||
STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries );
|
STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries );
|
||||||
|
|
||||||
// a few of the horizontal gather functions read past the end of the decode (but mask it out),
|
|
||||||
// so put in normal values so no snans or denormals accidentally sneak in (also, in the ring
|
|
||||||
// buffer for vertical first)
|
|
||||||
for( i = 0 ; i < splits ; i++ )
|
|
||||||
{
|
|
||||||
int t, ofs, start;
|
|
||||||
|
|
||||||
ofs = decode_buffer_size / 4;
|
|
||||||
start = ofs - 4;
|
|
||||||
if ( start < 0 ) start = 0;
|
|
||||||
|
|
||||||
for( t = start ; t < ofs; t++ )
|
|
||||||
info->split_info[i].decode_buffer[ t ] = 9999.0f;
|
|
||||||
|
|
||||||
if ( vertical_first )
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
for( j = 0; j < info->ring_buffer_num_entries ; j++ )
|
|
||||||
{
|
|
||||||
for( t = start ; t < ofs; t++ )
|
|
||||||
stbir__get_ring_buffer_entry( info, info->split_info + i, j )[ t ] = 9999.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STBIR__NEXT_PTR
|
#undef STBIR__NEXT_PTR
|
||||||
|
|
||||||
|
|
||||||
|
@ -8197,7 +8238,7 @@ STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STB
|
||||||
#define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip
|
#define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
|
@ -8257,7 +8298,7 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m16;
|
input = end_input_m16;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 16;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -8295,6 +8336,8 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outputp, int width_times_channels, float const * encode )
|
static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outputp, int width_times_channels, float const * encode )
|
||||||
|
@ -8414,7 +8457,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
|
@ -8468,7 +8511,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m16;
|
input = end_input_m16;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 16;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -8506,6 +8549,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int width_times_channels, float const * encode )
|
static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int width_times_channels, float const * encode )
|
||||||
|
@ -8607,10 +8651,10 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float const * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
unsigned char const * input = (unsigned char const *)inputp;
|
unsigned char const * input = (unsigned char const *)inputp;
|
||||||
|
|
||||||
// try to do blocks of 4 when you can
|
// try to do blocks of 4 when you can
|
||||||
|
@ -8645,6 +8689,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int wi
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define stbir__min_max_shift20( i, f ) \
|
#define stbir__min_max_shift20( i, f ) \
|
||||||
|
@ -8797,11 +8842,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w
|
||||||
|
|
||||||
#if ( stbir__coder_min_num == 4 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
|
#if ( stbir__coder_min_num == 4 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float const * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
unsigned char const * input = (unsigned char const *)inputp;
|
unsigned char const * input = (unsigned char const *)inputp;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
decode[0] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ];
|
decode[0] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ];
|
||||||
decode[1] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order1] ];
|
decode[1] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order1] ];
|
||||||
|
@ -8810,6 +8856,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * de
|
||||||
input += 4;
|
input += 4;
|
||||||
decode += 4;
|
decode += 4;
|
||||||
} while( decode < decode_end );
|
} while( decode < decode_end );
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -8882,11 +8929,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * o
|
||||||
|
|
||||||
#if ( stbir__coder_min_num == 2 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
|
#if ( stbir__coder_min_num == 2 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float const * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
unsigned char const * input = (unsigned char const *)inputp;
|
unsigned char const * input = (unsigned char const *)inputp;
|
||||||
|
|
||||||
decode += 4;
|
decode += 4;
|
||||||
while( decode <= decode_end )
|
while( decode <= decode_end )
|
||||||
{
|
{
|
||||||
|
@ -8903,6 +8951,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * de
|
||||||
decode[0] = stbir__srgb_uchar_to_linear_float[ stbir__decode_order0 ];
|
decode[0] = stbir__srgb_uchar_to_linear_float[ stbir__decode_order0 ];
|
||||||
decode[1] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted;
|
decode[1] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted;
|
||||||
}
|
}
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * outputp, int width_times_channels, float const * encode )
|
static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * outputp, int width_times_channels, float const * encode )
|
||||||
|
@ -8968,7 +9017,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * o
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
|
@ -9016,7 +9065,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m8;
|
input = end_input_m8;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 8;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -9054,6 +9103,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9173,7 +9223,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
|
@ -9218,7 +9268,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m8;
|
input = end_input_m8;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 8;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -9256,6 +9306,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int width_times_channels, float const * encode )
|
static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int width_times_channels, float const * encode )
|
||||||
|
@ -9356,7 +9407,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
float * decode_end = (float*) decode + width_times_channels;
|
float * decode_end = (float*) decode + width_times_channels;
|
||||||
|
@ -9402,7 +9453,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep,
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m8;
|
input = end_input_m8;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 8;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -9440,6 +9491,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep,
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp, int width_times_channels, float const * encode )
|
static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp, int width_times_channels, float const * encode )
|
||||||
|
@ -9526,7 +9578,7 @@ static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp )
|
static float * STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp )
|
||||||
{
|
{
|
||||||
#ifdef stbir__decode_swizzle
|
#ifdef stbir__decode_swizzle
|
||||||
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
float STBIR_STREAMOUT_PTR( * ) decode = decodep;
|
||||||
|
@ -9580,7 +9632,7 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int
|
||||||
decode = decode_end; // backup and do last couple
|
decode = decode_end; // backup and do last couple
|
||||||
input = end_input_m16;
|
input = end_input_m16;
|
||||||
}
|
}
|
||||||
return;
|
return decode_end + 16;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -9618,12 +9670,15 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int
|
||||||
input += stbir__coder_min_num;
|
input += stbir__coder_min_num;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
return decode_end;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
if ( (void*)decodep != inputp )
|
if ( (void*)decodep != inputp )
|
||||||
STBIR_MEMCPY( decodep, inputp, width_times_channels * sizeof( float ) );
|
STBIR_MEMCPY( decodep, inputp, width_times_channels * sizeof( float ) );
|
||||||
|
|
||||||
|
return decodep + width_times_channels;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue