| 1 | // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause | 
|---|
| 2 | /* ****************************************************************** | 
|---|
| 3 | * FSE : Finite State Entropy decoder | 
|---|
| 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. | 
|---|
| 5 | * | 
|---|
| 6 | *  You can contact the author at : | 
|---|
| 7 | *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | 
|---|
| 8 | *  - Public forum : https://groups.google.com/forum/#!forum/lz4c | 
|---|
| 9 | * | 
|---|
| 10 | * This source code is licensed under both the BSD-style license (found in the | 
|---|
| 11 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | 
|---|
| 12 | * in the COPYING file in the root directory of this source tree). | 
|---|
| 13 | * You may select, at your option, one of the above-listed licenses. | 
|---|
| 14 | ****************************************************************** */ | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | /* ************************************************************** | 
|---|
| 18 | *  Includes | 
|---|
| 19 | ****************************************************************/ | 
|---|
| 20 | #include "debug.h"      /* assert */ | 
|---|
| 21 | #include "bitstream.h" | 
|---|
| 22 | #include "compiler.h" | 
|---|
| 23 | #define FSE_STATIC_LINKING_ONLY | 
|---|
| 24 | #include "fse.h" | 
|---|
| 25 | #include "error_private.h" | 
|---|
| 26 | #include "zstd_deps.h"  /* ZSTD_memcpy */ | 
|---|
| 27 | #include "bits.h"       /* ZSTD_highbit32 */ | 
|---|
| 28 |  | 
|---|
| 29 |  | 
|---|
| 30 | /* ************************************************************** | 
|---|
| 31 | *  Error Management | 
|---|
| 32 | ****************************************************************/ | 
|---|
| 33 | #define FSE_isError ERR_isError | 
|---|
| 34 | #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */ | 
|---|
| 35 |  | 
|---|
| 36 |  | 
|---|
| 37 | /* ************************************************************** | 
|---|
| 38 | *  Templates | 
|---|
| 39 | ****************************************************************/ | 
|---|
| 40 | /* | 
|---|
| 41 | designed to be included | 
|---|
| 42 | for type-specific functions (template emulation in C) | 
|---|
| 43 | Objective is to write these functions only once, for improved maintenance | 
|---|
| 44 | */ | 
|---|
| 45 |  | 
|---|
| 46 | /* safety checks */ | 
|---|
| 47 | #ifndef FSE_FUNCTION_EXTENSION | 
|---|
| 48 | #  error "FSE_FUNCTION_EXTENSION must be defined" | 
|---|
| 49 | #endif | 
|---|
| 50 | #ifndef FSE_FUNCTION_TYPE | 
|---|
| 51 | #  error "FSE_FUNCTION_TYPE must be defined" | 
|---|
| 52 | #endif | 
|---|
| 53 |  | 
|---|
| 54 | /* Function names */ | 
|---|
| 55 | #define FSE_CAT(X,Y) X##Y | 
|---|
| 56 | #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) | 
|---|
| 57 | #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) | 
|---|
| 58 |  | 
|---|
| 59 | static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | 
|---|
| 60 | { | 
|---|
| 61 | void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */ | 
|---|
| 62 | FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); | 
|---|
| 63 | U16* symbolNext = (U16*)workSpace; | 
|---|
| 64 | BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); | 
|---|
| 65 |  | 
|---|
| 66 | U32 const maxSV1 = maxSymbolValue + 1; | 
|---|
| 67 | U32 const tableSize = 1 << tableLog; | 
|---|
| 68 | U32 highThreshold = tableSize-1; | 
|---|
| 69 |  | 
|---|
| 70 | /* Sanity Checks */ | 
|---|
| 71 | if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); | 
|---|
| 72 | if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); | 
|---|
| 73 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | 
|---|
| 74 |  | 
|---|
| 75 | /* Init, lay down lowprob symbols */ | 
|---|
| 76 | {   FSE_DTableHeader DTableH; | 
|---|
| 77 | DTableH.tableLog = (U16)tableLog; | 
|---|
| 78 | DTableH.fastMode = 1; | 
|---|
| 79 | {   S16 const largeLimit= (S16)(1 << (tableLog-1)); | 
|---|
| 80 | U32 s; | 
|---|
| 81 | for (s=0; s<maxSV1; s++) { | 
|---|
| 82 | if (normalizedCounter[s]==-1) { | 
|---|
| 83 | tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; | 
|---|
| 84 | symbolNext[s] = 1; | 
|---|
| 85 | } else { | 
|---|
| 86 | if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; | 
|---|
| 87 | symbolNext[s] = (U16)normalizedCounter[s]; | 
|---|
| 88 | }   }   } | 
|---|
| 89 | ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | /* Spread symbols */ | 
|---|
| 93 | if (highThreshold == tableSize - 1) { | 
|---|
| 94 | size_t const tableMask = tableSize-1; | 
|---|
| 95 | size_t const step = FSE_TABLESTEP(tableSize); | 
|---|
| 96 | /* First lay down the symbols in order. | 
|---|
| 97 | * We use a uint64_t to lay down 8 bytes at a time. This reduces branch | 
|---|
| 98 | * misses since small blocks generally have small table logs, so nearly | 
|---|
| 99 | * all symbols have counts <= 8. We ensure we have 8 bytes at the end of | 
|---|
| 100 | * our buffer to handle the over-write. | 
|---|
| 101 | */ | 
|---|
| 102 | {   U64 const add = 0x0101010101010101ull; | 
|---|
| 103 | size_t pos = 0; | 
|---|
| 104 | U64 sv = 0; | 
|---|
| 105 | U32 s; | 
|---|
| 106 | for (s=0; s<maxSV1; ++s, sv += add) { | 
|---|
| 107 | int i; | 
|---|
| 108 | int const n = normalizedCounter[s]; | 
|---|
| 109 | MEM_write64(memPtr: spread + pos, value: sv); | 
|---|
| 110 | for (i = 8; i < n; i += 8) { | 
|---|
| 111 | MEM_write64(memPtr: spread + pos + i, value: sv); | 
|---|
| 112 | } | 
|---|
| 113 | pos += (size_t)n; | 
|---|
| 114 | }   } | 
|---|
| 115 | /* Now we spread those positions across the table. | 
|---|
| 116 | * The benefit of doing it in two stages is that we avoid the | 
|---|
| 117 | * variable size inner loop, which caused lots of branch misses. | 
|---|
| 118 | * Now we can run through all the positions without any branch misses. | 
|---|
| 119 | * We unroll the loop twice, since that is what empirically worked best. | 
|---|
| 120 | */ | 
|---|
| 121 | { | 
|---|
| 122 | size_t position = 0; | 
|---|
| 123 | size_t s; | 
|---|
| 124 | size_t const unroll = 2; | 
|---|
| 125 | assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ | 
|---|
| 126 | for (s = 0; s < (size_t)tableSize; s += unroll) { | 
|---|
| 127 | size_t u; | 
|---|
| 128 | for (u = 0; u < unroll; ++u) { | 
|---|
| 129 | size_t const uPosition = (position + (u * step)) & tableMask; | 
|---|
| 130 | tableDecode[uPosition].symbol = spread[s + u]; | 
|---|
| 131 | } | 
|---|
| 132 | position = (position + (unroll * step)) & tableMask; | 
|---|
| 133 | } | 
|---|
| 134 | assert(position == 0); | 
|---|
| 135 | } | 
|---|
| 136 | } else { | 
|---|
| 137 | U32 const tableMask = tableSize-1; | 
|---|
| 138 | U32 const step = FSE_TABLESTEP(tableSize); | 
|---|
| 139 | U32 s, position = 0; | 
|---|
| 140 | for (s=0; s<maxSV1; s++) { | 
|---|
| 141 | int i; | 
|---|
| 142 | for (i=0; i<normalizedCounter[s]; i++) { | 
|---|
| 143 | tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; | 
|---|
| 144 | position = (position + step) & tableMask; | 
|---|
| 145 | while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ | 
|---|
| 146 | }   } | 
|---|
| 147 | if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | /* Build Decoding table */ | 
|---|
| 151 | {   U32 u; | 
|---|
| 152 | for (u=0; u<tableSize; u++) { | 
|---|
| 153 | FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | 
|---|
| 154 | U32 const nextState = symbolNext[symbol]++; | 
|---|
| 155 | tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(val: nextState) ); | 
|---|
| 156 | tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | 
|---|
| 157 | }   } | 
|---|
| 158 |  | 
|---|
| 159 | return 0; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | 
|---|
| 163 | { | 
|---|
| 164 | return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 |  | 
|---|
| 168 | #ifndef FSE_COMMONDEFS_ONLY | 
|---|
| 169 |  | 
|---|
| 170 | /*-******************************************************* | 
|---|
| 171 | *  Decompression (Byte symbols) | 
|---|
| 172 | *********************************************************/ | 
|---|
| 173 |  | 
|---|
| 174 | FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( | 
|---|
| 175 | void* dst, size_t maxDstSize, | 
|---|
| 176 | const void* cSrc, size_t cSrcSize, | 
|---|
| 177 | const FSE_DTable* dt, const unsigned fast) | 
|---|
| 178 | { | 
|---|
| 179 | BYTE* const ostart = (BYTE*) dst; | 
|---|
| 180 | BYTE* op = ostart; | 
|---|
| 181 | BYTE* const omax = op + maxDstSize; | 
|---|
| 182 | BYTE* const olimit = omax-3; | 
|---|
| 183 |  | 
|---|
| 184 | BIT_DStream_t bitD; | 
|---|
| 185 | FSE_DState_t state1; | 
|---|
| 186 | FSE_DState_t state2; | 
|---|
| 187 |  | 
|---|
| 188 | /* Init */ | 
|---|
| 189 | CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); | 
|---|
| 190 |  | 
|---|
| 191 | FSE_initDState(DStatePtr: &state1, bitD: &bitD, dt); | 
|---|
| 192 | FSE_initDState(DStatePtr: &state2, bitD: &bitD, dt); | 
|---|
| 193 |  | 
|---|
| 194 | RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, ""); | 
|---|
| 195 |  | 
|---|
| 196 | #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) | 
|---|
| 197 |  | 
|---|
| 198 | /* 4 symbols per loop */ | 
|---|
| 199 | for ( ; (BIT_reloadDStream(bitD: &bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) { | 
|---|
| 200 | op[0] = FSE_GETSYMBOL(&state1); | 
|---|
| 201 |  | 
|---|
| 202 | if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ | 
|---|
| 203 | BIT_reloadDStream(bitD: &bitD); | 
|---|
| 204 |  | 
|---|
| 205 | op[1] = FSE_GETSYMBOL(&state2); | 
|---|
| 206 |  | 
|---|
| 207 | if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ | 
|---|
| 208 | { if (BIT_reloadDStream(bitD: &bitD) > BIT_DStream_unfinished) { op+=2; break; } } | 
|---|
| 209 |  | 
|---|
| 210 | op[2] = FSE_GETSYMBOL(&state1); | 
|---|
| 211 |  | 
|---|
| 212 | if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ | 
|---|
| 213 | BIT_reloadDStream(bitD: &bitD); | 
|---|
| 214 |  | 
|---|
| 215 | op[3] = FSE_GETSYMBOL(&state2); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | /* tail */ | 
|---|
| 219 | /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ | 
|---|
| 220 | while (1) { | 
|---|
| 221 | if (op>(omax-2)) return ERROR(dstSize_tooSmall); | 
|---|
| 222 | *op++ = FSE_GETSYMBOL(&state1); | 
|---|
| 223 | if (BIT_reloadDStream(bitD: &bitD)==BIT_DStream_overflow) { | 
|---|
| 224 | *op++ = FSE_GETSYMBOL(&state2); | 
|---|
| 225 | break; | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | if (op>(omax-2)) return ERROR(dstSize_tooSmall); | 
|---|
| 229 | *op++ = FSE_GETSYMBOL(&state2); | 
|---|
| 230 | if (BIT_reloadDStream(bitD: &bitD)==BIT_DStream_overflow) { | 
|---|
| 231 | *op++ = FSE_GETSYMBOL(&state1); | 
|---|
| 232 | break; | 
|---|
| 233 | }   } | 
|---|
| 234 |  | 
|---|
| 235 | assert(op >= ostart); | 
|---|
| 236 | return (size_t)(op-ostart); | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | typedef struct { | 
|---|
| 240 | short ncount[FSE_MAX_SYMBOL_VALUE + 1]; | 
|---|
| 241 | } FSE_DecompressWksp; | 
|---|
| 242 |  | 
|---|
| 243 |  | 
|---|
| 244 | FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( | 
|---|
| 245 | void* dst, size_t dstCapacity, | 
|---|
| 246 | const void* cSrc, size_t cSrcSize, | 
|---|
| 247 | unsigned maxLog, void* workSpace, size_t wkspSize, | 
|---|
| 248 | int bmi2) | 
|---|
| 249 | { | 
|---|
| 250 | const BYTE* const istart = (const BYTE*)cSrc; | 
|---|
| 251 | const BYTE* ip = istart; | 
|---|
| 252 | unsigned tableLog; | 
|---|
| 253 | unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; | 
|---|
| 254 | FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; | 
|---|
| 255 | size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable); | 
|---|
| 256 | FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos; | 
|---|
| 257 |  | 
|---|
| 258 | FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); | 
|---|
| 259 | if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); | 
|---|
| 260 |  | 
|---|
| 261 | /* correct offset to dtable depends on this property */ | 
|---|
| 262 | FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0); | 
|---|
| 263 |  | 
|---|
| 264 | /* normal FSE decoding mode */ | 
|---|
| 265 | {   size_t const NCountLength = | 
|---|
| 266 | FSE_readNCount_bmi2(normalizedCounter: wksp->ncount, maxSymbolValuePtr: &maxSymbolValue, tableLogPtr: &tableLog, rBuffer: istart, rBuffSize: cSrcSize, bmi2); | 
|---|
| 267 | if (FSE_isError(code: NCountLength)) return NCountLength; | 
|---|
| 268 | if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | 
|---|
| 269 | assert(NCountLength <= cSrcSize); | 
|---|
| 270 | ip += NCountLength; | 
|---|
| 271 | cSrcSize -= NCountLength; | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); | 
|---|
| 275 | assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); | 
|---|
| 276 | workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); | 
|---|
| 277 | wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); | 
|---|
| 278 |  | 
|---|
| 279 | CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); | 
|---|
| 280 |  | 
|---|
| 281 | { | 
|---|
| 282 | const void* ptr = dtable; | 
|---|
| 283 | const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | 
|---|
| 284 | const U32 fastMode = DTableH->fastMode; | 
|---|
| 285 |  | 
|---|
| 286 | /* select fast mode (static) */ | 
|---|
| 287 | if (fastMode) return FSE_decompress_usingDTable_generic(dst, maxDstSize: dstCapacity, cSrc: ip, cSrcSize, dt: dtable, fast: 1); | 
|---|
| 288 | return FSE_decompress_usingDTable_generic(dst, maxDstSize: dstCapacity, cSrc: ip, cSrcSize, dt: dtable, fast: 0); | 
|---|
| 289 | } | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | /* Avoids the FORCE_INLINE of the _body() function. */ | 
|---|
| 293 | static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | 
|---|
| 294 | { | 
|---|
| 295 | return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, bmi2: 0); | 
|---|
| 296 | } | 
|---|
| 297 |  | 
|---|
| 298 | #if DYNAMIC_BMI2 | 
|---|
| 299 | BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | 
|---|
| 300 | { | 
|---|
| 301 | return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, bmi2: 1); | 
|---|
| 302 | } | 
|---|
| 303 | #endif | 
|---|
| 304 |  | 
|---|
| 305 | size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) | 
|---|
| 306 | { | 
|---|
| 307 | #if DYNAMIC_BMI2 | 
|---|
| 308 | if (bmi2) { | 
|---|
| 309 | return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | 
|---|
| 310 | } | 
|---|
| 311 | #endif | 
|---|
| 312 | (void)bmi2; | 
|---|
| 313 | return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | #endif   /* FSE_COMMONDEFS_ONLY */ | 
|---|
| 317 |  | 
|---|