1// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2/*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12/* zstd_decompress_block :
13 * this module takes care of decompressing _compressed_ block */
14
15/*-*******************************************************
16* Dependencies
17*********************************************************/
18#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19#include "../common/compiler.h" /* prefetch */
20#include "../common/cpu.h" /* bmi2 */
21#include "../common/mem.h" /* low level memory routines */
22#define FSE_STATIC_LINKING_ONLY
23#include "../common/fse.h"
24#include "../common/huf.h"
25#include "../common/zstd_internal.h"
26#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28#include "zstd_decompress_block.h"
29#include "../common/bits.h" /* ZSTD_highbit32 */
30
31/*_*******************************************************
32* Macros
33**********************************************************/
34
35/* These two optional macros force the use one way or another of the two
36 * ZSTD_decompressSequences implementations. You can't force in both directions
37 * at the same time.
38 */
39#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
40 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
41#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
42#endif
43
44
45/*_*******************************************************
46* Memory operations
47**********************************************************/
48static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
49
50
51/*-*************************************************************
52 * Block decoding
53 ***************************************************************/
54
55static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
56{
57 size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
58 assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
59 return blockSizeMax;
60}
61
62/*! ZSTD_getcBlockSize() :
63 * Provides the size of compressed block from block header `src` */
64size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
65 blockProperties_t* bpPtr)
66{
67 RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
68
69 { U32 const cBlockHeader = MEM_readLE24(memPtr: src);
70 U32 const cSize = cBlockHeader >> 3;
71 bpPtr->lastBlock = cBlockHeader & 1;
72 bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
73 bpPtr->origSize = cSize; /* only useful for RLE */
74 if (bpPtr->blockType == bt_rle) return 1;
75 RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
76 return cSize;
77 }
78}
79
80/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
81static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
82 const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
83{
84 size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
85 assert(litSize <= blockSizeMax);
86 assert(dctx->isFrameDecompression || streaming == not_streaming);
87 assert(expectedWriteSize <= blockSizeMax);
88 if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
89 /* If we aren't streaming, we can just put the literals after the output
90 * of the current block. We don't need to worry about overwriting the
91 * extDict of our window, because it doesn't exist.
92 * So if we have space after the end of the block, just put it there.
93 */
94 dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
95 dctx->litBufferEnd = dctx->litBuffer + litSize;
96 dctx->litBufferLocation = ZSTD_in_dst;
97 } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
98 /* Literals fit entirely within the extra buffer, put them there to avoid
99 * having to split the literals.
100 */
101 dctx->litBuffer = dctx->litExtraBuffer;
102 dctx->litBufferEnd = dctx->litBuffer + litSize;
103 dctx->litBufferLocation = ZSTD_not_in_dst;
104 } else {
105 assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
106 /* Literals must be split between the output block and the extra lit
107 * buffer. We fill the extra lit buffer with the tail of the literals,
108 * and put the rest of the literals at the end of the block, with
109 * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
110 * This MUST not write more than our maxBlockSize beyond dst, because in
111 * streaming mode, that could overwrite part of our extDict window.
112 */
113 if (splitImmediately) {
114 /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
115 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
116 dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
117 } else {
118 /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
119 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
120 dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
121 }
122 dctx->litBufferLocation = ZSTD_split;
123 assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
124 }
125}
126
127/*! ZSTD_decodeLiteralsBlock() :
128 * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
129 * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
130 * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
131 * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
132 *
133 * @return : nb of bytes read from src (< srcSize )
134 * note : symbol not declared but exposed for fullbench */
135static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
136 const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
137 void* dst, size_t dstCapacity, const streaming_operation streaming)
138{
139 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
140 RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
141
142 { const BYTE* const istart = (const BYTE*) src;
143 SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3);
144 size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
145
146 switch(litEncType)
147 {
148 case set_repeat:
149 DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
150 RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
151 ZSTD_FALLTHROUGH;
152
153 case set_compressed:
154 RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
155 { size_t lhSize, litSize, litCSize;
156 U32 singleStream=0;
157 U32 const lhlCode = (istart[0] >> 2) & 3;
158 U32 const lhc = MEM_readLE32(memPtr: istart);
159 size_t hufSuccess;
160 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
161 int const flags = 0
162 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
163 | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
164 switch(lhlCode)
165 {
166 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
167 /* 2 - 2 - 10 - 10 */
168 singleStream = !lhlCode;
169 lhSize = 3;
170 litSize = (lhc >> 4) & 0x3FF;
171 litCSize = (lhc >> 14) & 0x3FF;
172 break;
173 case 2:
174 /* 2 - 2 - 14 - 14 */
175 lhSize = 4;
176 litSize = (lhc >> 4) & 0x3FFF;
177 litCSize = lhc >> 18;
178 break;
179 case 3:
180 /* 2 - 2 - 18 - 18 */
181 lhSize = 5;
182 litSize = (lhc >> 4) & 0x3FFFF;
183 litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
184 break;
185 }
186 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
187 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
188 if (!singleStream)
189 RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
190 "Not enough literals (%zu) for the 4-streams mode (min %u)",
191 litSize, MIN_LITERALS_FOR_4_STREAMS);
192 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
193 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
194 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: 0);
195
196 /* prefetch huffman table if cold */
197 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
198 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
199 }
200
201 if (litEncType==set_repeat) {
202 if (singleStream) {
203 hufSuccess = HUF_decompress1X_usingDTable(
204 dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
205 DTable: dctx->HUFptr, flags);
206 } else {
207 assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
208 hufSuccess = HUF_decompress4X_usingDTable(
209 dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
210 DTable: dctx->HUFptr, flags);
211 }
212 } else {
213 if (singleStream) {
214#if defined(HUF_FORCE_DECOMPRESS_X2)
215 hufSuccess = HUF_decompress1X_DCtx_wksp(
216 dctx->entropy.hufTable, dctx->litBuffer, litSize,
217 istart+lhSize, litCSize, dctx->workspace,
218 sizeof(dctx->workspace), flags);
219#else
220 hufSuccess = HUF_decompress1X1_DCtx_wksp(
221 dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
222 cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
223 wkspSize: sizeof(dctx->workspace), flags);
224#endif
225 } else {
226 hufSuccess = HUF_decompress4X_hufOnly_wksp(
227 dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
228 cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
229 wkspSize: sizeof(dctx->workspace), flags);
230 }
231 }
232 if (dctx->litBufferLocation == ZSTD_split)
233 {
234 assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
235 ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
236 ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
237 dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
238 dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
239 assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
240 }
241
242 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
243
244 dctx->litPtr = dctx->litBuffer;
245 dctx->litSize = litSize;
246 dctx->litEntropy = 1;
247 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
248 return litCSize + lhSize;
249 }
250
251 case set_basic:
252 { size_t litSize, lhSize;
253 U32 const lhlCode = ((istart[0]) >> 2) & 3;
254 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
255 switch(lhlCode)
256 {
257 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
258 lhSize = 1;
259 litSize = istart[0] >> 3;
260 break;
261 case 1:
262 lhSize = 2;
263 litSize = MEM_readLE16(memPtr: istart) >> 4;
264 break;
265 case 3:
266 lhSize = 3;
267 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
268 litSize = MEM_readLE24(memPtr: istart) >> 4;
269 break;
270 }
271
272 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
273 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
274 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
275 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: 1);
276 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
277 RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
278 if (dctx->litBufferLocation == ZSTD_split)
279 {
280 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
281 ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
282 }
283 else
284 {
285 ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
286 }
287 dctx->litPtr = dctx->litBuffer;
288 dctx->litSize = litSize;
289 return lhSize+litSize;
290 }
291 /* direct reference into compressed stream */
292 dctx->litPtr = istart+lhSize;
293 dctx->litSize = litSize;
294 dctx->litBufferEnd = dctx->litPtr + litSize;
295 dctx->litBufferLocation = ZSTD_not_in_dst;
296 return lhSize+litSize;
297 }
298
299 case set_rle:
300 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
301 size_t litSize, lhSize;
302 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
303 switch(lhlCode)
304 {
305 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
306 lhSize = 1;
307 litSize = istart[0] >> 3;
308 break;
309 case 1:
310 lhSize = 2;
311 RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
312 litSize = MEM_readLE16(memPtr: istart) >> 4;
313 break;
314 case 3:
315 lhSize = 3;
316 RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
317 litSize = MEM_readLE24(memPtr: istart) >> 4;
318 break;
319 }
320 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
321 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
322 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
323 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: 1);
324 if (dctx->litBufferLocation == ZSTD_split)
325 {
326 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
327 ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
328 }
329 else
330 {
331 ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
332 }
333 dctx->litPtr = dctx->litBuffer;
334 dctx->litSize = litSize;
335 return lhSize+1;
336 }
337 default:
338 RETURN_ERROR(corruption_detected, "impossible");
339 }
340 }
341}
342
343/* Hidden declaration for fullbench */
344size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
345 const void* src, size_t srcSize,
346 void* dst, size_t dstCapacity);
347size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
348 const void* src, size_t srcSize,
349 void* dst, size_t dstCapacity)
350{
351 dctx->isFrameDecompression = 0;
352 return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming: not_streaming);
353}
354
355/* Default FSE distribution tables.
356 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
357 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
358 * They were generated programmatically with following method :
359 * - start from default distributions, present in /lib/common/zstd_internal.h
360 * - generate tables normally, using ZSTD_buildFSETable()
361 * - printout the content of tables
362 * - prettify output, report below, test with fuzzer to ensure it's correct */
363
364/* Default FSE distribution table for Literal Lengths */
365static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
366 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
367 /* nextState, nbAddBits, nbBits, baseVal */
368 { 0, 0, 4, 0}, { 16, 0, 4, 0},
369 { 32, 0, 5, 1}, { 0, 0, 5, 3},
370 { 0, 0, 5, 4}, { 0, 0, 5, 6},
371 { 0, 0, 5, 7}, { 0, 0, 5, 9},
372 { 0, 0, 5, 10}, { 0, 0, 5, 12},
373 { 0, 0, 6, 14}, { 0, 1, 5, 16},
374 { 0, 1, 5, 20}, { 0, 1, 5, 22},
375 { 0, 2, 5, 28}, { 0, 3, 5, 32},
376 { 0, 4, 5, 48}, { 32, 6, 5, 64},
377 { 0, 7, 5, 128}, { 0, 8, 6, 256},
378 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
379 { 32, 0, 4, 0}, { 0, 0, 4, 1},
380 { 0, 0, 5, 2}, { 32, 0, 5, 4},
381 { 0, 0, 5, 5}, { 32, 0, 5, 7},
382 { 0, 0, 5, 8}, { 32, 0, 5, 10},
383 { 0, 0, 5, 11}, { 0, 0, 6, 13},
384 { 32, 1, 5, 16}, { 0, 1, 5, 18},
385 { 32, 1, 5, 22}, { 0, 2, 5, 24},
386 { 32, 3, 5, 32}, { 0, 3, 5, 40},
387 { 0, 6, 4, 64}, { 16, 6, 4, 64},
388 { 32, 7, 5, 128}, { 0, 9, 6, 512},
389 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
390 { 16, 0, 4, 1}, { 32, 0, 5, 2},
391 { 32, 0, 5, 3}, { 32, 0, 5, 5},
392 { 32, 0, 5, 6}, { 32, 0, 5, 8},
393 { 32, 0, 5, 9}, { 32, 0, 5, 11},
394 { 32, 0, 5, 12}, { 0, 0, 6, 15},
395 { 32, 1, 5, 18}, { 32, 1, 5, 20},
396 { 32, 2, 5, 24}, { 32, 2, 5, 28},
397 { 32, 3, 5, 40}, { 32, 4, 5, 48},
398 { 0, 16, 6,65536}, { 0, 15, 6,32768},
399 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
400}; /* LL_defaultDTable */
401
402/* Default FSE distribution table for Offset Codes */
403static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
404 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
405 /* nextState, nbAddBits, nbBits, baseVal */
406 { 0, 0, 5, 0}, { 0, 6, 4, 61},
407 { 0, 9, 5, 509}, { 0, 15, 5,32765},
408 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
409 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
410 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
411 { 0, 5, 5, 29}, { 0, 8, 4, 253},
412 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
413 { 0, 2, 5, 1}, { 16, 7, 4, 125},
414 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
415 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
416 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
417 { 0, 19, 5,524285}, { 0, 1, 5, 1},
418 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
419 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
420 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
421 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
422}; /* OF_defaultDTable */
423
424
425/* Default FSE distribution table for Match Lengths */
426static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
427 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
428 /* nextState, nbAddBits, nbBits, baseVal */
429 { 0, 0, 6, 3}, { 0, 0, 4, 4},
430 { 32, 0, 5, 5}, { 0, 0, 5, 6},
431 { 0, 0, 5, 8}, { 0, 0, 5, 9},
432 { 0, 0, 5, 11}, { 0, 0, 6, 13},
433 { 0, 0, 6, 16}, { 0, 0, 6, 19},
434 { 0, 0, 6, 22}, { 0, 0, 6, 25},
435 { 0, 0, 6, 28}, { 0, 0, 6, 31},
436 { 0, 0, 6, 34}, { 0, 1, 6, 37},
437 { 0, 1, 6, 41}, { 0, 2, 6, 47},
438 { 0, 3, 6, 59}, { 0, 4, 6, 83},
439 { 0, 7, 6, 131}, { 0, 9, 6, 515},
440 { 16, 0, 4, 4}, { 0, 0, 4, 5},
441 { 32, 0, 5, 6}, { 0, 0, 5, 7},
442 { 32, 0, 5, 9}, { 0, 0, 5, 10},
443 { 0, 0, 6, 12}, { 0, 0, 6, 15},
444 { 0, 0, 6, 18}, { 0, 0, 6, 21},
445 { 0, 0, 6, 24}, { 0, 0, 6, 27},
446 { 0, 0, 6, 30}, { 0, 0, 6, 33},
447 { 0, 1, 6, 35}, { 0, 1, 6, 39},
448 { 0, 2, 6, 43}, { 0, 3, 6, 51},
449 { 0, 4, 6, 67}, { 0, 5, 6, 99},
450 { 0, 8, 6, 259}, { 32, 0, 4, 4},
451 { 48, 0, 4, 4}, { 16, 0, 4, 5},
452 { 32, 0, 5, 7}, { 32, 0, 5, 8},
453 { 32, 0, 5, 10}, { 32, 0, 5, 11},
454 { 0, 0, 6, 14}, { 0, 0, 6, 17},
455 { 0, 0, 6, 20}, { 0, 0, 6, 23},
456 { 0, 0, 6, 26}, { 0, 0, 6, 29},
457 { 0, 0, 6, 32}, { 0, 16, 6,65539},
458 { 0, 15, 6,32771}, { 0, 14, 6,16387},
459 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
460 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
461}; /* ML_defaultDTable */
462
463
464static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
465{
466 void* ptr = dt;
467 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
468 ZSTD_seqSymbol* const cell = dt + 1;
469
470 DTableH->tableLog = 0;
471 DTableH->fastMode = 0;
472
473 cell->nbBits = 0;
474 cell->nextState = 0;
475 assert(nbAddBits < 255);
476 cell->nbAdditionalBits = nbAddBits;
477 cell->baseValue = baseValue;
478}
479
480
481/* ZSTD_buildFSETable() :
482 * generate FSE decoding table for one symbol (ll, ml or off)
483 * cannot fail if input is valid =>
484 * all inputs are presumed validated at this stage */
485FORCE_INLINE_TEMPLATE
486void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
487 const short* normalizedCounter, unsigned maxSymbolValue,
488 const U32* baseValue, const U8* nbAdditionalBits,
489 unsigned tableLog, void* wksp, size_t wkspSize)
490{
491 ZSTD_seqSymbol* const tableDecode = dt+1;
492 U32 const maxSV1 = maxSymbolValue + 1;
493 U32 const tableSize = 1 << tableLog;
494
495 U16* symbolNext = (U16*)wksp;
496 BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
497 U32 highThreshold = tableSize - 1;
498
499
500 /* Sanity Checks */
501 assert(maxSymbolValue <= MaxSeq);
502 assert(tableLog <= MaxFSELog);
503 assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
504 (void)wkspSize;
505 /* Init, lay down lowprob symbols */
506 { ZSTD_seqSymbol_header DTableH;
507 DTableH.tableLog = tableLog;
508 DTableH.fastMode = 1;
509 { S16 const largeLimit= (S16)(1 << (tableLog-1));
510 U32 s;
511 for (s=0; s<maxSV1; s++) {
512 if (normalizedCounter[s]==-1) {
513 tableDecode[highThreshold--].baseValue = s;
514 symbolNext[s] = 1;
515 } else {
516 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
517 assert(normalizedCounter[s]>=0);
518 symbolNext[s] = (U16)normalizedCounter[s];
519 } } }
520 ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
521 }
522
523 /* Spread symbols */
524 assert(tableSize <= 512);
525 /* Specialized symbol spreading for the case when there are
526 * no low probability (-1 count) symbols. When compressing
527 * small blocks we avoid low probability symbols to hit this
528 * case, since header decoding speed matters more.
529 */
530 if (highThreshold == tableSize - 1) {
531 size_t const tableMask = tableSize-1;
532 size_t const step = FSE_TABLESTEP(tableSize);
533 /* First lay down the symbols in order.
534 * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
535 * misses since small blocks generally have small table logs, so nearly
536 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
537 * our buffer to handle the over-write.
538 */
539 {
540 U64 const add = 0x0101010101010101ull;
541 size_t pos = 0;
542 U64 sv = 0;
543 U32 s;
544 for (s=0; s<maxSV1; ++s, sv += add) {
545 int i;
546 int const n = normalizedCounter[s];
547 MEM_write64(memPtr: spread + pos, value: sv);
548 for (i = 8; i < n; i += 8) {
549 MEM_write64(memPtr: spread + pos + i, value: sv);
550 }
551 assert(n>=0);
552 pos += (size_t)n;
553 }
554 }
555 /* Now we spread those positions across the table.
556 * The benefit of doing it in two stages is that we avoid the
557 * variable size inner loop, which caused lots of branch misses.
558 * Now we can run through all the positions without any branch misses.
559 * We unroll the loop twice, since that is what empirically worked best.
560 */
561 {
562 size_t position = 0;
563 size_t s;
564 size_t const unroll = 2;
565 assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
566 for (s = 0; s < (size_t)tableSize; s += unroll) {
567 size_t u;
568 for (u = 0; u < unroll; ++u) {
569 size_t const uPosition = (position + (u * step)) & tableMask;
570 tableDecode[uPosition].baseValue = spread[s + u];
571 }
572 position = (position + (unroll * step)) & tableMask;
573 }
574 assert(position == 0);
575 }
576 } else {
577 U32 const tableMask = tableSize-1;
578 U32 const step = FSE_TABLESTEP(tableSize);
579 U32 s, position = 0;
580 for (s=0; s<maxSV1; s++) {
581 int i;
582 int const n = normalizedCounter[s];
583 for (i=0; i<n; i++) {
584 tableDecode[position].baseValue = s;
585 position = (position + step) & tableMask;
586 while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
587 } }
588 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
589 }
590
591 /* Build Decoding table */
592 {
593 U32 u;
594 for (u=0; u<tableSize; u++) {
595 U32 const symbol = tableDecode[u].baseValue;
596 U32 const nextState = symbolNext[symbol]++;
597 tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(val: nextState) );
598 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
599 assert(nbAdditionalBits[symbol] < 255);
600 tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
601 tableDecode[u].baseValue = baseValue[symbol];
602 }
603 }
604}
605
606/* Avoids the FORCE_INLINE of the _body() function. */
607static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
608 const short* normalizedCounter, unsigned maxSymbolValue,
609 const U32* baseValue, const U8* nbAdditionalBits,
610 unsigned tableLog, void* wksp, size_t wkspSize)
611{
612 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
613 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
614}
615
616#if DYNAMIC_BMI2
617BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
618 const short* normalizedCounter, unsigned maxSymbolValue,
619 const U32* baseValue, const U8* nbAdditionalBits,
620 unsigned tableLog, void* wksp, size_t wkspSize)
621{
622 ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
623 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
624}
625#endif
626
627void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
628 const short* normalizedCounter, unsigned maxSymbolValue,
629 const U32* baseValue, const U8* nbAdditionalBits,
630 unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
631{
632#if DYNAMIC_BMI2
633 if (bmi2) {
634 ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
635 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
636 return;
637 }
638#endif
639 (void)bmi2;
640 ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
641 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
642}
643
644
645/*! ZSTD_buildSeqTable() :
646 * @return : nb bytes read from src,
647 * or an error code if it fails */
648static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
649 SymbolEncodingType_e type, unsigned max, U32 maxLog,
650 const void* src, size_t srcSize,
651 const U32* baseValue, const U8* nbAdditionalBits,
652 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
653 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
654 int bmi2)
655{
656 switch(type)
657 {
658 case set_rle :
659 RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
660 RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
661 { U32 const symbol = *(const BYTE*)src;
662 U32 const baseline = baseValue[symbol];
663 U8 const nbBits = nbAdditionalBits[symbol];
664 ZSTD_buildSeqTable_rle(dt: DTableSpace, baseValue: baseline, nbAddBits: nbBits);
665 }
666 *DTablePtr = DTableSpace;
667 return 1;
668 case set_basic :
669 *DTablePtr = defaultTable;
670 return 0;
671 case set_repeat:
672 RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
673 /* prefetch FSE table if used */
674 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
675 const void* const pStart = *DTablePtr;
676 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
677 PREFETCH_AREA(pStart, pSize);
678 }
679 return 0;
680 case set_compressed :
681 { unsigned tableLog;
682 S16 norm[MaxSeq+1];
683 size_t const headerSize = FSE_readNCount(normalizedCounter: norm, maxSymbolValuePtr: &max, tableLogPtr: &tableLog, rBuffer: src, rBuffSize: srcSize);
684 RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
685 RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
686 ZSTD_buildFSETable(dt: DTableSpace, normalizedCounter: norm, maxSymbolValue: max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
687 *DTablePtr = DTableSpace;
688 return headerSize;
689 }
690 default :
691 assert(0);
692 RETURN_ERROR(GENERIC, "impossible");
693 }
694}
695
696size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
697 const void* src, size_t srcSize)
698{
699 const BYTE* const istart = (const BYTE*)src;
700 const BYTE* const iend = istart + srcSize;
701 const BYTE* ip = istart;
702 int nbSeq;
703 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
704
705 /* check */
706 RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
707
708 /* SeqHead */
709 nbSeq = *ip++;
710 if (nbSeq > 0x7F) {
711 if (nbSeq == 0xFF) {
712 RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
713 nbSeq = MEM_readLE16(memPtr: ip) + LONGNBSEQ;
714 ip+=2;
715 } else {
716 RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
717 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
718 }
719 }
720 *nbSeqPtr = nbSeq;
721
722 if (nbSeq == 0) {
723 /* No sequence : section ends immediately */
724 RETURN_ERROR_IF(ip != iend, corruption_detected,
725 "extraneous data present in the Sequences section");
726 return (size_t)(ip - istart);
727 }
728
729 /* FSE table descriptors */
730 RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
731 RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
732 { SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6);
733 SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3);
734 SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3);
735 ip++;
736
737 /* Build DTables */
738 { size_t const llhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.LLTable, DTablePtr: &dctx->LLTptr,
739 type: LLtype, MaxLL, LLFSELog,
740 src: ip, srcSize: iend-ip,
741 baseValue: LL_base, nbAdditionalBits: LL_bits,
742 defaultTable: LL_defaultDTable, flagRepeatTable: dctx->fseEntropy,
743 ddictIsCold: dctx->ddictIsCold, nbSeq,
744 wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
745 bmi2: ZSTD_DCtx_get_bmi2(dctx));
746 RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
747 ip += llhSize;
748 }
749
750 { size_t const ofhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.OFTable, DTablePtr: &dctx->OFTptr,
751 type: OFtype, MaxOff, OffFSELog,
752 src: ip, srcSize: iend-ip,
753 baseValue: OF_base, nbAdditionalBits: OF_bits,
754 defaultTable: OF_defaultDTable, flagRepeatTable: dctx->fseEntropy,
755 ddictIsCold: dctx->ddictIsCold, nbSeq,
756 wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
757 bmi2: ZSTD_DCtx_get_bmi2(dctx));
758 RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
759 ip += ofhSize;
760 }
761
762 { size_t const mlhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.MLTable, DTablePtr: &dctx->MLTptr,
763 type: MLtype, MaxML, MLFSELog,
764 src: ip, srcSize: iend-ip,
765 baseValue: ML_base, nbAdditionalBits: ML_bits,
766 defaultTable: ML_defaultDTable, flagRepeatTable: dctx->fseEntropy,
767 ddictIsCold: dctx->ddictIsCold, nbSeq,
768 wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
769 bmi2: ZSTD_DCtx_get_bmi2(dctx));
770 RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
771 ip += mlhSize;
772 }
773 }
774
775 return ip-istart;
776}
777
778
779typedef struct {
780 size_t litLength;
781 size_t matchLength;
782 size_t offset;
783} seq_t;
784
785typedef struct {
786 size_t state;
787 const ZSTD_seqSymbol* table;
788} ZSTD_fseState;
789
790typedef struct {
791 BIT_DStream_t DStream;
792 ZSTD_fseState stateLL;
793 ZSTD_fseState stateOffb;
794 ZSTD_fseState stateML;
795 size_t prevOffset[ZSTD_REP_NUM];
796} seqState_t;
797
798/*! ZSTD_overlapCopy8() :
799 * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
800 * If the offset is < 8 then the offset is spread to at least 8 bytes.
801 *
802 * Precondition: *ip <= *op
803 * Postcondition: *op - *op >= 8
804 */
805HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
806 assert(*ip <= *op);
807 if (offset < 8) {
808 /* close range match, overlap */
809 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
810 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
811 int const sub2 = dec64table[offset];
812 (*op)[0] = (*ip)[0];
813 (*op)[1] = (*ip)[1];
814 (*op)[2] = (*ip)[2];
815 (*op)[3] = (*ip)[3];
816 *ip += dec32table[offset];
817 ZSTD_copy4(dst: *op+4, src: *ip);
818 *ip -= sub2;
819 } else {
820 ZSTD_copy8(dst: *op, src: *ip);
821 }
822 *ip += 8;
823 *op += 8;
824 assert(*op - *ip >= 8);
825}
826
827/*! ZSTD_safecopy() :
828 * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
829 * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
830 * This function is only called in the uncommon case where the sequence is near the end of the block. It
831 * should be fast for a single long sequence, but can be slow for several short sequences.
832 *
833 * @param ovtype controls the overlap detection
834 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
835 * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
836 * The src buffer must be before the dst buffer.
837 */
838static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
839 ptrdiff_t const diff = op - ip;
840 BYTE* const oend = op + length;
841
842 assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
843 (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
844
845 if (length < 8) {
846 /* Handle short lengths. */
847 while (op < oend) *op++ = *ip++;
848 return;
849 }
850 if (ovtype == ZSTD_overlap_src_before_dst) {
851 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
852 assert(length >= 8);
853 ZSTD_overlapCopy8(op: &op, ip: &ip, offset: diff);
854 length -= 8;
855 assert(op - ip >= 8);
856 assert(op <= oend);
857 }
858
859 if (oend <= oend_w) {
860 /* No risk of overwrite. */
861 ZSTD_wildcopy(dst: op, src: ip, length, ovtype);
862 return;
863 }
864 if (op <= oend_w) {
865 /* Wildcopy until we get close to the end. */
866 assert(oend > oend_w);
867 ZSTD_wildcopy(dst: op, src: ip, length: oend_w - op, ovtype);
868 ip += oend_w - op;
869 op += oend_w - op;
870 }
871 /* Handle the leftovers. */
872 while (op < oend) *op++ = *ip++;
873}
874
875/* ZSTD_safecopyDstBeforeSrc():
876 * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
877 * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
878static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
879 ptrdiff_t const diff = op - ip;
880 BYTE* const oend = op + length;
881
882 if (length < 8 || diff > -8) {
883 /* Handle short lengths, close overlaps, and dst not before src. */
884 while (op < oend) *op++ = *ip++;
885 return;
886 }
887
888 if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
889 ZSTD_wildcopy(dst: op, src: ip, length: oend - WILDCOPY_OVERLENGTH - op, ovtype: ZSTD_no_overlap);
890 ip += oend - WILDCOPY_OVERLENGTH - op;
891 op += oend - WILDCOPY_OVERLENGTH - op;
892 }
893
894 /* Handle the leftovers. */
895 while (op < oend) *op++ = *ip++;
896}
897
898/* ZSTD_execSequenceEnd():
899 * This version handles cases that are near the end of the output buffer. It requires
900 * more careful checks to make sure there is no overflow. By separating out these hard
901 * and unlikely cases, we can speed up the common cases.
902 *
903 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
904 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
905 */
906FORCE_NOINLINE
907ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
908size_t ZSTD_execSequenceEnd(BYTE* op,
909 BYTE* const oend, seq_t sequence,
910 const BYTE** litPtr, const BYTE* const litLimit,
911 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912{
913 BYTE* const oLitEnd = op + sequence.litLength;
914 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916 const BYTE* match = oLitEnd - sequence.offset;
917 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
918
919 /* bounds checks : careful of address space overflow in 32-bit mode */
920 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922 assert(op < op + sequenceLength);
923 assert(oLitEnd < op + sequenceLength);
924
925 /* copy literals */
926 ZSTD_safecopy(op, oend_w, ip: *litPtr, length: sequence.litLength, ovtype: ZSTD_no_overlap);
927 op = oLitEnd;
928 *litPtr = iLitEnd;
929
930 /* copy Match */
931 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
932 /* offset beyond prefix */
933 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
934 match = dictEnd - (prefixStart - match);
935 if (match + sequence.matchLength <= dictEnd) {
936 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
937 return sequenceLength;
938 }
939 /* span extDict & currentPrefixSegment */
940 { size_t const length1 = dictEnd - match;
941 ZSTD_memmove(oLitEnd, match, length1);
942 op = oLitEnd + length1;
943 sequence.matchLength -= length1;
944 match = prefixStart;
945 }
946 }
947 ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
948 return sequenceLength;
949}
950
951/* ZSTD_execSequenceEndSplitLitBuffer():
952 * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
953 */
954FORCE_NOINLINE
955ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
956size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
957 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
958 const BYTE** litPtr, const BYTE* const litLimit,
959 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
960{
961 BYTE* const oLitEnd = op + sequence.litLength;
962 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
963 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
964 const BYTE* match = oLitEnd - sequence.offset;
965
966
967 /* bounds checks : careful of address space overflow in 32-bit mode */
968 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
969 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
970 assert(op < op + sequenceLength);
971 assert(oLitEnd < op + sequenceLength);
972
973 /* copy literals */
974 RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
975 ZSTD_safecopyDstBeforeSrc(op, ip: *litPtr, length: sequence.litLength);
976 op = oLitEnd;
977 *litPtr = iLitEnd;
978
979 /* copy Match */
980 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
981 /* offset beyond prefix */
982 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
983 match = dictEnd - (prefixStart - match);
984 if (match + sequence.matchLength <= dictEnd) {
985 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
986 return sequenceLength;
987 }
988 /* span extDict & currentPrefixSegment */
989 { size_t const length1 = dictEnd - match;
990 ZSTD_memmove(oLitEnd, match, length1);
991 op = oLitEnd + length1;
992 sequence.matchLength -= length1;
993 match = prefixStart;
994 }
995 }
996 ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
997 return sequenceLength;
998}
999
1000HINT_INLINE
1001ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1002size_t ZSTD_execSequence(BYTE* op,
1003 BYTE* const oend, seq_t sequence,
1004 const BYTE** litPtr, const BYTE* const litLimit,
1005 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1006{
1007 BYTE* const oLitEnd = op + sequence.litLength;
1008 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1009 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1010 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
1011 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1012 const BYTE* match = oLitEnd - sequence.offset;
1013
1014 assert(op != NULL /* Precondition */);
1015 assert(oend_w < oend /* No underflow */);
1016
1017#if defined(__aarch64__)
1018 /* prefetch sequence starting from match that will be used for copy later */
1019 PREFETCH_L1(match);
1020#endif
1021 /* Handle edge cases in a slow path:
1022 * - Read beyond end of literals
1023 * - Match end is within WILDCOPY_OVERLIMIT of oend
1024 * - 32-bit mode and the match length overflows
1025 */
1026 if (UNLIKELY(
1027 iLitEnd > litLimit ||
1028 oMatchEnd > oend_w ||
1029 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1030 return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1031
1032 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1033 assert(op <= oLitEnd /* No overflow */);
1034 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1035 assert(oMatchEnd <= oend /* No underflow */);
1036 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1037 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1038 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1039
1040 /* Copy Literals:
1041 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1042 * We likely don't need the full 32-byte wildcopy.
1043 */
1044 assert(WILDCOPY_OVERLENGTH >= 16);
1045 ZSTD_copy16(dst: op, src: (*litPtr));
1046 if (UNLIKELY(sequence.litLength > 16)) {
1047 ZSTD_wildcopy(dst: op + 16, src: (*litPtr) + 16, length: sequence.litLength - 16, ovtype: ZSTD_no_overlap);
1048 }
1049 op = oLitEnd;
1050 *litPtr = iLitEnd; /* update for next sequence */
1051
1052 /* Copy Match */
1053 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1054 /* offset beyond prefix -> go into extDict */
1055 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1056 match = dictEnd + (match - prefixStart);
1057 if (match + sequence.matchLength <= dictEnd) {
1058 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1059 return sequenceLength;
1060 }
1061 /* span extDict & currentPrefixSegment */
1062 { size_t const length1 = dictEnd - match;
1063 ZSTD_memmove(oLitEnd, match, length1);
1064 op = oLitEnd + length1;
1065 sequence.matchLength -= length1;
1066 match = prefixStart;
1067 }
1068 }
1069 /* Match within prefix of 1 or more bytes */
1070 assert(op <= oMatchEnd);
1071 assert(oMatchEnd <= oend_w);
1072 assert(match >= prefixStart);
1073 assert(sequence.matchLength >= 1);
1074
1075 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1076 * without overlap checking.
1077 */
1078 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1079 /* We bet on a full wildcopy for matches, since we expect matches to be
1080 * longer than literals (in general). In silesia, ~10% of matches are longer
1081 * than 16 bytes.
1082 */
1083 ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1084 return sequenceLength;
1085 }
1086 assert(sequence.offset < WILDCOPY_VECLEN);
1087
1088 /* Copy 8 bytes and spread the offset to be >= 8. */
1089 ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1090
1091 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1092 if (sequence.matchLength > 8) {
1093 assert(op < oMatchEnd);
1094 ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength - 8, ovtype: ZSTD_overlap_src_before_dst);
1095 }
1096 return sequenceLength;
1097}
1098
1099HINT_INLINE
1100ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1101size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1102 BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1103 const BYTE** litPtr, const BYTE* const litLimit,
1104 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1105{
1106 BYTE* const oLitEnd = op + sequence.litLength;
1107 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1108 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1109 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1110 const BYTE* match = oLitEnd - sequence.offset;
1111
1112 assert(op != NULL /* Precondition */);
1113 assert(oend_w < oend /* No underflow */);
1114 /* Handle edge cases in a slow path:
1115 * - Read beyond end of literals
1116 * - Match end is within WILDCOPY_OVERLIMIT of oend
1117 * - 32-bit mode and the match length overflows
1118 */
1119 if (UNLIKELY(
1120 iLitEnd > litLimit ||
1121 oMatchEnd > oend_w ||
1122 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1123 return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1124
1125 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1126 assert(op <= oLitEnd /* No overflow */);
1127 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1128 assert(oMatchEnd <= oend /* No underflow */);
1129 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1130 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1131 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1132
1133 /* Copy Literals:
1134 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1135 * We likely don't need the full 32-byte wildcopy.
1136 */
1137 assert(WILDCOPY_OVERLENGTH >= 16);
1138 ZSTD_copy16(dst: op, src: (*litPtr));
1139 if (UNLIKELY(sequence.litLength > 16)) {
1140 ZSTD_wildcopy(dst: op+16, src: (*litPtr)+16, length: sequence.litLength-16, ovtype: ZSTD_no_overlap);
1141 }
1142 op = oLitEnd;
1143 *litPtr = iLitEnd; /* update for next sequence */
1144
1145 /* Copy Match */
1146 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1147 /* offset beyond prefix -> go into extDict */
1148 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1149 match = dictEnd + (match - prefixStart);
1150 if (match + sequence.matchLength <= dictEnd) {
1151 ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1152 return sequenceLength;
1153 }
1154 /* span extDict & currentPrefixSegment */
1155 { size_t const length1 = dictEnd - match;
1156 ZSTD_memmove(oLitEnd, match, length1);
1157 op = oLitEnd + length1;
1158 sequence.matchLength -= length1;
1159 match = prefixStart;
1160 } }
1161 /* Match within prefix of 1 or more bytes */
1162 assert(op <= oMatchEnd);
1163 assert(oMatchEnd <= oend_w);
1164 assert(match >= prefixStart);
1165 assert(sequence.matchLength >= 1);
1166
1167 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1168 * without overlap checking.
1169 */
1170 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1171 /* We bet on a full wildcopy for matches, since we expect matches to be
1172 * longer than literals (in general). In silesia, ~10% of matches are longer
1173 * than 16 bytes.
1174 */
1175 ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1176 return sequenceLength;
1177 }
1178 assert(sequence.offset < WILDCOPY_VECLEN);
1179
1180 /* Copy 8 bytes and spread the offset to be >= 8. */
1181 ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1182
1183 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1184 if (sequence.matchLength > 8) {
1185 assert(op < oMatchEnd);
1186 ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength-8, ovtype: ZSTD_overlap_src_before_dst);
1187 }
1188 return sequenceLength;
1189}
1190
1191
1192static void
1193ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1194{
1195 const void* ptr = dt;
1196 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1197 DStatePtr->state = BIT_readBits(bitD, nbBits: DTableH->tableLog);
1198 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1199 (U32)DStatePtr->state, DTableH->tableLog);
1200 BIT_reloadDStream(bitD);
1201 DStatePtr->table = dt + 1;
1202}
1203
1204FORCE_INLINE_TEMPLATE void
1205ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1206{
1207 size_t const lowBits = BIT_readBits(bitD, nbBits);
1208 DStatePtr->state = nextState + lowBits;
1209}
1210
1211/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1212 * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1213 * bits before reloading. This value is the maximum number of bytes we read
1214 * after reloading when we are decoding long offsets.
1215 */
1216#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1217 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1218 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1219 : 0)
1220
1221typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1222
1223/*
1224 * ZSTD_decodeSequence():
1225 * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
1226 * only used in 32-bit mode
1227 * @return : Sequence (litL + matchL + offset)
1228 */
1229FORCE_INLINE_TEMPLATE seq_t
1230ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
1231{
1232 seq_t seq;
1233 /*
1234 * ZSTD_seqSymbol is a 64 bits wide structure.
1235 * It can be loaded in one operation
1236 * and its fields extracted by simply shifting or bit-extracting on aarch64.
1237 * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1238 * operations that cause performance drop. This can be avoided by using this
1239 * ZSTD_memcpy hack.
1240 */
1241#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1242 ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1243 ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1244 ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1245 ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1246 ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1247 ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1248 ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1249#else
1250 const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1251 const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1252 const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1253#endif
1254 seq.matchLength = mlDInfo->baseValue;
1255 seq.litLength = llDInfo->baseValue;
1256 { U32 const ofBase = ofDInfo->baseValue;
1257 BYTE const llBits = llDInfo->nbAdditionalBits;
1258 BYTE const mlBits = mlDInfo->nbAdditionalBits;
1259 BYTE const ofBits = ofDInfo->nbAdditionalBits;
1260 BYTE const totalBits = llBits+mlBits+ofBits;
1261
1262 U16 const llNext = llDInfo->nextState;
1263 U16 const mlNext = mlDInfo->nextState;
1264 U16 const ofNext = ofDInfo->nextState;
1265 U32 const llnbBits = llDInfo->nbBits;
1266 U32 const mlnbBits = mlDInfo->nbBits;
1267 U32 const ofnbBits = ofDInfo->nbBits;
1268
1269 assert(llBits <= MaxLLBits);
1270 assert(mlBits <= MaxMLBits);
1271 assert(ofBits <= MaxOff);
1272 /*
1273 * As gcc has better branch and block analyzers, sometimes it is only
1274 * valuable to mark likeliness for clang, it gives around 3-4% of
1275 * performance.
1276 */
1277
1278 /* sequence */
1279 { size_t offset;
1280 if (ofBits > 1) {
1281 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1282 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1283 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1284 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1285 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1286 /* Always read extra bits, this keeps the logic simple,
1287 * avoids branches, and avoids accidentally reading 0 bits.
1288 */
1289 U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1290 offset = ofBase + (BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits - extraBits) << extraBits);
1291 BIT_reloadDStream(bitD: &seqState->DStream);
1292 offset += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: extraBits);
1293 } else {
1294 offset = ofBase + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1295 if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream);
1296 }
1297 seqState->prevOffset[2] = seqState->prevOffset[1];
1298 seqState->prevOffset[1] = seqState->prevOffset[0];
1299 seqState->prevOffset[0] = offset;
1300 } else {
1301 U32 const ll0 = (llDInfo->baseValue == 0);
1302 if (LIKELY((ofBits == 0))) {
1303 offset = seqState->prevOffset[ll0];
1304 seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1305 seqState->prevOffset[0] = offset;
1306 } else {
1307 offset = ofBase + ll0 + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: 1);
1308 { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1309 temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
1310 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1311 seqState->prevOffset[1] = seqState->prevOffset[0];
1312 seqState->prevOffset[0] = offset = temp;
1313 } } }
1314 seq.offset = offset;
1315 }
1316
1317 if (mlBits > 0)
1318 seq.matchLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: mlBits/*>0*/);
1319
1320 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321 BIT_reloadDStream(bitD: &seqState->DStream);
1322 if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323 BIT_reloadDStream(bitD: &seqState->DStream);
1324 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1325 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1326
1327 if (llBits > 0)
1328 seq.litLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: llBits/*>0*/);
1329
1330 if (MEM_32bits())
1331 BIT_reloadDStream(bitD: &seqState->DStream);
1332
1333 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1334 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1335
1336 if (!isLastSeq) {
1337 /* don't update FSE state for last Sequence */
1338 ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateLL, bitD: &seqState->DStream, nextState: llNext, nbBits: llnbBits); /* <= 9 bits */
1339 ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateML, bitD: &seqState->DStream, nextState: mlNext, nbBits: mlnbBits); /* <= 9 bits */
1340 if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream); /* <= 18 bits */
1341 ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateOffb, bitD: &seqState->DStream, nextState: ofNext, nbBits: ofnbBits); /* <= 8 bits */
1342 BIT_reloadDStream(bitD: &seqState->DStream);
1343 }
1344 }
1345
1346 return seq;
1347}
1348
1349#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1350#if DEBUGLEVEL >= 1
1351static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1352{
1353 size_t const windowSize = dctx->fParams.windowSize;
1354 /* No dictionary used. */
1355 if (dctx->dictContentEndForFuzzing == NULL) return 0;
1356 /* Dictionary is our prefix. */
1357 if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1358 /* Dictionary is not our ext-dict. */
1359 if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1360 /* Dictionary is not within our window size. */
1361 if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1362 /* Dictionary is active. */
1363 return 1;
1364}
1365#endif
1366
1367static void ZSTD_assertValidSequence(
1368 ZSTD_DCtx const* dctx,
1369 BYTE const* op, BYTE const* oend,
1370 seq_t const seq,
1371 BYTE const* prefixStart, BYTE const* virtualStart)
1372{
1373#if DEBUGLEVEL >= 1
1374 if (dctx->isFrameDecompression) {
1375 size_t const windowSize = dctx->fParams.windowSize;
1376 size_t const sequenceSize = seq.litLength + seq.matchLength;
1377 BYTE const* const oLitEnd = op + seq.litLength;
1378 DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1379 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1380 assert(op <= oend);
1381 assert((size_t)(oend - op) >= sequenceSize);
1382 assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
1383 if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1384 size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1385 /* Offset must be within the dictionary. */
1386 assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1387 assert(seq.offset <= windowSize + dictSize);
1388 } else {
1389 /* Offset must be within our window. */
1390 assert(seq.offset <= windowSize);
1391 }
1392 }
1393#else
1394 (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1395#endif
1396}
1397#endif
1398
1399#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1400
1401
1402FORCE_INLINE_TEMPLATE size_t
1403DONT_VECTORIZE
1404ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1405 void* dst, size_t maxDstSize,
1406 const void* seqStart, size_t seqSize, int nbSeq,
1407 const ZSTD_longOffset_e isLongOffset)
1408{
1409 const BYTE* ip = (const BYTE*)seqStart;
1410 const BYTE* const iend = ip + seqSize;
1411 BYTE* const ostart = (BYTE*)dst;
1412 BYTE* const oend = ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize);
1413 BYTE* op = ostart;
1414 const BYTE* litPtr = dctx->litPtr;
1415 const BYTE* litBufferEnd = dctx->litBufferEnd;
1416 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1417 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1418 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1419 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
1420
1421 /* Literals are split between internal buffer & output buffer */
1422 if (nbSeq) {
1423 seqState_t seqState;
1424 dctx->fseEntropy = 1;
1425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1426 RETURN_ERROR_IF(
1427 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1428 corruption_detected, "");
1429 ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1430 ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1431 ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1432 assert(dst != NULL);
1433
1434 ZSTD_STATIC_ASSERT(
1435 BIT_DStream_unfinished < BIT_DStream_completed &&
1436 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1437 BIT_DStream_completed < BIT_DStream_overflow);
1438
1439 /* decompress without overrunning litPtr begins */
1440 { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
1441 /* Align the decompression loop to 32 + 16 bytes.
1442 *
1443 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1444 * speed swings based on the alignment of the decompression loop. This
1445 * performance swing is caused by parts of the decompression loop falling
1446 * out of the DSB. The entire decompression loop should fit in the DSB,
1447 * when it can't we get much worse performance. You can measure if you've
1448 * hit the good case or the bad case with this perf command for some
1449 * compressed file test.zst:
1450 *
1451 * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1452 * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1453 *
1454 * If you see most cycles served out of the MITE you've hit the bad case.
1455 * If you see most cycles served out of the DSB you've hit the good case.
1456 * If it is pretty even then you may be in an okay case.
1457 *
1458 * This issue has been reproduced on the following CPUs:
1459 * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1460 * Use Instruments->Counters to get DSB/MITE cycles.
1461 * I never got performance swings, but I was able to
1462 * go from the good case of mostly DSB to half of the
1463 * cycles served from MITE.
1464 * - Coffeelake: Intel i9-9900k
1465 * - Coffeelake: Intel i7-9700k
1466 *
1467 * I haven't been able to reproduce the instability or DSB misses on any
1468 * of the following CPUS:
1469 * - Haswell
1470 * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1471 * - Skylake
1472 *
1473 * Alignment is done for each of the three major decompression loops:
1474 * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1475 * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1476 * - ZSTD_decompressSequences_body
1477 * Alignment choices are made to minimize large swings on bad cases and influence on performance
1478 * from changes external to this code, rather than to overoptimize on the current commit.
1479 *
1480 * If you are seeing performance stability this script can help test.
1481 * It tests on 4 commits in zstd where I saw performance change.
1482 *
1483 * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1484 */
1485#if defined(__x86_64__)
1486 __asm__(".p2align 6");
1487# if __GNUC__ >= 7
1488 /* good for gcc-7, gcc-9, and gcc-11 */
1489 __asm__("nop");
1490 __asm__(".p2align 5");
1491 __asm__("nop");
1492 __asm__(".p2align 4");
1493# if __GNUC__ == 8 || __GNUC__ == 10
1494 /* good for gcc-8 and gcc-10 */
1495 __asm__("nop");
1496 __asm__(".p2align 3");
1497# endif
1498# endif
1499#endif
1500
1501 /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1502 for ( ; nbSeq; nbSeq--) {
1503 sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==1);
1504 if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
1505 { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1506#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1507 assert(!ZSTD_isError(oneSeqSize));
1508 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1509#endif
1510 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1511 return oneSeqSize;
1512 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1513 op += oneSeqSize;
1514 } }
1515 DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
1516
1517 /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1518 if (nbSeq > 0) {
1519 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1520 DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
1521 if (leftoverLit) {
1522 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1523 ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1524 sequence.litLength -= leftoverLit;
1525 op += leftoverLit;
1526 }
1527 litPtr = dctx->litExtraBuffer;
1528 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1529 dctx->litBufferLocation = ZSTD_not_in_dst;
1530 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1531#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1532 assert(!ZSTD_isError(oneSeqSize));
1533 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1534#endif
1535 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1536 return oneSeqSize;
1537 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1538 op += oneSeqSize;
1539 }
1540 nbSeq--;
1541 }
1542 }
1543
1544 if (nbSeq > 0) {
1545 /* there is remaining lit from extra buffer */
1546
1547#if defined(__x86_64__)
1548 __asm__(".p2align 6");
1549 __asm__("nop");
1550# if __GNUC__ != 7
1551 /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1552 __asm__(".p2align 4");
1553 __asm__("nop");
1554 __asm__(".p2align 3");
1555# elif __GNUC__ >= 11
1556 __asm__(".p2align 3");
1557# else
1558 __asm__(".p2align 5");
1559 __asm__("nop");
1560 __asm__(".p2align 3");
1561# endif
1562#endif
1563
1564 for ( ; nbSeq ; nbSeq--) {
1565 seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==1);
1566 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1567#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1568 assert(!ZSTD_isError(oneSeqSize));
1569 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1570#endif
1571 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1572 return oneSeqSize;
1573 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1574 op += oneSeqSize;
1575 }
1576 }
1577
1578 /* check if reached exact end */
1579 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1580 RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1581 DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
1582 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1583 /* save reps for next block */
1584 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1585 }
1586
1587 /* last literal segment */
1588 if (dctx->litBufferLocation == ZSTD_split) {
1589 /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1590 size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1591 DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
1592 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1593 if (op != NULL) {
1594 ZSTD_memmove(op, litPtr, lastLLSize);
1595 op += lastLLSize;
1596 }
1597 litPtr = dctx->litExtraBuffer;
1598 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1599 dctx->litBufferLocation = ZSTD_not_in_dst;
1600 }
1601 /* copy last literals from internal buffer */
1602 { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1603 DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
1604 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1605 if (op != NULL) {
1606 ZSTD_memcpy(op, litPtr, lastLLSize);
1607 op += lastLLSize;
1608 } }
1609
1610 DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1611 return (size_t)(op - ostart);
1612}
1613
1614FORCE_INLINE_TEMPLATE size_t
1615DONT_VECTORIZE
1616ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1617 void* dst, size_t maxDstSize,
1618 const void* seqStart, size_t seqSize, int nbSeq,
1619 const ZSTD_longOffset_e isLongOffset)
1620{
1621 const BYTE* ip = (const BYTE*)seqStart;
1622 const BYTE* const iend = ip + seqSize;
1623 BYTE* const ostart = (BYTE*)dst;
1624 BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize) : dctx->litBuffer;
1625 BYTE* op = ostart;
1626 const BYTE* litPtr = dctx->litPtr;
1627 const BYTE* const litEnd = litPtr + dctx->litSize;
1628 const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1629 const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1630 const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1631 DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1632
1633 /* Regen sequences */
1634 if (nbSeq) {
1635 seqState_t seqState;
1636 dctx->fseEntropy = 1;
1637 { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1638 RETURN_ERROR_IF(
1639 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1640 corruption_detected, "");
1641 ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1642 ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1643 ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1644 assert(dst != NULL);
1645
1646#if defined(__x86_64__)
1647 __asm__(".p2align 6");
1648 __asm__("nop");
1649# if __GNUC__ >= 7
1650 __asm__(".p2align 5");
1651 __asm__("nop");
1652 __asm__(".p2align 3");
1653# else
1654 __asm__(".p2align 4");
1655 __asm__("nop");
1656 __asm__(".p2align 3");
1657# endif
1658#endif
1659
1660 for ( ; nbSeq ; nbSeq--) {
1661 seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==1);
1662 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litEnd, prefixStart, virtualStart: vBase, dictEnd);
1663#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1664 assert(!ZSTD_isError(oneSeqSize));
1665 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1666#endif
1667 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1668 return oneSeqSize;
1669 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1670 op += oneSeqSize;
1671 }
1672
1673 /* check if reached exact end */
1674 assert(nbSeq == 0);
1675 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1676 /* save reps for next block */
1677 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1678 }
1679
1680 /* last literal segment */
1681 { size_t const lastLLSize = (size_t)(litEnd - litPtr);
1682 DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
1683 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1684 if (op != NULL) {
1685 ZSTD_memcpy(op, litPtr, lastLLSize);
1686 op += lastLLSize;
1687 } }
1688
1689 DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1690 return (size_t)(op - ostart);
1691}
1692
1693static size_t
1694ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1695 void* dst, size_t maxDstSize,
1696 const void* seqStart, size_t seqSize, int nbSeq,
1697 const ZSTD_longOffset_e isLongOffset)
1698{
1699 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1700}
1701
1702static size_t
1703ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1704 void* dst, size_t maxDstSize,
1705 const void* seqStart, size_t seqSize, int nbSeq,
1706 const ZSTD_longOffset_e isLongOffset)
1707{
1708 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1709}
1710#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1711
1712#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1713
1714FORCE_INLINE_TEMPLATE
1715
1716size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1717 const BYTE* const prefixStart, const BYTE* const dictEnd)
1718{
1719 prefetchPos += sequence.litLength;
1720 { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1721 /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1722 * No consequence though : memory address is only used for prefetching, not for dereferencing */
1723 const BYTE* const match = ZSTD_wrappedPtrSub(ptr: ZSTD_wrappedPtrAdd(ptr: matchBase, add: prefetchPos), sub: sequence.offset);
1724 PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1725 }
1726 return prefetchPos + sequence.matchLength;
1727}
1728
1729/* This decoding function employs prefetching
1730 * to reduce latency impact of cache misses.
1731 * It's generally employed when block contains a significant portion of long-distance matches
1732 * or when coupled with a "cold" dictionary */
1733FORCE_INLINE_TEMPLATE size_t
1734ZSTD_decompressSequencesLong_body(
1735 ZSTD_DCtx* dctx,
1736 void* dst, size_t maxDstSize,
1737 const void* seqStart, size_t seqSize, int nbSeq,
1738 const ZSTD_longOffset_e isLongOffset)
1739{
1740 const BYTE* ip = (const BYTE*)seqStart;
1741 const BYTE* const iend = ip + seqSize;
1742 BYTE* const ostart = (BYTE*)dst;
1743 BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize);
1744 BYTE* op = ostart;
1745 const BYTE* litPtr = dctx->litPtr;
1746 const BYTE* litBufferEnd = dctx->litBufferEnd;
1747 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1748 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1749 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1750
1751 /* Regen sequences */
1752 if (nbSeq) {
1753#define STORED_SEQS 8
1754#define STORED_SEQS_MASK (STORED_SEQS-1)
1755#define ADVANCED_SEQS STORED_SEQS
1756 seq_t sequences[STORED_SEQS];
1757 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1758 seqState_t seqState;
1759 int seqNb;
1760 size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1761
1762 dctx->fseEntropy = 1;
1763 { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1764 assert(dst != NULL);
1765 assert(iend >= ip);
1766 RETURN_ERROR_IF(
1767 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1768 corruption_detected, "");
1769 ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1770 ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1771 ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1772
1773 /* prepare in advance */
1774 for (seqNb=0; seqNb<seqAdvance; seqNb++) {
1775 seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: seqNb == nbSeq-1);
1776 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1777 sequences[seqNb] = sequence;
1778 }
1779
1780 /* decompress without stomping litBuffer */
1781 for (; seqNb < nbSeq; seqNb++) {
1782 seq_t sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: seqNb == nbSeq-1);
1783
1784 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
1785 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1786 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1787 if (leftoverLit)
1788 {
1789 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1790 ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1791 sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1792 op += leftoverLit;
1793 }
1794 litPtr = dctx->litExtraBuffer;
1795 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1796 dctx->litBufferLocation = ZSTD_not_in_dst;
1797 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1798#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1799 assert(!ZSTD_isError(oneSeqSize));
1800 ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1801#endif
1802 if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1803
1804 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1805 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1806 op += oneSeqSize;
1807 } }
1808 else
1809 {
1810 /* lit buffer is either wholly contained in first or second split, or not split at all*/
1811 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1812 ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1813 ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1814#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1815 assert(!ZSTD_isError(oneSeqSize));
1816 ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1817#endif
1818 if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1819
1820 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1821 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1822 op += oneSeqSize;
1823 }
1824 }
1825 RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1826
1827 /* finish queue */
1828 seqNb -= seqAdvance;
1829 for ( ; seqNb<nbSeq ; seqNb++) {
1830 seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1831 if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
1832 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1833 if (leftoverLit) {
1834 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1835 ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1836 sequence->litLength -= leftoverLit;
1837 op += leftoverLit;
1838 }
1839 litPtr = dctx->litExtraBuffer;
1840 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1841 dctx->litBufferLocation = ZSTD_not_in_dst;
1842 { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1843#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1844 assert(!ZSTD_isError(oneSeqSize));
1845 ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1846#endif
1847 if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1848 op += oneSeqSize;
1849 }
1850 }
1851 else
1852 {
1853 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1854 ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1855 ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1856#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1857 assert(!ZSTD_isError(oneSeqSize));
1858 ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1859#endif
1860 if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1861 op += oneSeqSize;
1862 }
1863 }
1864
1865 /* save reps for next block */
1866 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1867 }
1868
1869 /* last literal segment */
1870 if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
1871 size_t const lastLLSize = litBufferEnd - litPtr;
1872 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1873 if (op != NULL) {
1874 ZSTD_memmove(op, litPtr, lastLLSize);
1875 op += lastLLSize;
1876 }
1877 litPtr = dctx->litExtraBuffer;
1878 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1879 }
1880 { size_t const lastLLSize = litBufferEnd - litPtr;
1881 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1882 if (op != NULL) {
1883 ZSTD_memmove(op, litPtr, lastLLSize);
1884 op += lastLLSize;
1885 }
1886 }
1887
1888 return (size_t)(op - ostart);
1889}
1890
1891static size_t
1892ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1893 void* dst, size_t maxDstSize,
1894 const void* seqStart, size_t seqSize, int nbSeq,
1895 const ZSTD_longOffset_e isLongOffset)
1896{
1897 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1898}
1899#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1900
1901
1902
1903#if DYNAMIC_BMI2
1904
1905#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1906static BMI2_TARGET_ATTRIBUTE size_t
1907DONT_VECTORIZE
1908ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1909 void* dst, size_t maxDstSize,
1910 const void* seqStart, size_t seqSize, int nbSeq,
1911 const ZSTD_longOffset_e isLongOffset)
1912{
1913 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1914}
1915static BMI2_TARGET_ATTRIBUTE size_t
1916DONT_VECTORIZE
1917ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1918 void* dst, size_t maxDstSize,
1919 const void* seqStart, size_t seqSize, int nbSeq,
1920 const ZSTD_longOffset_e isLongOffset)
1921{
1922 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1923}
1924#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1925
1926#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1927static BMI2_TARGET_ATTRIBUTE size_t
1928ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1929 void* dst, size_t maxDstSize,
1930 const void* seqStart, size_t seqSize, int nbSeq,
1931 const ZSTD_longOffset_e isLongOffset)
1932{
1933 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1934}
1935#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1936
1937#endif /* DYNAMIC_BMI2 */
1938
1939#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1940static size_t
1941ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1942 const void* seqStart, size_t seqSize, int nbSeq,
1943 const ZSTD_longOffset_e isLongOffset)
1944{
1945 DEBUGLOG(5, "ZSTD_decompressSequences");
1946#if DYNAMIC_BMI2
1947 if (ZSTD_DCtx_get_bmi2(dctx)) {
1948 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1949 }
1950#endif
1951 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1952}
1953static size_t
1954ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1955 const void* seqStart, size_t seqSize, int nbSeq,
1956 const ZSTD_longOffset_e isLongOffset)
1957{
1958 DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1959#if DYNAMIC_BMI2
1960 if (ZSTD_DCtx_get_bmi2(dctx)) {
1961 return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1962 }
1963#endif
1964 return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1965}
1966#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1967
1968
1969#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1970/* ZSTD_decompressSequencesLong() :
1971 * decompression function triggered when a minimum share of offsets is considered "long",
1972 * aka out of cache.
1973 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1974 * This function will try to mitigate main memory latency through the use of prefetching */
1975static size_t
1976ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1977 void* dst, size_t maxDstSize,
1978 const void* seqStart, size_t seqSize, int nbSeq,
1979 const ZSTD_longOffset_e isLongOffset)
1980{
1981 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1982#if DYNAMIC_BMI2
1983 if (ZSTD_DCtx_get_bmi2(dctx)) {
1984 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1985 }
1986#endif
1987 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1988}
1989#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1990
1991
1992/*
1993 * @returns The total size of the history referenceable by zstd, including
1994 * both the prefix and the extDict. At @p op any offset larger than this
1995 * is invalid.
1996 */
1997static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1998{
1999 return (size_t)(op - virtualStart);
2000}
2001
2002typedef struct {
2003 unsigned longOffsetShare;
2004 unsigned maxNbAdditionalBits;
2005} ZSTD_OffsetInfo;
2006
2007/* ZSTD_getOffsetInfo() :
2008 * condition : offTable must be valid
2009 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
2010 * compared to maximum possible of (1<<OffFSELog),
2011 * as well as the maximum number additional bits required.
2012 */
2013static ZSTD_OffsetInfo
2014ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2015{
2016 ZSTD_OffsetInfo info = {0, 0};
2017 /* If nbSeq == 0, then the offTable is uninitialized, but we have
2018 * no sequences, so both values should be 0.
2019 */
2020 if (nbSeq != 0) {
2021 const void* ptr = offTable;
2022 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2023 const ZSTD_seqSymbol* table = offTable + 1;
2024 U32 const max = 1 << tableLog;
2025 U32 u;
2026 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2027
2028 assert(max <= (1 << OffFSELog)); /* max not too large */
2029 for (u=0; u<max; u++) {
2030 info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2031 if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2032 }
2033
2034 assert(tableLog <= OffFSELog);
2035 info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2036 }
2037
2038 return info;
2039}
2040
2041/*
2042 * @returns The maximum offset we can decode in one read of our bitstream, without
2043 * reloading more bits in the middle of the offset bits read. Any offsets larger
2044 * than this must use the long offset decoder.
2045 */
2046static size_t ZSTD_maxShortOffset(void)
2047{
2048 if (MEM_64bits()) {
2049 /* We can decode any offset without reloading bits.
2050 * This might change if the max window size grows.
2051 */
2052 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2053 return (size_t)-1;
2054 } else {
2055 /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2056 * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2057 * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2058 */
2059 size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2060 size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2061 assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2062 return maxOffset;
2063 }
2064}
2065
2066size_t
2067ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2068 void* dst, size_t dstCapacity,
2069 const void* src, size_t srcSize, const streaming_operation streaming)
2070{ /* blockType == blockCompressed */
2071 const BYTE* ip = (const BYTE*)src;
2072 DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
2073
2074 /* Note : the wording of the specification
2075 * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
2076 * This generally does not happen, as it makes little sense,
2077 * since an uncompressed block would feature same size and have no decompression cost.
2078 * Also, note that decoder from reference libzstd before < v1.5.4
2079 * would consider this edge case as an error.
2080 * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
2081 * for broader compatibility with the deployed ecosystem of zstd decoders */
2082 RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
2083
2084 /* Decode literals section */
2085 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2086 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2087 if (ZSTD_isError(code: litCSize)) return litCSize;
2088 ip += litCSize;
2089 srcSize -= litCSize;
2090 }
2091
2092 /* Build Decoding Tables */
2093 {
2094 /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2095 * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2096 */
2097 size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
2098 size_t const totalHistorySize = ZSTD_totalHistorySize(op: ZSTD_maybeNullPtrAdd(ptr: (BYTE*)dst, add: blockSizeMax), virtualStart: (BYTE const*)dctx->virtualStart);
2099 /* isLongOffset must be true if there are long offsets.
2100 * Offsets are long if they are larger than ZSTD_maxShortOffset().
2101 * We don't expect that to be the case in 64-bit mode.
2102 *
2103 * We check here to see if our history is large enough to allow long offsets.
2104 * If it isn't, then we can't possible have (valid) long offsets. If the offset
2105 * is invalid, then it is okay to read it incorrectly.
2106 *
2107 * If isLongOffsets is true, then we will later check our decoding table to see
2108 * if it is even possible to generate long offsets.
2109 */
2110 ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2111 /* These macros control at build-time which decompressor implementation
2112 * we use. If neither is defined, we do some inspection and dispatch at
2113 * runtime.
2114 */
2115#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2116 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2117 int usePrefetchDecoder = dctx->ddictIsCold;
2118#else
2119 /* Set to 1 to avoid computing offset info if we don't need to.
2120 * Otherwise this value is ignored.
2121 */
2122 int usePrefetchDecoder = 1;
2123#endif
2124 int nbSeq;
2125 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, nbSeqPtr: &nbSeq, src: ip, srcSize);
2126 if (ZSTD_isError(code: seqHSize)) return seqHSize;
2127 ip += seqHSize;
2128 srcSize -= seqHSize;
2129
2130 RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2131 RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2132 "invalid dst");
2133
2134 /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2135 * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2136 * NOTE: could probably use a larger nbSeq limit
2137 */
2138 if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2139 ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(offTable: dctx->OFTptr, nbSeq);
2140 if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2141 /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2142 * enough, then we know it is impossible to have too long an offset in this block, so we can
2143 * use the regular offset decoder.
2144 */
2145 isLongOffset = ZSTD_lo_isRegularOffset;
2146 }
2147 if (!usePrefetchDecoder) {
2148 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2149 usePrefetchDecoder = (info.longOffsetShare >= minShare);
2150 }
2151 }
2152
2153 dctx->ddictIsCold = 0;
2154
2155#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2156 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2157 if (usePrefetchDecoder) {
2158#else
2159 (void)usePrefetchDecoder;
2160 {
2161#endif
2162#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2163 return ZSTD_decompressSequencesLong(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2164#endif
2165 }
2166
2167#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2168 /* else */
2169 if (dctx->litBufferLocation == ZSTD_split)
2170 return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2171 else
2172 return ZSTD_decompressSequences(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2173#endif
2174 }
2175}
2176
2177
2178ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
2179void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2180{
2181 if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
2182 dctx->dictEnd = dctx->previousDstEnd;
2183 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2184 dctx->prefixStart = dst;
2185 dctx->previousDstEnd = dst;
2186 }
2187}
2188
2189
2190size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2191 void* dst, size_t dstCapacity,
2192 const void* src, size_t srcSize)
2193{
2194 size_t dSize;
2195 dctx->isFrameDecompression = 0;
2196 ZSTD_checkContinuity(dctx, dst, dstSize: dstCapacity);
2197 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, streaming: not_streaming);
2198 FORWARD_IF_ERROR(dSize, "");
2199 dctx->previousDstEnd = (char*)dst + dSize;
2200 return dSize;
2201}
2202
2203
2204/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2205size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2206 void* dst, size_t dstCapacity,
2207 const void* src, size_t srcSize)
2208{
2209 return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2210}
2211