zstd_decompress_block.c source code [Linux/lib/zstd/decompress/zstd_decompress_block.c]

1	// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2	/*
3	* Copyright (c) Meta Platforms, Inc. and affiliates.
4	* All rights reserved.
5	*
6	* This source code is licensed under both the BSD-style license (found in the
7	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
8	* in the COPYING file in the root directory of this source tree).
9	* You may select, at your option, one of the above-listed licenses.
10	*/
11
12	/ zstd_decompress_block :*
13	* this module takes care of decompressing _compressed_ block */
14
15	/-******************************************************
16	* Dependencies
17	*********************************************************/
18	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19	#include "../common/compiler.h" /* prefetch */
20	#include "../common/cpu.h" /* bmi2 */
21	#include "../common/mem.h" /* low level memory routines */
22	#define FSE_STATIC_LINKING_ONLY
23	#include "../common/fse.h"
24	#include "../common/huf.h"
25	#include "../common/zstd_internal.h"
26	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28	#include "zstd_decompress_block.h"
29	#include "../common/bits.h" /* ZSTD_highbit32 */
30
31	/_******************************************************
32	* Macros
33	**********************************************************/
34
35	/ These two optional macros force the use one way or another of the two*
36	* ZSTD_decompressSequences implementations. You can't force in both directions
37	* at the same time.
38	*/
39	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
40	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
41	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
42	#endif
43
44
45	/_******************************************************
46	* Memory operations
47	**********************************************************/
48	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, `4`); }
49
50
51	/-************************************************************
52	* Block decoding
53	***************************************************************/
54
55	static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
56	{
57	size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
58	assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
59	return blockSizeMax;
60	}
61
62	/! ZSTD_getcBlockSize() :*
63	* Provides the size of compressed block from block header `src` */
64	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
65	blockProperties_t* bpPtr)
66	{
67	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
68
69	{ U32 const cBlockHeader = MEM_readLE24(memPtr: src);
70	U32 const cSize = cBlockHeader >> `3`;
71	bpPtr->lastBlock = cBlockHeader & `1`;
72	bpPtr->blockType = (blockType_e)((cBlockHeader >> `1`) & `3`);
73	bpPtr->origSize = cSize; / only useful for RLE /
74	if (bpPtr->blockType == bt_rle) return `1`;
75	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
76	return cSize;
77	}
78	}
79
80	/ Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer /
81	static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
82	const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
83	{
84	size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
85	assert(litSize <= blockSizeMax);
86	assert(dctx->isFrameDecompression \|\| streaming == not_streaming);
87	assert(expectedWriteSize <= blockSizeMax);
88	if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
89	/ If we aren't streaming, we can just put the literals after the output*
90	* of the current block. We don't need to worry about overwriting the
91	* extDict of our window, because it doesn't exist.
92	* So if we have space after the end of the block, just put it there.
93	*/
94	dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
95	dctx->litBufferEnd = dctx->litBuffer + litSize;
96	dctx->litBufferLocation = ZSTD_in_dst;
97	} else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
98	/ Literals fit entirely within the extra buffer, put them there to avoid*
99	* having to split the literals.
100	*/
101	dctx->litBuffer = dctx->litExtraBuffer;
102	dctx->litBufferEnd = dctx->litBuffer + litSize;
103	dctx->litBufferLocation = ZSTD_not_in_dst;
104	} else {
105	assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
106	/ Literals must be split between the output block and the extra lit*
107	* buffer. We fill the extra lit buffer with the tail of the literals,
108	* and put the rest of the literals at the end of the block, with
109	* WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
110	* This MUST not write more than our maxBlockSize beyond dst, because in
111	* streaming mode, that could overwrite part of our extDict window.
112	*/
113	if (splitImmediately) {
114	/ won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer /
115	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
116	dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
117	} else {
118	/ initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after /
119	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
120	dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
121	}
122	dctx->litBufferLocation = ZSTD_split;
123	assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
124	}
125	}
126
127	/! ZSTD_decodeLiteralsBlock() :*
128	* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
129	* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
130	* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
131	* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
132	*
133	* @return : nb of bytes read from src (< srcSize )
134	* note : symbol not declared but exposed for fullbench */
135	static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
136	const void* src, size_t srcSize, / note : srcSize < BLOCKSIZE /
137	void* dst, size_t dstCapacity, const streaming_operation streaming)
138	{
139	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock");
140	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
141
142	{ const BYTE* const istart = (const BYTE*) src;
143	SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[`0`] & `3`);
144	size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
145
146	switch(litEncType)
147	{
148	case set_repeat:
149	DEBUGLOG(`5`, "set_repeat flag : re-using stats from previous compressed literals block");
150	RETURN_ERROR_IF(dctx->litEntropy==`0`, dictionary_corrupted, "");
151	ZSTD_FALLTHROUGH;
152
153	case set_compressed:
154	RETURN_ERROR_IF(srcSize < `5`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
155	{ size_t lhSize, litSize, litCSize;
156	U32 singleStream=`0`;
157	U32 const lhlCode = (istart[`0`] >> `2`) & `3`;
158	U32 const lhc = MEM_readLE32(memPtr: istart);
159	size_t hufSuccess;
160	size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
161	int const flags = `0`
162	\| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : `0`)
163	\| (dctx->disableHufAsm ? HUF_flags_disableAsm : `0`);
164	switch(lhlCode)
165	{
166	case `0`: case `1`: default: / note : default is impossible, since lhlCode into [0..3] /
167	/ 2 - 2 - 10 - 10 /
168	singleStream = !lhlCode;
169	lhSize = `3`;
170	litSize = (lhc >> `4`) & `0x3FF`;
171	litCSize = (lhc >> `14`) & `0x3FF`;
172	break;
173	case `2`:
174	/ 2 - 2 - 14 - 14 /
175	lhSize = `4`;
176	litSize = (lhc >> `4`) & `0x3FFF`;
177	litCSize = lhc >> `18`;
178	break;
179	case `3`:
180	/ 2 - 2 - 18 - 18 /
181	lhSize = `5`;
182	litSize = (lhc >> `4`) & `0x3FFFF`;
183	litCSize = (lhc >> `22`) + ((size_t)istart[`4`] << `10`);
184	break;
185	}
186	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
187	RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
188	if (!singleStream)
189	RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
190	"Not enough literals (%zu) for the 4-streams mode (min %u)",
191	litSize, MIN_LITERALS_FOR_4_STREAMS);
192	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
193	RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
194	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `0`);
195
196	/ prefetch huffman table if cold /
197	if (dctx->ddictIsCold && (litSize > `768` / heuristic /)) {
198	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
199	}
200
201	if (litEncType==set_repeat) {
202	if (singleStream) {
203	hufSuccess = HUF_decompress1X_usingDTable(
204	dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
205	DTable: dctx->HUFptr, flags);
206	} else {
207	assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
208	hufSuccess = HUF_decompress4X_usingDTable(
209	dst: dctx->litBuffer, maxDstSize: litSize, cSrc: istart+lhSize, cSrcSize: litCSize,
210	DTable: dctx->HUFptr, flags);
211	}
212	} else {
213	if (singleStream) {
214	#if defined(HUF_FORCE_DECOMPRESS_X2)
215	hufSuccess = HUF_decompress1X_DCtx_wksp(
216	dctx->entropy.hufTable, dctx->litBuffer, litSize,
217	istart+lhSize, litCSize, dctx->workspace,
218	sizeof(dctx->workspace), flags);
219	#else
220	hufSuccess = HUF_decompress1X1_DCtx_wksp(
221	dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
222	cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
223	wkspSize: sizeof(dctx->workspace), flags);
224	#endif
225	} else {
226	hufSuccess = HUF_decompress4X_hufOnly_wksp(
227	dctx: dctx->entropy.hufTable, dst: dctx->litBuffer, dstSize: litSize,
228	cSrc: istart+lhSize, cSrcSize: litCSize, workSpace: dctx->workspace,
229	wkspSize: sizeof(dctx->workspace), flags);
230	}
231	}
232	if (dctx->litBufferLocation == ZSTD_split)
233	{
234	assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
235	ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
236	ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
237	dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
238	dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
239	assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
240	}
241
242	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
243
244	dctx->litPtr = dctx->litBuffer;
245	dctx->litSize = litSize;
246	dctx->litEntropy = `1`;
247	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
248	return litCSize + lhSize;
249	}
250
251	case set_basic:
252	{ size_t litSize, lhSize;
253	U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
254	size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
255	switch(lhlCode)
256	{
257	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
258	lhSize = `1`;
259	litSize = istart[`0`] >> `3`;
260	break;
261	case `1`:
262	lhSize = `2`;
263	litSize = MEM_readLE16(memPtr: istart) >> `4`;
264	break;
265	case `3`:
266	lhSize = `3`;
267	RETURN_ERROR_IF(srcSize<`3`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
268	litSize = MEM_readLE24(memPtr: istart) >> `4`;
269	break;
270	}
271
272	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
273	RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
274	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
275	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `1`);
276	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { / risk reading beyond src buffer with wildcopy /
277	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
278	if (dctx->litBufferLocation == ZSTD_split)
279	{
280	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
281	ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
282	}
283	else
284	{
285	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
286	}
287	dctx->litPtr = dctx->litBuffer;
288	dctx->litSize = litSize;
289	return lhSize+litSize;
290	}
291	/ direct reference into compressed stream /
292	dctx->litPtr = istart+lhSize;
293	dctx->litSize = litSize;
294	dctx->litBufferEnd = dctx->litPtr + litSize;
295	dctx->litBufferLocation = ZSTD_not_in_dst;
296	return lhSize+litSize;
297	}
298
299	case set_rle:
300	{ U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
301	size_t litSize, lhSize;
302	size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
303	switch(lhlCode)
304	{
305	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
306	lhSize = `1`;
307	litSize = istart[`0`] >> `3`;
308	break;
309	case `1`:
310	lhSize = `2`;
311	RETURN_ERROR_IF(srcSize<`3`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
312	litSize = MEM_readLE16(memPtr: istart) >> `4`;
313	break;
314	case `3`:
315	lhSize = `3`;
316	RETURN_ERROR_IF(srcSize<`4`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
317	litSize = MEM_readLE24(memPtr: istart) >> `4`;
318	break;
319	}
320	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
321	RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
322	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
323	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, splitImmediately: `1`);
324	if (dctx->litBufferLocation == ZSTD_split)
325	{
326	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
327	ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
328	}
329	else
330	{
331	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
332	}
333	dctx->litPtr = dctx->litBuffer;
334	dctx->litSize = litSize;
335	return lhSize+`1`;
336	}
337	default:
338	RETURN_ERROR(corruption_detected, "impossible");
339	}
340	}
341	}
342
343	/ Hidden declaration for fullbench /
344	size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
345	const void* src, size_t srcSize,
346	void* dst, size_t dstCapacity);
347	size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
348	const void* src, size_t srcSize,
349	void* dst, size_t dstCapacity)
350	{
351	dctx->isFrameDecompression = `0`;
352	return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming: not_streaming);
353	}
354
355	/ Default FSE distribution tables.*
356	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
357	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
358	* They were generated programmatically with following method :
359	* - start from default distributions, present in /lib/common/zstd_internal.h
360	* - generate tables normally, using ZSTD_buildFSETable()
361	* - printout the content of tables
362	* - prettify output, report below, test with fuzzer to ensure it's correct */
363
364	/ Default FSE distribution table for Literal Lengths /
365	static const ZSTD_seqSymbol LL_defaultDTable[(`1`<<LL_DEFAULTNORMLOG)+`1`] = {
366	{ `1`, `1`, `1`, LL_DEFAULTNORMLOG}, / header : fastMode, tableLog /
367	/ nextState, nbAddBits, nbBits, baseVal /
368	{ `0`, `0`, `4`, `0`}, { `16`, `0`, `4`, `0`},
369	{ `32`, `0`, `5`, `1`}, { `0`, `0`, `5`, `3`},
370	{ `0`, `0`, `5`, `4`}, { `0`, `0`, `5`, `6`},
371	{ `0`, `0`, `5`, `7`}, { `0`, `0`, `5`, `9`},
372	{ `0`, `0`, `5`, `10`}, { `0`, `0`, `5`, `12`},
373	{ `0`, `0`, `6`, `14`}, { `0`, `1`, `5`, `16`},
374	{ `0`, `1`, `5`, `20`}, { `0`, `1`, `5`, `22`},
375	{ `0`, `2`, `5`, `28`}, { `0`, `3`, `5`, `32`},
376	{ `0`, `4`, `5`, `48`}, { `32`, `6`, `5`, `64`},
377	{ `0`, `7`, `5`, `128`}, { `0`, `8`, `6`, `256`},
378	{ `0`, `10`, `6`, `1024`}, { `0`, `12`, `6`, `4096`},
379	{ `32`, `0`, `4`, `0`}, { `0`, `0`, `4`, `1`},
380	{ `0`, `0`, `5`, `2`}, { `32`, `0`, `5`, `4`},
381	{ `0`, `0`, `5`, `5`}, { `32`, `0`, `5`, `7`},
382	{ `0`, `0`, `5`, `8`}, { `32`, `0`, `5`, `10`},
383	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
384	{ `32`, `1`, `5`, `16`}, { `0`, `1`, `5`, `18`},
385	{ `32`, `1`, `5`, `22`}, { `0`, `2`, `5`, `24`},
386	{ `32`, `3`, `5`, `32`}, { `0`, `3`, `5`, `40`},
387	{ `0`, `6`, `4`, `64`}, { `16`, `6`, `4`, `64`},
388	{ `32`, `7`, `5`, `128`}, { `0`, `9`, `6`, `512`},
389	{ `0`, `11`, `6`, `2048`}, { `48`, `0`, `4`, `0`},
390	{ `16`, `0`, `4`, `1`}, { `32`, `0`, `5`, `2`},
391	{ `32`, `0`, `5`, `3`}, { `32`, `0`, `5`, `5`},
392	{ `32`, `0`, `5`, `6`}, { `32`, `0`, `5`, `8`},
393	{ `32`, `0`, `5`, `9`}, { `32`, `0`, `5`, `11`},
394	{ `32`, `0`, `5`, `12`}, { `0`, `0`, `6`, `15`},
395	{ `32`, `1`, `5`, `18`}, { `32`, `1`, `5`, `20`},
396	{ `32`, `2`, `5`, `24`}, { `32`, `2`, `5`, `28`},
397	{ `32`, `3`, `5`, `40`}, { `32`, `4`, `5`, `48`},
398	{ `0`, `16`, `6`,`65536`}, { `0`, `15`, `6`,`32768`},
399	{ `0`, `14`, `6`,`16384`}, { `0`, `13`, `6`, `8192`},
400	}; / LL_defaultDTable /
401
402	/ Default FSE distribution table for Offset Codes /
403	static const ZSTD_seqSymbol OF_defaultDTable[(`1`<<OF_DEFAULTNORMLOG)+`1`] = {
404	{ `1`, `1`, `1`, OF_DEFAULTNORMLOG}, / header : fastMode, tableLog /
405	/ nextState, nbAddBits, nbBits, baseVal /
406	{ `0`, `0`, `5`, `0`}, { `0`, `6`, `4`, `61`},
407	{ `0`, `9`, `5`, `509`}, { `0`, `15`, `5`,`32765`},
408	{ `0`, `21`, `5`,`2097149`}, { `0`, `3`, `5`, `5`},
409	{ `0`, `7`, `4`, `125`}, { `0`, `12`, `5`, `4093`},
410	{ `0`, `18`, `5`,`262141`}, { `0`, `23`, `5`,`8388605`},
411	{ `0`, `5`, `5`, `29`}, { `0`, `8`, `4`, `253`},
412	{ `0`, `14`, `5`,`16381`}, { `0`, `20`, `5`,`1048573`},
413	{ `0`, `2`, `5`, `1`}, { `16`, `7`, `4`, `125`},
414	{ `0`, `11`, `5`, `2045`}, { `0`, `17`, `5`,`131069`},
415	{ `0`, `22`, `5`,`4194301`}, { `0`, `4`, `5`, `13`},
416	{ `16`, `8`, `4`, `253`}, { `0`, `13`, `5`, `8189`},
417	{ `0`, `19`, `5`,`524285`}, { `0`, `1`, `5`, `1`},
418	{ `16`, `6`, `4`, `61`}, { `0`, `10`, `5`, `1021`},
419	{ `0`, `16`, `5`,`65533`}, { `0`, `28`, `5`,`268435453`},
420	{ `0`, `27`, `5`,`134217725`}, { `0`, `26`, `5`,`67108861`},
421	{ `0`, `25`, `5`,`33554429`}, { `0`, `24`, `5`,`16777213`},
422	}; / OF_defaultDTable /
423
424
425	/ Default FSE distribution table for Match Lengths /
426	static const ZSTD_seqSymbol ML_defaultDTable[(`1`<<ML_DEFAULTNORMLOG)+`1`] = {
427	{ `1`, `1`, `1`, ML_DEFAULTNORMLOG}, / header : fastMode, tableLog /
428	/ nextState, nbAddBits, nbBits, baseVal /
429	{ `0`, `0`, `6`, `3`}, { `0`, `0`, `4`, `4`},
430	{ `32`, `0`, `5`, `5`}, { `0`, `0`, `5`, `6`},
431	{ `0`, `0`, `5`, `8`}, { `0`, `0`, `5`, `9`},
432	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
433	{ `0`, `0`, `6`, `16`}, { `0`, `0`, `6`, `19`},
434	{ `0`, `0`, `6`, `22`}, { `0`, `0`, `6`, `25`},
435	{ `0`, `0`, `6`, `28`}, { `0`, `0`, `6`, `31`},
436	{ `0`, `0`, `6`, `34`}, { `0`, `1`, `6`, `37`},
437	{ `0`, `1`, `6`, `41`}, { `0`, `2`, `6`, `47`},
438	{ `0`, `3`, `6`, `59`}, { `0`, `4`, `6`, `83`},
439	{ `0`, `7`, `6`, `131`}, { `0`, `9`, `6`, `515`},
440	{ `16`, `0`, `4`, `4`}, { `0`, `0`, `4`, `5`},
441	{ `32`, `0`, `5`, `6`}, { `0`, `0`, `5`, `7`},
442	{ `32`, `0`, `5`, `9`}, { `0`, `0`, `5`, `10`},
443	{ `0`, `0`, `6`, `12`}, { `0`, `0`, `6`, `15`},
444	{ `0`, `0`, `6`, `18`}, { `0`, `0`, `6`, `21`},
445	{ `0`, `0`, `6`, `24`}, { `0`, `0`, `6`, `27`},
446	{ `0`, `0`, `6`, `30`}, { `0`, `0`, `6`, `33`},
447	{ `0`, `1`, `6`, `35`}, { `0`, `1`, `6`, `39`},
448	{ `0`, `2`, `6`, `43`}, { `0`, `3`, `6`, `51`},
449	{ `0`, `4`, `6`, `67`}, { `0`, `5`, `6`, `99`},
450	{ `0`, `8`, `6`, `259`}, { `32`, `0`, `4`, `4`},
451	{ `48`, `0`, `4`, `4`}, { `16`, `0`, `4`, `5`},
452	{ `32`, `0`, `5`, `7`}, { `32`, `0`, `5`, `8`},
453	{ `32`, `0`, `5`, `10`}, { `32`, `0`, `5`, `11`},
454	{ `0`, `0`, `6`, `14`}, { `0`, `0`, `6`, `17`},
455	{ `0`, `0`, `6`, `20`}, { `0`, `0`, `6`, `23`},
456	{ `0`, `0`, `6`, `26`}, { `0`, `0`, `6`, `29`},
457	{ `0`, `0`, `6`, `32`}, { `0`, `16`, `6`,`65539`},
458	{ `0`, `15`, `6`,`32771`}, { `0`, `14`, `6`,`16387`},
459	{ `0`, `13`, `6`, `8195`}, { `0`, `12`, `6`, `4099`},
460	{ `0`, `11`, `6`, `2051`}, { `0`, `10`, `6`, `1027`},
461	}; / ML_defaultDTable /
462
463
464	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
465	{
466	void* ptr = dt;
467	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
468	ZSTD_seqSymbol* const cell = dt + `1`;
469
470	DTableH->tableLog = `0`;
471	DTableH->fastMode = `0`;
472
473	cell->nbBits = `0`;
474	cell->nextState = `0`;
475	assert(nbAddBits < `255`);
476	cell->nbAdditionalBits = nbAddBits;
477	cell->baseValue = baseValue;
478	}
479
480
481	/ ZSTD_buildFSETable() :*
482	* generate FSE decoding table for one symbol (ll, ml or off)
483	* cannot fail if input is valid =>
484	* all inputs are presumed validated at this stage */
485	FORCE_INLINE_TEMPLATE
486	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
487	const short* normalizedCounter, unsigned maxSymbolValue,
488	const U32* baseValue, const U8* nbAdditionalBits,
489	unsigned tableLog, void* wksp, size_t wkspSize)
490	{
491	ZSTD_seqSymbol* const tableDecode = dt+`1`;
492	U32 const maxSV1 = maxSymbolValue + `1`;
493	U32 const tableSize = `1` << tableLog;
494
495	U16* symbolNext = (U16*)wksp;
496	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + `1`);
497	U32 highThreshold = tableSize - `1`;
498
499
500	/ Sanity Checks /
501	assert(maxSymbolValue <= MaxSeq);
502	assert(tableLog <= MaxFSELog);
503	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
504	(void)wkspSize;
505	/ Init, lay down lowprob symbols /
506	{ ZSTD_seqSymbol_header DTableH;
507	DTableH.tableLog = tableLog;
508	DTableH.fastMode = `1`;
509	{ S16 const largeLimit= (S16)(`1` << (tableLog-`1`));
510	U32 s;
511	for (s=`0`; s<maxSV1; s++) {
512	if (normalizedCounter[s]==-`1`) {
513	tableDecode[highThreshold--].baseValue = s;
514	symbolNext[s] = `1`;
515	} else {
516	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=`0`;
517	assert(normalizedCounter[s]>=`0`);
518	symbolNext[s] = (U16)normalizedCounter[s];
519	} } }
520	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
521	}
522
523	/ Spread symbols /
524	assert(tableSize <= `512`);
525	/ Specialized symbol spreading for the case when there are*
526	* no low probability (-1 count) symbols. When compressing
527	* small blocks we avoid low probability symbols to hit this
528	* case, since header decoding speed matters more.
529	*/
530	if (highThreshold == tableSize - `1`) {
531	size_t const tableMask = tableSize-`1`;
532	size_t const step = FSE_TABLESTEP(tableSize);
533	/ First lay down the symbols in order.*
534	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
535	* misses since small blocks generally have small table logs, so nearly
536	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
537	* our buffer to handle the over-write.
538	*/
539	{
540	U64 const add = `0x0101010101010101ull`;
541	size_t pos = `0`;
542	U64 sv = `0`;
543	U32 s;
544	for (s=`0`; s<maxSV1; ++s, sv += add) {
545	int i;
546	int const n = normalizedCounter[s];
547	MEM_write64(memPtr: spread + pos, value: sv);
548	for (i = `8`; i < n; i += `8`) {
549	MEM_write64(memPtr: spread + pos + i, value: sv);
550	}
551	assert(n>=`0`);
552	pos += (size_t)n;
553	}
554	}
555	/ Now we spread those positions across the table.*
556	* The benefit of doing it in two stages is that we avoid the
557	* variable size inner loop, which caused lots of branch misses.
558	* Now we can run through all the positions without any branch misses.
559	* We unroll the loop twice, since that is what empirically worked best.
560	*/
561	{
562	size_t position = `0`;
563	size_t s;
564	size_t const unroll = `2`;
565	assert(tableSize % unroll == `0`); / FSE_MIN_TABLELOG is 5 /
566	for (s = `0`; s < (size_t)tableSize; s += unroll) {
567	size_t u;
568	for (u = `0`; u < unroll; ++u) {
569	size_t const uPosition = (position + (u * step)) & tableMask;
570	tableDecode[uPosition].baseValue = spread[s + u];
571	}
572	position = (position + (unroll * step)) & tableMask;
573	}
574	assert(position == `0`);
575	}
576	} else {
577	U32 const tableMask = tableSize-`1`;
578	U32 const step = FSE_TABLESTEP(tableSize);
579	U32 s, position = `0`;
580	for (s=`0`; s<maxSV1; s++) {
581	int i;
582	int const n = normalizedCounter[s];
583	for (i=`0`; i<n; i++) {
584	tableDecode[position].baseValue = s;
585	position = (position + step) & tableMask;
586	while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; / lowprob area /
587	} }
588	assert(position == `0`); / position must reach all cells once, otherwise normalizedCounter is incorrect /
589	}
590
591	/ Build Decoding table /
592	{
593	U32 u;
594	for (u=`0`; u<tableSize; u++) {
595	U32 const symbol = tableDecode[u].baseValue;
596	U32 const nextState = symbolNext[symbol]++;
597	tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(val: nextState) );
598	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
599	assert(nbAdditionalBits[symbol] < `255`);
600	tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
601	tableDecode[u].baseValue = baseValue[symbol];
602	}
603	}
604	}
605
606	/ Avoids the FORCE_INLINE of the _body() function. /
607	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
608	const short* normalizedCounter, unsigned maxSymbolValue,
609	const U32* baseValue, const U8* nbAdditionalBits,
610	unsigned tableLog, void* wksp, size_t wkspSize)
611	{
612	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
613	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
614	}
615
616	#if DYNAMIC_BMI2
617	BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
618	const short* normalizedCounter, unsigned maxSymbolValue,
619	const U32* baseValue, const U8* nbAdditionalBits,
620	unsigned tableLog, void* wksp, size_t wkspSize)
621	{
622	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
623	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
624	}
625	#endif
626
627	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
628	const short* normalizedCounter, unsigned maxSymbolValue,
629	const U32* baseValue, const U8* nbAdditionalBits,
630	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
631	{
632	#if DYNAMIC_BMI2
633	if (bmi2) {
634	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
635	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
636	return;
637	}
638	#endif
639	(void)bmi2;
640	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
641	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
642	}
643
644
645	/! ZSTD_buildSeqTable() :*
646	* @return : nb bytes read from src,
647	* or an error code if it fails */
648	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
649	SymbolEncodingType_e type, unsigned max, U32 maxLog,
650	const void* src, size_t srcSize,
651	const U32* baseValue, const U8* nbAdditionalBits,
652	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
653	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
654	int bmi2)
655	{
656	switch(type)
657	{
658	case set_rle :
659	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
660	RETURN_ERROR_IF(((const* BYTE*)src) > max, corruption_detected, "");
661	{ U32 const symbol = (const* BYTE*)src;
662	U32 const baseline = baseValue[symbol];
663	U8 const nbBits = nbAdditionalBits[symbol];
664	ZSTD_buildSeqTable_rle(dt: DTableSpace, baseValue: baseline, nbAddBits: nbBits);
665	}
666	*DTablePtr = DTableSpace;
667	return `1`;
668	case set_basic :
669	*DTablePtr = defaultTable;
670	return `0`;
671	case set_repeat:
672	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
673	/ prefetch FSE table if used /
674	if (ddictIsCold && (nbSeq > `24` / heuristic /)) {
675	const void* const pStart = *DTablePtr;
676	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
677	PREFETCH_AREA(pStart, pSize);
678	}
679	return `0`;
680	case set_compressed :
681	{ unsigned tableLog;
682	S16 norm[MaxSeq+`1`];
683	size_t const headerSize = FSE_readNCount(normalizedCounter: norm, maxSymbolValuePtr: &max, tableLogPtr: &tableLog, rBuffer: src, rBuffSize: srcSize);
684	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
685	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
686	ZSTD_buildFSETable(dt: DTableSpace, normalizedCounter: norm, maxSymbolValue: max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
687	*DTablePtr = DTableSpace;
688	return headerSize;
689	}
690	default :
691	assert(`0`);
692	RETURN_ERROR(GENERIC, "impossible");
693	}
694	}
695
696	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
697	const void* src, size_t srcSize)
698	{
699	const BYTE* const istart = (const BYTE*)src;
700	const BYTE* const iend = istart + srcSize;
701	const BYTE* ip = istart;
702	int nbSeq;
703	DEBUGLOG(`5`, "ZSTD_decodeSeqHeaders");
704
705	/ check /
706	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
707
708	/ SeqHead /
709	nbSeq = *ip++;
710	if (nbSeq > `0x7F`) {
711	if (nbSeq == `0xFF`) {
712	RETURN_ERROR_IF(ip+`2` > iend, srcSize_wrong, "");
713	nbSeq = MEM_readLE16(memPtr: ip) + LONGNBSEQ;
714	ip+=`2`;
715	} else {
716	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
717	nbSeq = ((nbSeq-`0x80`)<<`8`) + *ip++;
718	}
719	}
720	*nbSeqPtr = nbSeq;
721
722	if (nbSeq == `0`) {
723	/ No sequence : section ends immediately /
724	RETURN_ERROR_IF(ip != iend, corruption_detected,
725	"extraneous data present in the Sequences section");
726	return (size_t)(ip - istart);
727	}
728
729	/ FSE table descriptors /
730	RETURN_ERROR_IF(ip+`1` > iend, srcSize_wrong, ""); / minimum possible size: 1 byte for symbol encoding types /
731	RETURN_ERROR_IF(ip & `3`, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. /
732	{ SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> `6`);
733	SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> `4`) & `3`);
734	SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> `2`) & `3`);
735	ip++;
736
737	/ Build DTables /
738	{ size_t const llhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.LLTable, DTablePtr: &dctx->LLTptr,
739	type: LLtype, MaxLL, LLFSELog,
740	src: ip, srcSize: iend-ip,
741	baseValue: LL_base, nbAdditionalBits: LL_bits,
742	defaultTable: LL_defaultDTable, flagRepeatTable: dctx->fseEntropy,
743	ddictIsCold: dctx->ddictIsCold, nbSeq,
744	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
745	bmi2: ZSTD_DCtx_get_bmi2(dctx));
746	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
747	ip += llhSize;
748	}
749
750	{ size_t const ofhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.OFTable, DTablePtr: &dctx->OFTptr,
751	type: OFtype, MaxOff, OffFSELog,
752	src: ip, srcSize: iend-ip,
753	baseValue: OF_base, nbAdditionalBits: OF_bits,
754	defaultTable: OF_defaultDTable, flagRepeatTable: dctx->fseEntropy,
755	ddictIsCold: dctx->ddictIsCold, nbSeq,
756	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
757	bmi2: ZSTD_DCtx_get_bmi2(dctx));
758	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
759	ip += ofhSize;
760	}
761
762	{ size_t const mlhSize = ZSTD_buildSeqTable(DTableSpace: dctx->entropy.MLTable, DTablePtr: &dctx->MLTptr,
763	type: MLtype, MaxML, MLFSELog,
764	src: ip, srcSize: iend-ip,
765	baseValue: ML_base, nbAdditionalBits: ML_bits,
766	defaultTable: ML_defaultDTable, flagRepeatTable: dctx->fseEntropy,
767	ddictIsCold: dctx->ddictIsCold, nbSeq,
768	wksp: dctx->workspace, wkspSize: sizeof(dctx->workspace),
769	bmi2: ZSTD_DCtx_get_bmi2(dctx));
770	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
771	ip += mlhSize;
772	}
773	}
774
775	return ip-istart;
776	}
777
778
779	typedef struct {
780	size_t litLength;
781	size_t matchLength;
782	size_t offset;
783	} seq_t;
784
785	typedef struct {
786	size_t state;
787	const ZSTD_seqSymbol* table;
788	} ZSTD_fseState;
789
790	typedef struct {
791	BIT_DStream_t DStream;
792	ZSTD_fseState stateLL;
793	ZSTD_fseState stateOffb;
794	ZSTD_fseState stateML;
795	size_t prevOffset[ZSTD_REP_NUM];
796	} seqState_t;
797
798	/! ZSTD_overlapCopy8() :*
799	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
800	* If the offset is < 8 then the offset is spread to at least 8 bytes.
801	*
802	* Precondition: ip <= op
803	* Postcondition: op - op >= 8
804	*/
805	HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
806	assert(ip <= op);
807	if (offset < `8`) {
808	/ close range match, overlap /
809	static const U32 dec32table[] = { `0`, `1`, `2`, `1`, `4`, `4`, `4`, `4` }; / added /
810	static const int dec64table[] = { `8`, `8`, `8`, `7`, `8`, `9`,`10`,`11` }; / subtracted /
811	int const sub2 = dec64table[offset];
812	(op)[`0`] = (ip)[`0`];
813	(op)[`1`] = (ip)[`1`];
814	(op)[`2`] = (ip)[`2`];
815	(op)[`3`] = (ip)[`3`];
816	*ip += dec32table[offset];
817	ZSTD_copy4(dst: op+`4`, src: ip);
818	*ip -= sub2;
819	} else {
820	ZSTD_copy8(dst: op, src: ip);
821	}
822	*ip += `8`;
823	*op += `8`;
824	assert(op - ip >= `8`);
825	}
826
827	/! ZSTD_safecopy() :*
828	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
829	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
830	* This function is only called in the uncommon case where the sequence is near the end of the block. It
831	* should be fast for a single long sequence, but can be slow for several short sequences.
832	*
833	* @param ovtype controls the overlap detection
834	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
835	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
836	* The src buffer must be before the dst buffer.
837	*/
838	static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
839	ptrdiff_t const diff = op - ip;
840	BYTE* const oend = op + length;
841
842	assert((ovtype == ZSTD_no_overlap && (diff <= -`8` \|\| diff >= `8` \|\| op >= oend_w)) \|\|
843	(ovtype == ZSTD_overlap_src_before_dst && diff >= `0`));
844
845	if (length < `8`) {
846	/ Handle short lengths. /
847	while (op < oend) op++ = ip++;
848	return;
849	}
850	if (ovtype == ZSTD_overlap_src_before_dst) {
851	/ Copy 8 bytes and ensure the offset >= 8 when there can be overlap. /
852	assert(length >= `8`);
853	ZSTD_overlapCopy8(op: &op, ip: &ip, offset: diff);
854	length -= `8`;
855	assert(op - ip >= `8`);
856	assert(op <= oend);
857	}
858
859	if (oend <= oend_w) {
860	/ No risk of overwrite. /
861	ZSTD_wildcopy(dst: op, src: ip, length, ovtype);
862	return;
863	}
864	if (op <= oend_w) {
865	/ Wildcopy until we get close to the end. /
866	assert(oend > oend_w);
867	ZSTD_wildcopy(dst: op, src: ip, length: oend_w - op, ovtype);
868	ip += oend_w - op;
869	op += oend_w - op;
870	}
871	/ Handle the leftovers. /
872	while (op < oend) op++ = ip++;
873	}
874
875	/ ZSTD_safecopyDstBeforeSrc():*
876	* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
877	* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
878	static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
879	ptrdiff_t const diff = op - ip;
880	BYTE* const oend = op + length;
881
882	if (length < `8` \|\| diff > -`8`) {
883	/ Handle short lengths, close overlaps, and dst not before src. /
884	while (op < oend) op++ = ip++;
885	return;
886	}
887
888	if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
889	ZSTD_wildcopy(dst: op, src: ip, length: oend - WILDCOPY_OVERLENGTH - op, ovtype: ZSTD_no_overlap);
890	ip += oend - WILDCOPY_OVERLENGTH - op;
891	op += oend - WILDCOPY_OVERLENGTH - op;
892	}
893
894	/ Handle the leftovers. /
895	while (op < oend) op++ = ip++;
896	}
897
898	/ ZSTD_execSequenceEnd():*
899	* This version handles cases that are near the end of the output buffer. It requires
900	* more careful checks to make sure there is no overflow. By separating out these hard
901	* and unlikely cases, we can speed up the common cases.
902	*
903	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
904	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
905	*/
906	FORCE_NOINLINE
907	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
908	size_t ZSTD_execSequenceEnd(BYTE* op,
909	BYTE* const oend, seq_t sequence,
910	const BYTE** litPtr, const BYTE* const litLimit,
911	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912	{
913	BYTE* const oLitEnd = op + sequence.litLength;
914	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916	const BYTE* match = oLitEnd - sequence.offset;
917	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
918
919	/ bounds checks : careful of address space overflow in 32-bit mode /
920	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922	assert(op < op + sequenceLength);
923	assert(oLitEnd < op + sequenceLength);
924
925	/ copy literals /
926	ZSTD_safecopy(op, oend_w, ip: *litPtr, length: sequence.litLength, ovtype: ZSTD_no_overlap);
927	op = oLitEnd;
928	*litPtr = iLitEnd;
929
930	/ copy Match /
931	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
932	/ offset beyond prefix /
933	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
934	match = dictEnd - (prefixStart - match);
935	if (match + sequence.matchLength <= dictEnd) {
936	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
937	return sequenceLength;
938	}
939	/ span extDict & currentPrefixSegment /
940	{ size_t const length1 = dictEnd - match;
941	ZSTD_memmove(oLitEnd, match, length1);
942	op = oLitEnd + length1;
943	sequence.matchLength -= length1;
944	match = prefixStart;
945	}
946	}
947	ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
948	return sequenceLength;
949	}
950
951	/ ZSTD_execSequenceEndSplitLitBuffer():*
952	* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
953	*/
954	FORCE_NOINLINE
955	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
956	size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
957	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
958	const BYTE** litPtr, const BYTE* const litLimit,
959	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
960	{
961	BYTE* const oLitEnd = op + sequence.litLength;
962	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
963	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
964	const BYTE* match = oLitEnd - sequence.offset;
965
966
967	/ bounds checks : careful of address space overflow in 32-bit mode /
968	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
969	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
970	assert(op < op + sequenceLength);
971	assert(oLitEnd < op + sequenceLength);
972
973	/ copy literals /
974	RETURN_ERROR_IF(op > litPtr && op < litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
975	ZSTD_safecopyDstBeforeSrc(op, ip: *litPtr, length: sequence.litLength);
976	op = oLitEnd;
977	*litPtr = iLitEnd;
978
979	/ copy Match /
980	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
981	/ offset beyond prefix /
982	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
983	match = dictEnd - (prefixStart - match);
984	if (match + sequence.matchLength <= dictEnd) {
985	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
986	return sequenceLength;
987	}
988	/ span extDict & currentPrefixSegment /
989	{ size_t const length1 = dictEnd - match;
990	ZSTD_memmove(oLitEnd, match, length1);
991	op = oLitEnd + length1;
992	sequence.matchLength -= length1;
993	match = prefixStart;
994	}
995	}
996	ZSTD_safecopy(op, oend_w, ip: match, length: sequence.matchLength, ovtype: ZSTD_overlap_src_before_dst);
997	return sequenceLength;
998	}
999
1000	HINT_INLINE
1001	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1002	size_t ZSTD_execSequence(BYTE* op,
1003	BYTE* const oend, seq_t sequence,
1004	const BYTE** litPtr, const BYTE* const litLimit,
1005	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1006	{
1007	BYTE* const oLitEnd = op + sequence.litLength;
1008	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1009	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
1010	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; / risk : address space underflow on oend=NULL /
1011	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1012	const BYTE* match = oLitEnd - sequence.offset;
1013
1014	assert(op != NULL / Precondition /);
1015	assert(oend_w < oend / No underflow /);
1016
1017	#if defined(__aarch64__)
1018	/ prefetch sequence starting from match that will be used for copy later /
1019	PREFETCH_L1(match);
1020	#endif
1021	/ Handle edge cases in a slow path:*
1022	* - Read beyond end of literals
1023	* - Match end is within WILDCOPY_OVERLIMIT of oend
1024	* - 32-bit mode and the match length overflows
1025	*/
1026	if (UNLIKELY(
1027	iLitEnd > litLimit \|\|
1028	oMatchEnd > oend_w \|\|
1029	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1030	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1031
1032	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
1033	assert(op <= oLitEnd / No overflow /);
1034	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
1035	assert(oMatchEnd <= oend / No underflow /);
1036	assert(iLitEnd <= litLimit / Literal length is in bounds /);
1037	assert(oLitEnd <= oend_w / Can wildcopy literals /);
1038	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
1039
1040	/ Copy Literals:*
1041	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1042	* We likely don't need the full 32-byte wildcopy.
1043	*/
1044	assert(WILDCOPY_OVERLENGTH >= `16`);
1045	ZSTD_copy16(dst: op, src: (*litPtr));
1046	if (UNLIKELY(sequence.litLength > `16`)) {
1047	ZSTD_wildcopy(dst: op + `16`, src: (*litPtr) + `16`, length: sequence.litLength - `16`, ovtype: ZSTD_no_overlap);
1048	}
1049	op = oLitEnd;
1050	litPtr = iLitEnd; /* update for next sequence /
1051
1052	/ Copy Match /
1053	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1054	/ offset beyond prefix -> go into extDict /
1055	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1056	match = dictEnd + (match - prefixStart);
1057	if (match + sequence.matchLength <= dictEnd) {
1058	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1059	return sequenceLength;
1060	}
1061	/ span extDict & currentPrefixSegment /
1062	{ size_t const length1 = dictEnd - match;
1063	ZSTD_memmove(oLitEnd, match, length1);
1064	op = oLitEnd + length1;
1065	sequence.matchLength -= length1;
1066	match = prefixStart;
1067	}
1068	}
1069	/ Match within prefix of 1 or more bytes /
1070	assert(op <= oMatchEnd);
1071	assert(oMatchEnd <= oend_w);
1072	assert(match >= prefixStart);
1073	assert(sequence.matchLength >= `1`);
1074
1075	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1076	* without overlap checking.
1077	*/
1078	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1079	/ We bet on a full wildcopy for matches, since we expect matches to be*
1080	* longer than literals (in general). In silesia, ~10% of matches are longer
1081	* than 16 bytes.
1082	*/
1083	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1084	return sequenceLength;
1085	}
1086	assert(sequence.offset < WILDCOPY_VECLEN);
1087
1088	/ Copy 8 bytes and spread the offset to be >= 8. /
1089	ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1090
1091	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1092	if (sequence.matchLength > `8`) {
1093	assert(op < oMatchEnd);
1094	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength - `8`, ovtype: ZSTD_overlap_src_before_dst);
1095	}
1096	return sequenceLength;
1097	}
1098
1099	HINT_INLINE
1100	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1101	size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1102	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1103	const BYTE** litPtr, const BYTE* const litLimit,
1104	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1105	{
1106	BYTE* const oLitEnd = op + sequence.litLength;
1107	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1108	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
1109	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1110	const BYTE* match = oLitEnd - sequence.offset;
1111
1112	assert(op != NULL / Precondition /);
1113	assert(oend_w < oend / No underflow /);
1114	/ Handle edge cases in a slow path:*
1115	* - Read beyond end of literals
1116	* - Match end is within WILDCOPY_OVERLIMIT of oend
1117	* - 32-bit mode and the match length overflows
1118	*/
1119	if (UNLIKELY(
1120	iLitEnd > litLimit \|\|
1121	oMatchEnd > oend_w \|\|
1122	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1123	return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1124
1125	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
1126	assert(op <= oLitEnd / No overflow /);
1127	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
1128	assert(oMatchEnd <= oend / No underflow /);
1129	assert(iLitEnd <= litLimit / Literal length is in bounds /);
1130	assert(oLitEnd <= oend_w / Can wildcopy literals /);
1131	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
1132
1133	/ Copy Literals:*
1134	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1135	* We likely don't need the full 32-byte wildcopy.
1136	*/
1137	assert(WILDCOPY_OVERLENGTH >= `16`);
1138	ZSTD_copy16(dst: op, src: (*litPtr));
1139	if (UNLIKELY(sequence.litLength > `16`)) {
1140	ZSTD_wildcopy(dst: op+`16`, src: (*litPtr)+`16`, length: sequence.litLength-`16`, ovtype: ZSTD_no_overlap);
1141	}
1142	op = oLitEnd;
1143	litPtr = iLitEnd; /* update for next sequence /
1144
1145	/ Copy Match /
1146	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1147	/ offset beyond prefix -> go into extDict /
1148	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1149	match = dictEnd + (match - prefixStart);
1150	if (match + sequence.matchLength <= dictEnd) {
1151	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1152	return sequenceLength;
1153	}
1154	/ span extDict & currentPrefixSegment /
1155	{ size_t const length1 = dictEnd - match;
1156	ZSTD_memmove(oLitEnd, match, length1);
1157	op = oLitEnd + length1;
1158	sequence.matchLength -= length1;
1159	match = prefixStart;
1160	} }
1161	/ Match within prefix of 1 or more bytes /
1162	assert(op <= oMatchEnd);
1163	assert(oMatchEnd <= oend_w);
1164	assert(match >= prefixStart);
1165	assert(sequence.matchLength >= `1`);
1166
1167	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1168	* without overlap checking.
1169	*/
1170	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1171	/ We bet on a full wildcopy for matches, since we expect matches to be*
1172	* longer than literals (in general). In silesia, ~10% of matches are longer
1173	* than 16 bytes.
1174	*/
1175	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength, ovtype: ZSTD_no_overlap);
1176	return sequenceLength;
1177	}
1178	assert(sequence.offset < WILDCOPY_VECLEN);
1179
1180	/ Copy 8 bytes and spread the offset to be >= 8. /
1181	ZSTD_overlapCopy8(op: &op, ip: &match, offset: sequence.offset);
1182
1183	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1184	if (sequence.matchLength > `8`) {
1185	assert(op < oMatchEnd);
1186	ZSTD_wildcopy(dst: op, src: match, length: (ptrdiff_t)sequence.matchLength-`8`, ovtype: ZSTD_overlap_src_before_dst);
1187	}
1188	return sequenceLength;
1189	}
1190
1191
1192	static void
1193	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1194	{
1195	const void* ptr = dt;
1196	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1197	DStatePtr->state = BIT_readBits(bitD, nbBits: DTableH->tableLog);
1198	DEBUGLOG(`6`, "ZSTD_initFseState : val=%u using %u bits",
1199	(U32)DStatePtr->state, DTableH->tableLog);
1200	BIT_reloadDStream(bitD);
1201	DStatePtr->table = dt + `1`;
1202	}
1203
1204	FORCE_INLINE_TEMPLATE void
1205	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1206	{
1207	size_t const lowBits = BIT_readBits(bitD, nbBits);
1208	DStatePtr->state = nextState + lowBits;
1209	}
1210
1211	/ We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum*
1212	* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1213	* bits before reloading. This value is the maximum number of bytes we read
1214	* after reloading when we are decoding long offsets.
1215	*/
1216	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1217	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1218	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1219	: 0)
1220
1221	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=`1` } ZSTD_longOffset_e;
1222
1223	/*
1224	* ZSTD_decodeSequence():
1225	* @p longOffsets : tells the decoder to reload more bit while decoding large offsets
1226	* only used in 32-bit mode
1227	* @return : Sequence (litL + matchL + offset)
1228	*/
1229	FORCE_INLINE_TEMPLATE seq_t
1230	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
1231	{
1232	seq_t seq;
1233	/*
1234	* ZSTD_seqSymbol is a 64 bits wide structure.
1235	* It can be loaded in one operation
1236	* and its fields extracted by simply shifting or bit-extracting on aarch64.
1237	* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1238	* operations that cause performance drop. This can be avoided by using this
1239	* ZSTD_memcpy hack.
1240	*/
1241	#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1242	ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1243	ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1244	ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1245	ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1246	ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1247	ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1248	ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1249	#else
1250	const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1251	const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1252	const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1253	#endif
1254	seq.matchLength = mlDInfo->baseValue;
1255	seq.litLength = llDInfo->baseValue;
1256	{ U32 const ofBase = ofDInfo->baseValue;
1257	BYTE const llBits = llDInfo->nbAdditionalBits;
1258	BYTE const mlBits = mlDInfo->nbAdditionalBits;
1259	BYTE const ofBits = ofDInfo->nbAdditionalBits;
1260	BYTE const totalBits = llBits+mlBits+ofBits;
1261
1262	U16 const llNext = llDInfo->nextState;
1263	U16 const mlNext = mlDInfo->nextState;
1264	U16 const ofNext = ofDInfo->nextState;
1265	U32 const llnbBits = llDInfo->nbBits;
1266	U32 const mlnbBits = mlDInfo->nbBits;
1267	U32 const ofnbBits = ofDInfo->nbBits;
1268
1269	assert(llBits <= MaxLLBits);
1270	assert(mlBits <= MaxMLBits);
1271	assert(ofBits <= MaxOff);
1272	/*
1273	* As gcc has better branch and block analyzers, sometimes it is only
1274	* valuable to mark likeliness for clang, it gives around 3-4% of
1275	* performance.
1276	*/
1277
1278	/ sequence /
1279	{ size_t offset;
1280	if (ofBits > `1`) {
1281	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == `1`);
1282	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == `5`);
1283	ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1284	ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1285	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1286	/ Always read extra bits, this keeps the logic simple,*
1287	* avoids branches, and avoids accidentally reading 0 bits.
1288	*/
1289	U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1290	offset = ofBase + (BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits - extraBits) << extraBits);
1291	BIT_reloadDStream(bitD: &seqState->DStream);
1292	offset += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: extraBits);
1293	} else {
1294	offset = ofBase + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: ofBits/>0/); / <= (ZSTD_WINDOWLOG_MAX-1) bits /
1295	if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream);
1296	}
1297	seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1298	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1299	seqState->prevOffset[`0`] = offset;
1300	} else {
1301	U32 const ll0 = (llDInfo->baseValue == `0`);
1302	if (LIKELY((ofBits == `0`))) {
1303	offset = seqState->prevOffset[ll0];
1304	seqState->prevOffset[`1`] = seqState->prevOffset[!ll0];
1305	seqState->prevOffset[`0`] = offset;
1306	} else {
1307	offset = ofBase + ll0 + BIT_readBitsFast(bitD: &seqState->DStream, nbBits: `1`);
1308	{ size_t temp = (offset==`3`) ? seqState->prevOffset[`0`] - `1` : seqState->prevOffset[offset];
1309	temp -= !temp; / 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence /
1310	if (offset != `1`) seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1311	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1312	seqState->prevOffset[`0`] = offset = temp;
1313	} } }
1314	seq.offset = offset;
1315	}
1316
1317	if (mlBits > `0`)
1318	seq.matchLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: mlBits/>0/);
1319
1320	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321	BIT_reloadDStream(bitD: &seqState->DStream);
1322	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323	BIT_reloadDStream(bitD: &seqState->DStream);
1324	/ Ensure there are enough bits to read the rest of data in 64-bit mode. /
1325	ZSTD_STATIC_ASSERT(`16`+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1326
1327	if (llBits > `0`)
1328	seq.litLength += BIT_readBitsFast(bitD: &seqState->DStream, nbBits: llBits/>0/);
1329
1330	if (MEM_32bits())
1331	BIT_reloadDStream(bitD: &seqState->DStream);
1332
1333	DEBUGLOG(`6`, "seq: litL=%u, matchL=%u, offset=%u",
1334	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1335
1336	if (!isLastSeq) {
1337	/ don't update FSE state for last Sequence /
1338	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateLL, bitD: &seqState->DStream, nextState: llNext, nbBits: llnbBits); / <= 9 bits /
1339	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateML, bitD: &seqState->DStream, nextState: mlNext, nbBits: mlnbBits); / <= 9 bits /
1340	if (MEM_32bits()) BIT_reloadDStream(bitD: &seqState->DStream); / <= 18 bits /
1341	ZSTD_updateFseStateWithDInfo(DStatePtr: &seqState->stateOffb, bitD: &seqState->DStream, nextState: ofNext, nbBits: ofnbBits); / <= 8 bits /
1342	BIT_reloadDStream(bitD: &seqState->DStream);
1343	}
1344	}
1345
1346	return seq;
1347	}
1348
1349	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1350	#if DEBUGLEVEL >= 1
1351	static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1352	{
1353	size_t const windowSize = dctx->fParams.windowSize;
1354	/ No dictionary used. /
1355	if (dctx->dictContentEndForFuzzing == NULL) return `0`;
1356	/ Dictionary is our prefix. /
1357	if (prefixStart == dctx->dictContentBeginForFuzzing) return `1`;
1358	/ Dictionary is not our ext-dict. /
1359	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return `0`;
1360	/ Dictionary is not within our window size. /
1361	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return `0`;
1362	/ Dictionary is active. /
1363	return `1`;
1364	}
1365	#endif
1366
1367	static void ZSTD_assertValidSequence(
1368	ZSTD_DCtx const* dctx,
1369	BYTE const* op, BYTE const* oend,
1370	seq_t const seq,
1371	BYTE const* prefixStart, BYTE const* virtualStart)
1372	{
1373	#if DEBUGLEVEL >= 1
1374	if (dctx->isFrameDecompression) {
1375	size_t const windowSize = dctx->fParams.windowSize;
1376	size_t const sequenceSize = seq.litLength + seq.matchLength;
1377	BYTE const* const oLitEnd = op + seq.litLength;
1378	DEBUGLOG(`6`, "Checking sequence: litL=%u matchL=%u offset=%u",
1379	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1380	assert(op <= oend);
1381	assert((size_t)(oend - op) >= sequenceSize);
1382	assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
1383	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1384	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char* const*)dctx->dictContentBeginForFuzzing);
1385	/ Offset must be within the dictionary. /
1386	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1387	assert(seq.offset <= windowSize + dictSize);
1388	} else {
1389	/ Offset must be within our window. /
1390	assert(seq.offset <= windowSize);
1391	}
1392	}
1393	#else
1394	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1395	#endif
1396	}
1397	#endif
1398
1399	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1400
1401
1402	FORCE_INLINE_TEMPLATE size_t
1403	DONT_VECTORIZE
1404	ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1405	void* dst, size_t maxDstSize,
1406	const void* seqStart, size_t seqSize, int nbSeq,
1407	const ZSTD_longOffset_e isLongOffset)
1408	{
1409	const BYTE* ip = (const BYTE*)seqStart;
1410	const BYTE* const iend = ip + seqSize;
1411	BYTE* const ostart = (BYTE*)dst;
1412	BYTE* const oend = ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize);
1413	BYTE* op = ostart;
1414	const BYTE* litPtr = dctx->litPtr;
1415	const BYTE* litBufferEnd = dctx->litBufferEnd;
1416	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1417	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1418	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1419	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
1420
1421	/ Literals are split between internal buffer & output buffer /
1422	if (nbSeq) {
1423	seqState_t seqState;
1424	dctx->fseEntropy = `1`;
1425	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1426	RETURN_ERROR_IF(
1427	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1428	corruption_detected, "");
1429	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1430	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1431	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1432	assert(dst != NULL);
1433
1434	ZSTD_STATIC_ASSERT(
1435	BIT_DStream_unfinished < BIT_DStream_completed &&
1436	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1437	BIT_DStream_completed < BIT_DStream_overflow);
1438
1439	/ decompress without overrunning litPtr begins /
1440	{ seq_t sequence = {`0`,`0`,`0`}; / some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) /
1441	/ Align the decompression loop to 32 + 16 bytes.*
1442	*
1443	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1444	* speed swings based on the alignment of the decompression loop. This
1445	* performance swing is caused by parts of the decompression loop falling
1446	* out of the DSB. The entire decompression loop should fit in the DSB,
1447	* when it can't we get much worse performance. You can measure if you've
1448	* hit the good case or the bad case with this perf command for some
1449	* compressed file test.zst:
1450	*
1451	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1452	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1453	*
1454	* If you see most cycles served out of the MITE you've hit the bad case.
1455	* If you see most cycles served out of the DSB you've hit the good case.
1456	* If it is pretty even then you may be in an okay case.
1457	*
1458	* This issue has been reproduced on the following CPUs:
1459	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1460	* Use Instruments->Counters to get DSB/MITE cycles.
1461	* I never got performance swings, but I was able to
1462	* go from the good case of mostly DSB to half of the
1463	* cycles served from MITE.
1464	* - Coffeelake: Intel i9-9900k
1465	* - Coffeelake: Intel i7-9700k
1466	*
1467	* I haven't been able to reproduce the instability or DSB misses on any
1468	* of the following CPUS:
1469	* - Haswell
1470	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1471	* - Skylake
1472	*
1473	* Alignment is done for each of the three major decompression loops:
1474	* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1475	* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1476	* - ZSTD_decompressSequences_body
1477	* Alignment choices are made to minimize large swings on bad cases and influence on performance
1478	* from changes external to this code, rather than to overoptimize on the current commit.
1479	*
1480	* If you are seeing performance stability this script can help test.
1481	* It tests on 4 commits in zstd where I saw performance change.
1482	*
1483	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1484	*/
1485	#if defined(__x86_64__)
1486	__asm__(".p2align 6");
1487	# if __GNUC__ >= 7
1488	/ good for gcc-7, gcc-9, and gcc-11 /
1489	__asm__("nop");
1490	__asm__(".p2align 5");
1491	__asm__("nop");
1492	__asm__(".p2align 4");
1493	# if __GNUC__ == 8 \|\| __GNUC__ == 10
1494	/ good for gcc-8 and gcc-10 /
1495	__asm__("nop");
1496	__asm__(".p2align 3");
1497	# endif
1498	# endif
1499	#endif
1500
1501	/ Handle the initial state where litBuffer is currently split between dst and litExtraBuffer /
1502	for ( ; nbSeq; nbSeq--) {
1503	sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==`1`);
1504	if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
1505	{ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1506	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1507	assert(!ZSTD_isError(oneSeqSize));
1508	ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1509	#endif
1510	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1511	return oneSeqSize;
1512	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1513	op += oneSeqSize;
1514	} }
1515	DEBUGLOG(`6`, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
1516
1517	/ If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd /
1518	if (nbSeq > `0`) {
1519	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1520	DEBUGLOG(`6`, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
1521	if (leftoverLit) {
1522	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1523	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1524	sequence.litLength -= leftoverLit;
1525	op += leftoverLit;
1526	}
1527	litPtr = dctx->litExtraBuffer;
1528	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1529	dctx->litBufferLocation = ZSTD_not_in_dst;
1530	{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1531	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1532	assert(!ZSTD_isError(oneSeqSize));
1533	ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1534	#endif
1535	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1536	return oneSeqSize;
1537	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1538	op += oneSeqSize;
1539	}
1540	nbSeq--;
1541	}
1542	}
1543
1544	if (nbSeq > `0`) {
1545	/ there is remaining lit from extra buffer /
1546
1547	#if defined(__x86_64__)
1548	__asm__(".p2align 6");
1549	__asm__("nop");
1550	# if __GNUC__ != 7
1551	/ worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang /
1552	__asm__(".p2align 4");
1553	__asm__("nop");
1554	__asm__(".p2align 3");
1555	# elif __GNUC__ >= 11
1556	__asm__(".p2align 3");
1557	# else
1558	__asm__(".p2align 5");
1559	__asm__("nop");
1560	__asm__(".p2align 3");
1561	# endif
1562	#endif
1563
1564	for ( ; nbSeq ; nbSeq--) {
1565	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==`1`);
1566	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: vBase, dictEnd);
1567	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1568	assert(!ZSTD_isError(oneSeqSize));
1569	ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1570	#endif
1571	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1572	return oneSeqSize;
1573	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1574	op += oneSeqSize;
1575	}
1576	}
1577
1578	/ check if reached exact end /
1579	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1580	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1581	DEBUGLOG(`5`, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
1582	RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1583	/ save reps for next block /
1584	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1585	}
1586
1587	/ last literal segment /
1588	if (dctx->litBufferLocation == ZSTD_split) {
1589	/ split hasn't been reached yet, first get dst then copy litExtraBuffer /
1590	size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1591	DEBUGLOG(`6`, "copy last literals from segment : %u", (U32)lastLLSize);
1592	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1593	if (op != NULL) {
1594	ZSTD_memmove(op, litPtr, lastLLSize);
1595	op += lastLLSize;
1596	}
1597	litPtr = dctx->litExtraBuffer;
1598	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1599	dctx->litBufferLocation = ZSTD_not_in_dst;
1600	}
1601	/ copy last literals from internal buffer /
1602	{ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1603	DEBUGLOG(`6`, "copy last literals from internal buffer : %u", (U32)lastLLSize);
1604	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1605	if (op != NULL) {
1606	ZSTD_memcpy(op, litPtr, lastLLSize);
1607	op += lastLLSize;
1608	} }
1609
1610	DEBUGLOG(`6`, "decoded block of size %u bytes", (U32)(op - ostart));
1611	return (size_t)(op - ostart);
1612	}
1613
1614	FORCE_INLINE_TEMPLATE size_t
1615	DONT_VECTORIZE
1616	ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1617	void* dst, size_t maxDstSize,
1618	const void* seqStart, size_t seqSize, int nbSeq,
1619	const ZSTD_longOffset_e isLongOffset)
1620	{
1621	const BYTE* ip = (const BYTE*)seqStart;
1622	const BYTE* const iend = ip + seqSize;
1623	BYTE* const ostart = (BYTE*)dst;
1624	BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize) : dctx->litBuffer;
1625	BYTE* op = ostart;
1626	const BYTE* litPtr = dctx->litPtr;
1627	const BYTE* const litEnd = litPtr + dctx->litSize;
1628	const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1629	const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1630	const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1631	DEBUGLOG(`5`, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1632
1633	/ Regen sequences /
1634	if (nbSeq) {
1635	seqState_t seqState;
1636	dctx->fseEntropy = `1`;
1637	{ U32 i; for (i = `0`; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1638	RETURN_ERROR_IF(
1639	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1640	corruption_detected, "");
1641	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1642	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1643	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1644	assert(dst != NULL);
1645
1646	#if defined(__x86_64__)
1647	__asm__(".p2align 6");
1648	__asm__("nop");
1649	# if __GNUC__ >= 7
1650	__asm__(".p2align 5");
1651	__asm__("nop");
1652	__asm__(".p2align 3");
1653	# else
1654	__asm__(".p2align 4");
1655	__asm__("nop");
1656	__asm__(".p2align 3");
1657	# endif
1658	#endif
1659
1660	for ( ; nbSeq ; nbSeq--) {
1661	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: nbSeq==`1`);
1662	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, litPtr: &litPtr, litLimit: litEnd, prefixStart, virtualStart: vBase, dictEnd);
1663	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1664	assert(!ZSTD_isError(oneSeqSize));
1665	ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1666	#endif
1667	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1668	return oneSeqSize;
1669	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1670	op += oneSeqSize;
1671	}
1672
1673	/ check if reached exact end /
1674	assert(nbSeq == `0`);
1675	RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1676	/ save reps for next block /
1677	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1678	}
1679
1680	/ last literal segment /
1681	{ size_t const lastLLSize = (size_t)(litEnd - litPtr);
1682	DEBUGLOG(`6`, "copy last literals : %u", (U32)lastLLSize);
1683	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1684	if (op != NULL) {
1685	ZSTD_memcpy(op, litPtr, lastLLSize);
1686	op += lastLLSize;
1687	} }
1688
1689	DEBUGLOG(`6`, "decoded block of size %u bytes", (U32)(op - ostart));
1690	return (size_t)(op - ostart);
1691	}
1692
1693	static size_t
1694	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1695	void* dst, size_t maxDstSize,
1696	const void* seqStart, size_t seqSize, int nbSeq,
1697	const ZSTD_longOffset_e isLongOffset)
1698	{
1699	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1700	}
1701
1702	static size_t
1703	ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1704	void* dst, size_t maxDstSize,
1705	const void* seqStart, size_t seqSize, int nbSeq,
1706	const ZSTD_longOffset_e isLongOffset)
1707	{
1708	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1709	}
1710	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1711
1712	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1713
1714	FORCE_INLINE_TEMPLATE
1715
1716	size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1717	const BYTE* const prefixStart, const BYTE* const dictEnd)
1718	{
1719	prefetchPos += sequence.litLength;
1720	{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1721	/ note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.*
1722	* No consequence though : memory address is only used for prefetching, not for dereferencing */
1723	const BYTE* const match = ZSTD_wrappedPtrSub(ptr: ZSTD_wrappedPtrAdd(ptr: matchBase, add: prefetchPos), sub: sequence.offset);
1724	PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); / note : it's safe to invoke PREFETCH() on any memory address, including invalid ones /
1725	}
1726	return prefetchPos + sequence.matchLength;
1727	}
1728
1729	/ This decoding function employs prefetching*
1730	* to reduce latency impact of cache misses.
1731	* It's generally employed when block contains a significant portion of long-distance matches
1732	* or when coupled with a "cold" dictionary */
1733	FORCE_INLINE_TEMPLATE size_t
1734	ZSTD_decompressSequencesLong_body(
1735	ZSTD_DCtx* dctx,
1736	void* dst, size_t maxDstSize,
1737	const void* seqStart, size_t seqSize, int nbSeq,
1738	const ZSTD_longOffset_e isLongOffset)
1739	{
1740	const BYTE* ip = (const BYTE*)seqStart;
1741	const BYTE* const iend = ip + seqSize;
1742	BYTE* const ostart = (BYTE*)dst;
1743	BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ptr: ostart, add: maxDstSize);
1744	BYTE* op = ostart;
1745	const BYTE* litPtr = dctx->litPtr;
1746	const BYTE* litBufferEnd = dctx->litBufferEnd;
1747	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1748	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1749	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1750
1751	/ Regen sequences /
1752	if (nbSeq) {
1753	#define STORED_SEQS 8
1754	#define STORED_SEQS_MASK (STORED_SEQS-1)
1755	#define ADVANCED_SEQS STORED_SEQS
1756	seq_t sequences[STORED_SEQS];
1757	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1758	seqState_t seqState;
1759	int seqNb;
1760	size_t prefetchPos = (size_t)(op-prefixStart); / track position relative to prefixStart /
1761
1762	dctx->fseEntropy = `1`;
1763	{ int i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1764	assert(dst != NULL);
1765	assert(iend >= ip);
1766	RETURN_ERROR_IF(
1767	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1768	corruption_detected, "");
1769	ZSTD_initFseState(DStatePtr: &seqState.stateLL, bitD: &seqState.DStream, dt: dctx->LLTptr);
1770	ZSTD_initFseState(DStatePtr: &seqState.stateOffb, bitD: &seqState.DStream, dt: dctx->OFTptr);
1771	ZSTD_initFseState(DStatePtr: &seqState.stateML, bitD: &seqState.DStream, dt: dctx->MLTptr);
1772
1773	/ prepare in advance /
1774	for (seqNb=`0`; seqNb<seqAdvance; seqNb++) {
1775	seq_t const sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: seqNb == nbSeq-`1`);
1776	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1777	sequences[seqNb] = sequence;
1778	}
1779
1780	/ decompress without stomping litBuffer /
1781	for (; seqNb < nbSeq; seqNb++) {
1782	seq_t sequence = ZSTD_decodeSequence(seqState: &seqState, longOffsets: isLongOffset, isLastSeq: seqNb == nbSeq-`1`);
1783
1784	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
1785	/ lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer /
1786	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1787	if (leftoverLit)
1788	{
1789	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1790	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1791	sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1792	op += leftoverLit;
1793	}
1794	litPtr = dctx->litExtraBuffer;
1795	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1796	dctx->litBufferLocation = ZSTD_not_in_dst;
1797	{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1798	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1799	assert(!ZSTD_isError(oneSeqSize));
1800	ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1801	#endif
1802	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1803
1804	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1805	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1806	op += oneSeqSize;
1807	} }
1808	else
1809	{
1810	/ lit buffer is either wholly contained in first or second split, or not split at all/
1811	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1812	ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1813	ZSTD_execSequence(op, oend, sequence: sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1814	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1815	assert(!ZSTD_isError(oneSeqSize));
1816	ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1817	#endif
1818	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1819
1820	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1821	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1822	op += oneSeqSize;
1823	}
1824	}
1825	RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1826
1827	/ finish queue /
1828	seqNb -= seqAdvance;
1829	for ( ; seqNb<nbSeq ; seqNb++) {
1830	seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1831	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
1832	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1833	if (leftoverLit) {
1834	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1835	ZSTD_safecopyDstBeforeSrc(op, ip: litPtr, length: leftoverLit);
1836	sequence->litLength -= leftoverLit;
1837	op += leftoverLit;
1838	}
1839	litPtr = dctx->litExtraBuffer;
1840	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1841	dctx->litBufferLocation = ZSTD_not_in_dst;
1842	{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1843	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1844	assert(!ZSTD_isError(oneSeqSize));
1845	ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1846	#endif
1847	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1848	op += oneSeqSize;
1849	}
1850	}
1851	else
1852	{
1853	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1854	ZSTD_execSequenceSplitLitBuffer(op, oend, oend_w: litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd) :
1855	ZSTD_execSequence(op, oend, sequence: *sequence, litPtr: &litPtr, litLimit: litBufferEnd, prefixStart, virtualStart: dictStart, dictEnd);
1856	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1857	assert(!ZSTD_isError(oneSeqSize));
1858	ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1859	#endif
1860	if (ZSTD_isError(code: oneSeqSize)) return oneSeqSize;
1861	op += oneSeqSize;
1862	}
1863	}
1864
1865	/ save reps for next block /
1866	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1867	}
1868
1869	/ last literal segment /
1870	if (dctx->litBufferLocation == ZSTD_split) { / first deplete literal buffer in dst, then copy litExtraBuffer /
1871	size_t const lastLLSize = litBufferEnd - litPtr;
1872	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1873	if (op != NULL) {
1874	ZSTD_memmove(op, litPtr, lastLLSize);
1875	op += lastLLSize;
1876	}
1877	litPtr = dctx->litExtraBuffer;
1878	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1879	}
1880	{ size_t const lastLLSize = litBufferEnd - litPtr;
1881	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1882	if (op != NULL) {
1883	ZSTD_memmove(op, litPtr, lastLLSize);
1884	op += lastLLSize;
1885	}
1886	}
1887
1888	return (size_t)(op - ostart);
1889	}
1890
1891	static size_t
1892	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1893	void* dst, size_t maxDstSize,
1894	const void* seqStart, size_t seqSize, int nbSeq,
1895	const ZSTD_longOffset_e isLongOffset)
1896	{
1897	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1898	}
1899	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1900
1901
1902
1903	#if DYNAMIC_BMI2
1904
1905	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1906	static BMI2_TARGET_ATTRIBUTE size_t
1907	DONT_VECTORIZE
1908	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1909	void* dst, size_t maxDstSize,
1910	const void* seqStart, size_t seqSize, int nbSeq,
1911	const ZSTD_longOffset_e isLongOffset)
1912	{
1913	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1914	}
1915	static BMI2_TARGET_ATTRIBUTE size_t
1916	DONT_VECTORIZE
1917	ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1918	void* dst, size_t maxDstSize,
1919	const void* seqStart, size_t seqSize, int nbSeq,
1920	const ZSTD_longOffset_e isLongOffset)
1921	{
1922	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1923	}
1924	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1925
1926	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1927	static BMI2_TARGET_ATTRIBUTE size_t
1928	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1929	void* dst, size_t maxDstSize,
1930	const void* seqStart, size_t seqSize, int nbSeq,
1931	const ZSTD_longOffset_e isLongOffset)
1932	{
1933	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1934	}
1935	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1936
1937	#endif /* DYNAMIC_BMI2 */
1938
1939	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1940	static size_t
1941	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1942	const void* seqStart, size_t seqSize, int nbSeq,
1943	const ZSTD_longOffset_e isLongOffset)
1944	{
1945	DEBUGLOG(`5`, "ZSTD_decompressSequences");
1946	#if DYNAMIC_BMI2
1947	if (ZSTD_DCtx_get_bmi2(dctx)) {
1948	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1949	}
1950	#endif
1951	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1952	}
1953	static size_t
1954	ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1955	const void* seqStart, size_t seqSize, int nbSeq,
1956	const ZSTD_longOffset_e isLongOffset)
1957	{
1958	DEBUGLOG(`5`, "ZSTD_decompressSequencesSplitLitBuffer");
1959	#if DYNAMIC_BMI2
1960	if (ZSTD_DCtx_get_bmi2(dctx)) {
1961	return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1962	}
1963	#endif
1964	return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1965	}
1966	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1967
1968
1969	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1970	/ ZSTD_decompressSequencesLong() :*
1971	* decompression function triggered when a minimum share of offsets is considered "long",
1972	* aka out of cache.
1973	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1974	* This function will try to mitigate main memory latency through the use of prefetching */
1975	static size_t
1976	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1977	void* dst, size_t maxDstSize,
1978	const void* seqStart, size_t seqSize, int nbSeq,
1979	const ZSTD_longOffset_e isLongOffset)
1980	{
1981	DEBUGLOG(`5`, "ZSTD_decompressSequencesLong");
1982	#if DYNAMIC_BMI2
1983	if (ZSTD_DCtx_get_bmi2(dctx)) {
1984	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1985	}
1986	#endif
1987	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1988	}
1989	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1990
1991
1992	/*
1993	* @returns The total size of the history referenceable by zstd, including
1994	* both the prefix and the extDict. At @p op any offset larger than this
1995	* is invalid.
1996	*/
1997	static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1998	{
1999	return (size_t)(op - virtualStart);
2000	}
2001
2002	typedef struct {
2003	unsigned longOffsetShare;
2004	unsigned maxNbAdditionalBits;
2005	} ZSTD_OffsetInfo;
2006
2007	/ ZSTD_getOffsetInfo() :*
2008	* condition : offTable must be valid
2009	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
2010	* compared to maximum possible of (1<<OffFSELog),
2011	* as well as the maximum number additional bits required.
2012	*/
2013	static ZSTD_OffsetInfo
2014	ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2015	{
2016	ZSTD_OffsetInfo info = {`0`, `0`};
2017	/ If nbSeq == 0, then the offTable is uninitialized, but we have*
2018	* no sequences, so both values should be 0.
2019	*/
2020	if (nbSeq != `0`) {
2021	const void* ptr = offTable;
2022	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[`0`].tableLog;
2023	const ZSTD_seqSymbol* table = offTable + `1`;
2024	U32 const max = `1` << tableLog;
2025	U32 u;
2026	DEBUGLOG(`5`, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2027
2028	assert(max <= (`1` << OffFSELog)); / max not too large /
2029	for (u=`0`; u<max; u++) {
2030	info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2031	if (table[u].nbAdditionalBits > `22`) info.longOffsetShare += `1`;
2032	}
2033
2034	assert(tableLog <= OffFSELog);
2035	info.longOffsetShare <<= (OffFSELog - tableLog); / scale to OffFSELog /
2036	}
2037
2038	return info;
2039	}
2040
2041	/*
2042	* @returns The maximum offset we can decode in one read of our bitstream, without
2043	* reloading more bits in the middle of the offset bits read. Any offsets larger
2044	* than this must use the long offset decoder.
2045	*/
2046	static size_t ZSTD_maxShortOffset(void)
2047	{
2048	if (MEM_64bits()) {
2049	/ We can decode any offset without reloading bits.*
2050	* This might change if the max window size grows.
2051	*/
2052	ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= `31`);
2053	return (size_t)-`1`;
2054	} else {
2055	/ The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.*
2056	* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2057	* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2058	*/
2059	size_t const maxOffbase = ((size_t)`1` << (STREAM_ACCUMULATOR_MIN + `1`)) - `1`;
2060	size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2061	assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2062	return maxOffset;
2063	}
2064	}
2065
2066	size_t
2067	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2068	void* dst, size_t dstCapacity,
2069	const void* src, size_t srcSize, const streaming_operation streaming)
2070	{ / blockType == blockCompressed /
2071	const BYTE* ip = (const BYTE*)src;
2072	DEBUGLOG(`5`, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
2073
2074	/ Note : the wording of the specification*
2075	* allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
2076	* This generally does not happen, as it makes little sense,
2077	* since an uncompressed block would feature same size and have no decompression cost.
2078	* Also, note that decoder from reference libzstd before < v1.5.4
2079	* would consider this edge case as an error.
2080	* As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
2081	* for broader compatibility with the deployed ecosystem of zstd decoders */
2082	RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
2083
2084	/ Decode literals section /
2085	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2086	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2087	if (ZSTD_isError(code: litCSize)) return litCSize;
2088	ip += litCSize;
2089	srcSize -= litCSize;
2090	}
2091
2092	/ Build Decoding Tables /
2093	{
2094	/ Compute the maximum block size, which must also work when !frame and fParams are unset.*
2095	* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2096	*/
2097	size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
2098	size_t const totalHistorySize = ZSTD_totalHistorySize(op: ZSTD_maybeNullPtrAdd(ptr: (BYTE)dst, add: blockSizeMax), virtualStart: (BYTE const**)dctx->virtualStart);
2099	/ isLongOffset must be true if there are long offsets.*
2100	* Offsets are long if they are larger than ZSTD_maxShortOffset().
2101	* We don't expect that to be the case in 64-bit mode.
2102	*
2103	* We check here to see if our history is large enough to allow long offsets.
2104	* If it isn't, then we can't possible have (valid) long offsets. If the offset
2105	* is invalid, then it is okay to read it incorrectly.
2106	*
2107	* If isLongOffsets is true, then we will later check our decoding table to see
2108	* if it is even possible to generate long offsets.
2109	*/
2110	ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2111	/ These macros control at build-time which decompressor implementation*
2112	* we use. If neither is defined, we do some inspection and dispatch at
2113	* runtime.
2114	*/
2115	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2116	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2117	int usePrefetchDecoder = dctx->ddictIsCold;
2118	#else
2119	/ Set to 1 to avoid computing offset info if we don't need to.*
2120	* Otherwise this value is ignored.
2121	*/
2122	int usePrefetchDecoder = `1`;
2123	#endif
2124	int nbSeq;
2125	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, nbSeqPtr: &nbSeq, src: ip, srcSize);
2126	if (ZSTD_isError(code: seqHSize)) return seqHSize;
2127	ip += seqHSize;
2128	srcSize -= seqHSize;
2129
2130	RETURN_ERROR_IF((dst == NULL \|\| dstCapacity == `0`) && nbSeq > `0`, dstSize_tooSmall, "NULL not handled");
2131	RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-`1`) - (size_t)dst < (size_t)(`1` << `20`), dstSize_tooSmall,
2132	"invalid dst");
2133
2134	/ If we could potentially have long offsets, or we might want to use the prefetch decoder,*
2135	* compute information about the share of long offsets, and the maximum nbAdditionalBits.
2136	* NOTE: could probably use a larger nbSeq limit
2137	*/
2138	if (isLongOffset \|\| (!usePrefetchDecoder && (totalHistorySize > (`1u` << `24`)) && (nbSeq > `8`))) {
2139	ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(offTable: dctx->OFTptr, nbSeq);
2140	if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2141	/ If isLongOffset, but the maximum number of additional bits that we see in our table is small*
2142	* enough, then we know it is impossible to have too long an offset in this block, so we can
2143	* use the regular offset decoder.
2144	*/
2145	isLongOffset = ZSTD_lo_isRegularOffset;
2146	}
2147	if (!usePrefetchDecoder) {
2148	U32 const minShare = MEM_64bits() ? `7` : `20`; / heuristic values, correspond to 2.73% and 7.81% /
2149	usePrefetchDecoder = (info.longOffsetShare >= minShare);
2150	}
2151	}
2152
2153	dctx->ddictIsCold = `0`;
2154
2155	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2156	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2157	if (usePrefetchDecoder) {
2158	#else
2159	(void)usePrefetchDecoder;
2160	{
2161	#endif
2162	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2163	return ZSTD_decompressSequencesLong(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2164	#endif
2165	}
2166
2167	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2168	/ else /
2169	if (dctx->litBufferLocation == ZSTD_split)
2170	return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2171	else
2172	return ZSTD_decompressSequences(dctx, dst, maxDstSize: dstCapacity, seqStart: ip, seqSize: srcSize, nbSeq, isLongOffset);
2173	#endif
2174	}
2175	}
2176
2177
2178	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
2179	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2180	{
2181	if (dst != dctx->previousDstEnd && dstSize > `0`) { / not contiguous /
2182	dctx->dictEnd = dctx->previousDstEnd;
2183	dctx->virtualStart = (const char)dst - ((const* char)(dctx->previousDstEnd) - (const* char*)(dctx->prefixStart));
2184	dctx->prefixStart = dst;
2185	dctx->previousDstEnd = dst;
2186	}
2187	}
2188
2189
2190	size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2191	void* dst, size_t dstCapacity,
2192	const void* src, size_t srcSize)
2193	{
2194	size_t dSize;
2195	dctx->isFrameDecompression = `0`;
2196	ZSTD_checkContinuity(dctx, dst, dstSize: dstCapacity);
2197	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, streaming: not_streaming);
2198	FORWARD_IF_ERROR(dSize, "");
2199	dctx->previousDstEnd = (char*)dst + dSize;
2200	return dSize;
2201	}
2202
2203
2204	/ NOTE: Must just wrap ZSTD_decompressBlock_deprecated() /
2205	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2206	void* dst, size_t dstCapacity,
2207	const void* src, size_t srcSize)
2208	{
2209	return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2210	}
2211

Browse the source code of Linux/lib/zstd/decompress/zstd_decompress_block.c