xz_dec_lzma2.c source code [Linux/lib/xz/xz_dec_lzma2.c]

1	// SPDX-License-Identifier: 0BSD
2
3	/*
4	* LZMA2 decoder
5	*
6	* Authors: Lasse Collin <lasse.collin@tukaani.org>
7	* Igor Pavlov <https://7-zip.org/>
8	*/
9
10	#include "xz_private.h"
11	#include "xz_lzma2.h"
12
13	/*
14	* Range decoder initialization eats the first five bytes of each LZMA chunk.
15	*/
16	#define RC_INIT_BYTES 5
17
18	/*
19	* Minimum number of usable input buffer to safely decode one LZMA symbol.
20	* The worst case is that we decode 22 bits using probabilities and 26
21	* direct bits. This may decode at maximum of 20 bytes of input. However,
22	* lzma_main() does an extra normalization before returning, thus we
23	* need to put 21 here.
24	*/
25	#define LZMA_IN_REQUIRED 21
26
27	/*
28	* Dictionary (history buffer)
29	*
30	* These are always true:
31	* start <= pos <= full <= end
32	* pos <= limit <= end
33	*
34	* In multi-call mode, also these are true:
35	* end == size
36	* size <= size_max
37	* allocated <= size
38	*
39	* Most of these variables are size_t to support single-call mode,
40	* in which the dictionary variables address the actual output
41	* buffer directly.
42	*/
43	struct dictionary {
44	/ Beginning of the history buffer /
45	uint8_t *buf;
46
47	/ Old position in buf (before decoding more data) /
48	size_t start;
49
50	/ Position in buf /
51	size_t pos;
52
53	/*
54	* How full dictionary is. This is used to detect corrupt input that
55	* would read beyond the beginning of the uncompressed stream.
56	*/
57	size_t full;
58
59	/ Write limit; we don't write to buf[limit] or later bytes. /
60	size_t limit;
61
62	/*
63	* End of the dictionary buffer. In multi-call mode, this is
64	* the same as the dictionary size. In single-call mode, this
65	* indicates the size of the output buffer.
66	*/
67	size_t end;
68
69	/*
70	* Size of the dictionary as specified in Block Header. This is used
71	* together with "full" to detect corrupt input that would make us
72	* read beyond the beginning of the uncompressed stream.
73	*/
74	uint32_t size;
75
76	/*
77	* Maximum allowed dictionary size in multi-call mode.
78	* This is ignored in single-call mode.
79	*/
80	uint32_t size_max;
81
82	/*
83	* Amount of memory currently allocated for the dictionary.
84	* This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
85	* size_max is always the same as the allocated size.)
86	*/
87	uint32_t allocated;
88
89	/ Operation mode /
90	enum xz_mode mode;
91	};
92
93	/ Range decoder /
94	struct rc_dec {
95	uint32_t range;
96	uint32_t code;
97
98	/*
99	* Number of initializing bytes remaining to be read
100	* by rc_read_init().
101	*/
102	uint32_t init_bytes_left;
103
104	/*
105	* Buffer from which we read our input. It can be either
106	* temp.buf or the caller-provided input buffer.
107	*/
108	const uint8_t *in;
109	size_t in_pos;
110	size_t in_limit;
111	};
112
113	/ Probabilities for a length decoder. /
114	struct lzma_len_dec {
115	/ Probability of match length being at least 10 /
116	uint16_t choice;
117
118	/ Probability of match length being at least 18 /
119	uint16_t choice2;
120
121	/ Probabilities for match lengths 2-9 /
122	uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
123
124	/ Probabilities for match lengths 10-17 /
125	uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
126
127	/ Probabilities for match lengths 18-273 /
128	uint16_t high[LEN_HIGH_SYMBOLS];
129	};
130
131	struct lzma_dec {
132	/ Distances of latest four matches /
133	uint32_t rep0;
134	uint32_t rep1;
135	uint32_t rep2;
136	uint32_t rep3;
137
138	/ Types of the most recently seen LZMA symbols /
139	enum lzma_state state;
140
141	/*
142	* Length of a match. This is updated so that dict_repeat can
143	* be called again to finish repeating the whole match.
144	*/
145	uint32_t len;
146
147	/*
148	* LZMA properties or related bit masks (number of literal
149	* context bits, a mask derived from the number of literal
150	* position bits, and a mask derived from the number
151	* position bits)
152	*/
153	uint32_t lc;
154	uint32_t literal_pos_mask; / (1 << lp) - 1 /
155	uint32_t pos_mask; / (1 << pb) - 1 /
156
157	/ If 1, it's a match. Otherwise it's a single 8-bit literal. /
158	uint16_t is_match[STATES][POS_STATES_MAX];
159
160	/ If 1, it's a repeated match. The distance is one of rep0 .. rep3. /
161	uint16_t is_rep[STATES];
162
163	/*
164	* If 0, distance of a repeated match is rep0.
165	* Otherwise check is_rep1.
166	*/
167	uint16_t is_rep0[STATES];
168
169	/*
170	* If 0, distance of a repeated match is rep1.
171	* Otherwise check is_rep2.
172	*/
173	uint16_t is_rep1[STATES];
174
175	/ If 0, distance of a repeated match is rep2. Otherwise it is rep3. /
176	uint16_t is_rep2[STATES];
177
178	/*
179	* If 1, the repeated match has length of one byte. Otherwise
180	* the length is decoded from rep_len_decoder.
181	*/
182	uint16_t is_rep0_long[STATES][POS_STATES_MAX];
183
184	/*
185	* Probability tree for the highest two bits of the match
186	* distance. There is a separate probability tree for match
187	* lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
188	*/
189	uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
190
191	/*
192	* Probility trees for additional bits for match distance
193	* when the distance is in the range [4, 127].
194	*/
195	uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
196
197	/*
198	* Probability tree for the lowest four bits of a match
199	* distance that is equal to or greater than 128.
200	*/
201	uint16_t dist_align[ALIGN_SIZE];
202
203	/ Length of a normal match /
204	struct lzma_len_dec match_len_dec;
205
206	/ Length of a repeated match /
207	struct lzma_len_dec rep_len_dec;
208
209	/ Probabilities of literals /
210	uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
211	};
212
213	struct lzma2_dec {
214	/ Position in xz_dec_lzma2_run(). /
215	enum lzma2_seq {
216	SEQ_CONTROL,
217	SEQ_UNCOMPRESSED_1,
218	SEQ_UNCOMPRESSED_2,
219	SEQ_COMPRESSED_0,
220	SEQ_COMPRESSED_1,
221	SEQ_PROPERTIES,
222	SEQ_LZMA_PREPARE,
223	SEQ_LZMA_RUN,
224	SEQ_COPY
225	} sequence;
226
227	/ Next position after decoding the compressed size of the chunk. /
228	enum lzma2_seq next_sequence;
229
230	/ Uncompressed size of LZMA chunk (2 MiB at maximum) /
231	uint32_t uncompressed;
232
233	/*
234	* Compressed size of LZMA chunk or compressed/uncompressed
235	* size of uncompressed chunk (64 KiB at maximum)
236	*/
237	uint32_t compressed;
238
239	/*
240	* True if dictionary reset is needed. This is false before
241	* the first chunk (LZMA or uncompressed).
242	*/
243	bool need_dict_reset;
244
245	/*
246	* True if new LZMA properties are needed. This is false
247	* before the first LZMA chunk.
248	*/
249	bool need_props;
250
251	#ifdef XZ_DEC_MICROLZMA
252	bool pedantic_microlzma;
253	#endif
254	};
255
256	struct xz_dec_lzma2 {
257	/*
258	* The order below is important on x86 to reduce code size and
259	* it shouldn't hurt on other platforms. Everything up to and
260	* including lzma.pos_mask are in the first 128 bytes on x86-32,
261	* which allows using smaller instructions to access those
262	* variables. On x86-64, fewer variables fit into the first 128
263	* bytes, but this is still the best order without sacrificing
264	* the readability by splitting the structures.
265	*/
266	struct rc_dec rc;
267	struct dictionary dict;
268	struct lzma2_dec lzma2;
269	struct lzma_dec lzma;
270
271	/*
272	* Temporary buffer which holds small number of input bytes between
273	* decoder calls. See lzma2_lzma() for details.
274	*/
275	struct {
276	uint32_t size;
277	uint8_t buf[`3` * LZMA_IN_REQUIRED];
278	} temp;
279	};
280
281	/**************
282	* Dictionary *
283	**************/
284
285	/*
286	* Reset the dictionary state. When in single-call mode, set up the beginning
287	* of the dictionary to point to the actual output buffer.
288	*/
289	static void dict_reset(struct dictionary dict, struct* xz_buf *b)
290	{
291	if (DEC_IS_SINGLE(dict->mode)) {
292	dict->buf = b->out + b->out_pos;
293	dict->end = b->out_size - b->out_pos;
294	}
295
296	dict->start = `0`;
297	dict->pos = `0`;
298	dict->limit = `0`;
299	dict->full = `0`;
300	}
301
302	/ Set dictionary write limit /
303	static void dict_limit(struct dictionary *dict, size_t out_max)
304	{
305	if (dict->end - dict->pos <= out_max)
306	dict->limit = dict->end;
307	else
308	dict->limit = dict->pos + out_max;
309	}
310
311	/ Return true if at least one byte can be written into the dictionary. /
312	static inline bool dict_has_space(const struct dictionary *dict)
313	{
314	return dict->pos < dict->limit;
315	}
316
317	/*
318	* Get a byte from the dictionary at the given distance. The distance is
319	* assumed to valid, or as a special case, zero when the dictionary is
320	* still empty. This special case is needed for single-call decoding to
321	* avoid writing a '\0' to the end of the destination buffer.
322	*/
323	static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
324	{
325	size_t offset = dict->pos - dist - `1`;
326
327	if (dist >= dict->pos)
328	offset += dict->end;
329
330	return dict->full > `0` ? dict->buf[offset] : `0`;
331	}
332
333	/*
334	* Put one byte into the dictionary. It is assumed that there is space for it.
335	*/
336	static inline void dict_put(struct dictionary *dict, uint8_t byte)
337	{
338	dict->buf[dict->pos++] = byte;
339
340	if (dict->full < dict->pos)
341	dict->full = dict->pos;
342	}
343
344	/*
345	* Repeat given number of bytes from the given distance. If the distance is
346	* invalid, false is returned. On success, true is returned and *len is
347	* updated to indicate how many bytes were left to be repeated.
348	*/
349	static bool dict_repeat(struct dictionary dict, uint32_t len, uint32_t dist)
350	{
351	size_t back;
352	uint32_t left;
353
354	if (dist >= dict->full \|\| dist >= dict->size)
355	return false;
356
357	left = min_t(size_t, dict->limit - dict->pos, *len);
358	*len -= left;
359
360	back = dict->pos - dist - `1`;
361	if (dist >= dict->pos)
362	back += dict->end;
363
364	do {
365	dict->buf[dict->pos++] = dict->buf[back++];
366	if (back == dict->end)
367	back = `0`;
368	} while (--left > `0`);
369
370	if (dict->full < dict->pos)
371	dict->full = dict->pos;
372
373	return true;
374	}
375
376	/ Copy uncompressed data as is from input to dictionary and output buffers. /
377	static void dict_uncompressed(struct dictionary dict, struct* xz_buf *b,
378	uint32_t *left)
379	{
380	size_t copy_size;
381
382	while (*left > `0` && b->in_pos < b->in_size
383	&& b->out_pos < b->out_size) {
384	copy_size = min(b->in_size - b->in_pos,
385	b->out_size - b->out_pos);
386	if (copy_size > dict->end - dict->pos)
387	copy_size = dict->end - dict->pos;
388	if (copy_size > *left)
389	copy_size = *left;
390
391	*left -= copy_size;
392
393	/*
394	* If doing in-place decompression in single-call mode and the
395	* uncompressed size of the file is larger than the caller
396	* thought (i.e. it is invalid input!), the buffers below may
397	* overlap and cause undefined behavior with memcpy().
398	* With valid inputs memcpy() would be fine here.
399	*/
400	memmove(dest: dict->buf + dict->pos, src: b->in + b->in_pos, count: copy_size);
401	dict->pos += copy_size;
402
403	if (dict->full < dict->pos)
404	dict->full = dict->pos;
405
406	if (DEC_IS_MULTI(dict->mode)) {
407	if (dict->pos == dict->end)
408	dict->pos = `0`;
409
410	/*
411	* Like above but for multi-call mode: use memmove()
412	* to avoid undefined behavior with invalid input.
413	*/
414	memmove(dest: b->out + b->out_pos, src: b->in + b->in_pos,
415	count: copy_size);
416	}
417
418	dict->start = dict->pos;
419
420	b->out_pos += copy_size;
421	b->in_pos += copy_size;
422	}
423	}
424
425	#ifdef XZ_DEC_MICROLZMA
426	# define DICT_FLUSH_SUPPORTS_SKIPPING true
427	#else
428	# define DICT_FLUSH_SUPPORTS_SKIPPING false
429	#endif
430
431	/*
432	* Flush pending data from dictionary to b->out. It is assumed that there is
433	* enough space in b->out. This is guaranteed because caller uses dict_limit()
434	* before decoding data into the dictionary.
435	*/
436	static uint32_t dict_flush(struct dictionary dict, struct* xz_buf *b)
437	{
438	size_t copy_size = dict->pos - dict->start;
439
440	if (DEC_IS_MULTI(dict->mode)) {
441	if (dict->pos == dict->end)
442	dict->pos = `0`;
443
444	/*
445	* These buffers cannot overlap even if doing in-place
446	* decompression because in multi-call mode dict->buf
447	* has been allocated by us in this file; it's not
448	* provided by the caller like in single-call mode.
449	*
450	* With MicroLZMA, b->out can be NULL to skip bytes that
451	* the caller doesn't need. This cannot be done with XZ
452	* because it would break BCJ filters.
453	*/
454	if (!DICT_FLUSH_SUPPORTS_SKIPPING \|\| b->out != NULL)
455	memcpy(to: b->out + b->out_pos, from: dict->buf + dict->start,
456	len: copy_size);
457	}
458
459	dict->start = dict->pos;
460	b->out_pos += copy_size;
461	return copy_size;
462	}
463
464	/*****************
465	* Range decoder *
466	*****************/
467
468	/ Reset the range decoder. /
469	static void rc_reset(struct rc_dec *rc)
470	{
471	rc->range = (uint32_t)-`1`;
472	rc->code = `0`;
473	rc->init_bytes_left = RC_INIT_BYTES;
474	}
475
476	/*
477	* Read the first five initial bytes into rc->code if they haven't been
478	* read already. (Yes, the first byte gets completely ignored.)
479	*/
480	static bool rc_read_init(struct rc_dec rc, struct* xz_buf *b)
481	{
482	while (rc->init_bytes_left > `0`) {
483	if (b->in_pos == b->in_size)
484	return false;
485
486	rc->code = (rc->code << `8`) + b->in[b->in_pos++];
487	--rc->init_bytes_left;
488	}
489
490	return true;
491	}
492
493	/ Return true if there may not be enough input for the next decoding loop. /
494	static inline bool rc_limit_exceeded(const struct rc_dec *rc)
495	{
496	return rc->in_pos > rc->in_limit;
497	}
498
499	/*
500	* Return true if it is possible (from point of view of range decoder) that
501	* we have reached the end of the LZMA chunk.
502	*/
503	static inline bool rc_is_finished(const struct rc_dec *rc)
504	{
505	return rc->code == `0`;
506	}
507
508	/ Read the next input byte if needed. /
509	static __always_inline void rc_normalize(struct rc_dec *rc)
510	{
511	if (rc->range < RC_TOP_VALUE) {
512	rc->range <<= RC_SHIFT_BITS;
513	rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
514	}
515	}
516
517	/*
518	* Decode one bit. In some versions, this function has been split in three
519	* functions so that the compiler is supposed to be able to more easily avoid
520	* an extra branch. In this particular version of the LZMA decoder, this
521	* doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
522	* on x86). Using a non-split version results in nicer looking code too.
523	*
524	* NOTE: This must return an int. Do not make it return a bool or the speed
525	* of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
526	* and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
527	*/
528	static __always_inline int rc_bit(struct rc_dec rc, uint16_t prob)
529	{
530	uint32_t bound;
531	int bit;
532
533	rc_normalize(rc);
534	bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
535	if (rc->code < bound) {
536	rc->range = bound;
537	prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
538	bit = `0`;
539	} else {
540	rc->range -= bound;
541	rc->code -= bound;
542	prob -= prob >> RC_MOVE_BITS;
543	bit = `1`;
544	}
545
546	return bit;
547	}
548
549	/ Decode a bittree starting from the most significant bit. /
550	static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
551	uint16_t *probs, uint32_t limit)
552	{
553	uint32_t symbol = `1`;
554
555	do {
556	if (rc_bit(rc, prob: &probs[symbol]))
557	symbol = (symbol << `1`) + `1`;
558	else
559	symbol <<= `1`;
560	} while (symbol < limit);
561
562	return symbol;
563	}
564
565	/ Decode a bittree starting from the least significant bit. /
566	static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
567	uint16_t *probs,
568	uint32_t *dest, uint32_t limit)
569	{
570	uint32_t symbol = `1`;
571	uint32_t i = `0`;
572
573	do {
574	if (rc_bit(rc, prob: &probs[symbol])) {
575	symbol = (symbol << `1`) + `1`;
576	*dest += `1` << i;
577	} else {
578	symbol <<= `1`;
579	}
580	} while (++i < limit);
581	}
582
583	/ Decode direct bits (fixed fifty-fifty probability) /
584	static inline void rc_direct(struct rc_dec rc, uint32_t dest, uint32_t limit)
585	{
586	uint32_t mask;
587
588	do {
589	rc_normalize(rc);
590	rc->range >>= `1`;
591	rc->code -= rc->range;
592	mask = (uint32_t)`0` - (rc->code >> `31`);
593	rc->code += rc->range & mask;
594	dest = (dest << `1`) + (mask + `1`);
595	} while (--limit > `0`);
596	}
597
598	/********
599	* LZMA *
600	********/
601
602	/ Get pointer to literal coder probability array. /
603	static uint16_t lzma_literal_probs(struct* xz_dec_lzma2 *s)
604	{
605	uint32_t prev_byte = dict_get(dict: &s->dict, dist: `0`);
606	uint32_t low = prev_byte >> (`8` - s->lzma.lc);
607	uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
608	return s->lzma.literal[low + high];
609	}
610
611	/ Decode a literal (one 8-bit byte) /
612	static void lzma_literal(struct xz_dec_lzma2 *s)
613	{
614	uint16_t *probs;
615	uint32_t symbol;
616	uint32_t match_byte;
617	uint32_t match_bit;
618	uint32_t offset;
619	uint32_t i;
620
621	probs = lzma_literal_probs(s);
622
623	if (lzma_state_is_literal(state: s->lzma.state)) {
624	symbol = rc_bittree(rc: &s->rc, probs, limit: `0x100`);
625	} else {
626	symbol = `1`;
627	match_byte = dict_get(dict: &s->dict, dist: s->lzma.rep0) << `1`;
628	offset = `0x100`;
629
630	do {
631	match_bit = match_byte & offset;
632	match_byte <<= `1`;
633	i = offset + match_bit + symbol;
634
635	if (rc_bit(rc: &s->rc, prob: &probs[i])) {
636	symbol = (symbol << `1`) + `1`;
637	offset &= match_bit;
638	} else {
639	symbol <<= `1`;
640	offset &= ~match_bit;
641	}
642	} while (symbol < `0x100`);
643	}
644
645	dict_put(dict: &s->dict, byte: (uint8_t)symbol);
646	lzma_state_literal(state: &s->lzma.state);
647	}
648
649	/ Decode the length of the match into s->lzma.len. /
650	static void lzma_len(struct xz_dec_lzma2 s, struct* lzma_len_dec *l,
651	uint32_t pos_state)
652	{
653	uint16_t *probs;
654	uint32_t limit;
655
656	if (!rc_bit(rc: &s->rc, prob: &l->choice)) {
657	probs = l->low[pos_state];
658	limit = LEN_LOW_SYMBOLS;
659	s->lzma.len = MATCH_LEN_MIN;
660	} else {
661	if (!rc_bit(rc: &s->rc, prob: &l->choice2)) {
662	probs = l->mid[pos_state];
663	limit = LEN_MID_SYMBOLS;
664	s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
665	} else {
666	probs = l->high;
667	limit = LEN_HIGH_SYMBOLS;
668	s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
669	+ LEN_MID_SYMBOLS;
670	}
671	}
672
673	s->lzma.len += rc_bittree(rc: &s->rc, probs, limit) - limit;
674	}
675
676	/ Decode a match. The distance will be stored in s->lzma.rep0. /
677	static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
678	{
679	uint16_t *probs;
680	uint32_t dist_slot;
681	uint32_t limit;
682
683	lzma_state_match(state: &s->lzma.state);
684
685	s->lzma.rep3 = s->lzma.rep2;
686	s->lzma.rep2 = s->lzma.rep1;
687	s->lzma.rep1 = s->lzma.rep0;
688
689	lzma_len(s, l: &s->lzma.match_len_dec, pos_state);
690
691	probs = s->lzma.dist_slot[lzma_get_dist_state(len: s->lzma.len)];
692	dist_slot = rc_bittree(rc: &s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
693
694	if (dist_slot < DIST_MODEL_START) {
695	s->lzma.rep0 = dist_slot;
696	} else {
697	limit = (dist_slot >> `1`) - `1`;
698	s->lzma.rep0 = `2` + (dist_slot & `1`);
699
700	if (dist_slot < DIST_MODEL_END) {
701	s->lzma.rep0 <<= limit;
702	probs = s->lzma.dist_special + s->lzma.rep0
703	- dist_slot - `1`;
704	rc_bittree_reverse(rc: &s->rc, probs,
705	dest: &s->lzma.rep0, limit);
706	} else {
707	rc_direct(rc: &s->rc, dest: &s->lzma.rep0, limit: limit - ALIGN_BITS);
708	s->lzma.rep0 <<= ALIGN_BITS;
709	rc_bittree_reverse(rc: &s->rc, probs: s->lzma.dist_align,
710	dest: &s->lzma.rep0, ALIGN_BITS);
711	}
712	}
713	}
714
715	/*
716	* Decode a repeated match. The distance is one of the four most recently
717	* seen matches. The distance will be stored in s->lzma.rep0.
718	*/
719	static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
720	{
721	uint32_t tmp;
722
723	if (!rc_bit(rc: &s->rc, prob: &s->lzma.is_rep0[s->lzma.state])) {
724	if (!rc_bit(rc: &s->rc, prob: &s->lzma.is_rep0_long[
725	s->lzma.state][pos_state])) {
726	lzma_state_short_rep(state: &s->lzma.state);
727	s->lzma.len = `1`;
728	return;
729	}
730	} else {
731	if (!rc_bit(rc: &s->rc, prob: &s->lzma.is_rep1[s->lzma.state])) {
732	tmp = s->lzma.rep1;
733	} else {
734	if (!rc_bit(rc: &s->rc, prob: &s->lzma.is_rep2[s->lzma.state])) {
735	tmp = s->lzma.rep2;
736	} else {
737	tmp = s->lzma.rep3;
738	s->lzma.rep3 = s->lzma.rep2;
739	}
740
741	s->lzma.rep2 = s->lzma.rep1;
742	}
743
744	s->lzma.rep1 = s->lzma.rep0;
745	s->lzma.rep0 = tmp;
746	}
747
748	lzma_state_long_rep(state: &s->lzma.state);
749	lzma_len(s, l: &s->lzma.rep_len_dec, pos_state);
750	}
751
752	/ LZMA decoder core /
753	static bool lzma_main(struct xz_dec_lzma2 *s)
754	{
755	uint32_t pos_state;
756
757	/*
758	* If the dictionary was reached during the previous call, try to
759	* finish the possibly pending repeat in the dictionary.
760	*/
761	if (dict_has_space(dict: &s->dict) && s->lzma.len > `0`)
762	dict_repeat(dict: &s->dict, len: &s->lzma.len, dist: s->lzma.rep0);
763
764	/*
765	* Decode more LZMA symbols. One iteration may consume up to
766	* LZMA_IN_REQUIRED - 1 bytes.
767	*/
768	while (dict_has_space(dict: &s->dict) && !rc_limit_exceeded(rc: &s->rc)) {
769	pos_state = s->dict.pos & s->lzma.pos_mask;
770
771	if (!rc_bit(rc: &s->rc, prob: &s->lzma.is_match[
772	s->lzma.state][pos_state])) {
773	lzma_literal(s);
774	} else {
775	if (rc_bit(rc: &s->rc, prob: &s->lzma.is_rep[s->lzma.state]))
776	lzma_rep_match(s, pos_state);
777	else
778	lzma_match(s, pos_state);
779
780	if (!dict_repeat(dict: &s->dict, len: &s->lzma.len, dist: s->lzma.rep0))
781	return false;
782	}
783	}
784
785	/*
786	* Having the range decoder always normalized when we are outside
787	* this function makes it easier to correctly handle end of the chunk.
788	*/
789	rc_normalize(rc: &s->rc);
790
791	return true;
792	}
793
794	/*
795	* Reset the LZMA decoder and range decoder state. Dictionary is not reset
796	* here, because LZMA state may be reset without resetting the dictionary.
797	*/
798	static void lzma_reset(struct xz_dec_lzma2 *s)
799	{
800	uint16_t *probs;
801	size_t i;
802
803	s->lzma.state = STATE_LIT_LIT;
804	s->lzma.rep0 = `0`;
805	s->lzma.rep1 = `0`;
806	s->lzma.rep2 = `0`;
807	s->lzma.rep3 = `0`;
808	s->lzma.len = `0`;
809
810	/*
811	* All probabilities are initialized to the same value. This hack
812	* makes the code smaller by avoiding a separate loop for each
813	* probability array.
814	*
815	* This could be optimized so that only that part of literal
816	* probabilities that are actually required. In the common case
817	* we would write 12 KiB less.
818	*/
819	probs = s->lzma.is_match[`0`];
820	for (i = `0`; i < PROBS_TOTAL; ++i)
821	probs[i] = RC_BIT_MODEL_TOTAL / `2`;
822
823	rc_reset(rc: &s->rc);
824	}
825
826	/*
827	* Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
828	* from the decoded lp and pb values. On success, the LZMA decoder state is
829	* reset and true is returned.
830	*/
831	static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
832	{
833	if (props > (`4` * `5` + `4`) * `9` + `8`)
834	return false;
835
836	s->lzma.pos_mask = `0`;
837	while (props >= `9` * `5`) {
838	props -= `9` * `5`;
839	++s->lzma.pos_mask;
840	}
841
842	s->lzma.pos_mask = (`1` << s->lzma.pos_mask) - `1`;
843
844	s->lzma.literal_pos_mask = `0`;
845	while (props >= `9`) {
846	props -= `9`;
847	++s->lzma.literal_pos_mask;
848	}
849
850	s->lzma.lc = props;
851
852	if (s->lzma.lc + s->lzma.literal_pos_mask > `4`)
853	return false;
854
855	s->lzma.literal_pos_mask = (`1` << s->lzma.literal_pos_mask) - `1`;
856
857	lzma_reset(s);
858
859	return true;
860	}
861
862	/*********
863	* LZMA2 *
864	*********/
865
866	/*
867	* The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
868	* been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
869	* wrapper function takes care of making the LZMA decoder's assumption safe.
870	*
871	* As long as there is plenty of input left to be decoded in the current LZMA
872	* chunk, we decode directly from the caller-supplied input buffer until
873	* there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
874	* s->temp.buf, which (hopefully) gets filled on the next call to this
875	* function. We decode a few bytes from the temporary buffer so that we can
876	* continue decoding from the caller-supplied input buffer again.
877	*/
878	static bool lzma2_lzma(struct xz_dec_lzma2 s, struct* xz_buf *b)
879	{
880	size_t in_avail;
881	uint32_t tmp;
882
883	in_avail = b->in_size - b->in_pos;
884	if (s->temp.size > `0` \|\| s->lzma2.compressed == `0`) {
885	tmp = `2` * LZMA_IN_REQUIRED - s->temp.size;
886	if (tmp > s->lzma2.compressed - s->temp.size)
887	tmp = s->lzma2.compressed - s->temp.size;
888	if (tmp > in_avail)
889	tmp = in_avail;
890
891	memcpy(to: s->temp.buf + s->temp.size, from: b->in + b->in_pos, len: tmp);
892
893	if (s->temp.size + tmp == s->lzma2.compressed) {
894	memzero(s->temp.buf + s->temp.size + tmp,
895	sizeof(s->temp.buf)
896	- s->temp.size - tmp);
897	s->rc.in_limit = s->temp.size + tmp;
898	} else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
899	s->temp.size += tmp;
900	b->in_pos += tmp;
901	return true;
902	} else {
903	s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
904	}
905
906	s->rc.in = s->temp.buf;
907	s->rc.in_pos = `0`;
908
909	if (!lzma_main(s) \|\| s->rc.in_pos > s->temp.size + tmp)
910	return false;
911
912	s->lzma2.compressed -= s->rc.in_pos;
913
914	if (s->rc.in_pos < s->temp.size) {
915	s->temp.size -= s->rc.in_pos;
916	memmove(dest: s->temp.buf, src: s->temp.buf + s->rc.in_pos,
917	count: s->temp.size);
918	return true;
919	}
920
921	b->in_pos += s->rc.in_pos - s->temp.size;
922	s->temp.size = `0`;
923	}
924
925	in_avail = b->in_size - b->in_pos;
926	if (in_avail >= LZMA_IN_REQUIRED) {
927	s->rc.in = b->in;
928	s->rc.in_pos = b->in_pos;
929
930	if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
931	s->rc.in_limit = b->in_pos + s->lzma2.compressed;
932	else
933	s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
934
935	if (!lzma_main(s))
936	return false;
937
938	in_avail = s->rc.in_pos - b->in_pos;
939	if (in_avail > s->lzma2.compressed)
940	return false;
941
942	s->lzma2.compressed -= in_avail;
943	b->in_pos = s->rc.in_pos;
944	}
945
946	in_avail = b->in_size - b->in_pos;
947	if (in_avail < LZMA_IN_REQUIRED) {
948	if (in_avail > s->lzma2.compressed)
949	in_avail = s->lzma2.compressed;
950
951	memcpy(to: s->temp.buf, from: b->in + b->in_pos, len: in_avail);
952	s->temp.size = in_avail;
953	b->in_pos += in_avail;
954	}
955
956	return true;
957	}
958
959	/*
960	* Take care of the LZMA2 control layer, and forward the job of actual LZMA
961	* decoding or copying of uncompressed chunks to other functions.
962	*/
963	enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 s, struct* xz_buf *b)
964	{
965	uint32_t tmp;
966
967	while (b->in_pos < b->in_size \|\| s->lzma2.sequence == SEQ_LZMA_RUN) {
968	switch (s->lzma2.sequence) {
969	case SEQ_CONTROL:
970	/*
971	* LZMA2 control byte
972	*
973	* Exact values:
974	* 0x00 End marker
975	* 0x01 Dictionary reset followed by
976	* an uncompressed chunk
977	* 0x02 Uncompressed chunk (no dictionary reset)
978	*
979	* Highest three bits (s->control & 0xE0):
980	* 0xE0 Dictionary reset, new properties and state
981	* reset, followed by LZMA compressed chunk
982	* 0xC0 New properties and state reset, followed
983	* by LZMA compressed chunk (no dictionary
984	* reset)
985	* 0xA0 State reset using old properties,
986	* followed by LZMA compressed chunk (no
987	* dictionary reset)
988	* 0x80 LZMA chunk (no dictionary or state reset)
989	*
990	* For LZMA compressed chunks, the lowest five bits
991	* (s->control & 1F) are the highest bits of the
992	* uncompressed size (bits 16-20).
993	*
994	* A new LZMA2 stream must begin with a dictionary
995	* reset. The first LZMA chunk must set new
996	* properties and reset the LZMA state.
997	*
998	* Values that don't match anything described above
999	* are invalid and we return XZ_DATA_ERROR.
1000	*/
1001	tmp = b->in[b->in_pos++];
1002
1003	if (tmp == `0x00`)
1004	return XZ_STREAM_END;
1005
1006	if (tmp >= `0xE0` \|\| tmp == `0x01`) {
1007	s->lzma2.need_props = true;
1008	s->lzma2.need_dict_reset = false;
1009	dict_reset(dict: &s->dict, b);
1010	} else if (s->lzma2.need_dict_reset) {
1011	return XZ_DATA_ERROR;
1012	}
1013
1014	if (tmp >= `0x80`) {
1015	s->lzma2.uncompressed = (tmp & `0x1F`) << `16`;
1016	s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
1017
1018	if (tmp >= `0xC0`) {
1019	/*
1020	* When there are new properties,
1021	* state reset is done at
1022	* SEQ_PROPERTIES.
1023	*/
1024	s->lzma2.need_props = false;
1025	s->lzma2.next_sequence
1026	= SEQ_PROPERTIES;
1027
1028	} else if (s->lzma2.need_props) {
1029	return XZ_DATA_ERROR;
1030
1031	} else {
1032	s->lzma2.next_sequence
1033	= SEQ_LZMA_PREPARE;
1034	if (tmp >= `0xA0`)
1035	lzma_reset(s);
1036	}
1037	} else {
1038	if (tmp > `0x02`)
1039	return XZ_DATA_ERROR;
1040
1041	s->lzma2.sequence = SEQ_COMPRESSED_0;
1042	s->lzma2.next_sequence = SEQ_COPY;
1043	}
1044
1045	break;
1046
1047	case SEQ_UNCOMPRESSED_1:
1048	s->lzma2.uncompressed
1049	+= (uint32_t)b->in[b->in_pos++] << `8`;
1050	s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1051	break;
1052
1053	case SEQ_UNCOMPRESSED_2:
1054	s->lzma2.uncompressed
1055	+= (uint32_t)b->in[b->in_pos++] + `1`;
1056	s->lzma2.sequence = SEQ_COMPRESSED_0;
1057	break;
1058
1059	case SEQ_COMPRESSED_0:
1060	s->lzma2.compressed
1061	= (uint32_t)b->in[b->in_pos++] << `8`;
1062	s->lzma2.sequence = SEQ_COMPRESSED_1;
1063	break;
1064
1065	case SEQ_COMPRESSED_1:
1066	s->lzma2.compressed
1067	+= (uint32_t)b->in[b->in_pos++] + `1`;
1068	s->lzma2.sequence = s->lzma2.next_sequence;
1069	break;
1070
1071	case SEQ_PROPERTIES:
1072	if (!lzma_props(s, props: b->in[b->in_pos++]))
1073	return XZ_DATA_ERROR;
1074
1075	s->lzma2.sequence = SEQ_LZMA_PREPARE;
1076
1077	fallthrough;
1078
1079	case SEQ_LZMA_PREPARE:
1080	if (s->lzma2.compressed < RC_INIT_BYTES)
1081	return XZ_DATA_ERROR;
1082
1083	if (!rc_read_init(rc: &s->rc, b))
1084	return XZ_OK;
1085
1086	s->lzma2.compressed -= RC_INIT_BYTES;
1087	s->lzma2.sequence = SEQ_LZMA_RUN;
1088
1089	fallthrough;
1090
1091	case SEQ_LZMA_RUN:
1092	/*
1093	* Set dictionary limit to indicate how much we want
1094	* to be encoded at maximum. Decode new data into the
1095	* dictionary. Flush the new data from dictionary to
1096	* b->out. Check if we finished decoding this chunk.
1097	* In case the dictionary got full but we didn't fill
1098	* the output buffer yet, we may run this loop
1099	* multiple times without changing s->lzma2.sequence.
1100	*/
1101	dict_limit(dict: &s->dict, min_t(size_t,
1102	b->out_size - b->out_pos,
1103	s->lzma2.uncompressed));
1104	if (!lzma2_lzma(s, b))
1105	return XZ_DATA_ERROR;
1106
1107	s->lzma2.uncompressed -= dict_flush(dict: &s->dict, b);
1108
1109	if (s->lzma2.uncompressed == `0`) {
1110	if (s->lzma2.compressed > `0` \|\| s->lzma.len > `0`
1111	\|\| !rc_is_finished(rc: &s->rc))
1112	return XZ_DATA_ERROR;
1113
1114	rc_reset(rc: &s->rc);
1115	s->lzma2.sequence = SEQ_CONTROL;
1116
1117	} else if (b->out_pos == b->out_size
1118	\|\| (b->in_pos == b->in_size
1119	&& s->temp.size
1120	< s->lzma2.compressed)) {
1121	return XZ_OK;
1122	}
1123
1124	break;
1125
1126	case SEQ_COPY:
1127	dict_uncompressed(dict: &s->dict, b, left: &s->lzma2.compressed);
1128	if (s->lzma2.compressed > `0`)
1129	return XZ_OK;
1130
1131	s->lzma2.sequence = SEQ_CONTROL;
1132	break;
1133	}
1134	}
1135
1136	return XZ_OK;
1137	}
1138
1139	struct xz_dec_lzma2 xz_dec_lzma2_create(enum* xz_mode mode, uint32_t dict_max)
1140	{
1141	struct xz_dec_lzma2 s = kmalloc(sizeof(s), GFP_KERNEL);
1142	if (s == NULL)
1143	return NULL;
1144
1145	s->dict.mode = mode;
1146	s->dict.size_max = dict_max;
1147
1148	if (DEC_IS_PREALLOC(mode)) {
1149	s->dict.buf = vmalloc(dict_max);
1150	if (s->dict.buf == NULL) {
1151	kfree(objp: s);
1152	return NULL;
1153	}
1154	} else if (DEC_IS_DYNALLOC(mode)) {
1155	s->dict.buf = NULL;
1156	s->dict.allocated = `0`;
1157	}
1158
1159	return s;
1160	}
1161
1162	enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
1163	{
1164	/ This limits dictionary size to 3 GiB to keep parsing simpler. /
1165	if (props > `39`)
1166	return XZ_OPTIONS_ERROR;
1167
1168	s->dict.size = `2` + (props & `1`);
1169	s->dict.size <<= (props >> `1`) + `11`;
1170
1171	if (DEC_IS_MULTI(s->dict.mode)) {
1172	if (s->dict.size > s->dict.size_max)
1173	return XZ_MEMLIMIT_ERROR;
1174
1175	s->dict.end = s->dict.size;
1176
1177	if (DEC_IS_DYNALLOC(s->dict.mode)) {
1178	if (s->dict.allocated < s->dict.size) {
1179	s->dict.allocated = s->dict.size;
1180	vfree(addr: s->dict.buf);
1181	s->dict.buf = vmalloc(s->dict.size);
1182	if (s->dict.buf == NULL) {
1183	s->dict.allocated = `0`;
1184	return XZ_MEM_ERROR;
1185	}
1186	}
1187	}
1188	}
1189
1190	s->lzma2.sequence = SEQ_CONTROL;
1191	s->lzma2.need_dict_reset = true;
1192
1193	s->temp.size = `0`;
1194
1195	return XZ_OK;
1196	}
1197
1198	void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1199	{
1200	if (DEC_IS_MULTI(s->dict.mode))
1201	vfree(addr: s->dict.buf);
1202
1203	kfree(objp: s);
1204	}
1205
1206	#ifdef XZ_DEC_MICROLZMA
1207	/ This is a wrapper struct to have a nice struct name in the public API. /
1208	struct xz_dec_microlzma {
1209	struct xz_dec_lzma2 s;
1210	};
1211
1212	enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
1213	struct xz_buf *b)
1214	{
1215	struct xz_dec_lzma2 *s = &s_ptr->s;
1216
1217	/*
1218	* sequence is SEQ_PROPERTIES before the first input byte,
1219	* SEQ_LZMA_PREPARE until a total of five bytes have been read,
1220	* and SEQ_LZMA_RUN for the rest of the input stream.
1221	*/
1222	if (s->lzma2.sequence != SEQ_LZMA_RUN) {
1223	if (s->lzma2.sequence == SEQ_PROPERTIES) {
1224	/ One byte is needed for the props. /
1225	if (b->in_pos >= b->in_size)
1226	return XZ_OK;
1227
1228	/*
1229	* Don't increment b->in_pos here. The same byte is
1230	* also passed to rc_read_init() which will ignore it.
1231	*/
1232	if (!lzma_props(s, ~b->in[b->in_pos]))
1233	return XZ_DATA_ERROR;
1234
1235	s->lzma2.sequence = SEQ_LZMA_PREPARE;
1236	}
1237
1238	/*
1239	* xz_dec_microlzma_reset() doesn't validate the compressed
1240	* size so we do it here. We have to limit the maximum size
1241	* to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
1242	* round number and much more than users of this code should
1243	* ever need.
1244	*/
1245	if (s->lzma2.compressed < RC_INIT_BYTES
1246	\|\| s->lzma2.compressed > (`3U` << `30`))
1247	return XZ_DATA_ERROR;
1248
1249	if (!rc_read_init(&s->rc, b))
1250	return XZ_OK;
1251
1252	s->lzma2.compressed -= RC_INIT_BYTES;
1253	s->lzma2.sequence = SEQ_LZMA_RUN;
1254
1255	dict_reset(&s->dict, b);
1256	}
1257
1258	/ This is to allow increasing b->out_size between calls. /
1259	if (DEC_IS_SINGLE(s->dict.mode))
1260	s->dict.end = b->out_size - b->out_pos;
1261
1262	while (true) {
1263	dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
1264	s->lzma2.uncompressed));
1265
1266	if (!lzma2_lzma(s, b))
1267	return XZ_DATA_ERROR;
1268
1269	s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1270
1271	if (s->lzma2.uncompressed == `0`) {
1272	if (s->lzma2.pedantic_microlzma) {
1273	if (s->lzma2.compressed > `0` \|\| s->lzma.len > `0`
1274	\|\| !rc_is_finished(&s->rc))
1275	return XZ_DATA_ERROR;
1276	}
1277
1278	return XZ_STREAM_END;
1279	}
1280
1281	if (b->out_pos == b->out_size)
1282	return XZ_OK;
1283
1284	if (b->in_pos == b->in_size
1285	&& s->temp.size < s->lzma2.compressed)
1286	return XZ_OK;
1287	}
1288	}
1289
1290	struct xz_dec_microlzma xz_dec_microlzma_alloc(enum* xz_mode mode,
1291	uint32_t dict_size)
1292	{
1293	struct xz_dec_microlzma *s;
1294
1295	/ Restrict dict_size to the same range as in the LZMA2 code. /
1296	if (dict_size < `4096` \|\| dict_size > (`3U` << `30`))
1297	return NULL;
1298
1299	s = kmalloc(sizeof(*s), GFP_KERNEL);
1300	if (s == NULL)
1301	return NULL;
1302
1303	s->s.dict.mode = mode;
1304	s->s.dict.size = dict_size;
1305
1306	if (DEC_IS_MULTI(mode)) {
1307	s->s.dict.end = dict_size;
1308
1309	s->s.dict.buf = vmalloc(dict_size);
1310	if (s->s.dict.buf == NULL) {
1311	kfree(s);
1312	return NULL;
1313	}
1314	}
1315
1316	return s;
1317	}
1318
1319	void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
1320	uint32_t uncomp_size, int uncomp_size_is_exact)
1321	{
1322	/*
1323	* comp_size is validated in xz_dec_microlzma_run().
1324	* uncomp_size can safely be anything.
1325	*/
1326	s->s.lzma2.compressed = comp_size;
1327	s->s.lzma2.uncompressed = uncomp_size;
1328	s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
1329
1330	s->s.lzma2.sequence = SEQ_PROPERTIES;
1331	s->s.temp.size = `0`;
1332	}
1333
1334	void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
1335	{
1336	if (DEC_IS_MULTI(s->s.dict.mode))
1337	vfree(s->s.dict.buf);
1338
1339	kfree(s);
1340	}
1341	#endif
1342

Browse the source code of Linux/lib/xz/xz_dec_lzma2.c