| 1 | // SPDX-License-Identifier: 0BSD | 
|---|
| 2 |  | 
|---|
| 3 | /* | 
|---|
| 4 | * .xz Stream decoder | 
|---|
| 5 | * | 
|---|
| 6 | * Author: Lasse Collin <lasse.collin@tukaani.org> | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include "xz_private.h" | 
|---|
| 10 | #include "xz_stream.h" | 
|---|
| 11 |  | 
|---|
| 12 | /* Hash used to validate the Index field */ | 
|---|
| 13 | struct xz_dec_hash { | 
|---|
| 14 | vli_type unpadded; | 
|---|
| 15 | vli_type uncompressed; | 
|---|
| 16 | uint32_t crc32; | 
|---|
| 17 | }; | 
|---|
| 18 |  | 
|---|
| 19 | struct xz_dec { | 
|---|
| 20 | /* Position in dec_main() */ | 
|---|
| 21 | enum { | 
|---|
| 22 | , | 
|---|
| 23 | SEQ_BLOCK_START, | 
|---|
| 24 | , | 
|---|
| 25 | SEQ_BLOCK_UNCOMPRESS, | 
|---|
| 26 | SEQ_BLOCK_PADDING, | 
|---|
| 27 | SEQ_BLOCK_CHECK, | 
|---|
| 28 | SEQ_INDEX, | 
|---|
| 29 | SEQ_INDEX_PADDING, | 
|---|
| 30 | SEQ_INDEX_CRC32, | 
|---|
| 31 |  | 
|---|
| 32 | } sequence; | 
|---|
| 33 |  | 
|---|
| 34 | /* Position in variable-length integers and Check fields */ | 
|---|
| 35 | uint32_t pos; | 
|---|
| 36 |  | 
|---|
| 37 | /* Variable-length integer decoded by dec_vli() */ | 
|---|
| 38 | vli_type vli; | 
|---|
| 39 |  | 
|---|
| 40 | /* Saved in_pos and out_pos */ | 
|---|
| 41 | size_t in_start; | 
|---|
| 42 | size_t out_start; | 
|---|
| 43 |  | 
|---|
| 44 | /* CRC32 value in Block or Index */ | 
|---|
| 45 | uint32_t crc32; | 
|---|
| 46 |  | 
|---|
| 47 | /* Type of the integrity check calculated from uncompressed data */ | 
|---|
| 48 | enum xz_check check_type; | 
|---|
| 49 |  | 
|---|
| 50 | /* Operation mode */ | 
|---|
| 51 | enum xz_mode mode; | 
|---|
| 52 |  | 
|---|
| 53 | /* | 
|---|
| 54 | * True if the next call to xz_dec_run() is allowed to return | 
|---|
| 55 | * XZ_BUF_ERROR. | 
|---|
| 56 | */ | 
|---|
| 57 | bool allow_buf_error; | 
|---|
| 58 |  | 
|---|
| 59 | /* Information stored in Block Header */ | 
|---|
| 60 | struct { | 
|---|
| 61 | /* | 
|---|
| 62 | * Value stored in the Compressed Size field, or | 
|---|
| 63 | * VLI_UNKNOWN if Compressed Size is not present. | 
|---|
| 64 | */ | 
|---|
| 65 | vli_type compressed; | 
|---|
| 66 |  | 
|---|
| 67 | /* | 
|---|
| 68 | * Value stored in the Uncompressed Size field, or | 
|---|
| 69 | * VLI_UNKNOWN if Uncompressed Size is not present. | 
|---|
| 70 | */ | 
|---|
| 71 | vli_type uncompressed; | 
|---|
| 72 |  | 
|---|
| 73 | /* Size of the Block Header field */ | 
|---|
| 74 | uint32_t size; | 
|---|
| 75 | } ; | 
|---|
| 76 |  | 
|---|
| 77 | /* Information collected when decoding Blocks */ | 
|---|
| 78 | struct { | 
|---|
| 79 | /* Observed compressed size of the current Block */ | 
|---|
| 80 | vli_type compressed; | 
|---|
| 81 |  | 
|---|
| 82 | /* Observed uncompressed size of the current Block */ | 
|---|
| 83 | vli_type uncompressed; | 
|---|
| 84 |  | 
|---|
| 85 | /* Number of Blocks decoded so far */ | 
|---|
| 86 | vli_type count; | 
|---|
| 87 |  | 
|---|
| 88 | /* | 
|---|
| 89 | * Hash calculated from the Block sizes. This is used to | 
|---|
| 90 | * validate the Index field. | 
|---|
| 91 | */ | 
|---|
| 92 | struct xz_dec_hash hash; | 
|---|
| 93 | } block; | 
|---|
| 94 |  | 
|---|
| 95 | /* Variables needed when verifying the Index field */ | 
|---|
| 96 | struct { | 
|---|
| 97 | /* Position in dec_index() */ | 
|---|
| 98 | enum { | 
|---|
| 99 | SEQ_INDEX_COUNT, | 
|---|
| 100 | SEQ_INDEX_UNPADDED, | 
|---|
| 101 | SEQ_INDEX_UNCOMPRESSED | 
|---|
| 102 | } sequence; | 
|---|
| 103 |  | 
|---|
| 104 | /* Size of the Index in bytes */ | 
|---|
| 105 | vli_type size; | 
|---|
| 106 |  | 
|---|
| 107 | /* Number of Records (matches block.count in valid files) */ | 
|---|
| 108 | vli_type count; | 
|---|
| 109 |  | 
|---|
| 110 | /* | 
|---|
| 111 | * Hash calculated from the Records (matches block.hash in | 
|---|
| 112 | * valid files). | 
|---|
| 113 | */ | 
|---|
| 114 | struct xz_dec_hash hash; | 
|---|
| 115 | } index; | 
|---|
| 116 |  | 
|---|
| 117 | /* | 
|---|
| 118 | * Temporary buffer needed to hold Stream Header, Block Header, | 
|---|
| 119 | * and Stream Footer. The Block Header is the biggest (1 KiB) | 
|---|
| 120 | * so we reserve space according to that. buf[] has to be aligned | 
|---|
| 121 | * to a multiple of four bytes; the size_t variables before it | 
|---|
| 122 | * should guarantee this. | 
|---|
| 123 | */ | 
|---|
| 124 | struct { | 
|---|
| 125 | size_t pos; | 
|---|
| 126 | size_t size; | 
|---|
| 127 | uint8_t buf[1024]; | 
|---|
| 128 | } temp; | 
|---|
| 129 |  | 
|---|
| 130 | struct xz_dec_lzma2 *lzma2; | 
|---|
| 131 |  | 
|---|
| 132 | #ifdef XZ_DEC_BCJ | 
|---|
| 133 | struct xz_dec_bcj *bcj; | 
|---|
| 134 | bool bcj_active; | 
|---|
| 135 | #endif | 
|---|
| 136 | }; | 
|---|
| 137 |  | 
|---|
| 138 | #ifdef XZ_DEC_ANY_CHECK | 
|---|
| 139 | /* Sizes of the Check field with different Check IDs */ | 
|---|
| 140 | static const uint8_t check_sizes[16] = { | 
|---|
| 141 | 0, | 
|---|
| 142 | 4, 4, 4, | 
|---|
| 143 | 8, 8, 8, | 
|---|
| 144 | 16, 16, 16, | 
|---|
| 145 | 32, 32, 32, | 
|---|
| 146 | 64, 64, 64 | 
|---|
| 147 | }; | 
|---|
| 148 | #endif | 
|---|
| 149 |  | 
|---|
| 150 | /* | 
|---|
| 151 | * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller | 
|---|
| 152 | * must have set s->temp.pos to indicate how much data we are supposed | 
|---|
| 153 | * to copy into s->temp.buf. Return true once s->temp.pos has reached | 
|---|
| 154 | * s->temp.size. | 
|---|
| 155 | */ | 
|---|
| 156 | static bool fill_temp(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 157 | { | 
|---|
| 158 | size_t copy_size = min_t(size_t, | 
|---|
| 159 | b->in_size - b->in_pos, s->temp.size - s->temp.pos); | 
|---|
| 160 |  | 
|---|
| 161 | memcpy(to: s->temp.buf + s->temp.pos, from: b->in + b->in_pos, len: copy_size); | 
|---|
| 162 | b->in_pos += copy_size; | 
|---|
| 163 | s->temp.pos += copy_size; | 
|---|
| 164 |  | 
|---|
| 165 | if (s->temp.pos == s->temp.size) { | 
|---|
| 166 | s->temp.pos = 0; | 
|---|
| 167 | return true; | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | return false; | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | /* Decode a variable-length integer (little-endian base-128 encoding) */ | 
|---|
| 174 | static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, | 
|---|
| 175 | size_t *in_pos, size_t in_size) | 
|---|
| 176 | { | 
|---|
| 177 | uint8_t byte; | 
|---|
| 178 |  | 
|---|
| 179 | if (s->pos == 0) | 
|---|
| 180 | s->vli = 0; | 
|---|
| 181 |  | 
|---|
| 182 | while (*in_pos < in_size) { | 
|---|
| 183 | byte = in[*in_pos]; | 
|---|
| 184 | ++*in_pos; | 
|---|
| 185 |  | 
|---|
| 186 | s->vli |= (vli_type)(byte & 0x7F) << s->pos; | 
|---|
| 187 |  | 
|---|
| 188 | if ((byte & 0x80) == 0) { | 
|---|
| 189 | /* Don't allow non-minimal encodings. */ | 
|---|
| 190 | if (byte == 0 && s->pos != 0) | 
|---|
| 191 | return XZ_DATA_ERROR; | 
|---|
| 192 |  | 
|---|
| 193 | s->pos = 0; | 
|---|
| 194 | return XZ_STREAM_END; | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | s->pos += 7; | 
|---|
| 198 | if (s->pos == 7 * VLI_BYTES_MAX) | 
|---|
| 199 | return XZ_DATA_ERROR; | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | return XZ_OK; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /* | 
|---|
| 206 | * Decode the Compressed Data field from a Block. Update and validate | 
|---|
| 207 | * the observed compressed and uncompressed sizes of the Block so that | 
|---|
| 208 | * they don't exceed the values possibly stored in the Block Header | 
|---|
| 209 | * (validation assumes that no integer overflow occurs, since vli_type | 
|---|
| 210 | * is normally uint64_t). Update the CRC32 if presence of the CRC32 | 
|---|
| 211 | * field was indicated in Stream Header. | 
|---|
| 212 | * | 
|---|
| 213 | * Once the decoding is finished, validate that the observed sizes match | 
|---|
| 214 | * the sizes possibly stored in the Block Header. Update the hash and | 
|---|
| 215 | * Block count, which are later used to validate the Index field. | 
|---|
| 216 | */ | 
|---|
| 217 | static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 218 | { | 
|---|
| 219 | enum xz_ret ret; | 
|---|
| 220 |  | 
|---|
| 221 | s->in_start = b->in_pos; | 
|---|
| 222 | s->out_start = b->out_pos; | 
|---|
| 223 |  | 
|---|
| 224 | #ifdef XZ_DEC_BCJ | 
|---|
| 225 | if (s->bcj_active) | 
|---|
| 226 | ret = xz_dec_bcj_run(s: s->bcj, lzma2: s->lzma2, b); | 
|---|
| 227 | else | 
|---|
| 228 | #endif | 
|---|
| 229 | ret = xz_dec_lzma2_run(s: s->lzma2, b); | 
|---|
| 230 |  | 
|---|
| 231 | s->block.compressed += b->in_pos - s->in_start; | 
|---|
| 232 | s->block.uncompressed += b->out_pos - s->out_start; | 
|---|
| 233 |  | 
|---|
| 234 | /* | 
|---|
| 235 | * There is no need to separately check for VLI_UNKNOWN, since | 
|---|
| 236 | * the observed sizes are always smaller than VLI_UNKNOWN. | 
|---|
| 237 | */ | 
|---|
| 238 | if (s->block.compressed > s->block_header.compressed | 
|---|
| 239 | || s->block.uncompressed | 
|---|
| 240 | > s->block_header.uncompressed) | 
|---|
| 241 | return XZ_DATA_ERROR; | 
|---|
| 242 |  | 
|---|
| 243 | if (s->check_type == XZ_CHECK_CRC32) | 
|---|
| 244 | s->crc32 = xz_crc32(b->out + s->out_start, | 
|---|
| 245 | b->out_pos - s->out_start, s->crc32); | 
|---|
| 246 |  | 
|---|
| 247 | if (ret == XZ_STREAM_END) { | 
|---|
| 248 | if (s->block_header.compressed != VLI_UNKNOWN | 
|---|
| 249 | && s->block_header.compressed | 
|---|
| 250 | != s->block.compressed) | 
|---|
| 251 | return XZ_DATA_ERROR; | 
|---|
| 252 |  | 
|---|
| 253 | if (s->block_header.uncompressed != VLI_UNKNOWN | 
|---|
| 254 | && s->block_header.uncompressed | 
|---|
| 255 | != s->block.uncompressed) | 
|---|
| 256 | return XZ_DATA_ERROR; | 
|---|
| 257 |  | 
|---|
| 258 | s->block.hash.unpadded += s->block_header.size | 
|---|
| 259 | + s->block.compressed; | 
|---|
| 260 |  | 
|---|
| 261 | #ifdef XZ_DEC_ANY_CHECK | 
|---|
| 262 | s->block.hash.unpadded += check_sizes[s->check_type]; | 
|---|
| 263 | #else | 
|---|
| 264 | if (s->check_type == XZ_CHECK_CRC32) | 
|---|
| 265 | s->block.hash.unpadded += 4; | 
|---|
| 266 | #endif | 
|---|
| 267 |  | 
|---|
| 268 | s->block.hash.uncompressed += s->block.uncompressed; | 
|---|
| 269 | s->block.hash.crc32 = xz_crc32( | 
|---|
| 270 | (const uint8_t *)&s->block.hash, | 
|---|
| 271 | sizeof(s->block.hash), s->block.hash.crc32); | 
|---|
| 272 |  | 
|---|
| 273 | ++s->block.count; | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | return ret; | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | /* Update the Index size and the CRC32 value. */ | 
|---|
| 280 | static void index_update(struct xz_dec *s, const struct xz_buf *b) | 
|---|
| 281 | { | 
|---|
| 282 | size_t in_used = b->in_pos - s->in_start; | 
|---|
| 283 | s->index.size += in_used; | 
|---|
| 284 | s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | /* | 
|---|
| 288 | * Decode the Number of Records, Unpadded Size, and Uncompressed Size | 
|---|
| 289 | * fields from the Index field. That is, Index Padding and CRC32 are not | 
|---|
| 290 | * decoded by this function. | 
|---|
| 291 | * | 
|---|
| 292 | * This can return XZ_OK (more input needed), XZ_STREAM_END (everything | 
|---|
| 293 | * successfully decoded), or XZ_DATA_ERROR (input is corrupt). | 
|---|
| 294 | */ | 
|---|
| 295 | static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 296 | { | 
|---|
| 297 | enum xz_ret ret; | 
|---|
| 298 |  | 
|---|
| 299 | do { | 
|---|
| 300 | ret = dec_vli(s, in: b->in, in_pos: &b->in_pos, in_size: b->in_size); | 
|---|
| 301 | if (ret != XZ_STREAM_END) { | 
|---|
| 302 | index_update(s, b); | 
|---|
| 303 | return ret; | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | switch (s->index.sequence) { | 
|---|
| 307 | case SEQ_INDEX_COUNT: | 
|---|
| 308 | s->index.count = s->vli; | 
|---|
| 309 |  | 
|---|
| 310 | /* | 
|---|
| 311 | * Validate that the Number of Records field | 
|---|
| 312 | * indicates the same number of Records as | 
|---|
| 313 | * there were Blocks in the Stream. | 
|---|
| 314 | */ | 
|---|
| 315 | if (s->index.count != s->block.count) | 
|---|
| 316 | return XZ_DATA_ERROR; | 
|---|
| 317 |  | 
|---|
| 318 | s->index.sequence = SEQ_INDEX_UNPADDED; | 
|---|
| 319 | break; | 
|---|
| 320 |  | 
|---|
| 321 | case SEQ_INDEX_UNPADDED: | 
|---|
| 322 | s->index.hash.unpadded += s->vli; | 
|---|
| 323 | s->index.sequence = SEQ_INDEX_UNCOMPRESSED; | 
|---|
| 324 | break; | 
|---|
| 325 |  | 
|---|
| 326 | case SEQ_INDEX_UNCOMPRESSED: | 
|---|
| 327 | s->index.hash.uncompressed += s->vli; | 
|---|
| 328 | s->index.hash.crc32 = xz_crc32( | 
|---|
| 329 | (const uint8_t *)&s->index.hash, | 
|---|
| 330 | sizeof(s->index.hash), | 
|---|
| 331 | s->index.hash.crc32); | 
|---|
| 332 | --s->index.count; | 
|---|
| 333 | s->index.sequence = SEQ_INDEX_UNPADDED; | 
|---|
| 334 | break; | 
|---|
| 335 | } | 
|---|
| 336 | } while (s->index.count > 0); | 
|---|
| 337 |  | 
|---|
| 338 | return XZ_STREAM_END; | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | /* | 
|---|
| 342 | * Validate that the next four input bytes match the value of s->crc32. | 
|---|
| 343 | * s->pos must be zero when starting to validate the first byte. | 
|---|
| 344 | */ | 
|---|
| 345 | static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 346 | { | 
|---|
| 347 | do { | 
|---|
| 348 | if (b->in_pos == b->in_size) | 
|---|
| 349 | return XZ_OK; | 
|---|
| 350 |  | 
|---|
| 351 | if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) | 
|---|
| 352 | return XZ_DATA_ERROR; | 
|---|
| 353 |  | 
|---|
| 354 | s->pos += 8; | 
|---|
| 355 |  | 
|---|
| 356 | } while (s->pos < 32); | 
|---|
| 357 |  | 
|---|
| 358 | s->crc32 = 0; | 
|---|
| 359 | s->pos = 0; | 
|---|
| 360 |  | 
|---|
| 361 | return XZ_STREAM_END; | 
|---|
| 362 | } | 
|---|
| 363 |  | 
|---|
| 364 | #ifdef XZ_DEC_ANY_CHECK | 
|---|
| 365 | /* | 
|---|
| 366 | * Skip over the Check field when the Check ID is not supported. | 
|---|
| 367 | * Returns true once the whole Check field has been skipped over. | 
|---|
| 368 | */ | 
|---|
| 369 | static bool check_skip(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 370 | { | 
|---|
| 371 | while (s->pos < check_sizes[s->check_type]) { | 
|---|
| 372 | if (b->in_pos == b->in_size) | 
|---|
| 373 | return false; | 
|---|
| 374 |  | 
|---|
| 375 | ++b->in_pos; | 
|---|
| 376 | ++s->pos; | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | s->pos = 0; | 
|---|
| 380 |  | 
|---|
| 381 | return true; | 
|---|
| 382 | } | 
|---|
| 383 | #endif | 
|---|
| 384 |  | 
|---|
| 385 | /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ | 
|---|
| 386 | static enum xz_ret (struct xz_dec *s) | 
|---|
| 387 | { | 
|---|
| 388 | if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) | 
|---|
| 389 | return XZ_FORMAT_ERROR; | 
|---|
| 390 |  | 
|---|
| 391 | if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) | 
|---|
| 392 | != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) | 
|---|
| 393 | return XZ_DATA_ERROR; | 
|---|
| 394 |  | 
|---|
| 395 | if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) | 
|---|
| 396 | return XZ_OPTIONS_ERROR; | 
|---|
| 397 |  | 
|---|
| 398 | /* | 
|---|
| 399 | * Of integrity checks, we support only none (Check ID = 0) and | 
|---|
| 400 | * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, | 
|---|
| 401 | * we will accept other check types too, but then the check won't | 
|---|
| 402 | * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. | 
|---|
| 403 | */ | 
|---|
| 404 | if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX) | 
|---|
| 405 | return XZ_OPTIONS_ERROR; | 
|---|
| 406 |  | 
|---|
| 407 | s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; | 
|---|
| 408 |  | 
|---|
| 409 | #ifdef XZ_DEC_ANY_CHECK | 
|---|
| 410 | if (s->check_type > XZ_CHECK_CRC32) | 
|---|
| 411 | return XZ_UNSUPPORTED_CHECK; | 
|---|
| 412 | #else | 
|---|
| 413 | if (s->check_type > XZ_CHECK_CRC32) | 
|---|
| 414 | return XZ_OPTIONS_ERROR; | 
|---|
| 415 | #endif | 
|---|
| 416 |  | 
|---|
| 417 | return XZ_OK; | 
|---|
| 418 | } | 
|---|
| 419 |  | 
|---|
| 420 | /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ | 
|---|
| 421 | static enum xz_ret (struct xz_dec *s) | 
|---|
| 422 | { | 
|---|
| 423 | if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) | 
|---|
| 424 | return XZ_DATA_ERROR; | 
|---|
| 425 |  | 
|---|
| 426 | if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) | 
|---|
| 427 | return XZ_DATA_ERROR; | 
|---|
| 428 |  | 
|---|
| 429 | /* | 
|---|
| 430 | * Validate Backward Size. Note that we never added the size of the | 
|---|
| 431 | * Index CRC32 field to s->index.size, thus we use s->index.size / 4 | 
|---|
| 432 | * instead of s->index.size / 4 - 1. | 
|---|
| 433 | */ | 
|---|
| 434 | if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) | 
|---|
| 435 | return XZ_DATA_ERROR; | 
|---|
| 436 |  | 
|---|
| 437 | if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) | 
|---|
| 438 | return XZ_DATA_ERROR; | 
|---|
| 439 |  | 
|---|
| 440 | /* | 
|---|
| 441 | * Use XZ_STREAM_END instead of XZ_OK to be more convenient | 
|---|
| 442 | * for the caller. | 
|---|
| 443 | */ | 
|---|
| 444 | return XZ_STREAM_END; | 
|---|
| 445 | } | 
|---|
| 446 |  | 
|---|
| 447 | /* Decode the Block Header and initialize the filter chain. */ | 
|---|
| 448 | static enum xz_ret (struct xz_dec *s) | 
|---|
| 449 | { | 
|---|
| 450 | enum xz_ret ret; | 
|---|
| 451 |  | 
|---|
| 452 | /* | 
|---|
| 453 | * Validate the CRC32. We know that the temp buffer is at least | 
|---|
| 454 | * eight bytes so this is safe. | 
|---|
| 455 | */ | 
|---|
| 456 | s->temp.size -= 4; | 
|---|
| 457 | if (xz_crc32(s->temp.buf, s->temp.size, 0) | 
|---|
| 458 | != get_le32(s->temp.buf + s->temp.size)) | 
|---|
| 459 | return XZ_DATA_ERROR; | 
|---|
| 460 |  | 
|---|
| 461 | s->temp.pos = 2; | 
|---|
| 462 |  | 
|---|
| 463 | /* | 
|---|
| 464 | * Catch unsupported Block Flags. We support only one or two filters | 
|---|
| 465 | * in the chain, so we catch that with the same test. | 
|---|
| 466 | */ | 
|---|
| 467 | #ifdef XZ_DEC_BCJ | 
|---|
| 468 | if (s->temp.buf[1] & 0x3E) | 
|---|
| 469 | #else | 
|---|
| 470 | if (s->temp.buf[1] & 0x3F) | 
|---|
| 471 | #endif | 
|---|
| 472 | return XZ_OPTIONS_ERROR; | 
|---|
| 473 |  | 
|---|
| 474 | /* Compressed Size */ | 
|---|
| 475 | if (s->temp.buf[1] & 0x40) { | 
|---|
| 476 | if (dec_vli(s, in: s->temp.buf, in_pos: &s->temp.pos, in_size: s->temp.size) | 
|---|
| 477 | != XZ_STREAM_END) | 
|---|
| 478 | return XZ_DATA_ERROR; | 
|---|
| 479 |  | 
|---|
| 480 | s->block_header.compressed = s->vli; | 
|---|
| 481 | } else { | 
|---|
| 482 | s->block_header.compressed = VLI_UNKNOWN; | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | /* Uncompressed Size */ | 
|---|
| 486 | if (s->temp.buf[1] & 0x80) { | 
|---|
| 487 | if (dec_vli(s, in: s->temp.buf, in_pos: &s->temp.pos, in_size: s->temp.size) | 
|---|
| 488 | != XZ_STREAM_END) | 
|---|
| 489 | return XZ_DATA_ERROR; | 
|---|
| 490 |  | 
|---|
| 491 | s->block_header.uncompressed = s->vli; | 
|---|
| 492 | } else { | 
|---|
| 493 | s->block_header.uncompressed = VLI_UNKNOWN; | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | #ifdef XZ_DEC_BCJ | 
|---|
| 497 | /* If there are two filters, the first one must be a BCJ filter. */ | 
|---|
| 498 | s->bcj_active = s->temp.buf[1] & 0x01; | 
|---|
| 499 | if (s->bcj_active) { | 
|---|
| 500 | if (s->temp.size - s->temp.pos < 2) | 
|---|
| 501 | return XZ_OPTIONS_ERROR; | 
|---|
| 502 |  | 
|---|
| 503 | ret = xz_dec_bcj_reset(s: s->bcj, id: s->temp.buf[s->temp.pos++]); | 
|---|
| 504 | if (ret != XZ_OK) | 
|---|
| 505 | return ret; | 
|---|
| 506 |  | 
|---|
| 507 | /* | 
|---|
| 508 | * We don't support custom start offset, | 
|---|
| 509 | * so Size of Properties must be zero. | 
|---|
| 510 | */ | 
|---|
| 511 | if (s->temp.buf[s->temp.pos++] != 0x00) | 
|---|
| 512 | return XZ_OPTIONS_ERROR; | 
|---|
| 513 | } | 
|---|
| 514 | #endif | 
|---|
| 515 |  | 
|---|
| 516 | /* Valid Filter Flags always take at least two bytes. */ | 
|---|
| 517 | if (s->temp.size - s->temp.pos < 2) | 
|---|
| 518 | return XZ_DATA_ERROR; | 
|---|
| 519 |  | 
|---|
| 520 | /* Filter ID = LZMA2 */ | 
|---|
| 521 | if (s->temp.buf[s->temp.pos++] != 0x21) | 
|---|
| 522 | return XZ_OPTIONS_ERROR; | 
|---|
| 523 |  | 
|---|
| 524 | /* Size of Properties = 1-byte Filter Properties */ | 
|---|
| 525 | if (s->temp.buf[s->temp.pos++] != 0x01) | 
|---|
| 526 | return XZ_OPTIONS_ERROR; | 
|---|
| 527 |  | 
|---|
| 528 | /* Filter Properties contains LZMA2 dictionary size. */ | 
|---|
| 529 | if (s->temp.size - s->temp.pos < 1) | 
|---|
| 530 | return XZ_DATA_ERROR; | 
|---|
| 531 |  | 
|---|
| 532 | ret = xz_dec_lzma2_reset(s: s->lzma2, props: s->temp.buf[s->temp.pos++]); | 
|---|
| 533 | if (ret != XZ_OK) | 
|---|
| 534 | return ret; | 
|---|
| 535 |  | 
|---|
| 536 | /* The rest must be Header Padding. */ | 
|---|
| 537 | while (s->temp.pos < s->temp.size) | 
|---|
| 538 | if (s->temp.buf[s->temp.pos++] != 0x00) | 
|---|
| 539 | return XZ_OPTIONS_ERROR; | 
|---|
| 540 |  | 
|---|
| 541 | s->temp.pos = 0; | 
|---|
| 542 | s->block.compressed = 0; | 
|---|
| 543 | s->block.uncompressed = 0; | 
|---|
| 544 |  | 
|---|
| 545 | return XZ_OK; | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 549 | { | 
|---|
| 550 | enum xz_ret ret; | 
|---|
| 551 |  | 
|---|
| 552 | /* | 
|---|
| 553 | * Store the start position for the case when we are in the middle | 
|---|
| 554 | * of the Index field. | 
|---|
| 555 | */ | 
|---|
| 556 | s->in_start = b->in_pos; | 
|---|
| 557 |  | 
|---|
| 558 | while (true) { | 
|---|
| 559 | switch (s->sequence) { | 
|---|
| 560 | case SEQ_STREAM_HEADER: | 
|---|
| 561 | /* | 
|---|
| 562 | * Stream Header is copied to s->temp, and then | 
|---|
| 563 | * decoded from there. This way if the caller | 
|---|
| 564 | * gives us only little input at a time, we can | 
|---|
| 565 | * still keep the Stream Header decoding code | 
|---|
| 566 | * simple. Similar approach is used in many places | 
|---|
| 567 | * in this file. | 
|---|
| 568 | */ | 
|---|
| 569 | if (!fill_temp(s, b)) | 
|---|
| 570 | return XZ_OK; | 
|---|
| 571 |  | 
|---|
| 572 | /* | 
|---|
| 573 | * If dec_stream_header() returns | 
|---|
| 574 | * XZ_UNSUPPORTED_CHECK, it is still possible | 
|---|
| 575 | * to continue decoding if working in multi-call | 
|---|
| 576 | * mode. Thus, update s->sequence before calling | 
|---|
| 577 | * dec_stream_header(). | 
|---|
| 578 | */ | 
|---|
| 579 | s->sequence = SEQ_BLOCK_START; | 
|---|
| 580 |  | 
|---|
| 581 | ret = dec_stream_header(s); | 
|---|
| 582 | if (ret != XZ_OK) | 
|---|
| 583 | return ret; | 
|---|
| 584 |  | 
|---|
| 585 | fallthrough; | 
|---|
| 586 |  | 
|---|
| 587 | case SEQ_BLOCK_START: | 
|---|
| 588 | /* We need one byte of input to continue. */ | 
|---|
| 589 | if (b->in_pos == b->in_size) | 
|---|
| 590 | return XZ_OK; | 
|---|
| 591 |  | 
|---|
| 592 | /* See if this is the beginning of the Index field. */ | 
|---|
| 593 | if (b->in[b->in_pos] == 0) { | 
|---|
| 594 | s->in_start = b->in_pos++; | 
|---|
| 595 | s->sequence = SEQ_INDEX; | 
|---|
| 596 | break; | 
|---|
| 597 | } | 
|---|
| 598 |  | 
|---|
| 599 | /* | 
|---|
| 600 | * Calculate the size of the Block Header and | 
|---|
| 601 | * prepare to decode it. | 
|---|
| 602 | */ | 
|---|
| 603 | s->block_header.size | 
|---|
| 604 | = ((uint32_t)b->in[b->in_pos] + 1) * 4; | 
|---|
| 605 |  | 
|---|
| 606 | s->temp.size = s->block_header.size; | 
|---|
| 607 | s->temp.pos = 0; | 
|---|
| 608 | s->sequence = SEQ_BLOCK_HEADER; | 
|---|
| 609 |  | 
|---|
| 610 | fallthrough; | 
|---|
| 611 |  | 
|---|
| 612 | case SEQ_BLOCK_HEADER: | 
|---|
| 613 | if (!fill_temp(s, b)) | 
|---|
| 614 | return XZ_OK; | 
|---|
| 615 |  | 
|---|
| 616 | ret = dec_block_header(s); | 
|---|
| 617 | if (ret != XZ_OK) | 
|---|
| 618 | return ret; | 
|---|
| 619 |  | 
|---|
| 620 | s->sequence = SEQ_BLOCK_UNCOMPRESS; | 
|---|
| 621 |  | 
|---|
| 622 | fallthrough; | 
|---|
| 623 |  | 
|---|
| 624 | case SEQ_BLOCK_UNCOMPRESS: | 
|---|
| 625 | ret = dec_block(s, b); | 
|---|
| 626 | if (ret != XZ_STREAM_END) | 
|---|
| 627 | return ret; | 
|---|
| 628 |  | 
|---|
| 629 | s->sequence = SEQ_BLOCK_PADDING; | 
|---|
| 630 |  | 
|---|
| 631 | fallthrough; | 
|---|
| 632 |  | 
|---|
| 633 | case SEQ_BLOCK_PADDING: | 
|---|
| 634 | /* | 
|---|
| 635 | * Size of Compressed Data + Block Padding | 
|---|
| 636 | * must be a multiple of four. We don't need | 
|---|
| 637 | * s->block.compressed for anything else | 
|---|
| 638 | * anymore, so we use it here to test the size | 
|---|
| 639 | * of the Block Padding field. | 
|---|
| 640 | */ | 
|---|
| 641 | while (s->block.compressed & 3) { | 
|---|
| 642 | if (b->in_pos == b->in_size) | 
|---|
| 643 | return XZ_OK; | 
|---|
| 644 |  | 
|---|
| 645 | if (b->in[b->in_pos++] != 0) | 
|---|
| 646 | return XZ_DATA_ERROR; | 
|---|
| 647 |  | 
|---|
| 648 | ++s->block.compressed; | 
|---|
| 649 | } | 
|---|
| 650 |  | 
|---|
| 651 | s->sequence = SEQ_BLOCK_CHECK; | 
|---|
| 652 |  | 
|---|
| 653 | fallthrough; | 
|---|
| 654 |  | 
|---|
| 655 | case SEQ_BLOCK_CHECK: | 
|---|
| 656 | if (s->check_type == XZ_CHECK_CRC32) { | 
|---|
| 657 | ret = crc32_validate(s, b); | 
|---|
| 658 | if (ret != XZ_STREAM_END) | 
|---|
| 659 | return ret; | 
|---|
| 660 | } | 
|---|
| 661 | #ifdef XZ_DEC_ANY_CHECK | 
|---|
| 662 | else if (!check_skip(s, b)) { | 
|---|
| 663 | return XZ_OK; | 
|---|
| 664 | } | 
|---|
| 665 | #endif | 
|---|
| 666 |  | 
|---|
| 667 | s->sequence = SEQ_BLOCK_START; | 
|---|
| 668 | break; | 
|---|
| 669 |  | 
|---|
| 670 | case SEQ_INDEX: | 
|---|
| 671 | ret = dec_index(s, b); | 
|---|
| 672 | if (ret != XZ_STREAM_END) | 
|---|
| 673 | return ret; | 
|---|
| 674 |  | 
|---|
| 675 | s->sequence = SEQ_INDEX_PADDING; | 
|---|
| 676 |  | 
|---|
| 677 | fallthrough; | 
|---|
| 678 |  | 
|---|
| 679 | case SEQ_INDEX_PADDING: | 
|---|
| 680 | while ((s->index.size + (b->in_pos - s->in_start)) | 
|---|
| 681 | & 3) { | 
|---|
| 682 | if (b->in_pos == b->in_size) { | 
|---|
| 683 | index_update(s, b); | 
|---|
| 684 | return XZ_OK; | 
|---|
| 685 | } | 
|---|
| 686 |  | 
|---|
| 687 | if (b->in[b->in_pos++] != 0) | 
|---|
| 688 | return XZ_DATA_ERROR; | 
|---|
| 689 | } | 
|---|
| 690 |  | 
|---|
| 691 | /* Finish the CRC32 value and Index size. */ | 
|---|
| 692 | index_update(s, b); | 
|---|
| 693 |  | 
|---|
| 694 | /* Compare the hashes to validate the Index field. */ | 
|---|
| 695 | if (!memeq(&s->block.hash, &s->index.hash, | 
|---|
| 696 | sizeof(s->block.hash))) | 
|---|
| 697 | return XZ_DATA_ERROR; | 
|---|
| 698 |  | 
|---|
| 699 | s->sequence = SEQ_INDEX_CRC32; | 
|---|
| 700 |  | 
|---|
| 701 | fallthrough; | 
|---|
| 702 |  | 
|---|
| 703 | case SEQ_INDEX_CRC32: | 
|---|
| 704 | ret = crc32_validate(s, b); | 
|---|
| 705 | if (ret != XZ_STREAM_END) | 
|---|
| 706 | return ret; | 
|---|
| 707 |  | 
|---|
| 708 | s->temp.size = STREAM_HEADER_SIZE; | 
|---|
| 709 | s->sequence = SEQ_STREAM_FOOTER; | 
|---|
| 710 |  | 
|---|
| 711 | fallthrough; | 
|---|
| 712 |  | 
|---|
| 713 | case SEQ_STREAM_FOOTER: | 
|---|
| 714 | if (!fill_temp(s, b)) | 
|---|
| 715 | return XZ_OK; | 
|---|
| 716 |  | 
|---|
| 717 | return dec_stream_footer(s); | 
|---|
| 718 | } | 
|---|
| 719 | } | 
|---|
| 720 |  | 
|---|
| 721 | /* Never reached */ | 
|---|
| 722 | } | 
|---|
| 723 |  | 
|---|
| 724 | /* | 
|---|
| 725 | * xz_dec_run() is a wrapper for dec_main() to handle some special cases in | 
|---|
| 726 | * multi-call and single-call decoding. | 
|---|
| 727 | * | 
|---|
| 728 | * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we | 
|---|
| 729 | * are not going to make any progress anymore. This is to prevent the caller | 
|---|
| 730 | * from calling us infinitely when the input file is truncated or otherwise | 
|---|
| 731 | * corrupt. Since zlib-style API allows that the caller fills the input buffer | 
|---|
| 732 | * only when the decoder doesn't produce any new output, we have to be careful | 
|---|
| 733 | * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only | 
|---|
| 734 | * after the second consecutive call to xz_dec_run() that makes no progress. | 
|---|
| 735 | * | 
|---|
| 736 | * In single-call mode, if we couldn't decode everything and no error | 
|---|
| 737 | * occurred, either the input is truncated or the output buffer is too small. | 
|---|
| 738 | * Since we know that the last input byte never produces any output, we know | 
|---|
| 739 | * that if all the input was consumed and decoding wasn't finished, the file | 
|---|
| 740 | * must be corrupt. Otherwise the output buffer has to be too small or the | 
|---|
| 741 | * file is corrupt in a way that decoding it produces too big output. | 
|---|
| 742 | * | 
|---|
| 743 | * If single-call decoding fails, we reset b->in_pos and b->out_pos back to | 
|---|
| 744 | * their original values. This is because with some filter chains there won't | 
|---|
| 745 | * be any valid uncompressed data in the output buffer unless the decoding | 
|---|
| 746 | * actually succeeds (that's the price to pay of using the output buffer as | 
|---|
| 747 | * the workspace). | 
|---|
| 748 | */ | 
|---|
| 749 | enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) | 
|---|
| 750 | { | 
|---|
| 751 | size_t in_start; | 
|---|
| 752 | size_t out_start; | 
|---|
| 753 | enum xz_ret ret; | 
|---|
| 754 |  | 
|---|
| 755 | if (DEC_IS_SINGLE(s->mode)) | 
|---|
| 756 | xz_dec_reset(s); | 
|---|
| 757 |  | 
|---|
| 758 | in_start = b->in_pos; | 
|---|
| 759 | out_start = b->out_pos; | 
|---|
| 760 | ret = dec_main(s, b); | 
|---|
| 761 |  | 
|---|
| 762 | if (DEC_IS_SINGLE(s->mode)) { | 
|---|
| 763 | if (ret == XZ_OK) | 
|---|
| 764 | ret = b->in_pos == b->in_size | 
|---|
| 765 | ? XZ_DATA_ERROR : XZ_BUF_ERROR; | 
|---|
| 766 |  | 
|---|
| 767 | if (ret != XZ_STREAM_END) { | 
|---|
| 768 | b->in_pos = in_start; | 
|---|
| 769 | b->out_pos = out_start; | 
|---|
| 770 | } | 
|---|
| 771 |  | 
|---|
| 772 | } else if (ret == XZ_OK && in_start == b->in_pos | 
|---|
| 773 | && out_start == b->out_pos) { | 
|---|
| 774 | if (s->allow_buf_error) | 
|---|
| 775 | ret = XZ_BUF_ERROR; | 
|---|
| 776 |  | 
|---|
| 777 | s->allow_buf_error = true; | 
|---|
| 778 | } else { | 
|---|
| 779 | s->allow_buf_error = false; | 
|---|
| 780 | } | 
|---|
| 781 |  | 
|---|
| 782 | return ret; | 
|---|
| 783 | } | 
|---|
| 784 |  | 
|---|
| 785 | struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) | 
|---|
| 786 | { | 
|---|
| 787 | struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); | 
|---|
| 788 | if (s == NULL) | 
|---|
| 789 | return NULL; | 
|---|
| 790 |  | 
|---|
| 791 | s->mode = mode; | 
|---|
| 792 |  | 
|---|
| 793 | #ifdef XZ_DEC_BCJ | 
|---|
| 794 | s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); | 
|---|
| 795 | if (s->bcj == NULL) | 
|---|
| 796 | goto error_bcj; | 
|---|
| 797 | #endif | 
|---|
| 798 |  | 
|---|
| 799 | s->lzma2 = xz_dec_lzma2_create(mode, dict_max); | 
|---|
| 800 | if (s->lzma2 == NULL) | 
|---|
| 801 | goto error_lzma2; | 
|---|
| 802 |  | 
|---|
| 803 | xz_dec_reset(s); | 
|---|
| 804 | return s; | 
|---|
| 805 |  | 
|---|
| 806 | error_lzma2: | 
|---|
| 807 | #ifdef XZ_DEC_BCJ | 
|---|
| 808 | xz_dec_bcj_end(s->bcj); | 
|---|
| 809 | error_bcj: | 
|---|
| 810 | #endif | 
|---|
| 811 | kfree(objp: s); | 
|---|
| 812 | return NULL; | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 | void xz_dec_reset(struct xz_dec *s) | 
|---|
| 816 | { | 
|---|
| 817 | s->sequence = SEQ_STREAM_HEADER; | 
|---|
| 818 | s->allow_buf_error = false; | 
|---|
| 819 | s->pos = 0; | 
|---|
| 820 | s->crc32 = 0; | 
|---|
| 821 | memzero(&s->block, sizeof(s->block)); | 
|---|
| 822 | memzero(&s->index, sizeof(s->index)); | 
|---|
| 823 | s->temp.pos = 0; | 
|---|
| 824 | s->temp.size = STREAM_HEADER_SIZE; | 
|---|
| 825 | } | 
|---|
| 826 |  | 
|---|
| 827 | void xz_dec_end(struct xz_dec *s) | 
|---|
| 828 | { | 
|---|
| 829 | if (s != NULL) { | 
|---|
| 830 | xz_dec_lzma2_end(s: s->lzma2); | 
|---|
| 831 | #ifdef XZ_DEC_BCJ | 
|---|
| 832 | xz_dec_bcj_end(s->bcj); | 
|---|
| 833 | #endif | 
|---|
| 834 | kfree(objp: s); | 
|---|
| 835 | } | 
|---|
| 836 | } | 
|---|
| 837 |  | 
|---|