| 1 | // SPDX-License-Identifier: 0BSD | 
|---|
| 2 |  | 
|---|
| 3 | /* | 
|---|
| 4 | * Branch/Call/Jump (BCJ) filter decoders | 
|---|
| 5 | * | 
|---|
| 6 | * Authors: Lasse Collin <lasse.collin@tukaani.org> | 
|---|
| 7 | *          Igor Pavlov <https://7-zip.org/> | 
|---|
| 8 | */ | 
|---|
| 9 |  | 
|---|
| 10 | #include "xz_private.h" | 
|---|
| 11 |  | 
|---|
| 12 | /* | 
|---|
| 13 | * The rest of the file is inside this ifdef. It makes things a little more | 
|---|
| 14 | * convenient when building without support for any BCJ filters. | 
|---|
| 15 | */ | 
|---|
| 16 | #ifdef XZ_DEC_BCJ | 
|---|
| 17 |  | 
|---|
| 18 | struct xz_dec_bcj { | 
|---|
| 19 | /* Type of the BCJ filter being used */ | 
|---|
| 20 | enum { | 
|---|
| 21 | BCJ_X86 = 4,        /* x86 or x86-64 */ | 
|---|
| 22 | BCJ_POWERPC = 5,    /* Big endian only */ | 
|---|
| 23 | BCJ_IA64 = 6,       /* Big or little endian */ | 
|---|
| 24 | BCJ_ARM = 7,        /* Little endian only */ | 
|---|
| 25 | BCJ_ARMTHUMB = 8,   /* Little endian only */ | 
|---|
| 26 | BCJ_SPARC = 9,      /* Big or little endian */ | 
|---|
| 27 | BCJ_ARM64 = 10,     /* AArch64 */ | 
|---|
| 28 | BCJ_RISCV = 11      /* RV32GQC_Zfh, RV64GQC_Zfh */ | 
|---|
| 29 | } type; | 
|---|
| 30 |  | 
|---|
| 31 | /* | 
|---|
| 32 | * Return value of the next filter in the chain. We need to preserve | 
|---|
| 33 | * this information across calls, because we must not call the next | 
|---|
| 34 | * filter anymore once it has returned XZ_STREAM_END. | 
|---|
| 35 | */ | 
|---|
| 36 | enum xz_ret ret; | 
|---|
| 37 |  | 
|---|
| 38 | /* True if we are operating in single-call mode. */ | 
|---|
| 39 | bool single_call; | 
|---|
| 40 |  | 
|---|
| 41 | /* | 
|---|
| 42 | * Absolute position relative to the beginning of the uncompressed | 
|---|
| 43 | * data (in a single .xz Block). We care only about the lowest 32 | 
|---|
| 44 | * bits so this doesn't need to be uint64_t even with big files. | 
|---|
| 45 | */ | 
|---|
| 46 | uint32_t pos; | 
|---|
| 47 |  | 
|---|
| 48 | /* x86 filter state */ | 
|---|
| 49 | uint32_t x86_prev_mask; | 
|---|
| 50 |  | 
|---|
| 51 | /* Temporary space to hold the variables from struct xz_buf */ | 
|---|
| 52 | uint8_t *out; | 
|---|
| 53 | size_t out_pos; | 
|---|
| 54 | size_t out_size; | 
|---|
| 55 |  | 
|---|
| 56 | struct { | 
|---|
| 57 | /* Amount of already filtered data in the beginning of buf */ | 
|---|
| 58 | size_t filtered; | 
|---|
| 59 |  | 
|---|
| 60 | /* Total amount of data currently stored in buf  */ | 
|---|
| 61 | size_t size; | 
|---|
| 62 |  | 
|---|
| 63 | /* | 
|---|
| 64 | * Buffer to hold a mix of filtered and unfiltered data. This | 
|---|
| 65 | * needs to be big enough to hold Alignment + 2 * Look-ahead: | 
|---|
| 66 | * | 
|---|
| 67 | * Type         Alignment   Look-ahead | 
|---|
| 68 | * x86              1           4 | 
|---|
| 69 | * PowerPC          4           0 | 
|---|
| 70 | * IA-64           16           0 | 
|---|
| 71 | * ARM              4           0 | 
|---|
| 72 | * ARM-Thumb        2           2 | 
|---|
| 73 | * SPARC            4           0 | 
|---|
| 74 | */ | 
|---|
| 75 | uint8_t buf[16]; | 
|---|
| 76 | } temp; | 
|---|
| 77 | }; | 
|---|
| 78 |  | 
|---|
| 79 | #ifdef XZ_DEC_X86 | 
|---|
| 80 | /* | 
|---|
| 81 | * This is used to test the most significant byte of a memory address | 
|---|
| 82 | * in an x86 instruction. | 
|---|
| 83 | */ | 
|---|
| 84 | static inline int bcj_x86_test_msbyte(uint8_t b) | 
|---|
| 85 | { | 
|---|
| 86 | return b == 0x00 || b == 0xFF; | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 90 | { | 
|---|
| 91 | static const bool mask_to_allowed_status[8] | 
|---|
| 92 | = { true, true, true, false, true, false, false, false }; | 
|---|
| 93 |  | 
|---|
| 94 | static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; | 
|---|
| 95 |  | 
|---|
| 96 | size_t i; | 
|---|
| 97 | size_t prev_pos = (size_t)-1; | 
|---|
| 98 | uint32_t prev_mask = s->x86_prev_mask; | 
|---|
| 99 | uint32_t src; | 
|---|
| 100 | uint32_t dest; | 
|---|
| 101 | uint32_t j; | 
|---|
| 102 | uint8_t b; | 
|---|
| 103 |  | 
|---|
| 104 | if (size <= 4) | 
|---|
| 105 | return 0; | 
|---|
| 106 |  | 
|---|
| 107 | size -= 4; | 
|---|
| 108 | for (i = 0; i < size; ++i) { | 
|---|
| 109 | if ((buf[i] & 0xFE) != 0xE8) | 
|---|
| 110 | continue; | 
|---|
| 111 |  | 
|---|
| 112 | prev_pos = i - prev_pos; | 
|---|
| 113 | if (prev_pos > 3) { | 
|---|
| 114 | prev_mask = 0; | 
|---|
| 115 | } else { | 
|---|
| 116 | prev_mask = (prev_mask << (prev_pos - 1)) & 7; | 
|---|
| 117 | if (prev_mask != 0) { | 
|---|
| 118 | b = buf[i + 4 - mask_to_bit_num[prev_mask]]; | 
|---|
| 119 | if (!mask_to_allowed_status[prev_mask] | 
|---|
| 120 | || bcj_x86_test_msbyte(b)) { | 
|---|
| 121 | prev_pos = i; | 
|---|
| 122 | prev_mask = (prev_mask << 1) | 1; | 
|---|
| 123 | continue; | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | prev_pos = i; | 
|---|
| 129 |  | 
|---|
| 130 | if (bcj_x86_test_msbyte(b: buf[i + 4])) { | 
|---|
| 131 | src = get_unaligned_le32(p: buf + i + 1); | 
|---|
| 132 | while (true) { | 
|---|
| 133 | dest = src - (s->pos + (uint32_t)i + 5); | 
|---|
| 134 | if (prev_mask == 0) | 
|---|
| 135 | break; | 
|---|
| 136 |  | 
|---|
| 137 | j = mask_to_bit_num[prev_mask] * 8; | 
|---|
| 138 | b = (uint8_t)(dest >> (24 - j)); | 
|---|
| 139 | if (!bcj_x86_test_msbyte(b)) | 
|---|
| 140 | break; | 
|---|
| 141 |  | 
|---|
| 142 | src = dest ^ (((uint32_t)1 << (32 - j)) - 1); | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 | dest &= 0x01FFFFFF; | 
|---|
| 146 | dest |= (uint32_t)0 - (dest & 0x01000000); | 
|---|
| 147 | put_unaligned_le32(val: dest, p: buf + i + 1); | 
|---|
| 148 | i += 4; | 
|---|
| 149 | } else { | 
|---|
| 150 | prev_mask = (prev_mask << 1) | 1; | 
|---|
| 151 | } | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | prev_pos = i - prev_pos; | 
|---|
| 155 | s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); | 
|---|
| 156 | return i; | 
|---|
| 157 | } | 
|---|
| 158 | #endif | 
|---|
| 159 |  | 
|---|
| 160 | #ifdef XZ_DEC_POWERPC | 
|---|
| 161 | static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 162 | { | 
|---|
| 163 | size_t i; | 
|---|
| 164 | uint32_t instr; | 
|---|
| 165 |  | 
|---|
| 166 | size &= ~(size_t)3; | 
|---|
| 167 |  | 
|---|
| 168 | for (i = 0; i < size; i += 4) { | 
|---|
| 169 | instr = get_unaligned_be32(p: buf + i); | 
|---|
| 170 | if ((instr & 0xFC000003) == 0x48000001) { | 
|---|
| 171 | instr &= 0x03FFFFFC; | 
|---|
| 172 | instr -= s->pos + (uint32_t)i; | 
|---|
| 173 | instr &= 0x03FFFFFC; | 
|---|
| 174 | instr |= 0x48000001; | 
|---|
| 175 | put_unaligned_be32(val: instr, p: buf + i); | 
|---|
| 176 | } | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | return i; | 
|---|
| 180 | } | 
|---|
| 181 | #endif | 
|---|
| 182 |  | 
|---|
| 183 | #ifdef XZ_DEC_IA64 | 
|---|
| 184 | static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 185 | { | 
|---|
| 186 | static const uint8_t branch_table[32] = { | 
|---|
| 187 | 0, 0, 0, 0, 0, 0, 0, 0, | 
|---|
| 188 | 0, 0, 0, 0, 0, 0, 0, 0, | 
|---|
| 189 | 4, 4, 6, 6, 0, 0, 7, 7, | 
|---|
| 190 | 4, 4, 0, 0, 4, 4, 0, 0 | 
|---|
| 191 | }; | 
|---|
| 192 |  | 
|---|
| 193 | /* | 
|---|
| 194 | * The local variables take a little bit stack space, but it's less | 
|---|
| 195 | * than what LZMA2 decoder takes, so it doesn't make sense to reduce | 
|---|
| 196 | * stack usage here without doing that for the LZMA2 decoder too. | 
|---|
| 197 | */ | 
|---|
| 198 |  | 
|---|
| 199 | /* Loop counters */ | 
|---|
| 200 | size_t i; | 
|---|
| 201 | size_t j; | 
|---|
| 202 |  | 
|---|
| 203 | /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ | 
|---|
| 204 | uint32_t slot; | 
|---|
| 205 |  | 
|---|
| 206 | /* Bitwise offset of the instruction indicated by slot */ | 
|---|
| 207 | uint32_t bit_pos; | 
|---|
| 208 |  | 
|---|
| 209 | /* bit_pos split into byte and bit parts */ | 
|---|
| 210 | uint32_t byte_pos; | 
|---|
| 211 | uint32_t bit_res; | 
|---|
| 212 |  | 
|---|
| 213 | /* Address part of an instruction */ | 
|---|
| 214 | uint32_t addr; | 
|---|
| 215 |  | 
|---|
| 216 | /* Mask used to detect which instructions to convert */ | 
|---|
| 217 | uint32_t mask; | 
|---|
| 218 |  | 
|---|
| 219 | /* 41-bit instruction stored somewhere in the lowest 48 bits */ | 
|---|
| 220 | uint64_t instr; | 
|---|
| 221 |  | 
|---|
| 222 | /* Instruction normalized with bit_res for easier manipulation */ | 
|---|
| 223 | uint64_t norm; | 
|---|
| 224 |  | 
|---|
| 225 | size &= ~(size_t)15; | 
|---|
| 226 |  | 
|---|
| 227 | for (i = 0; i < size; i += 16) { | 
|---|
| 228 | mask = branch_table[buf[i] & 0x1F]; | 
|---|
| 229 | for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { | 
|---|
| 230 | if (((mask >> slot) & 1) == 0) | 
|---|
| 231 | continue; | 
|---|
| 232 |  | 
|---|
| 233 | byte_pos = bit_pos >> 3; | 
|---|
| 234 | bit_res = bit_pos & 7; | 
|---|
| 235 | instr = 0; | 
|---|
| 236 | for (j = 0; j < 6; ++j) | 
|---|
| 237 | instr |= (uint64_t)(buf[i + j + byte_pos]) | 
|---|
| 238 | << (8 * j); | 
|---|
| 239 |  | 
|---|
| 240 | norm = instr >> bit_res; | 
|---|
| 241 |  | 
|---|
| 242 | if (((norm >> 37) & 0x0F) == 0x05 | 
|---|
| 243 | && ((norm >> 9) & 0x07) == 0) { | 
|---|
| 244 | addr = (norm >> 13) & 0x0FFFFF; | 
|---|
| 245 | addr |= ((uint32_t)(norm >> 36) & 1) << 20; | 
|---|
| 246 | addr <<= 4; | 
|---|
| 247 | addr -= s->pos + (uint32_t)i; | 
|---|
| 248 | addr >>= 4; | 
|---|
| 249 |  | 
|---|
| 250 | norm &= ~((uint64_t)0x8FFFFF << 13); | 
|---|
| 251 | norm |= (uint64_t)(addr & 0x0FFFFF) << 13; | 
|---|
| 252 | norm |= (uint64_t)(addr & 0x100000) | 
|---|
| 253 | << (36 - 20); | 
|---|
| 254 |  | 
|---|
| 255 | instr &= (1 << bit_res) - 1; | 
|---|
| 256 | instr |= norm << bit_res; | 
|---|
| 257 |  | 
|---|
| 258 | for (j = 0; j < 6; j++) | 
|---|
| 259 | buf[i + j + byte_pos] | 
|---|
| 260 | = (uint8_t)(instr >> (8 * j)); | 
|---|
| 261 | } | 
|---|
| 262 | } | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | return i; | 
|---|
| 266 | } | 
|---|
| 267 | #endif | 
|---|
| 268 |  | 
|---|
| 269 | #ifdef XZ_DEC_ARM | 
|---|
| 270 | static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 271 | { | 
|---|
| 272 | size_t i; | 
|---|
| 273 | uint32_t addr; | 
|---|
| 274 |  | 
|---|
| 275 | size &= ~(size_t)3; | 
|---|
| 276 |  | 
|---|
| 277 | for (i = 0; i < size; i += 4) { | 
|---|
| 278 | if (buf[i + 3] == 0xEB) { | 
|---|
| 279 | addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) | 
|---|
| 280 | | ((uint32_t)buf[i + 2] << 16); | 
|---|
| 281 | addr <<= 2; | 
|---|
| 282 | addr -= s->pos + (uint32_t)i + 8; | 
|---|
| 283 | addr >>= 2; | 
|---|
| 284 | buf[i] = (uint8_t)addr; | 
|---|
| 285 | buf[i + 1] = (uint8_t)(addr >> 8); | 
|---|
| 286 | buf[i + 2] = (uint8_t)(addr >> 16); | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | return i; | 
|---|
| 291 | } | 
|---|
| 292 | #endif | 
|---|
| 293 |  | 
|---|
| 294 | #ifdef XZ_DEC_ARMTHUMB | 
|---|
| 295 | static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 296 | { | 
|---|
| 297 | size_t i; | 
|---|
| 298 | uint32_t addr; | 
|---|
| 299 |  | 
|---|
| 300 | if (size < 4) | 
|---|
| 301 | return 0; | 
|---|
| 302 |  | 
|---|
| 303 | size -= 4; | 
|---|
| 304 |  | 
|---|
| 305 | for (i = 0; i <= size; i += 2) { | 
|---|
| 306 | if ((buf[i + 1] & 0xF8) == 0xF0 | 
|---|
| 307 | && (buf[i + 3] & 0xF8) == 0xF8) { | 
|---|
| 308 | addr = (((uint32_t)buf[i + 1] & 0x07) << 19) | 
|---|
| 309 | | ((uint32_t)buf[i] << 11) | 
|---|
| 310 | | (((uint32_t)buf[i + 3] & 0x07) << 8) | 
|---|
| 311 | | (uint32_t)buf[i + 2]; | 
|---|
| 312 | addr <<= 1; | 
|---|
| 313 | addr -= s->pos + (uint32_t)i + 4; | 
|---|
| 314 | addr >>= 1; | 
|---|
| 315 | buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); | 
|---|
| 316 | buf[i] = (uint8_t)(addr >> 11); | 
|---|
| 317 | buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); | 
|---|
| 318 | buf[i + 2] = (uint8_t)addr; | 
|---|
| 319 | i += 2; | 
|---|
| 320 | } | 
|---|
| 321 | } | 
|---|
| 322 |  | 
|---|
| 323 | return i; | 
|---|
| 324 | } | 
|---|
| 325 | #endif | 
|---|
| 326 |  | 
|---|
| 327 | #ifdef XZ_DEC_SPARC | 
|---|
| 328 | static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 329 | { | 
|---|
| 330 | size_t i; | 
|---|
| 331 | uint32_t instr; | 
|---|
| 332 |  | 
|---|
| 333 | size &= ~(size_t)3; | 
|---|
| 334 |  | 
|---|
| 335 | for (i = 0; i < size; i += 4) { | 
|---|
| 336 | instr = get_unaligned_be32(p: buf + i); | 
|---|
| 337 | if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { | 
|---|
| 338 | instr <<= 2; | 
|---|
| 339 | instr -= s->pos + (uint32_t)i; | 
|---|
| 340 | instr >>= 2; | 
|---|
| 341 | instr = ((uint32_t)0x40000000 - (instr & 0x400000)) | 
|---|
| 342 | | 0x40000000 | (instr & 0x3FFFFF); | 
|---|
| 343 | put_unaligned_be32(val: instr, p: buf + i); | 
|---|
| 344 | } | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | return i; | 
|---|
| 348 | } | 
|---|
| 349 | #endif | 
|---|
| 350 |  | 
|---|
| 351 | #ifdef XZ_DEC_ARM64 | 
|---|
| 352 | static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 353 | { | 
|---|
| 354 | size_t i; | 
|---|
| 355 | uint32_t instr; | 
|---|
| 356 | uint32_t addr; | 
|---|
| 357 |  | 
|---|
| 358 | size &= ~(size_t)3; | 
|---|
| 359 |  | 
|---|
| 360 | for (i = 0; i < size; i += 4) { | 
|---|
| 361 | instr = get_unaligned_le32(p: buf + i); | 
|---|
| 362 |  | 
|---|
| 363 | if ((instr >> 26) == 0x25) { | 
|---|
| 364 | /* BL instruction */ | 
|---|
| 365 | addr = instr - ((s->pos + (uint32_t)i) >> 2); | 
|---|
| 366 | instr = 0x94000000 | (addr & 0x03FFFFFF); | 
|---|
| 367 | put_unaligned_le32(val: instr, p: buf + i); | 
|---|
| 368 |  | 
|---|
| 369 | } else if ((instr & 0x9F000000) == 0x90000000) { | 
|---|
| 370 | /* ADRP instruction */ | 
|---|
| 371 | addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC); | 
|---|
| 372 |  | 
|---|
| 373 | /* Only convert values in the range +/-512 MiB. */ | 
|---|
| 374 | if ((addr + 0x020000) & 0x1C0000) | 
|---|
| 375 | continue; | 
|---|
| 376 |  | 
|---|
| 377 | addr -= (s->pos + (uint32_t)i) >> 12; | 
|---|
| 378 |  | 
|---|
| 379 | instr &= 0x9000001F; | 
|---|
| 380 | instr |= (addr & 3) << 29; | 
|---|
| 381 | instr |= (addr & 0x03FFFC) << 3; | 
|---|
| 382 | instr |= (0U - (addr & 0x020000)) & 0xE00000; | 
|---|
| 383 |  | 
|---|
| 384 | put_unaligned_le32(val: instr, p: buf + i); | 
|---|
| 385 | } | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | return i; | 
|---|
| 389 | } | 
|---|
| 390 | #endif | 
|---|
| 391 |  | 
|---|
| 392 | #ifdef XZ_DEC_RISCV | 
|---|
| 393 | static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | 
|---|
| 394 | { | 
|---|
| 395 | size_t i; | 
|---|
| 396 | uint32_t b1; | 
|---|
| 397 | uint32_t b2; | 
|---|
| 398 | uint32_t b3; | 
|---|
| 399 | uint32_t instr; | 
|---|
| 400 | uint32_t instr2; | 
|---|
| 401 | uint32_t instr2_rs1; | 
|---|
| 402 | uint32_t addr; | 
|---|
| 403 |  | 
|---|
| 404 | if (size < 8) | 
|---|
| 405 | return 0; | 
|---|
| 406 |  | 
|---|
| 407 | size -= 8; | 
|---|
| 408 |  | 
|---|
| 409 | for (i = 0; i <= size; i += 2) { | 
|---|
| 410 | instr = buf[i]; | 
|---|
| 411 |  | 
|---|
| 412 | if (instr == 0xEF) { | 
|---|
| 413 | /* JAL */ | 
|---|
| 414 | b1 = buf[i + 1]; | 
|---|
| 415 | if ((b1 & 0x0D) != 0) | 
|---|
| 416 | continue; | 
|---|
| 417 |  | 
|---|
| 418 | b2 = buf[i + 2]; | 
|---|
| 419 | b3 = buf[i + 3]; | 
|---|
| 420 |  | 
|---|
| 421 | addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1); | 
|---|
| 422 | addr -= s->pos + (uint32_t)i; | 
|---|
| 423 |  | 
|---|
| 424 | buf[i + 1] = (uint8_t)((b1 & 0x0F) | 
|---|
| 425 | | ((addr >> 8) & 0xF0)); | 
|---|
| 426 |  | 
|---|
| 427 | buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F) | 
|---|
| 428 | | ((addr >> 7) & 0x10) | 
|---|
| 429 | | ((addr << 4) & 0xE0)); | 
|---|
| 430 |  | 
|---|
| 431 | buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F) | 
|---|
| 432 | | ((addr >> 13) & 0x80)); | 
|---|
| 433 |  | 
|---|
| 434 | i += 4 - 2; | 
|---|
| 435 |  | 
|---|
| 436 | } else if ((instr & 0x7F) == 0x17) { | 
|---|
| 437 | /* AUIPC */ | 
|---|
| 438 | instr |= (uint32_t)buf[i + 1] << 8; | 
|---|
| 439 | instr |= (uint32_t)buf[i + 2] << 16; | 
|---|
| 440 | instr |= (uint32_t)buf[i + 3] << 24; | 
|---|
| 441 |  | 
|---|
| 442 | if (instr & 0xE80) { | 
|---|
| 443 | /* AUIPC's rd doesn't equal x0 or x2. */ | 
|---|
| 444 | instr2 = get_unaligned_le32(p: buf + i + 4); | 
|---|
| 445 |  | 
|---|
| 446 | if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) { | 
|---|
| 447 | i += 6 - 2; | 
|---|
| 448 | continue; | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | addr = (instr & 0xFFFFF000) + (instr2 >> 20); | 
|---|
| 452 |  | 
|---|
| 453 | instr = 0x17 | (2 << 7) | (instr2 << 12); | 
|---|
| 454 | instr2 = addr; | 
|---|
| 455 | } else { | 
|---|
| 456 | /* AUIPC's rd equals x0 or x2. */ | 
|---|
| 457 | instr2_rs1 = instr >> 27; | 
|---|
| 458 |  | 
|---|
| 459 | if ((uint32_t)((instr - 0x3117) << 18) | 
|---|
| 460 | >= (instr2_rs1 & 0x1D)) { | 
|---|
| 461 | i += 4 - 2; | 
|---|
| 462 | continue; | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | addr = get_unaligned_be32(p: buf + i + 4); | 
|---|
| 466 | addr -= s->pos + (uint32_t)i; | 
|---|
| 467 |  | 
|---|
| 468 | instr2 = (instr >> 12) | (addr << 20); | 
|---|
| 469 |  | 
|---|
| 470 | instr = 0x17 | (instr2_rs1 << 7) | 
|---|
| 471 | | ((addr + 0x800) & 0xFFFFF000); | 
|---|
| 472 | } | 
|---|
| 473 |  | 
|---|
| 474 | put_unaligned_le32(val: instr, p: buf + i); | 
|---|
| 475 | put_unaligned_le32(val: instr2, p: buf + i + 4); | 
|---|
| 476 |  | 
|---|
| 477 | i += 8 - 2; | 
|---|
| 478 | } | 
|---|
| 479 | } | 
|---|
| 480 |  | 
|---|
| 481 | return i; | 
|---|
| 482 | } | 
|---|
| 483 | #endif | 
|---|
| 484 |  | 
|---|
| 485 | /* | 
|---|
| 486 | * Apply the selected BCJ filter. Update *pos and s->pos to match the amount | 
|---|
| 487 | * of data that got filtered. | 
|---|
| 488 | * | 
|---|
| 489 | * NOTE: This is implemented as a switch statement to avoid using function | 
|---|
| 490 | * pointers, which could be problematic in the kernel boot code, which must | 
|---|
| 491 | * avoid pointers to static data (at least on x86). | 
|---|
| 492 | */ | 
|---|
| 493 | static void bcj_apply(struct xz_dec_bcj *s, | 
|---|
| 494 | uint8_t *buf, size_t *pos, size_t size) | 
|---|
| 495 | { | 
|---|
| 496 | size_t filtered; | 
|---|
| 497 |  | 
|---|
| 498 | buf += *pos; | 
|---|
| 499 | size -= *pos; | 
|---|
| 500 |  | 
|---|
| 501 | switch (s->type) { | 
|---|
| 502 | #ifdef XZ_DEC_X86 | 
|---|
| 503 | case BCJ_X86: | 
|---|
| 504 | filtered = bcj_x86(s, buf, size); | 
|---|
| 505 | break; | 
|---|
| 506 | #endif | 
|---|
| 507 | #ifdef XZ_DEC_POWERPC | 
|---|
| 508 | case BCJ_POWERPC: | 
|---|
| 509 | filtered = bcj_powerpc(s, buf, size); | 
|---|
| 510 | break; | 
|---|
| 511 | #endif | 
|---|
| 512 | #ifdef XZ_DEC_IA64 | 
|---|
| 513 | case BCJ_IA64: | 
|---|
| 514 | filtered = bcj_ia64(s, buf, size); | 
|---|
| 515 | break; | 
|---|
| 516 | #endif | 
|---|
| 517 | #ifdef XZ_DEC_ARM | 
|---|
| 518 | case BCJ_ARM: | 
|---|
| 519 | filtered = bcj_arm(s, buf, size); | 
|---|
| 520 | break; | 
|---|
| 521 | #endif | 
|---|
| 522 | #ifdef XZ_DEC_ARMTHUMB | 
|---|
| 523 | case BCJ_ARMTHUMB: | 
|---|
| 524 | filtered = bcj_armthumb(s, buf, size); | 
|---|
| 525 | break; | 
|---|
| 526 | #endif | 
|---|
| 527 | #ifdef XZ_DEC_SPARC | 
|---|
| 528 | case BCJ_SPARC: | 
|---|
| 529 | filtered = bcj_sparc(s, buf, size); | 
|---|
| 530 | break; | 
|---|
| 531 | #endif | 
|---|
| 532 | #ifdef XZ_DEC_ARM64 | 
|---|
| 533 | case BCJ_ARM64: | 
|---|
| 534 | filtered = bcj_arm64(s, buf, size); | 
|---|
| 535 | break; | 
|---|
| 536 | #endif | 
|---|
| 537 | #ifdef XZ_DEC_RISCV | 
|---|
| 538 | case BCJ_RISCV: | 
|---|
| 539 | filtered = bcj_riscv(s, buf, size); | 
|---|
| 540 | break; | 
|---|
| 541 | #endif | 
|---|
| 542 | default: | 
|---|
| 543 | /* Never reached but silence compiler warnings. */ | 
|---|
| 544 | filtered = 0; | 
|---|
| 545 | break; | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | *pos += filtered; | 
|---|
| 549 | s->pos += filtered; | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | /* | 
|---|
| 553 | * Flush pending filtered data from temp to the output buffer. | 
|---|
| 554 | * Move the remaining mixture of possibly filtered and unfiltered | 
|---|
| 555 | * data to the beginning of temp. | 
|---|
| 556 | */ | 
|---|
| 557 | static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) | 
|---|
| 558 | { | 
|---|
| 559 | size_t copy_size; | 
|---|
| 560 |  | 
|---|
| 561 | copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); | 
|---|
| 562 | memcpy(to: b->out + b->out_pos, from: s->temp.buf, len: copy_size); | 
|---|
| 563 | b->out_pos += copy_size; | 
|---|
| 564 |  | 
|---|
| 565 | s->temp.filtered -= copy_size; | 
|---|
| 566 | s->temp.size -= copy_size; | 
|---|
| 567 | memmove(dest: s->temp.buf, src: s->temp.buf + copy_size, count: s->temp.size); | 
|---|
| 568 | } | 
|---|
| 569 |  | 
|---|
| 570 | /* | 
|---|
| 571 | * The BCJ filter functions are primitive in sense that they process the | 
|---|
| 572 | * data in chunks of 1-16 bytes. To hide this issue, this function does | 
|---|
| 573 | * some buffering. | 
|---|
| 574 | */ | 
|---|
| 575 | enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2, | 
|---|
| 576 | struct xz_buf *b) | 
|---|
| 577 | { | 
|---|
| 578 | size_t out_start; | 
|---|
| 579 |  | 
|---|
| 580 | /* | 
|---|
| 581 | * Flush pending already filtered data to the output buffer. Return | 
|---|
| 582 | * immediately if we couldn't flush everything, or if the next | 
|---|
| 583 | * filter in the chain had already returned XZ_STREAM_END. | 
|---|
| 584 | */ | 
|---|
| 585 | if (s->temp.filtered > 0) { | 
|---|
| 586 | bcj_flush(s, b); | 
|---|
| 587 | if (s->temp.filtered > 0) | 
|---|
| 588 | return XZ_OK; | 
|---|
| 589 |  | 
|---|
| 590 | if (s->ret == XZ_STREAM_END) | 
|---|
| 591 | return XZ_STREAM_END; | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | /* | 
|---|
| 595 | * If we have more output space than what is currently pending in | 
|---|
| 596 | * temp, copy the unfiltered data from temp to the output buffer | 
|---|
| 597 | * and try to fill the output buffer by decoding more data from the | 
|---|
| 598 | * next filter in the chain. Apply the BCJ filter on the new data | 
|---|
| 599 | * in the output buffer. If everything cannot be filtered, copy it | 
|---|
| 600 | * to temp and rewind the output buffer position accordingly. | 
|---|
| 601 | * | 
|---|
| 602 | * This needs to be always run when temp.size == 0 to handle a special | 
|---|
| 603 | * case where the output buffer is full and the next filter has no | 
|---|
| 604 | * more output coming but hasn't returned XZ_STREAM_END yet. | 
|---|
| 605 | */ | 
|---|
| 606 | if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) { | 
|---|
| 607 | out_start = b->out_pos; | 
|---|
| 608 | memcpy(to: b->out + b->out_pos, from: s->temp.buf, len: s->temp.size); | 
|---|
| 609 | b->out_pos += s->temp.size; | 
|---|
| 610 |  | 
|---|
| 611 | s->ret = xz_dec_lzma2_run(s: lzma2, b); | 
|---|
| 612 | if (s->ret != XZ_STREAM_END | 
|---|
| 613 | && (s->ret != XZ_OK || s->single_call)) | 
|---|
| 614 | return s->ret; | 
|---|
| 615 |  | 
|---|
| 616 | bcj_apply(s, buf: b->out, pos: &out_start, size: b->out_pos); | 
|---|
| 617 |  | 
|---|
| 618 | /* | 
|---|
| 619 | * As an exception, if the next filter returned XZ_STREAM_END, | 
|---|
| 620 | * we can do that too, since the last few bytes that remain | 
|---|
| 621 | * unfiltered are meant to remain unfiltered. | 
|---|
| 622 | */ | 
|---|
| 623 | if (s->ret == XZ_STREAM_END) | 
|---|
| 624 | return XZ_STREAM_END; | 
|---|
| 625 |  | 
|---|
| 626 | s->temp.size = b->out_pos - out_start; | 
|---|
| 627 | b->out_pos -= s->temp.size; | 
|---|
| 628 | memcpy(to: s->temp.buf, from: b->out + b->out_pos, len: s->temp.size); | 
|---|
| 629 |  | 
|---|
| 630 | /* | 
|---|
| 631 | * If there wasn't enough input to the next filter to fill | 
|---|
| 632 | * the output buffer with unfiltered data, there's no point | 
|---|
| 633 | * to try decoding more data to temp. | 
|---|
| 634 | */ | 
|---|
| 635 | if (b->out_pos + s->temp.size < b->out_size) | 
|---|
| 636 | return XZ_OK; | 
|---|
| 637 | } | 
|---|
| 638 |  | 
|---|
| 639 | /* | 
|---|
| 640 | * We have unfiltered data in temp. If the output buffer isn't full | 
|---|
| 641 | * yet, try to fill the temp buffer by decoding more data from the | 
|---|
| 642 | * next filter. Apply the BCJ filter on temp. Then we hopefully can | 
|---|
| 643 | * fill the actual output buffer by copying filtered data from temp. | 
|---|
| 644 | * A mix of filtered and unfiltered data may be left in temp; it will | 
|---|
| 645 | * be taken care on the next call to this function. | 
|---|
| 646 | */ | 
|---|
| 647 | if (b->out_pos < b->out_size) { | 
|---|
| 648 | /* Make b->out{,_pos,_size} temporarily point to s->temp. */ | 
|---|
| 649 | s->out = b->out; | 
|---|
| 650 | s->out_pos = b->out_pos; | 
|---|
| 651 | s->out_size = b->out_size; | 
|---|
| 652 | b->out = s->temp.buf; | 
|---|
| 653 | b->out_pos = s->temp.size; | 
|---|
| 654 | b->out_size = sizeof(s->temp.buf); | 
|---|
| 655 |  | 
|---|
| 656 | s->ret = xz_dec_lzma2_run(s: lzma2, b); | 
|---|
| 657 |  | 
|---|
| 658 | s->temp.size = b->out_pos; | 
|---|
| 659 | b->out = s->out; | 
|---|
| 660 | b->out_pos = s->out_pos; | 
|---|
| 661 | b->out_size = s->out_size; | 
|---|
| 662 |  | 
|---|
| 663 | if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) | 
|---|
| 664 | return s->ret; | 
|---|
| 665 |  | 
|---|
| 666 | bcj_apply(s, buf: s->temp.buf, pos: &s->temp.filtered, size: s->temp.size); | 
|---|
| 667 |  | 
|---|
| 668 | /* | 
|---|
| 669 | * If the next filter returned XZ_STREAM_END, we mark that | 
|---|
| 670 | * everything is filtered, since the last unfiltered bytes | 
|---|
| 671 | * of the stream are meant to be left as is. | 
|---|
| 672 | */ | 
|---|
| 673 | if (s->ret == XZ_STREAM_END) | 
|---|
| 674 | s->temp.filtered = s->temp.size; | 
|---|
| 675 |  | 
|---|
| 676 | bcj_flush(s, b); | 
|---|
| 677 | if (s->temp.filtered > 0) | 
|---|
| 678 | return XZ_OK; | 
|---|
| 679 | } | 
|---|
| 680 |  | 
|---|
| 681 | return s->ret; | 
|---|
| 682 | } | 
|---|
| 683 |  | 
|---|
| 684 | struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) | 
|---|
| 685 | { | 
|---|
| 686 | struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); | 
|---|
| 687 | if (s != NULL) | 
|---|
| 688 | s->single_call = single_call; | 
|---|
| 689 |  | 
|---|
| 690 | return s; | 
|---|
| 691 | } | 
|---|
| 692 |  | 
|---|
| 693 | enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) | 
|---|
| 694 | { | 
|---|
| 695 | switch (id) { | 
|---|
| 696 | #ifdef XZ_DEC_X86 | 
|---|
| 697 | case BCJ_X86: | 
|---|
| 698 | #endif | 
|---|
| 699 | #ifdef XZ_DEC_POWERPC | 
|---|
| 700 | case BCJ_POWERPC: | 
|---|
| 701 | #endif | 
|---|
| 702 | #ifdef XZ_DEC_IA64 | 
|---|
| 703 | case BCJ_IA64: | 
|---|
| 704 | #endif | 
|---|
| 705 | #ifdef XZ_DEC_ARM | 
|---|
| 706 | case BCJ_ARM: | 
|---|
| 707 | #endif | 
|---|
| 708 | #ifdef XZ_DEC_ARMTHUMB | 
|---|
| 709 | case BCJ_ARMTHUMB: | 
|---|
| 710 | #endif | 
|---|
| 711 | #ifdef XZ_DEC_SPARC | 
|---|
| 712 | case BCJ_SPARC: | 
|---|
| 713 | #endif | 
|---|
| 714 | #ifdef XZ_DEC_ARM64 | 
|---|
| 715 | case BCJ_ARM64: | 
|---|
| 716 | #endif | 
|---|
| 717 | #ifdef XZ_DEC_RISCV | 
|---|
| 718 | case BCJ_RISCV: | 
|---|
| 719 | #endif | 
|---|
| 720 | break; | 
|---|
| 721 |  | 
|---|
| 722 | default: | 
|---|
| 723 | /* Unsupported Filter ID */ | 
|---|
| 724 | return XZ_OPTIONS_ERROR; | 
|---|
| 725 | } | 
|---|
| 726 |  | 
|---|
| 727 | s->type = id; | 
|---|
| 728 | s->ret = XZ_OK; | 
|---|
| 729 | s->pos = 0; | 
|---|
| 730 | s->x86_prev_mask = 0; | 
|---|
| 731 | s->temp.filtered = 0; | 
|---|
| 732 | s->temp.size = 0; | 
|---|
| 733 |  | 
|---|
| 734 | return XZ_OK; | 
|---|
| 735 | } | 
|---|
| 736 |  | 
|---|
| 737 | #endif | 
|---|
| 738 |  | 
|---|