| 1 | // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause | 
|---|
| 2 | /* | 
|---|
| 3 | * Copyright (c) Meta Platforms, Inc. and affiliates. | 
|---|
| 4 | * All rights reserved. | 
|---|
| 5 | * | 
|---|
| 6 | * This source code is licensed under both the BSD-style license (found in the | 
|---|
| 7 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | 
|---|
| 8 | * in the COPYING file in the root directory of this source tree). | 
|---|
| 9 | * You may select, at your option, one of the above-listed licenses. | 
|---|
| 10 | */ | 
|---|
| 11 |  | 
|---|
| 12 | /* zstd_ddict.c : | 
|---|
| 13 | * concentrates all logic that needs to know the internals of ZSTD_DDict object */ | 
|---|
| 14 |  | 
|---|
| 15 | /*-******************************************************* | 
|---|
| 16 | *  Dependencies | 
|---|
| 17 | *********************************************************/ | 
|---|
| 18 | #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */ | 
|---|
| 19 | #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ | 
|---|
| 20 | #include "../common/cpu.h"         /* bmi2 */ | 
|---|
| 21 | #include "../common/mem.h"         /* low level memory routines */ | 
|---|
| 22 | #define FSE_STATIC_LINKING_ONLY | 
|---|
| 23 | #include "../common/fse.h" | 
|---|
| 24 | #include "../common/huf.h" | 
|---|
| 25 | #include "zstd_decompress_internal.h" | 
|---|
| 26 | #include "zstd_ddict.h" | 
|---|
| 27 |  | 
|---|
| 28 |  | 
|---|
| 29 |  | 
|---|
| 30 |  | 
|---|
| 31 | /*-******************************************************* | 
|---|
| 32 | *  Types | 
|---|
| 33 | *********************************************************/ | 
|---|
| 34 | struct ZSTD_DDict_s { | 
|---|
| 35 | void* dictBuffer; | 
|---|
| 36 | const void* dictContent; | 
|---|
| 37 | size_t dictSize; | 
|---|
| 38 | ZSTD_entropyDTables_t entropy; | 
|---|
| 39 | U32 dictID; | 
|---|
| 40 | U32 entropyPresent; | 
|---|
| 41 | ZSTD_customMem cMem; | 
|---|
| 42 | };  /* typedef'd to ZSTD_DDict within "zstd.h" */ | 
|---|
| 43 |  | 
|---|
| 44 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) | 
|---|
| 45 | { | 
|---|
| 46 | assert(ddict != NULL); | 
|---|
| 47 | return ddict->dictContent; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) | 
|---|
| 51 | { | 
|---|
| 52 | assert(ddict != NULL); | 
|---|
| 53 | return ddict->dictSize; | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) | 
|---|
| 57 | { | 
|---|
| 58 | DEBUGLOG(4, "ZSTD_copyDDictParameters"); | 
|---|
| 59 | assert(dctx != NULL); | 
|---|
| 60 | assert(ddict != NULL); | 
|---|
| 61 | dctx->dictID = ddict->dictID; | 
|---|
| 62 | dctx->prefixStart = ddict->dictContent; | 
|---|
| 63 | dctx->virtualStart = ddict->dictContent; | 
|---|
| 64 | dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; | 
|---|
| 65 | dctx->previousDstEnd = dctx->dictEnd; | 
|---|
| 66 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | 
|---|
| 67 | dctx->dictContentBeginForFuzzing = dctx->prefixStart; | 
|---|
| 68 | dctx->dictContentEndForFuzzing = dctx->previousDstEnd; | 
|---|
| 69 | #endif | 
|---|
| 70 | if (ddict->entropyPresent) { | 
|---|
| 71 | dctx->litEntropy = 1; | 
|---|
| 72 | dctx->fseEntropy = 1; | 
|---|
| 73 | dctx->LLTptr = ddict->entropy.LLTable; | 
|---|
| 74 | dctx->MLTptr = ddict->entropy.MLTable; | 
|---|
| 75 | dctx->OFTptr = ddict->entropy.OFTable; | 
|---|
| 76 | dctx->HUFptr = ddict->entropy.hufTable; | 
|---|
| 77 | dctx->entropy.rep[0] = ddict->entropy.rep[0]; | 
|---|
| 78 | dctx->entropy.rep[1] = ddict->entropy.rep[1]; | 
|---|
| 79 | dctx->entropy.rep[2] = ddict->entropy.rep[2]; | 
|---|
| 80 | } else { | 
|---|
| 81 | dctx->litEntropy = 0; | 
|---|
| 82 | dctx->fseEntropy = 0; | 
|---|
| 83 | } | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 |  | 
|---|
| 87 | static size_t | 
|---|
| 88 | ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, | 
|---|
| 89 | ZSTD_dictContentType_e dictContentType) | 
|---|
| 90 | { | 
|---|
| 91 | ddict->dictID = 0; | 
|---|
| 92 | ddict->entropyPresent = 0; | 
|---|
| 93 | if (dictContentType == ZSTD_dct_rawContent) return 0; | 
|---|
| 94 |  | 
|---|
| 95 | if (ddict->dictSize < 8) { | 
|---|
| 96 | if (dictContentType == ZSTD_dct_fullDict) | 
|---|
| 97 | return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */ | 
|---|
| 98 | return 0;   /* pure content mode */ | 
|---|
| 99 | } | 
|---|
| 100 | {   U32 const magic = MEM_readLE32(memPtr: ddict->dictContent); | 
|---|
| 101 | if (magic != ZSTD_MAGIC_DICTIONARY) { | 
|---|
| 102 | if (dictContentType == ZSTD_dct_fullDict) | 
|---|
| 103 | return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */ | 
|---|
| 104 | return 0;   /* pure content mode */ | 
|---|
| 105 | } | 
|---|
| 106 | } | 
|---|
| 107 | ddict->dictID = MEM_readLE32(memPtr: (const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); | 
|---|
| 108 |  | 
|---|
| 109 | /* load entropy tables */ | 
|---|
| 110 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( | 
|---|
| 111 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), | 
|---|
| 112 | dictionary_corrupted, ""); | 
|---|
| 113 | ddict->entropyPresent = 1; | 
|---|
| 114 | return 0; | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 |  | 
|---|
| 118 | static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, | 
|---|
| 119 | const void* dict, size_t dictSize, | 
|---|
| 120 | ZSTD_dictLoadMethod_e dictLoadMethod, | 
|---|
| 121 | ZSTD_dictContentType_e dictContentType) | 
|---|
| 122 | { | 
|---|
| 123 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { | 
|---|
| 124 | ddict->dictBuffer = NULL; | 
|---|
| 125 | ddict->dictContent = dict; | 
|---|
| 126 | if (!dict) dictSize = 0; | 
|---|
| 127 | } else { | 
|---|
| 128 | void* const internalBuffer = ZSTD_customMalloc(size: dictSize, customMem: ddict->cMem); | 
|---|
| 129 | ddict->dictBuffer = internalBuffer; | 
|---|
| 130 | ddict->dictContent = internalBuffer; | 
|---|
| 131 | if (!internalBuffer) return ERROR(memory_allocation); | 
|---|
| 132 | ZSTD_memcpy(internalBuffer, dict, dictSize); | 
|---|
| 133 | } | 
|---|
| 134 | ddict->dictSize = dictSize; | 
|---|
| 135 | ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */ | 
|---|
| 136 |  | 
|---|
| 137 | /* parse dictionary content */ | 
|---|
| 138 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); | 
|---|
| 139 |  | 
|---|
| 140 | return 0; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, | 
|---|
| 144 | ZSTD_dictLoadMethod_e dictLoadMethod, | 
|---|
| 145 | ZSTD_dictContentType_e dictContentType, | 
|---|
| 146 | ZSTD_customMem customMem) | 
|---|
| 147 | { | 
|---|
| 148 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; | 
|---|
| 149 |  | 
|---|
| 150 | {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(size: sizeof(ZSTD_DDict), customMem); | 
|---|
| 151 | if (ddict == NULL) return NULL; | 
|---|
| 152 | ddict->cMem = customMem; | 
|---|
| 153 | {   size_t const initResult = ZSTD_initDDict_internal(ddict, | 
|---|
| 154 | dict, dictSize, | 
|---|
| 155 | dictLoadMethod, dictContentType); | 
|---|
| 156 | if (ZSTD_isError(code: initResult)) { | 
|---|
| 157 | ZSTD_freeDDict(ddict); | 
|---|
| 158 | return NULL; | 
|---|
| 159 | }   } | 
|---|
| 160 | return ddict; | 
|---|
| 161 | } | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | /*! ZSTD_createDDict() : | 
|---|
| 165 | *   Create a digested dictionary, to start decompression without startup delay. | 
|---|
| 166 | *   `dict` content is copied inside DDict. | 
|---|
| 167 | *   Consequently, `dict` can be released after `ZSTD_DDict` creation */ | 
|---|
| 168 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) | 
|---|
| 169 | { | 
|---|
| 170 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | 
|---|
| 171 | return ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod: ZSTD_dlm_byCopy, dictContentType: ZSTD_dct_auto, customMem: allocator); | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | /*! ZSTD_createDDict_byReference() : | 
|---|
| 175 | *  Create a digested dictionary, to start decompression without startup delay. | 
|---|
| 176 | *  Dictionary content is simply referenced, it will be accessed during decompression. | 
|---|
| 177 | *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ | 
|---|
| 178 | ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) | 
|---|
| 179 | { | 
|---|
| 180 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | 
|---|
| 181 | return ZSTD_createDDict_advanced(dict: dictBuffer, dictSize, dictLoadMethod: ZSTD_dlm_byRef, dictContentType: ZSTD_dct_auto, customMem: allocator); | 
|---|
| 182 | } | 
|---|
| 183 |  | 
|---|
| 184 |  | 
|---|
| 185 | const ZSTD_DDict* ZSTD_initStaticDDict( | 
|---|
| 186 | void* sBuffer, size_t sBufferSize, | 
|---|
| 187 | const void* dict, size_t dictSize, | 
|---|
| 188 | ZSTD_dictLoadMethod_e dictLoadMethod, | 
|---|
| 189 | ZSTD_dictContentType_e dictContentType) | 
|---|
| 190 | { | 
|---|
| 191 | size_t const neededSpace = sizeof(ZSTD_DDict) | 
|---|
| 192 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); | 
|---|
| 193 | ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; | 
|---|
| 194 | assert(sBuffer != NULL); | 
|---|
| 195 | assert(dict != NULL); | 
|---|
| 196 | if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */ | 
|---|
| 197 | if (sBufferSize < neededSpace) return NULL; | 
|---|
| 198 | if (dictLoadMethod == ZSTD_dlm_byCopy) { | 
|---|
| 199 | ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */ | 
|---|
| 200 | dict = ddict+1; | 
|---|
| 201 | } | 
|---|
| 202 | if (ZSTD_isError( code: ZSTD_initDDict_internal(ddict, | 
|---|
| 203 | dict, dictSize, | 
|---|
| 204 | dictLoadMethod: ZSTD_dlm_byRef, dictContentType) )) | 
|---|
| 205 | return NULL; | 
|---|
| 206 | return ddict; | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 |  | 
|---|
| 210 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) | 
|---|
| 211 | { | 
|---|
| 212 | if (ddict==NULL) return 0;   /* support free on NULL */ | 
|---|
| 213 | {   ZSTD_customMem const cMem = ddict->cMem; | 
|---|
| 214 | ZSTD_customFree(ptr: ddict->dictBuffer, customMem: cMem); | 
|---|
| 215 | ZSTD_customFree(ptr: ddict, customMem: cMem); | 
|---|
| 216 | return 0; | 
|---|
| 217 | } | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 | /*! ZSTD_estimateDDictSize() : | 
|---|
| 221 | *  Estimate amount of memory that will be needed to create a dictionary for decompression. | 
|---|
| 222 | *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ | 
|---|
| 223 | size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) | 
|---|
| 224 | { | 
|---|
| 225 | return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) | 
|---|
| 229 | { | 
|---|
| 230 | if (ddict==NULL) return 0;   /* support sizeof on NULL */ | 
|---|
| 231 | return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | /*! ZSTD_getDictID_fromDDict() : | 
|---|
| 235 | *  Provides the dictID of the dictionary loaded into `ddict`. | 
|---|
| 236 | *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | 
|---|
| 237 | *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | 
|---|
| 238 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) | 
|---|
| 239 | { | 
|---|
| 240 | if (ddict==NULL) return 0; | 
|---|
| 241 | return ddict->dictID; | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|