1  /*
2   * Copyright (c) Yann Collet, Facebook, Inc.
3   * All rights reserved.
4   *
5   * This source code is licensed under both the BSD-style license (found in the
6   * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7   * in the COPYING file in the root directory of this source tree).
8   * You may select, at your option, one of the above-listed licenses.
9   */
10  
11  /* This header contains definitions
12   * that shall **only** be used by modules within lib/compress.
13   */
14  
15  #ifndef ZSTD_COMPRESS_H
16  #define ZSTD_COMPRESS_H
17  
18  /*-*************************************
19  *  Dependencies
20  ***************************************/
21  #include "../common/zstd_internal.h"
22  #include "zstd_cwksp.h"
23  
24  
25  /*-*************************************
26  *  Constants
27  ***************************************/
28  #define kSearchStrength      8
29  #define HASH_READ_SIZE       8
30  #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
31                                         It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
32                                         It's not a big deal though : candidate will just be sorted again.
33                                         Additionally, candidate position 1 will be lost.
34                                         But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
35                                         The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
36                                         This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
37  
38  
39  /*-*************************************
40  *  Context memory management
41  ***************************************/
42  typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
43  typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
44  
45  typedef struct ZSTD_prefixDict_s {
46      const void* dict;
47      size_t dictSize;
48      ZSTD_dictContentType_e dictContentType;
49  } ZSTD_prefixDict;
50  
51  typedef struct {
52      void* dictBuffer;
53      void const* dict;
54      size_t dictSize;
55      ZSTD_dictContentType_e dictContentType;
56      ZSTD_CDict* cdict;
57  } ZSTD_localDict;
58  
59  typedef struct {
60      HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
61      HUF_repeat repeatMode;
62  } ZSTD_hufCTables_t;
63  
64  typedef struct {
65      FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
66      FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
67      FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
68      FSE_repeat offcode_repeatMode;
69      FSE_repeat matchlength_repeatMode;
70      FSE_repeat litlength_repeatMode;
71  } ZSTD_fseCTables_t;
72  
73  typedef struct {
74      ZSTD_hufCTables_t huf;
75      ZSTD_fseCTables_t fse;
76  } ZSTD_entropyCTables_t;
77  
78  /* *********************************************
79  *  Entropy buffer statistics structs and funcs *
80  ***********************************************/
81  /* ZSTD_hufCTablesMetadata_t :
82   *  Stores Literals Block Type for a super-block in hType, and
83   *  huffman tree description in hufDesBuffer.
84   *  hufDesSize refers to the size of huffman tree description in bytes.
85   *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
86  typedef struct {
87      symbolEncodingType_e hType;
88      BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
89      size_t hufDesSize;
90  } ZSTD_hufCTablesMetadata_t;
91  
92  /* ZSTD_fseCTablesMetadata_t :
93   *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
94   *  fse tables in fseTablesBuffer.
95   *  fseTablesSize refers to the size of fse tables in bytes.
96   *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
97  typedef struct {
98      symbolEncodingType_e llType;
99      symbolEncodingType_e ofType;
100      symbolEncodingType_e mlType;
101      BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
102      size_t fseTablesSize;
103      size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
104  } ZSTD_fseCTablesMetadata_t;
105  
106  typedef struct {
107      ZSTD_hufCTablesMetadata_t hufMetadata;
108      ZSTD_fseCTablesMetadata_t fseMetadata;
109  } ZSTD_entropyCTablesMetadata_t;
110  
111  /* ZSTD_buildBlockEntropyStats() :
112   *  Builds entropy for the block.
113   *  @return : 0 on success or error code */
114  size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
115                               const ZSTD_entropyCTables_t* prevEntropy,
116                                     ZSTD_entropyCTables_t* nextEntropy,
117                               const ZSTD_CCtx_params* cctxParams,
118                                     ZSTD_entropyCTablesMetadata_t* entropyMetadata,
119                                     void* workspace, size_t wkspSize);
120  
121  /* *******************************
122  *  Compression internals structs *
123  *********************************/
124  
125  typedef struct {
126      U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
127      U32 len;            /* Raw length of match */
128  } ZSTD_match_t;
129  
130  typedef struct {
131      U32 offset;         /* Offset of sequence */
132      U32 litLength;      /* Length of literals prior to match */
133      U32 matchLength;    /* Raw length of match */
134  } rawSeq;
135  
136  typedef struct {
137    rawSeq* seq;          /* The start of the sequences */
138    size_t pos;           /* The index in seq where reading stopped. pos <= size. */
139    size_t posInSequence; /* The position within the sequence at seq[pos] where reading
140                             stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
141    size_t size;          /* The number of sequences. <= capacity. */
142    size_t capacity;      /* The capacity starting from `seq` pointer */
143  } rawSeqStore_t;
144  
145  UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
146  
147  typedef struct {
148      int price;
149      U32 off;
150      U32 mlen;
151      U32 litlen;
152      U32 rep[ZSTD_REP_NUM];
153  } ZSTD_optimal_t;
154  
155  typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
156  
157  typedef struct {
158      /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
159      unsigned* litFreq;           /* table of literals statistics, of size 256 */
160      unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
161      unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
162      unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
163      ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
164      ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
165  
166      U32  litSum;                 /* nb of literals */
167      U32  litLengthSum;           /* nb of litLength codes */
168      U32  matchLengthSum;         /* nb of matchLength codes */
169      U32  offCodeSum;             /* nb of offset codes */
170      U32  litSumBasePrice;        /* to compare to log2(litfreq) */
171      U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
172      U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
173      U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
174      ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
175      const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
176      ZSTD_paramSwitch_e literalCompressionMode;
177  } optState_t;
178  
179  typedef struct {
180    ZSTD_entropyCTables_t entropy;
181    U32 rep[ZSTD_REP_NUM];
182  } ZSTD_compressedBlockState_t;
183  
184  typedef struct {
185      BYTE const* nextSrc;       /* next block here to continue on current prefix */
186      BYTE const* base;          /* All regular indexes relative to this position */
187      BYTE const* dictBase;      /* extDict indexes relative to this position */
188      U32 dictLimit;             /* below that point, need extDict */
189      U32 lowLimit;              /* below that point, no more valid data */
190      U32 nbOverflowCorrections; /* Number of times overflow correction has run since
191                                  * ZSTD_window_init(). Useful for debugging coredumps
192                                  * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
193                                  */
194  } ZSTD_window_t;
195  
196  #define ZSTD_WINDOW_START_INDEX 2
197  
198  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
199  
200  #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
201  
202  struct ZSTD_matchState_t {
203      ZSTD_window_t window;   /* State for window round buffer management */
204      U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
205                               * When loadedDictEnd != 0, a dictionary is in use, and still valid.
206                               * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
207                               * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
208                               * When dict referential is copied into active context (i.e. not attached),
209                               * loadedDictEnd == dictSize, since referential starts from zero.
210                               */
211      U32 nextToUpdate;       /* index from which to continue table update */
212      U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
213  
214      U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
215      U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
216      U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
217  
218      U32* hashTable;
219      U32* hashTable3;
220      U32* chainTable;
221  
222      U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
223  
224      int dedicatedDictSearch;  /* Indicates whether this matchState is using the
225                                 * dedicated dictionary search structure.
226                                 */
227      optState_t opt;         /* optimal parser state */
228      const ZSTD_matchState_t* dictMatchState;
229      ZSTD_compressionParameters cParams;
230      const rawSeqStore_t* ldmSeqStore;
231  };
232  
233  typedef struct {
234      ZSTD_compressedBlockState_t* prevCBlock;
235      ZSTD_compressedBlockState_t* nextCBlock;
236      ZSTD_matchState_t matchState;
237  } ZSTD_blockState_t;
238  
239  typedef struct {
240      U32 offset;
241      U32 checksum;
242  } ldmEntry_t;
243  
244  typedef struct {
245      BYTE const* split;
246      U32 hash;
247      U32 checksum;
248      ldmEntry_t* bucket;
249  } ldmMatchCandidate_t;
250  
251  #define LDM_BATCH_SIZE 64
252  
253  typedef struct {
254      ZSTD_window_t window;   /* State for the window round buffer management */
255      ldmEntry_t* hashTable;
256      U32 loadedDictEnd;
257      BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
258      size_t splitIndices[LDM_BATCH_SIZE];
259      ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
260  } ldmState_t;
261  
262  typedef struct {
263      ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
264      U32 hashLog;            /* Log size of hashTable */
265      U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
266      U32 minMatchLength;     /* Minimum match length */
267      U32 hashRateLog;       /* Log number of entries to skip */
268      U32 windowLog;          /* Window log for the LDM */
269  } ldmParams_t;
270  
271  typedef struct {
272      int collectSequences;
273      ZSTD_Sequence* seqStart;
274      size_t seqIndex;
275      size_t maxSequences;
276  } SeqCollector;
277  
278  struct ZSTD_CCtx_params_s {
279      ZSTD_format_e format;
280      ZSTD_compressionParameters cParams;
281      ZSTD_frameParameters fParams;
282  
283      int compressionLevel;
284      int forceWindow;           /* force back-references to respect limit of
285                                  * 1<<wLog, even for dictionary */
286      size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
287                                  * No target when targetCBlockSize == 0.
288                                  * There is no guarantee on compressed block size */
289      int srcSizeHint;           /* User's best guess of source size.
290                                  * Hint is not valid when srcSizeHint == 0.
291                                  * There is no guarantee that hint is close to actual source size */
292  
293      ZSTD_dictAttachPref_e attachDictPref;
294      ZSTD_paramSwitch_e literalCompressionMode;
295  
296      /* Multithreading: used to pass parameters to mtctx */
297      int nbWorkers;
298      size_t jobSize;
299      int overlapLog;
300      int rsyncable;
301  
302      /* Long distance matching parameters */
303      ldmParams_t ldmParams;
304  
305      /* Dedicated dict search algorithm trigger */
306      int enableDedicatedDictSearch;
307  
308      /* Input/output buffer modes */
309      ZSTD_bufferMode_e inBufferMode;
310      ZSTD_bufferMode_e outBufferMode;
311  
312      /* Sequence compression API */
313      ZSTD_sequenceFormat_e blockDelimiters;
314      int validateSequences;
315  
316      /* Block splitting */
317      ZSTD_paramSwitch_e useBlockSplitter;
318  
319      /* Param for deciding whether to use row-based matchfinder */
320      ZSTD_paramSwitch_e useRowMatchFinder;
321  
322      /* Always load a dictionary in ext-dict mode (not prefix mode)? */
323      int deterministicRefPrefix;
324  
325      /* Internal use, for createCCtxParams() and freeCCtxParams() only */
326      ZSTD_customMem customMem;
327  };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
328  
329  #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
330  #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
331  
332  /*
333   * Indicates whether this compression proceeds directly from user-provided
334   * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
335   * whether the context needs to buffer the input/output (ZSTDb_buffered).
336   */
337  typedef enum {
338      ZSTDb_not_buffered,
339      ZSTDb_buffered
340  } ZSTD_buffered_policy_e;
341  
342  /*
343   * Struct that contains all elements of block splitter that should be allocated
344   * in a wksp.
345   */
346  #define ZSTD_MAX_NB_BLOCK_SPLITS 196
347  typedef struct {
348      seqStore_t fullSeqStoreChunk;
349      seqStore_t firstHalfSeqStore;
350      seqStore_t secondHalfSeqStore;
351      seqStore_t currSeqStore;
352      seqStore_t nextSeqStore;
353  
354      U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
355      ZSTD_entropyCTablesMetadata_t entropyMetadata;
356  } ZSTD_blockSplitCtx;
357  
358  struct ZSTD_CCtx_s {
359      ZSTD_compressionStage_e stage;
360      int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
361      int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
362      ZSTD_CCtx_params requestedParams;
363      ZSTD_CCtx_params appliedParams;
364      ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
365      U32   dictID;
366      size_t dictContentSize;
367  
368      ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
369      size_t blockSize;
370      unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
371      unsigned long long consumedSrcSize;
372      unsigned long long producedCSize;
373      struct xxh64_state xxhState;
374      ZSTD_customMem customMem;
375      ZSTD_threadPool* pool;
376      size_t staticSize;
377      SeqCollector seqCollector;
378      int isFirstBlock;
379      int initialized;
380  
381      seqStore_t seqStore;      /* sequences storage ptrs */
382      ldmState_t ldmState;      /* long distance matching state */
383      rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
384      size_t maxNbLdmSequences;
385      rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
386      ZSTD_blockState_t blockState;
387      U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
388  
389      /* Whether we are streaming or not */
390      ZSTD_buffered_policy_e bufferedPolicy;
391  
392      /* streaming */
393      char*  inBuff;
394      size_t inBuffSize;
395      size_t inToCompress;
396      size_t inBuffPos;
397      size_t inBuffTarget;
398      char*  outBuff;
399      size_t outBuffSize;
400      size_t outBuffContentSize;
401      size_t outBuffFlushedSize;
402      ZSTD_cStreamStage streamStage;
403      U32    frameEnded;
404  
405      /* Stable in/out buffer verification */
406      ZSTD_inBuffer expectedInBuffer;
407      size_t expectedOutBufferSize;
408  
409      /* Dictionary */
410      ZSTD_localDict localDict;
411      const ZSTD_CDict* cdict;
412      ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
413  
414      /* Multi-threading */
415  
416      /* Tracing */
417  
418      /* Workspace for block splitter */
419      ZSTD_blockSplitCtx blockSplitCtx;
420  };
421  
422  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
423  
424  typedef enum {
425      ZSTD_noDict = 0,
426      ZSTD_extDict = 1,
427      ZSTD_dictMatchState = 2,
428      ZSTD_dedicatedDictSearch = 3
429  } ZSTD_dictMode_e;
430  
431  typedef enum {
432      ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
433                                   * In this mode we use both the srcSize and the dictSize
434                                   * when selecting and adjusting parameters.
435                                   */
436      ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
437                                   * In this mode we only take the srcSize into account when selecting
438                                   * and adjusting parameters.
439                                   */
440      ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
441                                   * In this mode we take both the source size and the dictionary size
442                                   * into account when selecting and adjusting the parameters.
443                                   */
444      ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
445                                   * We don't know what these parameters are for. We default to the legacy
446                                   * behavior of taking both the source size and the dict size into account
447                                   * when selecting and adjusting parameters.
448                                   */
449  } ZSTD_cParamMode_e;
450  
451  typedef size_t (*ZSTD_blockCompressor) (
452          ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
453          void const* src, size_t srcSize);
454  ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
455  
456  
ZSTD_LLcode(U32 litLength)457  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
458  {
459      static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
460                                         8,  9, 10, 11, 12, 13, 14, 15,
461                                        16, 16, 17, 17, 18, 18, 19, 19,
462                                        20, 20, 20, 20, 21, 21, 21, 21,
463                                        22, 22, 22, 22, 22, 22, 22, 22,
464                                        23, 23, 23, 23, 23, 23, 23, 23,
465                                        24, 24, 24, 24, 24, 24, 24, 24,
466                                        24, 24, 24, 24, 24, 24, 24, 24 };
467      static const U32 LL_deltaCode = 19;
468      return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
469  }
470  
471  /* ZSTD_MLcode() :
472   * note : mlBase = matchLength - MINMATCH;
473   *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)474  MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
475  {
476      static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
477                                        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478                                        32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
479                                        38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
480                                        40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
481                                        41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
482                                        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
483                                        42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
484      static const U32 ML_deltaCode = 36;
485      return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
486  }
487  
488  /* ZSTD_cParam_withinBounds:
489   * @return 1 if value is within cParam bounds,
490   * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)491  MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
492  {
493      ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
494      if (ZSTD_isError(bounds.error)) return 0;
495      if (value < bounds.lowerBound) return 0;
496      if (value > bounds.upperBound) return 0;
497      return 1;
498  }
499  
500  /* ZSTD_noCompressBlock() :
501   * Writes uncompressed block to dst buffer from given src.
502   * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)503  MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
504  {
505      U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
506      RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
507                      dstSize_tooSmall, "dst buf too small for uncompressed block");
508      MEM_writeLE24(dst, cBlockHeader24);
509      ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
510      return ZSTD_blockHeaderSize + srcSize;
511  }
512  
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)513  MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
514  {
515      BYTE* const op = (BYTE*)dst;
516      U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
517      RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
518      MEM_writeLE24(op, cBlockHeader);
519      op[3] = src;
520      return 4;
521  }
522  
523  
524  /* ZSTD_minGain() :
525   * minimum compression required
526   * to generate a compress block or a compressed literals section.
527   * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)528  MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
529  {
530      U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
531      ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
532      assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
533      return (srcSize >> minlog) + 2;
534  }
535  
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)536  MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
537  {
538      switch (cctxParams->literalCompressionMode) {
539      case ZSTD_ps_enable:
540          return 0;
541      case ZSTD_ps_disable:
542          return 1;
543      default:
544          assert(0 /* impossible: pre-validated */);
545          ZSTD_FALLTHROUGH;
546      case ZSTD_ps_auto:
547          return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
548      }
549  }
550  
551  /*! ZSTD_safecopyLiterals() :
552   *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
553   *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
554   *  large copies.
555   */
556  static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)557  ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
558  {
559      assert(iend > ilimit_w);
560      if (ip <= ilimit_w) {
561          ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
562          op += ilimit_w - ip;
563          ip = ilimit_w;
564      }
565      while (ip < iend) *op++ = *ip++;
566  }
567  
568  #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
569  #define STORE_REPCODE_1 STORE_REPCODE(1)
570  #define STORE_REPCODE_2 STORE_REPCODE(2)
571  #define STORE_REPCODE_3 STORE_REPCODE(3)
572  #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
573  #define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
574  #define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
575  #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
576  #define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
577  #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
578  #define STORED_TO_OFFBASE(o) ((o)+1)
579  #define OFFBASE_TO_STORED(o) ((o)-1)
580  
581  /*! ZSTD_storeSeq() :
582   *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
583   *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
584   *  @matchLength : must be >= MINMATCH
585   *  Allowed to overread literals up to litLimit.
586  */
587  HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase_minus1,size_t matchLength)588  ZSTD_storeSeq(seqStore_t* seqStorePtr,
589                size_t litLength, const BYTE* literals, const BYTE* litLimit,
590                U32 offBase_minus1,
591                size_t matchLength)
592  {
593      BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
594      BYTE const* const litEnd = literals + litLength;
595  #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
596      static const BYTE* g_start = NULL;
597      if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
598      {   U32 const pos = (U32)((const BYTE*)literals - g_start);
599          DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
600                 pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
601      }
602  #endif
603      assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
604      /* copy Literals */
605      assert(seqStorePtr->maxNbLit <= 128 KB);
606      assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
607      assert(literals + litLength <= litLimit);
608      if (litEnd <= litLimit_w) {
609          /* Common case we can use wildcopy.
610  	 * First copy 16 bytes, because literals are likely short.
611  	 */
612          assert(WILDCOPY_OVERLENGTH >= 16);
613          ZSTD_copy16(seqStorePtr->lit, literals);
614          if (litLength > 16) {
615              ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
616          }
617      } else {
618          ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
619      }
620      seqStorePtr->lit += litLength;
621  
622      /* literal Length */
623      if (litLength>0xFFFF) {
624          assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
625          seqStorePtr->longLengthType = ZSTD_llt_literalLength;
626          seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
627      }
628      seqStorePtr->sequences[0].litLength = (U16)litLength;
629  
630      /* match offset */
631      seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
632  
633      /* match Length */
634      assert(matchLength >= MINMATCH);
635      {   size_t const mlBase = matchLength - MINMATCH;
636          if (mlBase>0xFFFF) {
637              assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
638              seqStorePtr->longLengthType = ZSTD_llt_matchLength;
639              seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
640          }
641          seqStorePtr->sequences[0].mlBase = (U16)mlBase;
642      }
643  
644      seqStorePtr->sequences++;
645  }
646  
647  /* ZSTD_updateRep() :
648   * updates in-place @rep (array of repeat offsets)
649   * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
650   */
651  MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)652  ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
653  {
654      if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
655          rep[2] = rep[1];
656          rep[1] = rep[0];
657          rep[0] = STORED_OFFSET(offBase_minus1);
658      } else {   /* repcode */
659          U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
660          if (repCode > 0) {  /* note : if repCode==0, no change */
661              U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
662              rep[2] = (repCode >= 2) ? rep[1] : rep[2];
663              rep[1] = rep[0];
664              rep[0] = currentOffset;
665          } else {   /* repCode == 0 */
666              /* nothing to do */
667          }
668      }
669  }
670  
671  typedef struct repcodes_s {
672      U32 rep[3];
673  } repcodes_t;
674  
675  MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)676  ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
677  {
678      repcodes_t newReps;
679      ZSTD_memcpy(&newReps, rep, sizeof(newReps));
680      ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
681      return newReps;
682  }
683  
684  
685  /*-*************************************
686  *  Match length counter
687  ***************************************/
ZSTD_NbCommonBytes(size_t val)688  static unsigned ZSTD_NbCommonBytes (size_t val)
689  {
690      if (MEM_isLittleEndian()) {
691          if (MEM_64bits()) {
692  #       if (__GNUC__ >= 4)
693              return (__builtin_ctzll((U64)val) >> 3);
694  #       else
695              static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
696                                                       0, 3, 1, 3, 1, 4, 2, 7,
697                                                       0, 2, 3, 6, 1, 5, 3, 5,
698                                                       1, 3, 4, 4, 2, 5, 6, 7,
699                                                       7, 0, 1, 2, 3, 3, 4, 6,
700                                                       2, 6, 5, 5, 3, 4, 5, 6,
701                                                       7, 1, 2, 4, 6, 4, 4, 5,
702                                                       7, 2, 6, 5, 7, 6, 7, 7 };
703              return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
704  #       endif
705          } else { /* 32 bits */
706  #       if (__GNUC__ >= 3)
707              return (__builtin_ctz((U32)val) >> 3);
708  #       else
709              static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
710                                                       3, 2, 2, 1, 3, 2, 0, 1,
711                                                       3, 3, 1, 2, 2, 2, 2, 0,
712                                                       3, 1, 2, 0, 1, 0, 1, 1 };
713              return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
714  #       endif
715          }
716      } else {  /* Big Endian CPU */
717          if (MEM_64bits()) {
718  #       if (__GNUC__ >= 4)
719              return (__builtin_clzll(val) >> 3);
720  #       else
721              unsigned r;
722              const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
723              if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
724              if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
725              r += (!val);
726              return r;
727  #       endif
728          } else { /* 32 bits */
729  #       if (__GNUC__ >= 3)
730              return (__builtin_clz((U32)val) >> 3);
731  #       else
732              unsigned r;
733              if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
734              r += (!val);
735              return r;
736  #       endif
737      }   }
738  }
739  
740  
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)741  MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
742  {
743      const BYTE* const pStart = pIn;
744      const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
745  
746      if (pIn < pInLoopLimit) {
747          { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
748            if (diff) return ZSTD_NbCommonBytes(diff); }
749          pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
750          while (pIn < pInLoopLimit) {
751              size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
752              if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
753              pIn += ZSTD_NbCommonBytes(diff);
754              return (size_t)(pIn - pStart);
755      }   }
756      if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
757      if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
758      if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
759      return (size_t)(pIn - pStart);
760  }
761  
762  /* ZSTD_count_2segments() :
763   *  can count match length with `ip` & `match` in 2 different segments.
764   *  convention : on reaching mEnd, match count continue starting from iStart
765   */
766  MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)767  ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
768                       const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
769  {
770      const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
771      size_t const matchLength = ZSTD_count(ip, match, vEnd);
772      if (match + matchLength != mEnd) return matchLength;
773      DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
774      DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
775      DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
776      DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
777      DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
778      return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
779  }
780  
781  
782  /*-*************************************
783   *  Hashes
784   ***************************************/
785  static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)786  static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)787  MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
788  
789  static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)790  static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)791  static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
792  
793  static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)794  static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)795  static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
796  
797  static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)798  static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)799  static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
800  
801  static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)802  static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)803  static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
804  
805  static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)806  static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)807  static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
808  
809  MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)810  size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
811  {
812      switch(mls)
813      {
814      default:
815      case 4: return ZSTD_hash4Ptr(p, hBits);
816      case 5: return ZSTD_hash5Ptr(p, hBits);
817      case 6: return ZSTD_hash6Ptr(p, hBits);
818      case 7: return ZSTD_hash7Ptr(p, hBits);
819      case 8: return ZSTD_hash8Ptr(p, hBits);
820      }
821  }
822  
823  /* ZSTD_ipow() :
824   * Return base^exponent.
825   */
ZSTD_ipow(U64 base,U64 exponent)826  static U64 ZSTD_ipow(U64 base, U64 exponent)
827  {
828      U64 power = 1;
829      while (exponent) {
830        if (exponent & 1) power *= base;
831        exponent >>= 1;
832        base *= base;
833      }
834      return power;
835  }
836  
837  #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
838  
839  /* ZSTD_rollingHash_append() :
840   * Add the buffer to the hash value.
841   */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)842  static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
843  {
844      BYTE const* istart = (BYTE const*)buf;
845      size_t pos;
846      for (pos = 0; pos < size; ++pos) {
847          hash *= prime8bytes;
848          hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
849      }
850      return hash;
851  }
852  
853  /* ZSTD_rollingHash_compute() :
854   * Compute the rolling hash value of the buffer.
855   */
ZSTD_rollingHash_compute(void const * buf,size_t size)856  MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
857  {
858      return ZSTD_rollingHash_append(0, buf, size);
859  }
860  
861  /* ZSTD_rollingHash_primePower() :
862   * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
863   * over a window of length bytes.
864   */
ZSTD_rollingHash_primePower(U32 length)865  MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
866  {
867      return ZSTD_ipow(prime8bytes, length - 1);
868  }
869  
870  /* ZSTD_rollingHash_rotate() :
871   * Rotate the rolling hash by one byte.
872   */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)873  MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
874  {
875      hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
876      hash *= prime8bytes;
877      hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
878      return hash;
879  }
880  
881  /*-*************************************
882  *  Round buffer management
883  ***************************************/
884  #if (ZSTD_WINDOWLOG_MAX_64 > 31)
885  # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
886  #endif
887  /* Max current allowed */
888  #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
889  /* Maximum chunk size before overflow correction needs to be called again */
890  #define ZSTD_CHUNKSIZE_MAX                                                     \
891      ( ((U32)-1)                  /* Maximum ending current index */            \
892      - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
893  
894  /*
895   * ZSTD_window_clear():
896   * Clears the window containing the history by simply setting it to empty.
897   */
ZSTD_window_clear(ZSTD_window_t * window)898  MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
899  {
900      size_t const endT = (size_t)(window->nextSrc - window->base);
901      U32 const end = (U32)endT;
902  
903      window->lowLimit = end;
904      window->dictLimit = end;
905  }
906  
ZSTD_window_isEmpty(ZSTD_window_t const window)907  MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
908  {
909      return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
910             window.lowLimit == ZSTD_WINDOW_START_INDEX &&
911             (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
912  }
913  
914  /*
915   * ZSTD_window_hasExtDict():
916   * Returns non-zero if the window has a non-empty extDict.
917   */
ZSTD_window_hasExtDict(ZSTD_window_t const window)918  MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
919  {
920      return window.lowLimit < window.dictLimit;
921  }
922  
923  /*
924   * ZSTD_matchState_dictMode():
925   * Inspects the provided matchState and figures out what dictMode should be
926   * passed to the compressor.
927   */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)928  MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
929  {
930      return ZSTD_window_hasExtDict(ms->window) ?
931          ZSTD_extDict :
932          ms->dictMatchState != NULL ?
933              (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
934              ZSTD_noDict;
935  }
936  
937  /* Defining this macro to non-zero tells zstd to run the overflow correction
938   * code much more frequently. This is very inefficient, and should only be
939   * used for tests and fuzzers.
940   */
941  #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
942  #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
943  #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
944  #  else
945  #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
946  #  endif
947  #endif
948  
949  /*
950   * ZSTD_window_canOverflowCorrect():
951   * Returns non-zero if the indices are large enough for overflow correction
952   * to work correctly without impacting compression ratio.
953   */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)954  MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
955                                                U32 cycleLog,
956                                                U32 maxDist,
957                                                U32 loadedDictEnd,
958                                                void const* src)
959  {
960      U32 const cycleSize = 1u << cycleLog;
961      U32 const curr = (U32)((BYTE const*)src - window.base);
962      U32 const minIndexToOverflowCorrect = cycleSize
963                                          + MAX(maxDist, cycleSize)
964                                          + ZSTD_WINDOW_START_INDEX;
965  
966      /* Adjust the min index to backoff the overflow correction frequency,
967       * so we don't waste too much CPU in overflow correction. If this
968       * computation overflows we don't really care, we just need to make
969       * sure it is at least minIndexToOverflowCorrect.
970       */
971      U32 const adjustment = window.nbOverflowCorrections + 1;
972      U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
973                                    minIndexToOverflowCorrect);
974      U32 const indexLargeEnough = curr > adjustedIndex;
975  
976      /* Only overflow correct early if the dictionary is invalidated already,
977       * so we don't hurt compression ratio.
978       */
979      U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
980  
981      return indexLargeEnough && dictionaryInvalidated;
982  }
983  
984  /*
985   * ZSTD_window_needOverflowCorrection():
986   * Returns non-zero if the indices are getting too large and need overflow
987   * protection.
988   */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)989  MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
990                                                    U32 cycleLog,
991                                                    U32 maxDist,
992                                                    U32 loadedDictEnd,
993                                                    void const* src,
994                                                    void const* srcEnd)
995  {
996      U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
997      if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
998          if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
999              return 1;
1000          }
1001      }
1002      return curr > ZSTD_CURRENT_MAX;
1003  }
1004  
1005  /*
1006   * ZSTD_window_correctOverflow():
1007   * Reduces the indices to protect from index overflow.
1008   * Returns the correction made to the indices, which must be applied to every
1009   * stored index.
1010   *
1011   * The least significant cycleLog bits of the indices must remain the same,
1012   * which may be 0. Every index up to maxDist in the past must be valid.
1013   */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1014  MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1015                                             U32 maxDist, void const* src)
1016  {
1017      /* preemptive overflow correction:
1018       * 1. correction is large enough:
1019       *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1020       *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1021       *
1022       *    current - newCurrent
1023       *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1024       *    > (3<<29) - (1<<chainLog)
1025       *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1026       *    > 1<<29
1027       *
1028       * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1029       *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1030       *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1031       *    In 32-bit mode we are safe, because (chainLog <= 29), so
1032       *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1033       * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1034       *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1035       */
1036      U32 const cycleSize = 1u << cycleLog;
1037      U32 const cycleMask = cycleSize - 1;
1038      U32 const curr = (U32)((BYTE const*)src - window->base);
1039      U32 const currentCycle = curr & cycleMask;
1040      /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1041      U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1042                                       ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1043                                       : 0;
1044      U32 const newCurrent = currentCycle
1045                           + currentCycleCorrection
1046                           + MAX(maxDist, cycleSize);
1047      U32 const correction = curr - newCurrent;
1048      /* maxDist must be a power of two so that:
1049       *   (newCurrent & cycleMask) == (curr & cycleMask)
1050       * This is required to not corrupt the chains / binary tree.
1051       */
1052      assert((maxDist & (maxDist - 1)) == 0);
1053      assert((curr & cycleMask) == (newCurrent & cycleMask));
1054      assert(curr > newCurrent);
1055      if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1056          /* Loose bound, should be around 1<<29 (see above) */
1057          assert(correction > 1<<28);
1058      }
1059  
1060      window->base += correction;
1061      window->dictBase += correction;
1062      if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1063          window->lowLimit = ZSTD_WINDOW_START_INDEX;
1064      } else {
1065          window->lowLimit -= correction;
1066      }
1067      if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1068          window->dictLimit = ZSTD_WINDOW_START_INDEX;
1069      } else {
1070          window->dictLimit -= correction;
1071      }
1072  
1073      /* Ensure we can still reference the full window. */
1074      assert(newCurrent >= maxDist);
1075      assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1076      /* Ensure that lowLimit and dictLimit didn't underflow. */
1077      assert(window->lowLimit <= newCurrent);
1078      assert(window->dictLimit <= newCurrent);
1079  
1080      ++window->nbOverflowCorrections;
1081  
1082      DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1083               window->lowLimit);
1084      return correction;
1085  }
1086  
1087  /*
1088   * ZSTD_window_enforceMaxDist():
1089   * Updates lowLimit so that:
1090   *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1091   *
1092   * It ensures index is valid as long as index >= lowLimit.
1093   * This must be called before a block compression call.
1094   *
1095   * loadedDictEnd is only defined if a dictionary is in use for current compression.
1096   * As the name implies, loadedDictEnd represents the index at end of dictionary.
1097   * The value lies within context's referential, it can be directly compared to blockEndIdx.
1098   *
1099   * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1100   * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1101   * This is because dictionaries are allowed to be referenced fully
1102   * as long as the last byte of the dictionary is in the window.
1103   * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1104   *
1105   * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1106   * In dictMatchState mode, lowLimit and dictLimit are the same,
1107   * and the dictionary is below them.
1108   * forceWindow and dictMatchState are therefore incompatible.
1109   */
1110  MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1111  ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1112                       const void* blockEnd,
1113                             U32   maxDist,
1114                             U32*  loadedDictEndPtr,
1115                       const ZSTD_matchState_t** dictMatchStatePtr)
1116  {
1117      U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1118      U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1119      DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1120                  (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1121  
1122      /* - When there is no dictionary : loadedDictEnd == 0.
1123           In which case, the test (blockEndIdx > maxDist) is merely to avoid
1124           overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1125         - When there is a standard dictionary :
1126           Index referential is copied from the dictionary,
1127           which means it starts from 0.
1128           In which case, loadedDictEnd == dictSize,
1129           and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1130           since `blockEndIdx` also starts from zero.
1131         - When there is an attached dictionary :
1132           loadedDictEnd is expressed within the referential of the context,
1133           so it can be directly compared against blockEndIdx.
1134      */
1135      if (blockEndIdx > maxDist + loadedDictEnd) {
1136          U32 const newLowLimit = blockEndIdx - maxDist;
1137          if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1138          if (window->dictLimit < window->lowLimit) {
1139              DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1140                          (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1141              window->dictLimit = window->lowLimit;
1142          }
1143          /* On reaching window size, dictionaries are invalidated */
1144          if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1145          if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1146      }
1147  }
1148  
1149  /* Similar to ZSTD_window_enforceMaxDist(),
1150   * but only invalidates dictionary
1151   * when input progresses beyond window size.
1152   * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1153   *              loadedDictEnd uses same referential as window->base
1154   *              maxDist is the window size */
1155  MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1156  ZSTD_checkDictValidity(const ZSTD_window_t* window,
1157                         const void* blockEnd,
1158                               U32   maxDist,
1159                               U32*  loadedDictEndPtr,
1160                         const ZSTD_matchState_t** dictMatchStatePtr)
1161  {
1162      assert(loadedDictEndPtr != NULL);
1163      assert(dictMatchStatePtr != NULL);
1164      {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1165          U32 const loadedDictEnd = *loadedDictEndPtr;
1166          DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1167                      (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1168          assert(blockEndIdx >= loadedDictEnd);
1169  
1170          if (blockEndIdx > loadedDictEnd + maxDist) {
1171              /* On reaching window size, dictionaries are invalidated.
1172               * For simplification, if window size is reached anywhere within next block,
1173               * the dictionary is invalidated for the full block.
1174               */
1175              DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1176              *loadedDictEndPtr = 0;
1177              *dictMatchStatePtr = NULL;
1178          } else {
1179              if (*loadedDictEndPtr != 0) {
1180                  DEBUGLOG(6, "dictionary considered valid for current block");
1181      }   }   }
1182  }
1183  
ZSTD_window_init(ZSTD_window_t * window)1184  MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1185      ZSTD_memset(window, 0, sizeof(*window));
1186      window->base = (BYTE const*)" ";
1187      window->dictBase = (BYTE const*)" ";
1188      ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1189      window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1190      window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1191      window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1192      window->nbOverflowCorrections = 0;
1193  }
1194  
1195  /*
1196   * ZSTD_window_update():
1197   * Updates the window by appending [src, src + srcSize) to the window.
1198   * If it is not contiguous, the current prefix becomes the extDict, and we
1199   * forget about the extDict. Handles overlap of the prefix and extDict.
1200   * Returns non-zero if the segment is contiguous.
1201   */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1202  MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1203                                    void const* src, size_t srcSize,
1204                                    int forceNonContiguous)
1205  {
1206      BYTE const* const ip = (BYTE const*)src;
1207      U32 contiguous = 1;
1208      DEBUGLOG(5, "ZSTD_window_update");
1209      if (srcSize == 0)
1210          return contiguous;
1211      assert(window->base != NULL);
1212      assert(window->dictBase != NULL);
1213      /* Check if blocks follow each other */
1214      if (src != window->nextSrc || forceNonContiguous) {
1215          /* not contiguous */
1216          size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1217          DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1218          window->lowLimit = window->dictLimit;
1219          assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1220          window->dictLimit = (U32)distanceFromBase;
1221          window->dictBase = window->base;
1222          window->base = ip - distanceFromBase;
1223          /* ms->nextToUpdate = window->dictLimit; */
1224          if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1225          contiguous = 0;
1226      }
1227      window->nextSrc = ip + srcSize;
1228      /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1229      if ( (ip+srcSize > window->dictBase + window->lowLimit)
1230         & (ip < window->dictBase + window->dictLimit)) {
1231          ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1232          U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1233          window->lowLimit = lowLimitMax;
1234          DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1235      }
1236      return contiguous;
1237  }
1238  
1239  /*
1240   * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1241   */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1242  MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1243  {
1244      U32 const maxDistance = 1U << windowLog;
1245      U32 const lowestValid = ms->window.lowLimit;
1246      U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1247      U32 const isDictionary = (ms->loadedDictEnd != 0);
1248      /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1249       * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1250       * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1251       */
1252      U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1253      return matchLowest;
1254  }
1255  
1256  /*
1257   * Returns the lowest allowed match index in the prefix.
1258   */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1259  MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1260  {
1261      U32    const maxDistance = 1U << windowLog;
1262      U32    const lowestValid = ms->window.dictLimit;
1263      U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1264      U32    const isDictionary = (ms->loadedDictEnd != 0);
1265      /* When computing the lowest prefix index we need to take the dictionary into account to handle
1266       * the edge case where the dictionary and the source are contiguous in memory.
1267       */
1268      U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1269      return matchLowest;
1270  }
1271  
1272  
1273  
1274  /* debug functions */
1275  #if (DEBUGLEVEL>=2)
1276  
ZSTD_fWeight(U32 rawStat)1277  MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1278  {
1279      U32 const fp_accuracy = 8;
1280      U32 const fp_multiplier = (1 << fp_accuracy);
1281      U32 const newStat = rawStat + 1;
1282      U32 const hb = ZSTD_highbit32(newStat);
1283      U32 const BWeight = hb * fp_multiplier;
1284      U32 const FWeight = (newStat << fp_accuracy) >> hb;
1285      U32 const weight = BWeight + FWeight;
1286      assert(hb + fp_accuracy < 31);
1287      return (double)weight / fp_multiplier;
1288  }
1289  
1290  /* display a table content,
1291   * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1292  MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1293  {
1294      unsigned u, sum;
1295      for (u=0, sum=0; u<=max; u++) sum += table[u];
1296      DEBUGLOG(2, "total nb elts: %u", sum);
1297      for (u=0; u<=max; u++) {
1298          DEBUGLOG(2, "%2u: %5u  (%.2f)",
1299                  u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1300      }
1301  }
1302  
1303  #endif
1304  
1305  
1306  
1307  /* ===============================================================
1308   * Shared internal declarations
1309   * These prototypes may be called from sources not in lib/compress
1310   * =============================================================== */
1311  
1312  /* ZSTD_loadCEntropy() :
1313   * dict : must point at beginning of a valid zstd dictionary.
1314   * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1315   * assumptions : magic number supposed already checked
1316   *               and dictSize >= 8 */
1317  size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1318                           const void* const dict, size_t dictSize);
1319  
1320  void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1321  
1322  /* ==============================================================
1323   * Private declarations
1324   * These prototypes shall only be called from within lib/compress
1325   * ============================================================== */
1326  
1327  /* ZSTD_getCParamsFromCCtxParams() :
1328   * cParams are built depending on compressionLevel, src size hints,
1329   * LDM and manually set compression parameters.
1330   * Note: srcSizeHint == 0 means 0!
1331   */
1332  ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333          const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1334  
1335  /*! ZSTD_initCStream_internal() :
1336   *  Private use only. Init streaming operation.
1337   *  expects params to be valid.
1338   *  must receive dict, or cdict, or none, but not both.
1339   *  @return : 0, or an error code */
1340  size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1341                       const void* dict, size_t dictSize,
1342                       const ZSTD_CDict* cdict,
1343                       const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1344  
1345  void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1346  
1347  /*! ZSTD_getCParamsFromCDict() :
1348   *  as the name implies */
1349  ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1350  
1351  /* ZSTD_compressBegin_advanced_internal() :
1352   * Private use only. To be called from zstdmt_compress.c. */
1353  size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1354                                      const void* dict, size_t dictSize,
1355                                      ZSTD_dictContentType_e dictContentType,
1356                                      ZSTD_dictTableLoadMethod_e dtlm,
1357                                      const ZSTD_CDict* cdict,
1358                                      const ZSTD_CCtx_params* params,
1359                                      unsigned long long pledgedSrcSize);
1360  
1361  /* ZSTD_compress_advanced_internal() :
1362   * Private use only. To be called from zstdmt_compress.c. */
1363  size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1364                                         void* dst, size_t dstCapacity,
1365                                   const void* src, size_t srcSize,
1366                                   const void* dict,size_t dictSize,
1367                                   const ZSTD_CCtx_params* params);
1368  
1369  
1370  /* ZSTD_writeLastEmptyBlock() :
1371   * output an empty Block with end-of-frame mark to complete a frame
1372   * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1373   *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1374   */
1375  size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1376  
1377  
1378  /* ZSTD_referenceExternalSequences() :
1379   * Must be called before starting a compression operation.
1380   * seqs must parse a prefix of the source.
1381   * This cannot be used when long range matching is enabled.
1382   * Zstd will use these sequences, and pass the literals to a secondary block
1383   * compressor.
1384   * @return : An error code on failure.
1385   * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1386   * access and data corruption.
1387   */
1388  size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1389  
1390  /* ZSTD_cycleLog() :
1391   *  condition for correct operation : hashLog > 1 */
1392  U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1393  
1394  /* ZSTD_CCtx_trace() :
1395   *  Trace the end of a compression call.
1396   */
1397  void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1398  
1399  #endif /* ZSTD_COMPRESS_H */
1400