1  /* ******************************************************************
2   * bitstream
3   * Part of FSE library
4   * Copyright (c) Yann Collet, Facebook, Inc.
5   *
6   * You can contact the author at :
7   * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8   *
9   * This source code is licensed under both the BSD-style license (found in the
10   * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11   * in the COPYING file in the root directory of this source tree).
12   * You may select, at your option, one of the above-listed licenses.
13  ****************************************************************** */
14  #ifndef BITSTREAM_H_MODULE
15  #define BITSTREAM_H_MODULE
16  
17  /*
18  *  This API consists of small unitary functions, which must be inlined for best performance.
19  *  Since link-time-optimization is not available for all compilers,
20  *  these functions are defined into a .h to be included.
21  */
22  
23  /*-****************************************
24  *  Dependencies
25  ******************************************/
26  #include "mem.h"            /* unaligned access routines */
27  #include "compiler.h"       /* UNLIKELY() */
28  #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
29  #include "error_private.h"  /* error codes and messages */
30  
31  
32  /*=========================================
33  *  Target specific
34  =========================================*/
35  
36  #define STREAM_ACCUMULATOR_MIN_32  25
37  #define STREAM_ACCUMULATOR_MIN_64  57
38  #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
39  
40  
41  /*-******************************************
42  *  bitStream encoding API (write forward)
43  ********************************************/
44  /* bitStream can mix input from multiple sources.
45   * A critical property of these streams is that they encode and decode in **reverse** direction.
46   * So the first bit sequence you add will be the last to be read, like a LIFO stack.
47   */
48  typedef struct {
49      size_t bitContainer;
50      unsigned bitPos;
51      char*  startPtr;
52      char*  ptr;
53      char*  endPtr;
54  } BIT_CStream_t;
55  
56  MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
57  MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
58  MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
59  MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
60  
61  /* Start with initCStream, providing the size of buffer to write into.
62  *  bitStream will never write outside of this buffer.
63  *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
64  *
65  *  bits are first added to a local register.
66  *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
67  *  Writing data into memory is an explicit operation, performed by the flushBits function.
68  *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
69  *  After a flushBits, a maximum of 7 bits might still be stored into local register.
70  *
71  *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
72  *
73  *  Last operation is to close the bitStream.
74  *  The function returns the final size of CStream in bytes.
75  *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
76  */
77  
78  
79  /*-********************************************
80  *  bitStream decoding API (read backward)
81  **********************************************/
82  typedef struct {
83      size_t   bitContainer;
84      unsigned bitsConsumed;
85      const char* ptr;
86      const char* start;
87      const char* limitPtr;
88  } BIT_DStream_t;
89  
90  typedef enum { BIT_DStream_unfinished = 0,
91                 BIT_DStream_endOfBuffer = 1,
92                 BIT_DStream_completed = 2,
93                 BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
94                 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
95  
96  MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
97  MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
98  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
99  MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
100  
101  
102  /* Start by invoking BIT_initDStream().
103  *  A chunk of the bitStream is then stored into a local register.
104  *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
105  *  You can then retrieve bitFields stored into the local register, **in reverse order**.
106  *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
107  *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
108  *  Otherwise, it can be less than that, so proceed accordingly.
109  *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
110  */
111  
112  
113  /*-****************************************
114  *  unsafe API
115  ******************************************/
116  MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
117  /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
118  
119  MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
120  /* unsafe version; does not check buffer overflow */
121  
122  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
123  /* faster, but works only if nbBits >= 1 */
124  
125  
126  
127  /*-**************************************************************
128  *  Internal functions
129  ****************************************************************/
BIT_highbit32(U32 val)130  MEM_STATIC unsigned BIT_highbit32 (U32 val)
131  {
132      assert(val != 0);
133      {
134  #   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
135          return __builtin_clz (val) ^ 31;
136  #   else   /* Software version */
137          static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
138                                                   11, 14, 16, 18, 22, 25,  3, 30,
139                                                    8, 12, 20, 28, 15, 17, 24,  7,
140                                                   19, 27, 23,  6, 26,  5,  4, 31 };
141          U32 v = val;
142          v |= v >> 1;
143          v |= v >> 2;
144          v |= v >> 4;
145          v |= v >> 8;
146          v |= v >> 16;
147          return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
148  #   endif
149      }
150  }
151  
152  /*=====    Local Constants   =====*/
153  static const unsigned BIT_mask[] = {
154      0,          1,         3,         7,         0xF,       0x1F,
155      0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
156      0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
157      0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
158      0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
159      0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
160  #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
161  
162  /*-**************************************************************
163  *  bitStream encoding
164  ****************************************************************/
165  /*! BIT_initCStream() :
166   *  `dstCapacity` must be > sizeof(size_t)
167   *  @return : 0 if success,
168   *            otherwise an error code (can be tested using ERR_isError()) */
BIT_initCStream(BIT_CStream_t * bitC,void * startPtr,size_t dstCapacity)169  MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
170                                    void* startPtr, size_t dstCapacity)
171  {
172      bitC->bitContainer = 0;
173      bitC->bitPos = 0;
174      bitC->startPtr = (char*)startPtr;
175      bitC->ptr = bitC->startPtr;
176      bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
177      if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
178      return 0;
179  }
180  
181  /*! BIT_addBits() :
182   *  can add up to 31 bits into `bitC`.
183   *  Note : does not check for register overflow ! */
BIT_addBits(BIT_CStream_t * bitC,size_t value,unsigned nbBits)184  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
185                              size_t value, unsigned nbBits)
186  {
187      DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
188      assert(nbBits < BIT_MASK_SIZE);
189      assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
190      bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
191      bitC->bitPos += nbBits;
192  }
193  
194  /*! BIT_addBitsFast() :
195   *  works only if `value` is _clean_,
196   *  meaning all high bits above nbBits are 0 */
BIT_addBitsFast(BIT_CStream_t * bitC,size_t value,unsigned nbBits)197  MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
198                                  size_t value, unsigned nbBits)
199  {
200      assert((value>>nbBits) == 0);
201      assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
202      bitC->bitContainer |= value << bitC->bitPos;
203      bitC->bitPos += nbBits;
204  }
205  
206  /*! BIT_flushBitsFast() :
207   *  assumption : bitContainer has not overflowed
208   *  unsafe version; does not check buffer overflow */
BIT_flushBitsFast(BIT_CStream_t * bitC)209  MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
210  {
211      size_t const nbBytes = bitC->bitPos >> 3;
212      assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
213      assert(bitC->ptr <= bitC->endPtr);
214      MEM_writeLEST(bitC->ptr, bitC->bitContainer);
215      bitC->ptr += nbBytes;
216      bitC->bitPos &= 7;
217      bitC->bitContainer >>= nbBytes*8;
218  }
219  
220  /*! BIT_flushBits() :
221   *  assumption : bitContainer has not overflowed
222   *  safe version; check for buffer overflow, and prevents it.
223   *  note : does not signal buffer overflow.
224   *  overflow will be revealed later on using BIT_closeCStream() */
BIT_flushBits(BIT_CStream_t * bitC)225  MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
226  {
227      size_t const nbBytes = bitC->bitPos >> 3;
228      assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
229      assert(bitC->ptr <= bitC->endPtr);
230      MEM_writeLEST(bitC->ptr, bitC->bitContainer);
231      bitC->ptr += nbBytes;
232      if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
233      bitC->bitPos &= 7;
234      bitC->bitContainer >>= nbBytes*8;
235  }
236  
237  /*! BIT_closeCStream() :
238   *  @return : size of CStream, in bytes,
239   *            or 0 if it could not fit into dstBuffer */
BIT_closeCStream(BIT_CStream_t * bitC)240  MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
241  {
242      BIT_addBitsFast(bitC, 1, 1);   /* endMark */
243      BIT_flushBits(bitC);
244      if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
245      return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
246  }
247  
248  
249  /*-********************************************************
250  *  bitStream decoding
251  **********************************************************/
252  /*! BIT_initDStream() :
253   *  Initialize a BIT_DStream_t.
254   * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
255   * `srcSize` must be the *exact* size of the bitStream, in bytes.
256   * @return : size of stream (== srcSize), or an errorCode if a problem is detected
257   */
BIT_initDStream(BIT_DStream_t * bitD,const void * srcBuffer,size_t srcSize)258  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
259  {
260      if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
261  
262      bitD->start = (const char*)srcBuffer;
263      bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
264  
265      if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
266          bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
267          bitD->bitContainer = MEM_readLEST(bitD->ptr);
268          { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
269            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
270            if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
271      } else {
272          bitD->ptr   = bitD->start;
273          bitD->bitContainer = *(const BYTE*)(bitD->start);
274          switch(srcSize)
275          {
276          case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
277                  ZSTD_FALLTHROUGH;
278  
279          case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
280                  ZSTD_FALLTHROUGH;
281  
282          case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
283                  ZSTD_FALLTHROUGH;
284  
285          case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
286                  ZSTD_FALLTHROUGH;
287  
288          case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
289                  ZSTD_FALLTHROUGH;
290  
291          case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
292                  ZSTD_FALLTHROUGH;
293  
294          default: break;
295          }
296          {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
297              bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
298              if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
299          }
300          bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
301      }
302  
303      return srcSize;
304  }
305  
BIT_getUpperBits(size_t bitContainer,U32 const start)306  MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
307  {
308      return bitContainer >> start;
309  }
310  
BIT_getMiddleBits(size_t bitContainer,U32 const start,U32 const nbBits)311  MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
312  {
313      U32 const regMask = sizeof(bitContainer)*8 - 1;
314      /* if start > regMask, bitstream is corrupted, and result is undefined */
315      assert(nbBits < BIT_MASK_SIZE);
316      /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
317       * than accessing memory. When bmi2 instruction is not present, we consider
318       * such cpus old (pre-Haswell, 2013) and their performance is not of that
319       * importance.
320       */
321  #if defined(__x86_64__) || defined(_M_X86)
322      return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
323  #else
324      return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
325  #endif
326  }
327  
BIT_getLowerBits(size_t bitContainer,U32 const nbBits)328  MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
329  {
330      assert(nbBits < BIT_MASK_SIZE);
331      return bitContainer & BIT_mask[nbBits];
332  }
333  
334  /*! BIT_lookBits() :
335   *  Provides next n bits from local register.
336   *  local register is not modified.
337   *  On 32-bits, maxNbBits==24.
338   *  On 64-bits, maxNbBits==56.
339   * @return : value extracted */
BIT_lookBits(const BIT_DStream_t * bitD,U32 nbBits)340  MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
341  {
342      /* arbitrate between double-shift and shift+mask */
343  #if 1
344      /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
345       * bitstream is likely corrupted, and result is undefined */
346      return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
347  #else
348      /* this code path is slower on my os-x laptop */
349      U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
350      return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
351  #endif
352  }
353  
354  /*! BIT_lookBitsFast() :
355   *  unsafe version; only works if nbBits >= 1 */
BIT_lookBitsFast(const BIT_DStream_t * bitD,U32 nbBits)356  MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
357  {
358      U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
359      assert(nbBits >= 1);
360      return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
361  }
362  
BIT_skipBits(BIT_DStream_t * bitD,U32 nbBits)363  MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
364  {
365      bitD->bitsConsumed += nbBits;
366  }
367  
368  /*! BIT_readBits() :
369   *  Read (consume) next n bits from local register and update.
370   *  Pay attention to not read more than nbBits contained into local register.
371   * @return : extracted value. */
BIT_readBits(BIT_DStream_t * bitD,unsigned nbBits)372  MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
373  {
374      size_t const value = BIT_lookBits(bitD, nbBits);
375      BIT_skipBits(bitD, nbBits);
376      return value;
377  }
378  
379  /*! BIT_readBitsFast() :
380   *  unsafe version; only works only if nbBits >= 1 */
BIT_readBitsFast(BIT_DStream_t * bitD,unsigned nbBits)381  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
382  {
383      size_t const value = BIT_lookBitsFast(bitD, nbBits);
384      assert(nbBits >= 1);
385      BIT_skipBits(bitD, nbBits);
386      return value;
387  }
388  
389  /*! BIT_reloadDStreamFast() :
390   *  Similar to BIT_reloadDStream(), but with two differences:
391   *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
392   *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
393   *     point you must use BIT_reloadDStream() to reload.
394   */
BIT_reloadDStreamFast(BIT_DStream_t * bitD)395  MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
396  {
397      if (UNLIKELY(bitD->ptr < bitD->limitPtr))
398          return BIT_DStream_overflow;
399      assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
400      bitD->ptr -= bitD->bitsConsumed >> 3;
401      bitD->bitsConsumed &= 7;
402      bitD->bitContainer = MEM_readLEST(bitD->ptr);
403      return BIT_DStream_unfinished;
404  }
405  
406  /*! BIT_reloadDStream() :
407   *  Refill `bitD` from buffer previously set in BIT_initDStream() .
408   *  This function is safe, it guarantees it will not read beyond src buffer.
409   * @return : status of `BIT_DStream_t` internal register.
410   *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
BIT_reloadDStream(BIT_DStream_t * bitD)411  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
412  {
413      if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
414          return BIT_DStream_overflow;
415  
416      if (bitD->ptr >= bitD->limitPtr) {
417          return BIT_reloadDStreamFast(bitD);
418      }
419      if (bitD->ptr == bitD->start) {
420          if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
421          return BIT_DStream_completed;
422      }
423      /* start < ptr < limitPtr */
424      {   U32 nbBytes = bitD->bitsConsumed >> 3;
425          BIT_DStream_status result = BIT_DStream_unfinished;
426          if (bitD->ptr - nbBytes < bitD->start) {
427              nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
428              result = BIT_DStream_endOfBuffer;
429          }
430          bitD->ptr -= nbBytes;
431          bitD->bitsConsumed -= nbBytes*8;
432          bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
433          return result;
434      }
435  }
436  
437  /*! BIT_endOfDStream() :
438   * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
439   */
BIT_endOfDStream(const BIT_DStream_t * DStream)440  MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
441  {
442      return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
443  }
444  
445  
446  #endif /* BITSTREAM_H_MODULE */
447