- Copy Brotli algorithm 3rd party source code for tool Cc: Liming Gao <liming.gao@intel.com> Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Bell Song <binx.song@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com>
		
			
				
	
	
		
			185 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright 2013 Google Inc. All Rights Reserved.
 | 
						|
 | 
						|
   Distributed under MIT license.
 | 
						|
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 | 
						|
*/
 | 
						|
 | 
						|
/* Functions to map previous bytes into a context id. */
 | 
						|
 | 
						|
#ifndef BROTLI_ENC_CONTEXT_H_
 | 
						|
#define BROTLI_ENC_CONTEXT_H_
 | 
						|
 | 
						|
#include "../common/types.h"
 | 
						|
#include "../common/port.h"
 | 
						|
 | 
						|
#if defined(__cplusplus) || defined(c_plusplus)
 | 
						|
extern "C" {
 | 
						|
#endif
 | 
						|
 | 
						|
/* Second-order context lookup table for UTF8 byte streams.
 | 
						|
 | 
						|
   If p1 and p2 are the previous two bytes, we calculate the context as
 | 
						|
 | 
						|
     context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
 | 
						|
 | 
						|
   If the previous two bytes are ASCII characters (i.e. < 128), this will be
 | 
						|
   equivalent to
 | 
						|
 | 
						|
     context = 4 * context1(p1) + context2(p2),
 | 
						|
 | 
						|
   where context1 is based on the previous byte in the following way:
 | 
						|
 | 
						|
     0  : non-ASCII control
 | 
						|
     1  : \t, \n, \r
 | 
						|
     2  : space
 | 
						|
     3  : other punctuation
 | 
						|
     4  : " '
 | 
						|
     5  : %
 | 
						|
     6  : ( < [ {
 | 
						|
     7  : ) > ] }
 | 
						|
     8  : , ; :
 | 
						|
     9  : .
 | 
						|
     10 : =
 | 
						|
     11 : number
 | 
						|
     12 : upper-case vowel
 | 
						|
     13 : upper-case consonant
 | 
						|
     14 : lower-case vowel
 | 
						|
     15 : lower-case consonant
 | 
						|
 | 
						|
   and context2 is based on the second last byte:
 | 
						|
 | 
						|
     0 : control, space
 | 
						|
     1 : punctuation
 | 
						|
     2 : upper-case letter, number
 | 
						|
     3 : lower-case letter
 | 
						|
 | 
						|
   If the last byte is ASCII, and the second last byte is not (in a valid UTF8
 | 
						|
   stream it will be a continuation byte, value between 128 and 191), the
 | 
						|
   context is the same as if the second last byte was an ASCII control or space.
 | 
						|
 | 
						|
   If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
 | 
						|
   be a continuation byte and the context id is 2 or 3 depending on the LSB of
 | 
						|
   the last byte and to a lesser extent on the second last byte if it is ASCII.
 | 
						|
 | 
						|
   If the last byte is a UTF8 continuation byte, the second last byte can be:
 | 
						|
     - continuation byte: the next byte is probably ASCII or lead byte (assuming
 | 
						|
       4-byte UTF8 characters are rare) and the context id is 0 or 1.
 | 
						|
     - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
 | 
						|
     - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
 | 
						|
 | 
						|
   The possible value combinations of the previous two bytes, the range of
 | 
						|
   context ids and the type of the next byte is summarized in the table below:
 | 
						|
 | 
						|
   |--------\-----------------------------------------------------------------|
 | 
						|
   |         \                         Last byte                              |
 | 
						|
   | Second   \---------------------------------------------------------------|
 | 
						|
   | last byte \    ASCII            |   cont. byte        |   lead byte      |
 | 
						|
   |            \   (0-127)          |   (128-191)         |   (192-)         |
 | 
						|
   |=============|===================|=====================|==================|
 | 
						|
   |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
 | 
						|
   |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
 | 
						|
   |-------------|-------------------|---------------------|------------------|
 | 
						|
   |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
 | 
						|
   |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
 | 
						|
   |-------------|-------------------|---------------------|------------------|
 | 
						|
   |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
 | 
						|
   |  (192-207)  |                   |  context: 0 - 1     |                  |
 | 
						|
   |-------------|-------------------|---------------------|------------------|
 | 
						|
   |  lead byte  | not valid         |  next: cont.        |  not valid       |
 | 
						|
   |  (208-)     |                   |  context: 2 - 3     |                  |
 | 
						|
   |-------------|-------------------|---------------------|------------------|
 | 
						|
*/
 | 
						|
static const uint8_t kUTF8ContextLookup[512] = {
 | 
						|
  /* Last byte. */
 | 
						|
  /* */
 | 
						|
  /* ASCII range. */
 | 
						|
   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
 | 
						|
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 | 
						|
   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
 | 
						|
  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
 | 
						|
  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
 | 
						|
  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
 | 
						|
  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
 | 
						|
  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
 | 
						|
  /* UTF8 continuation byte range. */
 | 
						|
  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 | 
						|
  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 | 
						|
  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 | 
						|
  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
 | 
						|
  /* UTF8 lead byte range. */
 | 
						|
  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 | 
						|
  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 | 
						|
  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 | 
						|
  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
 | 
						|
  /* Second last byte. */
 | 
						|
  /* */
 | 
						|
  /* ASCII range. */
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
 | 
						|
  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
 | 
						|
  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
						|
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
 | 
						|
  /* UTF8 continuation byte range. */
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  /* UTF8 lead byte range. */
 | 
						|
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
};
 | 
						|
 | 
						|
/* Context lookup table for small signed integers. */
 | 
						|
static const uint8_t kSigned3BitContextLookup[] = {
 | 
						|
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
						|
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
						|
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
						|
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
						|
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
						|
  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 | 
						|
  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 | 
						|
  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 | 
						|
  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 | 
						|
  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 | 
						|
  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 | 
						|
  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 | 
						|
  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
 | 
						|
};
 | 
						|
 | 
						|
typedef enum ContextType {
 | 
						|
  CONTEXT_LSB6         = 0,
 | 
						|
  CONTEXT_MSB6         = 1,
 | 
						|
  CONTEXT_UTF8         = 2,
 | 
						|
  CONTEXT_SIGNED       = 3
 | 
						|
} ContextType;
 | 
						|
 | 
						|
static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
 | 
						|
  switch (mode) {
 | 
						|
    case CONTEXT_LSB6:
 | 
						|
      return p1 & 0x3f;
 | 
						|
    case CONTEXT_MSB6:
 | 
						|
      return (uint8_t)(p1 >> 2);
 | 
						|
    case CONTEXT_UTF8:
 | 
						|
      return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
 | 
						|
    case CONTEXT_SIGNED:
 | 
						|
      return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
 | 
						|
                       kSigned3BitContextLookup[p2]);
 | 
						|
    default:
 | 
						|
      return 0;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
#if defined(__cplusplus) || defined(c_plusplus)
 | 
						|
}  /* extern "C" */
 | 
						|
#endif
 | 
						|
 | 
						|
#endif  /* BROTLI_ENC_CONTEXT_H_ */
 |