- Copy Brotli algorithm 3rd party source code for tool Cc: Liming Gao <liming.gao@intel.com> Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Bell Song <binx.song@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com>
		
			
				
	
	
		
			86 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright 2013 Google Inc. All Rights Reserved.
 | |
| 
 | |
|    Distributed under MIT license.
 | |
|    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 | |
| */
 | |
| 
 | |
| /* Heuristics for deciding about the UTF8-ness of strings. */
 | |
| 
 | |
| #include "./utf8_util.h"
 | |
| 
 | |
| #include "../common/types.h"
 | |
| 
 | |
| #if defined(__cplusplus) || defined(c_plusplus)
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| static size_t BrotliParseAsUTF8(
 | |
|     int* symbol, const uint8_t* input, size_t size) {
 | |
|   /* ASCII */
 | |
|   if ((input[0] & 0x80) == 0) {
 | |
|     *symbol = input[0];
 | |
|     if (*symbol > 0) {
 | |
|       return 1;
 | |
|     }
 | |
|   }
 | |
|   /* 2-byte UTF8 */
 | |
|   if (size > 1u &&
 | |
|       (input[0] & 0xe0) == 0xc0 &&
 | |
|       (input[1] & 0xc0) == 0x80) {
 | |
|     *symbol = (((input[0] & 0x1f) << 6) |
 | |
|                (input[1] & 0x3f));
 | |
|     if (*symbol > 0x7f) {
 | |
|       return 2;
 | |
|     }
 | |
|   }
 | |
|   /* 3-byte UFT8 */
 | |
|   if (size > 2u &&
 | |
|       (input[0] & 0xf0) == 0xe0 &&
 | |
|       (input[1] & 0xc0) == 0x80 &&
 | |
|       (input[2] & 0xc0) == 0x80) {
 | |
|     *symbol = (((input[0] & 0x0f) << 12) |
 | |
|                ((input[1] & 0x3f) << 6) |
 | |
|                (input[2] & 0x3f));
 | |
|     if (*symbol > 0x7ff) {
 | |
|       return 3;
 | |
|     }
 | |
|   }
 | |
|   /* 4-byte UFT8 */
 | |
|   if (size > 3u &&
 | |
|       (input[0] & 0xf8) == 0xf0 &&
 | |
|       (input[1] & 0xc0) == 0x80 &&
 | |
|       (input[2] & 0xc0) == 0x80 &&
 | |
|       (input[3] & 0xc0) == 0x80) {
 | |
|     *symbol = (((input[0] & 0x07) << 18) |
 | |
|                ((input[1] & 0x3f) << 12) |
 | |
|                ((input[2] & 0x3f) << 6) |
 | |
|                (input[3] & 0x3f));
 | |
|     if (*symbol > 0xffff && *symbol <= 0x10ffff) {
 | |
|       return 4;
 | |
|     }
 | |
|   }
 | |
|   /* Not UTF8, emit a special symbol above the UTF8-code space */
 | |
|   *symbol = 0x110000 | input[0];
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
 | |
| BROTLI_BOOL BrotliIsMostlyUTF8(
 | |
|     const uint8_t* data, const size_t pos, const size_t mask,
 | |
|     const size_t length, const double min_fraction) {
 | |
|   size_t size_utf8 = 0;
 | |
|   size_t i = 0;
 | |
|   while (i < length) {
 | |
|     int symbol;
 | |
|     size_t bytes_read =
 | |
|         BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
 | |
|     i += bytes_read;
 | |
|     if (symbol < 0x110000) size_utf8 += bytes_read;
 | |
|   }
 | |
|   return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
 | |
| }
 | |
| 
 | |
| #if defined(__cplusplus) || defined(c_plusplus)
 | |
| }  /* extern "C" */
 | |
| #endif
 |