MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=2066 Update Oniguruma to the latest version v6.9.3. Oniguruma https://github.com/kkos/oniguruma This release is the security fix release. It includes the changes: Fixed CVE-2019-13224 Fixed CVE-2019-13225 Fixed many problems (found by libfuzzer programs) Verify VS2015, GCC5 build. Verify RegularExpressionProtocol GetInfo() and Match() function. Cc: Jian J Wang <jian.j.wang@intel.com> Cc: Hao A Wu <hao.a.wu@intel.com> Cc: Cinnamon Shia <cinnamon.shia@hpe.com> Signed-off-by: Liming Gao <liming.gao@intel.com> Reviewed-by: Hao A Wu <hao.a.wu@intel.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
unicode.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -504,6 +504,281 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_UNICODE_WORD_BREAK
|
||||
|
||||
enum WB_TYPE {
|
||||
WB_Any = 0,
|
||||
WB_ALetter,
|
||||
WB_CR,
|
||||
WB_Double_Quote,
|
||||
WB_Extend,
|
||||
WB_ExtendNumLet,
|
||||
WB_Format,
|
||||
WB_Hebrew_Letter,
|
||||
WB_Katakana,
|
||||
WB_LF,
|
||||
WB_MidLetter,
|
||||
WB_MidNum,
|
||||
WB_MidNumLet,
|
||||
WB_Newline,
|
||||
WB_Numeric,
|
||||
WB_Regional_Indicator,
|
||||
WB_Single_Quote,
|
||||
WB_WSegSpace,
|
||||
WB_ZWJ,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint start;
|
||||
OnigCodePoint end;
|
||||
enum WB_TYPE type;
|
||||
} WB_RANGE_TYPE;
|
||||
|
||||
#include "unicode_wb_data.c"
|
||||
|
||||
static enum WB_TYPE
|
||||
wb_get_type(OnigCodePoint code)
|
||||
{
|
||||
OnigCodePoint low, high, x;
|
||||
enum WB_TYPE type;
|
||||
|
||||
for (low = 0, high = (OnigCodePoint )WB_RANGE_NUM; low < high; ) {
|
||||
x = (low + high) >> 1;
|
||||
if (code > WB_RANGES[x].end)
|
||||
low = x + 1;
|
||||
else
|
||||
high = x;
|
||||
}
|
||||
|
||||
type = (low < (OnigCodePoint )WB_RANGE_NUM &&
|
||||
code >= WB_RANGES[low].start) ?
|
||||
WB_RANGES[low].type : WB_Any;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
#define IS_WB_IGNORE_TAIL(t) ((t) == WB_Extend || (t) == WB_Format || (t) == WB_ZWJ)
|
||||
#define IS_WB_AHLetter(t) ((t) == WB_ALetter || (t) == WB_Hebrew_Letter)
|
||||
#define IS_WB_MidNumLetQ(t) ((t) == WB_MidNumLet || (t) == WB_Single_Quote)
|
||||
|
||||
static int
|
||||
wb_get_next_main_code(OnigEncoding enc, UChar* p, const UChar* end,
|
||||
OnigCodePoint* rcode, enum WB_TYPE* rtype)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
enum WB_TYPE type;
|
||||
|
||||
while (TRUE) {
|
||||
p += enclen(enc, p);
|
||||
if (p >= end) break;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
type = wb_get_type(code);
|
||||
if (! IS_WB_IGNORE_TAIL(type)) {
|
||||
*rcode = code;
|
||||
*rtype = type;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_wb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
const UChar* start, const UChar* end)
|
||||
{
|
||||
int r;
|
||||
UChar* pp;
|
||||
OnigCodePoint cfrom;
|
||||
OnigCodePoint cfrom2;
|
||||
OnigCodePoint cto;
|
||||
OnigCodePoint cto2;
|
||||
enum WB_TYPE from;
|
||||
enum WB_TYPE from2;
|
||||
enum WB_TYPE to;
|
||||
enum WB_TYPE to2;
|
||||
|
||||
/* WB1: sot / Any */
|
||||
if (p == start) return TRUE;
|
||||
/* WB2: Any / eot */
|
||||
if (p == end) return TRUE;
|
||||
|
||||
if (IS_NULL(prev)) {
|
||||
prev = onigenc_get_prev_char_head(enc, start, p);
|
||||
if (IS_NULL(prev)) return TRUE;
|
||||
}
|
||||
|
||||
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
cto = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
from = wb_get_type(cfrom);
|
||||
to = wb_get_type(cto);
|
||||
|
||||
/* short cut */
|
||||
if (from == 0 && to == 0) goto WB999;
|
||||
|
||||
/* WB3: CR + LF */
|
||||
if (from == WB_CR && to == WB_LF) return FALSE;
|
||||
|
||||
/* WB3a: (Newline|CR|LF) / */
|
||||
if (from == WB_Newline || from == WB_CR || from == WB_LF) return TRUE;
|
||||
/* WB3b: / (Newline|CR|LF) */
|
||||
if (to == WB_Newline || to == WB_CR || to == WB_LF) return TRUE;
|
||||
|
||||
/* WB3c: ZWJ + {Extended_Pictographic} */
|
||||
if (from == WB_ZWJ) {
|
||||
if (onigenc_unicode_is_code_ctype(cto, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* WB3d: WSegSpace + WSegSpace */
|
||||
if (from == WB_WSegSpace && to == WB_WSegSpace) return FALSE;
|
||||
|
||||
/* WB4: X (Extend|Format|ZWJ)* -> X */
|
||||
if (IS_WB_IGNORE_TAIL(to)) return FALSE;
|
||||
if (IS_WB_IGNORE_TAIL(from)) {
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from = wb_get_type(cfrom);
|
||||
if (! IS_WB_IGNORE_TAIL(from))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_WB_AHLetter(from)) {
|
||||
/* WB5: AHLetter + AHLetter */
|
||||
if (IS_WB_AHLetter(to)) return FALSE;
|
||||
|
||||
/* WB6: AHLetter + (MidLetter | MidNumLetQ) AHLetter */
|
||||
if (to == WB_MidLetter || IS_WB_MidNumLetQ(to)) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (IS_WB_AHLetter(to2)) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB7: AHLetter (MidLetter | MidNumLetQ) + AHLetter */
|
||||
if (from == WB_MidLetter || IS_WB_MidNumLetQ(from)) {
|
||||
if (IS_WB_AHLetter(to)) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (IS_WB_AHLetter(from2)) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (from == WB_Hebrew_Letter) {
|
||||
/* WB7a: Hebrew_Letter + Single_Quote */
|
||||
if (to == WB_Single_Quote) return FALSE;
|
||||
|
||||
/* WB7b: Hebrew_Letter + Double_Quote Hebrew_Letter */
|
||||
if (to == WB_Double_Quote) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (to2 == WB_Hebrew_Letter) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB7c: Hebrew_Letter Double_Quote + Hebrew_Letter */
|
||||
if (from == WB_Double_Quote) {
|
||||
if (to == WB_Hebrew_Letter) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (from2 == WB_Hebrew_Letter) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (to == WB_Numeric) {
|
||||
/* WB8: Numeric + Numeric */
|
||||
if (from == WB_Numeric) return FALSE;
|
||||
|
||||
/* WB9: AHLetter + Numeric */
|
||||
if (IS_WB_AHLetter(from)) return FALSE;
|
||||
|
||||
/* WB11: Numeric (MidNum | MidNumLetQ) + Numeric */
|
||||
if (from == WB_MidNum || IS_WB_MidNumLetQ(from)) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (from2 == WB_Numeric) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (from == WB_Numeric) {
|
||||
/* WB10: Numeric + AHLetter */
|
||||
if (IS_WB_AHLetter(to)) return FALSE;
|
||||
|
||||
/* WB12: Numeric + (MidNum | MidNumLetQ) Numeric */
|
||||
if (to == WB_MidNum || IS_WB_MidNumLetQ(to)) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (to2 == WB_Numeric) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB13: Katakana + Katakana */
|
||||
if (from == WB_Katakana && to == WB_Katakana) return FALSE;
|
||||
|
||||
/* WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) + ExtendNumLet */
|
||||
if (IS_WB_AHLetter(from) || from == WB_Numeric || from == WB_Katakana
|
||||
|| from == WB_ExtendNumLet) {
|
||||
if (to == WB_ExtendNumLet) return FALSE;
|
||||
}
|
||||
|
||||
/* WB13b: ExtendNumLet + (AHLetter | Numeric | Katakana) */
|
||||
if (from == WB_ExtendNumLet) {
|
||||
if (IS_WB_AHLetter(to) || to == WB_Numeric || to == WB_Katakana)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/* WB15: sot (RI RI)* RI + RI */
|
||||
/* WB16: [^RI] (RI RI)* RI + RI */
|
||||
if (from == WB_Regional_Indicator && to == WB_Regional_Indicator) {
|
||||
int n = 0;
|
||||
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (from2 != WB_Regional_Indicator)
|
||||
break;
|
||||
|
||||
n++;
|
||||
}
|
||||
if ((n % 2) == 0) return FALSE;
|
||||
}
|
||||
|
||||
WB999:
|
||||
/* WB999: Any / Any */
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif /* USE_UNICODE_WORD_BREAK */
|
||||
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
|
||||
enum EGCB_BREAK_TYPE {
|
||||
@@ -657,8 +932,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
return from != 0x000d || to != 0x000a;
|
||||
}
|
||||
|
||||
btype = unicode_egcb_is_break_2code(from, to);
|
||||
@@ -701,8 +975,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
return 1;
|
||||
|
||||
#else
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
return from != 0x000d || to != 0x000a;
|
||||
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
||||
}
|
||||
|
||||
@@ -729,6 +1002,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
||||
int len;
|
||||
int c;
|
||||
char* s;
|
||||
UChar* uname;
|
||||
|
||||
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
|
||||
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
|
||||
@@ -741,10 +1015,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
||||
if (s == 0)
|
||||
return ONIGERR_MEMORY;
|
||||
|
||||
uname = (UChar* )name;
|
||||
n = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
c = name[i];
|
||||
if (c <= 0 || c >= 0x80) {
|
||||
c = uname[i];
|
||||
if (c < 0x20 || c >= 0x80) {
|
||||
xfree(s);
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
Reference in New Issue
Block a user