MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=2066 Update Oniguruma to the latest version v6.9.3. Oniguruma https://github.com/kkos/oniguruma This release is the security fix release. It includes the changes: Fixed CVE-2019-13224 Fixed CVE-2019-13225 Fixed many problems (found by libfuzzer programs) Verify VS2015, GCC5 build. Verify RegularExpressionProtocol GetInfo() and Match() function. Cc: Jian J Wang <jian.j.wang@intel.com> Cc: Hao A Wu <hao.a.wu@intel.com> Cc: Cinnamon Shia <cinnamon.shia@hpe.com> Signed-off-by: Liming Gao <liming.gao@intel.com> Reviewed-by: Hao A Wu <hao.a.wu@intel.com>
2019-08-08 19:53:03 +08:00
parent ecc32c90ee
commit b26691c471
25 changed files with 16407 additions and 13648 deletions
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c
+++ b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c
@@ -2,7 +2,7 @@
  utf16_le.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
 static int
 utf16le_code_to_mbclen(OnigCodePoint code)
 {
-  return (code > 0xffff ? 4 : 2);
+  if (code > 0xffff) {
+    if (code > 0x10ffff)
+      return ONIGERR_INVALID_CODE_POINT_VALUE;
+    else
+      return 4;
+  }
+  else {
+    return 2;
+  }
 }

 static int
@@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
  const UChar* end1 = end - 1;

  while (p < end1) {
-    p += utf16le_mbc_enc_len(p);
+    int len = utf16le_mbc_enc_len(p);
+    if (len == 4) {
+      if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+        return FALSE;
+    }
+    else
+      if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+        return FALSE;
+
+    p += len;
  }

  if (p != end)
@@ -184,7 +201,7 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)

 static int
 utf16le_mbc_case_fold(OnigCaseFoldType flag,
-		      const UChar** pp, const UChar* end, UChar* fold)
+                      const UChar** pp, const UChar* end, UChar* fold)
 {
  const UChar* p = *pp;

@@ -207,13 +224,13 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
  }
  else
    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
-					 fold);
+                                         fold);
 }

 #if 0
 static int
 utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
-			 const UChar* end)
+                         const UChar* end)
 {
  const UChar* p = *pp;

@@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
    s--;
  }

-  if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+  if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+      UTF16_IS_SURROGATE_FIRST(*(s-1)))
    s -= 2;

  return (UChar* )s;
@@ -263,7 +281,7 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
 {
  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
-						    flag, p, end, items);
+                                                    flag, p, end, items);
 }

 OnigEncodingType OnigEncodingUTF16_LE = {
@@ -286,6 +304,6 @@ OnigEncodingType OnigEncodingUTF16_LE = {
  init,
  0, /* is_initialized */
  is_valid_mbc_string,
-  ENC_FLAG_UNICODE,
+  ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
  0, 0
 };