There are five check not necessary in logic ,just for pass static analysis. More detail please refer to comment in code. And the rest changes are all accepted by owner, they are reasonable. Cc: Liming Gao <liming.gao@intel.com> Contributed-under: TianoCore Contribution Agreement 1.1 Signed-off-by: Dongao Guo <dongao.guo@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com>
6582 lines
159 KiB
C
6582 lines
159 KiB
C
/**********************************************************************
|
|
regcomp.c - Oniguruma (regular expression library)
|
|
**********************************************************************/
|
|
/*-
|
|
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "regparse.h"
|
|
|
|
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
|
|
|
#if 0
|
|
typedef struct {
|
|
int n;
|
|
int alloc;
|
|
int* v;
|
|
} int_stack;
|
|
|
|
static int
|
|
make_int_stack(int_stack** rs, int init_size)
|
|
{
|
|
int_stack* s;
|
|
int* v;
|
|
|
|
*rs = 0;
|
|
|
|
s = xmalloc(sizeof(*s));
|
|
if (IS_NULL(s)) return ONIGERR_MEMORY;
|
|
|
|
v = (int* )xmalloc(sizeof(int) * init_size);
|
|
if (IS_NULL(v)) {
|
|
xfree(s);
|
|
return ONIGERR_MEMORY;
|
|
}
|
|
|
|
s->n = 0;
|
|
s->alloc = init_size;
|
|
s->v = v;
|
|
|
|
*rs = s;
|
|
return ONIG_NORMAL;
|
|
}
|
|
|
|
static void
|
|
free_int_stack(int_stack* s)
|
|
{
|
|
if (IS_NOT_NULL(s)) {
|
|
if (IS_NOT_NULL(s->v))
|
|
xfree(s->v);
|
|
xfree(s);
|
|
}
|
|
}
|
|
|
|
static int
|
|
int_stack_push(int_stack* s, int v)
|
|
{
|
|
if (s->n >= s->alloc) {
|
|
int new_size = s->alloc * 2;
|
|
int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);
|
|
if (IS_NULL(nv)) return ONIGERR_MEMORY;
|
|
|
|
s->alloc = new_size;
|
|
s->v = nv;
|
|
}
|
|
|
|
s->v[s->n] = v;
|
|
s->n++;
|
|
return ONIG_NORMAL;
|
|
}
|
|
|
|
static int
|
|
int_stack_pop(int_stack* s)
|
|
{
|
|
int v;
|
|
|
|
#ifdef ONIG_DEBUG
|
|
if (s->n <= 0) {
|
|
fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
v = s->v[s->n];
|
|
s->n--;
|
|
return v;
|
|
}
|
|
#endif
|
|
|
|
extern OnigCaseFoldType
|
|
onig_get_default_case_fold_flag(void)
|
|
{
|
|
return OnigDefaultCaseFoldFlag;
|
|
}
|
|
|
|
extern int
|
|
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
|
{
|
|
OnigDefaultCaseFoldFlag = case_fold_flag;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
int_multiply_cmp(int x, int y, int v)
|
|
{
|
|
if (x == 0 || y == 0) return -1;
|
|
|
|
if (x < INT_MAX / y) {
|
|
int xy = x * y;
|
|
if (xy > v) return 1;
|
|
else {
|
|
if (xy == v) return 0;
|
|
else return -1;
|
|
}
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
|
|
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
|
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
|
#endif
|
|
|
|
static void
|
|
swap_node(Node* a, Node* b)
|
|
{
|
|
Node c;
|
|
|
|
c = *a; *a = *b; *b = c;
|
|
|
|
if (NODE_TYPE(a) == NODE_STRING) {
|
|
StrNode* sn = STR_(a);
|
|
if (sn->capa == 0) {
|
|
int len = (int )(sn->end - sn->s);
|
|
sn->s = sn->buf;
|
|
sn->end = sn->s + len;
|
|
}
|
|
}
|
|
|
|
if (NODE_TYPE(b) == NODE_STRING) {
|
|
StrNode* sn = STR_(b);
|
|
if (sn->capa == 0) {
|
|
int len = (int )(sn->end - sn->s);
|
|
sn->s = sn->buf;
|
|
sn->end = sn->s + len;
|
|
}
|
|
}
|
|
}
|
|
|
|
static OnigLen
|
|
distance_add(OnigLen d1, OnigLen d2)
|
|
{
|
|
if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
|
|
return INFINITE_LEN;
|
|
else {
|
|
if (d1 <= INFINITE_LEN - d2) return d1 + d2;
|
|
else return INFINITE_LEN;
|
|
}
|
|
}
|
|
|
|
static OnigLen
|
|
distance_multiply(OnigLen d, int m)
|
|
{
|
|
if (m == 0) return 0;
|
|
|
|
if (d < INFINITE_LEN / m)
|
|
return d * m;
|
|
else
|
|
return INFINITE_LEN;
|
|
}
|
|
|
|
static int
|
|
bitset_is_empty(BitSetRef bs)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (int )BITSET_SIZE; i++) {
|
|
if (bs[i] != 0) return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
extern int
|
|
onig_bbuf_init(BBuf* buf, int size)
|
|
{
|
|
if (size <= 0) {
|
|
size = 0;
|
|
buf->p = NULL;
|
|
}
|
|
else {
|
|
buf->p = (UChar* )xmalloc(size);
|
|
if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
|
|
}
|
|
|
|
buf->alloc = size;
|
|
buf->used = 0;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#ifdef USE_CALL
|
|
|
|
static int
|
|
unset_addr_list_init(UnsetAddrList* list, int size)
|
|
{
|
|
UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
|
|
CHECK_NULL_RETURN_MEMERR(p);
|
|
|
|
list->num = 0;
|
|
list->alloc = size;
|
|
list->us = p;
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
unset_addr_list_end(UnsetAddrList* list)
|
|
{
|
|
if (IS_NOT_NULL(list->us))
|
|
xfree(list->us);
|
|
}
|
|
|
|
static int
|
|
unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
|
|
{
|
|
UnsetAddr* p;
|
|
int size;
|
|
|
|
if (list->num >= list->alloc) {
|
|
size = list->alloc * 2;
|
|
p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
|
|
CHECK_NULL_RETURN_MEMERR(p);
|
|
list->alloc = size;
|
|
list->us = p;
|
|
}
|
|
|
|
list->us[list->num].offset = offset;
|
|
list->us[list->num].target = node;
|
|
list->num++;
|
|
return 0;
|
|
}
|
|
#endif /* USE_CALL */
|
|
|
|
|
|
static int
|
|
add_opcode(regex_t* reg, int opcode)
|
|
{
|
|
BB_ADD1(reg, opcode);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_rel_addr(regex_t* reg, int addr)
|
|
{
|
|
RelAddrType ra = (RelAddrType )addr;
|
|
|
|
BB_ADD(reg, &ra, SIZE_RELADDR);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_abs_addr(regex_t* reg, int addr)
|
|
{
|
|
AbsAddrType ra = (AbsAddrType )addr;
|
|
|
|
BB_ADD(reg, &ra, SIZE_ABSADDR);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_length(regex_t* reg, int len)
|
|
{
|
|
LengthType l = (LengthType )len;
|
|
|
|
BB_ADD(reg, &l, SIZE_LENGTH);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_mem_num(regex_t* reg, int num)
|
|
{
|
|
MemNumType n = (MemNumType )num;
|
|
|
|
BB_ADD(reg, &n, SIZE_MEMNUM);
|
|
return 0;
|
|
}
|
|
|
|
#if 0
|
|
static int
|
|
add_pointer(regex_t* reg, void* addr)
|
|
{
|
|
PointerType ptr = (PointerType )addr;
|
|
|
|
BB_ADD(reg, &ptr, SIZE_POINTER);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
add_option(regex_t* reg, OnigOptionType option)
|
|
{
|
|
BB_ADD(reg, &option, SIZE_OPTION);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_save_type(regex_t* reg, enum SaveType type)
|
|
{
|
|
SaveType t = (SaveType )type;
|
|
|
|
BB_ADD(reg, &t, SIZE_SAVE_TYPE);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_update_var_type(regex_t* reg, enum UpdateVarType type)
|
|
{
|
|
UpdateVarType t = (UpdateVarType )type;
|
|
|
|
BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_mode(regex_t* reg, ModeType mode)
|
|
{
|
|
BB_ADD(reg, &mode, SIZE_MODE);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
|
|
{
|
|
int r;
|
|
|
|
r = add_opcode(reg, opcode);
|
|
if (r != 0) return r;
|
|
r = add_rel_addr(reg, addr);
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
add_bytes(regex_t* reg, UChar* bytes, int len)
|
|
{
|
|
BB_ADD(reg, bytes, len);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_bitset(regex_t* reg, BitSetRef bs)
|
|
{
|
|
BB_ADD(reg, bs, SIZE_BITSET);
|
|
return 0;
|
|
}
|
|
|
|
static int compile_length_tree(Node* node, regex_t* reg);
|
|
static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
|
|
|
|
|
|
#define IS_NEED_STR_LEN_OP_EXACT(op) \
|
|
((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
|
|
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
|
|
|
static int
|
|
select_str_opcode(int mb_len, int str_len, int ignore_case)
|
|
{
|
|
int op;
|
|
|
|
if (ignore_case) {
|
|
switch (str_len) {
|
|
case 1: op = OP_EXACT1_IC; break;
|
|
default: op = OP_EXACTN_IC; break;
|
|
}
|
|
}
|
|
else {
|
|
switch (mb_len) {
|
|
case 1:
|
|
switch (str_len) {
|
|
case 1: op = OP_EXACT1; break;
|
|
case 2: op = OP_EXACT2; break;
|
|
case 3: op = OP_EXACT3; break;
|
|
case 4: op = OP_EXACT4; break;
|
|
case 5: op = OP_EXACT5; break;
|
|
default: op = OP_EXACTN; break;
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
switch (str_len) {
|
|
case 1: op = OP_EXACTMB2N1; break;
|
|
case 2: op = OP_EXACTMB2N2; break;
|
|
case 3: op = OP_EXACTMB2N3; break;
|
|
default: op = OP_EXACTMB2N; break;
|
|
}
|
|
break;
|
|
|
|
case 3:
|
|
op = OP_EXACTMB3N;
|
|
break;
|
|
|
|
default:
|
|
op = OP_EXACTMBN;
|
|
break;
|
|
}
|
|
}
|
|
return op;
|
|
}
|
|
|
|
static int
|
|
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
|
|
{
|
|
int r;
|
|
int saved_num_null_check = reg->num_null_check;
|
|
|
|
if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
|
|
r = add_opcode(reg, OP_EMPTY_CHECK_START);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
|
|
if (r != 0) return r;
|
|
reg->num_null_check++;
|
|
}
|
|
|
|
r = compile_tree(node, reg, env);
|
|
if (r != 0) return r;
|
|
|
|
if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
|
|
if (empty_info == QUANT_BODY_IS_EMPTY)
|
|
r = add_opcode(reg, OP_EMPTY_CHECK_END);
|
|
else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
|
|
r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
|
|
else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
|
|
r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
|
|
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
|
|
}
|
|
return r;
|
|
}
|
|
|
|
#ifdef USE_CALL
|
|
static int
|
|
compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r;
|
|
|
|
r = add_opcode(reg, OP_CALL);
|
|
if (r != 0) return r;
|
|
r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
|
|
NODE_CALL_BODY(node));
|
|
if (r != 0) return r;
|
|
r = add_abs_addr(reg, 0 /*dummy addr.*/);
|
|
return r;
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int i, r;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
r = compile_tree(node, reg, env);
|
|
if (r != 0) return r;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
|
|
regex_t* reg ARG_UNUSED, int ignore_case)
|
|
{
|
|
int len;
|
|
int op = select_str_opcode(mb_len, str_len, ignore_case);
|
|
|
|
len = SIZE_OPCODE;
|
|
|
|
if (op == OP_EXACTMBN) len += SIZE_LENGTH;
|
|
if (IS_NEED_STR_LEN_OP_EXACT(op))
|
|
len += SIZE_LENGTH;
|
|
|
|
len += mb_len * str_len;
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
add_compile_string(UChar* s, int mb_len, int str_len,
|
|
regex_t* reg, int ignore_case)
|
|
{
|
|
int op = select_str_opcode(mb_len, str_len, ignore_case);
|
|
add_opcode(reg, op);
|
|
|
|
if (op == OP_EXACTMBN)
|
|
add_length(reg, mb_len);
|
|
|
|
if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
|
if (op == OP_EXACTN_IC)
|
|
add_length(reg, mb_len * str_len);
|
|
else
|
|
add_length(reg, str_len);
|
|
}
|
|
|
|
add_bytes(reg, s, mb_len * str_len);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
compile_length_string_node(Node* node, regex_t* reg)
|
|
{
|
|
int rlen, r, len, prev_len, slen, ambig;
|
|
UChar *p, *prev;
|
|
StrNode* sn;
|
|
OnigEncoding enc = reg->enc;
|
|
|
|
sn = STR_(node);
|
|
if (sn->end <= sn->s)
|
|
return 0;
|
|
|
|
ambig = NODE_STRING_IS_AMBIG(node);
|
|
|
|
p = prev = sn->s;
|
|
prev_len = enclen(enc, p);
|
|
p += prev_len;
|
|
slen = 1;
|
|
rlen = 0;
|
|
|
|
for (; p < sn->end; ) {
|
|
len = enclen(enc, p);
|
|
if (len == prev_len) {
|
|
slen++;
|
|
}
|
|
else {
|
|
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
|
|
rlen += r;
|
|
prev = p;
|
|
slen = 1;
|
|
prev_len = len;
|
|
}
|
|
p += len;
|
|
}
|
|
|
|
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
|
|
rlen += r;
|
|
return rlen;
|
|
}
|
|
|
|
static int
|
|
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
|
{
|
|
if (sn->end <= sn->s)
|
|
return 0;
|
|
|
|
return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
|
|
reg, 0);
|
|
}
|
|
|
|
static int
|
|
compile_string_node(Node* node, regex_t* reg)
|
|
{
|
|
int r, len, prev_len, slen, ambig;
|
|
UChar *p, *prev, *end;
|
|
StrNode* sn;
|
|
OnigEncoding enc = reg->enc;
|
|
|
|
sn = STR_(node);
|
|
if (sn->end <= sn->s)
|
|
return 0;
|
|
|
|
end = sn->end;
|
|
ambig = NODE_STRING_IS_AMBIG(node);
|
|
|
|
p = prev = sn->s;
|
|
prev_len = enclen(enc, p);
|
|
p += prev_len;
|
|
slen = 1;
|
|
|
|
for (; p < end; ) {
|
|
len = enclen(enc, p);
|
|
if (len == prev_len) {
|
|
slen++;
|
|
}
|
|
else {
|
|
r = add_compile_string(prev, prev_len, slen, reg, ambig);
|
|
if (r != 0) return r;
|
|
|
|
prev = p;
|
|
slen = 1;
|
|
prev_len = len;
|
|
}
|
|
|
|
p += len;
|
|
}
|
|
|
|
return add_compile_string(prev, prev_len, slen, reg, ambig);
|
|
}
|
|
|
|
static int
|
|
compile_string_raw_node(StrNode* sn, regex_t* reg)
|
|
{
|
|
if (sn->end <= sn->s)
|
|
return 0;
|
|
|
|
return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
|
|
}
|
|
|
|
static int
|
|
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
|
{
|
|
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
|
add_length(reg, mbuf->used);
|
|
return add_bytes(reg, mbuf->p, mbuf->used);
|
|
#else
|
|
int r, pad_size;
|
|
UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
|
|
|
GET_ALIGNMENT_PAD_SIZE(p, pad_size);
|
|
add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
|
|
if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
|
|
|
r = add_bytes(reg, mbuf->p, mbuf->used);
|
|
|
|
/* padding for return value from compile_length_cclass_node() to be fix. */
|
|
pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
|
|
if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
|
return r;
|
|
#endif
|
|
}
|
|
|
|
static int
|
|
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
|
{
|
|
int len;
|
|
|
|
if (IS_NULL(cc->mbuf)) {
|
|
len = SIZE_OPCODE + SIZE_BITSET;
|
|
}
|
|
else {
|
|
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
|
len = SIZE_OPCODE;
|
|
}
|
|
else {
|
|
len = SIZE_OPCODE + SIZE_BITSET;
|
|
}
|
|
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
|
len += SIZE_LENGTH + cc->mbuf->used;
|
|
#else
|
|
len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
|
|
#endif
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
compile_cclass_node(CClassNode* cc, regex_t* reg)
|
|
{
|
|
int r;
|
|
|
|
if (IS_NULL(cc->mbuf)) {
|
|
if (IS_NCCLASS_NOT(cc))
|
|
add_opcode(reg, OP_CCLASS_NOT);
|
|
else
|
|
add_opcode(reg, OP_CCLASS);
|
|
|
|
r = add_bitset(reg, cc->bs);
|
|
}
|
|
else {
|
|
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
|
if (IS_NCCLASS_NOT(cc))
|
|
add_opcode(reg, OP_CCLASS_MB_NOT);
|
|
else
|
|
add_opcode(reg, OP_CCLASS_MB);
|
|
|
|
r = add_multi_byte_cclass(cc->mbuf, reg);
|
|
}
|
|
else {
|
|
if (IS_NCCLASS_NOT(cc))
|
|
add_opcode(reg, OP_CCLASS_MIX_NOT);
|
|
else
|
|
add_opcode(reg, OP_CCLASS_MIX);
|
|
|
|
r = add_bitset(reg, cc->bs);
|
|
if (r != 0) return r;
|
|
r = add_multi_byte_cclass(cc->mbuf, reg);
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
|
{
|
|
#define REPEAT_RANGE_ALLOC 4
|
|
|
|
OnigRepeatRange* p;
|
|
|
|
if (reg->repeat_range_alloc == 0) {
|
|
p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
|
|
CHECK_NULL_RETURN_MEMERR(p);
|
|
reg->repeat_range = p;
|
|
reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
|
|
}
|
|
else if (reg->repeat_range_alloc <= id) {
|
|
int n;
|
|
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
|
|
p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
|
|
sizeof(OnigRepeatRange) * n,
|
|
sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
|
|
CHECK_NULL_RETURN_MEMERR(p);
|
|
reg->repeat_range = p;
|
|
reg->repeat_range_alloc = n;
|
|
}
|
|
else {
|
|
p = reg->repeat_range;
|
|
}
|
|
|
|
p[id].lower = lower;
|
|
p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
|
|
regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r;
|
|
int num_repeat = reg->num_repeat;
|
|
|
|
r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
|
reg->num_repeat++;
|
|
if (r != 0) return r;
|
|
r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
|
|
if (r != 0) return r;
|
|
|
|
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
|
|
if (r != 0) return r;
|
|
|
|
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
|
|
if (r != 0) return r;
|
|
|
|
if (
|
|
#ifdef USE_CALL
|
|
NODE_IS_IN_MULTI_ENTRY(qn) ||
|
|
#endif
|
|
NODE_IS_IN_REAL_REPEAT(qn)) {
|
|
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
|
}
|
|
else {
|
|
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
|
}
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
is_anychar_infinite_greedy(QuantNode* qn)
|
|
{
|
|
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
|
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
|
#define CKN_ON (ckn > 0)
|
|
|
|
static int
|
|
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
|
|
{
|
|
int len, mod_tlen;
|
|
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
|
enum QuantBodyEmpty empty_info = qn->body_empty_info;
|
|
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
|
|
|
|
if (tlen < 0) return tlen;
|
|
if (tlen == 0) return 0;
|
|
|
|
/* anychar repeat */
|
|
if (is_anychar_infinite_greedy(qn)) {
|
|
if (qn->lower <= 1 ||
|
|
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
|
|
if (IS_NOT_NULL(qn->next_head_exact))
|
|
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
|
|
else
|
|
return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
|
|
}
|
|
}
|
|
|
|
if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
|
|
mod_tlen = tlen;
|
|
else
|
|
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
|
|
|
|
if (infinite &&
|
|
(qn->lower <= 1 ||
|
|
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
|
|
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
|
len = SIZE_OP_JUMP;
|
|
}
|
|
else {
|
|
len = tlen * qn->lower;
|
|
}
|
|
|
|
if (qn->greedy) {
|
|
if (IS_NOT_NULL(qn->head_exact))
|
|
len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
|
|
else if (IS_NOT_NULL(qn->next_head_exact))
|
|
len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
|
|
else
|
|
len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
|
|
}
|
|
else
|
|
len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
|
|
}
|
|
else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
|
len = SIZE_OP_JUMP + tlen;
|
|
}
|
|
else if (!infinite && qn->greedy &&
|
|
(qn->upper == 1 ||
|
|
int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
|
|
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
|
|
len = tlen * qn->lower;
|
|
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
|
}
|
|
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
|
len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
|
|
}
|
|
else {
|
|
len = SIZE_OP_REPEAT_INC
|
|
+ mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int i, r, mod_tlen;
|
|
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
|
enum QuantBodyEmpty empty_info = qn->body_empty_info;
|
|
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
|
|
|
|
if (tlen < 0) return tlen;
|
|
if (tlen == 0) return 0;
|
|
|
|
if (is_anychar_infinite_greedy(qn) &&
|
|
(qn->lower <= 1 ||
|
|
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
|
|
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
|
|
if (r != 0) return r;
|
|
if (IS_NOT_NULL(qn->next_head_exact)) {
|
|
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
|
|
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
|
else
|
|
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
|
if (r != 0) return r;
|
|
return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
|
|
}
|
|
else {
|
|
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
|
|
return add_opcode(reg, OP_ANYCHAR_ML_STAR);
|
|
else
|
|
return add_opcode(reg, OP_ANYCHAR_STAR);
|
|
}
|
|
}
|
|
|
|
if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
|
|
mod_tlen = tlen;
|
|
else
|
|
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
|
|
|
|
if (infinite &&
|
|
(qn->lower <= 1 ||
|
|
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
|
|
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
|
if (qn->greedy) {
|
|
if (IS_NOT_NULL(qn->head_exact))
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
|
else if (IS_NOT_NULL(qn->next_head_exact))
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
|
|
else
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
|
|
}
|
|
else {
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
|
|
}
|
|
if (r != 0) return r;
|
|
}
|
|
else {
|
|
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
|
|
if (r != 0) return r;
|
|
}
|
|
|
|
if (qn->greedy) {
|
|
if (IS_NOT_NULL(qn->head_exact)) {
|
|
r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
|
|
mod_tlen + SIZE_OP_JUMP);
|
|
if (r != 0) return r;
|
|
add_bytes(reg, STR_(qn->head_exact)->s, 1);
|
|
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP,
|
|
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
|
|
}
|
|
else if (IS_NOT_NULL(qn->next_head_exact)) {
|
|
r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
|
|
mod_tlen + SIZE_OP_JUMP);
|
|
if (r != 0) return r;
|
|
add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
|
|
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP,
|
|
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
|
|
}
|
|
else {
|
|
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
|
if (r != 0) return r;
|
|
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP,
|
|
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
|
|
}
|
|
}
|
|
else {
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
|
if (r != 0) return r;
|
|
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
|
}
|
|
}
|
|
else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
|
|
}
|
|
else if (! infinite && qn->greedy &&
|
|
(qn->upper == 1 ||
|
|
int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
|
|
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
|
|
int n = qn->upper - qn->lower;
|
|
|
|
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
|
|
if (r != 0) return r;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
r = add_opcode_rel_addr(reg, OP_PUSH,
|
|
(n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
|
r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
|
|
}
|
|
else {
|
|
r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_length_option_node(EnclosureNode* node, regex_t* reg)
|
|
{
|
|
int tlen;
|
|
OnigOptionType prev = reg->options;
|
|
|
|
reg->options = node->o.options;
|
|
tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
|
|
reg->options = prev;
|
|
|
|
return tlen;
|
|
}
|
|
|
|
static int
|
|
compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r;
|
|
OnigOptionType prev = reg->options;
|
|
|
|
reg->options = node->o.options;
|
|
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
|
|
reg->options = prev;
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
|
|
{
|
|
int len;
|
|
int tlen;
|
|
|
|
if (node->type == ENCLOSURE_OPTION)
|
|
return compile_length_option_node(node, reg);
|
|
|
|
if (NODE_ENCLOSURE_BODY(node)) {
|
|
tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
|
|
if (tlen < 0) return tlen;
|
|
}
|
|
else
|
|
tlen = 0;
|
|
|
|
switch (node->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
#ifdef USE_CALL
|
|
|
|
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
|
|
len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
|
return len;
|
|
}
|
|
|
|
if (NODE_IS_CALLED(node)) {
|
|
len = SIZE_OP_MEMORY_START_PUSH + tlen
|
|
+ SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
|
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
|
|
len += (NODE_IS_RECURSION(node)
|
|
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
|
else
|
|
len += (NODE_IS_RECURSION(node)
|
|
? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
|
}
|
|
else if (NODE_IS_RECURSION(node)) {
|
|
len = SIZE_OP_MEMORY_START_PUSH;
|
|
len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
|
|
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
|
|
len = SIZE_OP_MEMORY_START_PUSH;
|
|
else
|
|
len = SIZE_OP_MEMORY_START;
|
|
|
|
len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
|
|
? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
|
|
QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
|
|
tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
|
|
if (tlen < 0) return tlen;
|
|
|
|
len = tlen * qn->lower
|
|
+ SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
|
|
}
|
|
else {
|
|
len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
Node* cond = NODE_ENCLOSURE_BODY(node);
|
|
Node* Then = node->te.Then;
|
|
Node* Else = node->te.Else;
|
|
|
|
len = compile_length_tree(cond, reg);
|
|
if (len < 0) return len;
|
|
len += SIZE_OP_PUSH;
|
|
len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
|
|
|
|
if (IS_NOT_NULL(Then)) {
|
|
tlen = compile_length_tree(Then, reg);
|
|
if (tlen < 0) return tlen;
|
|
len += tlen;
|
|
}
|
|
|
|
if (IS_NOT_NULL(Else)) {
|
|
len += SIZE_OP_JUMP;
|
|
tlen = compile_length_tree(Else, reg);
|
|
if (tlen < 0) return tlen;
|
|
len += tlen;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
|
|
|
|
static int
|
|
compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r;
|
|
int len;
|
|
|
|
#ifdef USE_CALL
|
|
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
|
|
r = add_opcode(reg, OP_CALL);
|
|
if (r != 0) return r;
|
|
node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
|
NODE_STATUS_ADD(node, ADDR_FIXED);
|
|
r = add_abs_addr(reg, (int )node->m.called_addr);
|
|
if (r != 0) return r;
|
|
len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
|
|
len += SIZE_OP_RETURN;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
|
if (r != 0) return r;
|
|
|
|
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_RETURN);
|
|
return r;
|
|
}
|
|
|
|
if (NODE_IS_CALLED(node)) {
|
|
r = add_opcode(reg, OP_CALL);
|
|
if (r != 0) return r;
|
|
node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
|
NODE_STATUS_ADD(node, ADDR_FIXED);
|
|
r = add_abs_addr(reg, (int )node->m.called_addr);
|
|
if (r != 0) return r;
|
|
len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
|
|
len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
|
|
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
|
|
len += (NODE_IS_RECURSION(node)
|
|
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
|
else
|
|
len += (NODE_IS_RECURSION(node)
|
|
? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
|
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
|
if (r != 0) return r;
|
|
}
|
|
#endif
|
|
|
|
if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
|
|
r = add_opcode(reg, OP_MEMORY_START_PUSH);
|
|
else
|
|
r = add_opcode(reg, OP_MEMORY_START);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->m.regnum);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
|
|
#ifdef USE_CALL
|
|
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
|
|
r = add_opcode(reg, (NODE_IS_RECURSION(node)
|
|
? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
|
|
else
|
|
r = add_opcode(reg, (NODE_IS_RECURSION(node)
|
|
? OP_MEMORY_END_REC : OP_MEMORY_END));
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->m.regnum);
|
|
if (NODE_IS_CALLED(node)) {
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_RETURN);
|
|
}
|
|
#else
|
|
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
|
|
r = add_opcode(reg, OP_MEMORY_END_PUSH);
|
|
else
|
|
r = add_opcode(reg, OP_MEMORY_END);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->m.regnum);
|
|
#endif
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r, len;
|
|
|
|
switch (node->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
r = compile_enclosure_memory_node(node, reg, env);
|
|
break;
|
|
|
|
case ENCLOSURE_OPTION:
|
|
r = compile_option_node(node, reg, env);
|
|
break;
|
|
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
|
|
QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
|
|
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
|
|
if (r != 0) return r;
|
|
|
|
len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
|
|
if (len < 0) return len;
|
|
|
|
r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_POP_OUT);
|
|
if (r != 0) return r;
|
|
r = add_opcode_rel_addr(reg, OP_JUMP,
|
|
-((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
|
|
}
|
|
else {
|
|
r = add_opcode(reg, OP_ATOMIC_START);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_ATOMIC_END);
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
int cond_len, then_len, jump_len;
|
|
Node* cond = NODE_ENCLOSURE_BODY(node);
|
|
Node* Then = node->te.Then;
|
|
Node* Else = node->te.Else;
|
|
|
|
r = add_opcode(reg, OP_ATOMIC_START);
|
|
if (r != 0) return r;
|
|
|
|
cond_len = compile_length_tree(cond, reg);
|
|
if (cond_len < 0) return cond_len;
|
|
if (IS_NOT_NULL(Then)) {
|
|
then_len = compile_length_tree(Then, reg);
|
|
if (then_len < 0) return then_len;
|
|
}
|
|
else
|
|
then_len = 0;
|
|
|
|
jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
|
|
if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
|
|
|
|
r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
|
|
if (r != 0) return r;
|
|
r = compile_tree(cond, reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_ATOMIC_END);
|
|
if (r != 0) return r;
|
|
|
|
if (IS_NOT_NULL(Then)) {
|
|
r = compile_tree(Then, reg, env);
|
|
if (r != 0) return r;
|
|
}
|
|
|
|
if (IS_NOT_NULL(Else)) {
|
|
int else_len = compile_length_tree(Else, reg);
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
|
|
if (r != 0) return r;
|
|
r = compile_tree(Else, reg, env);
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
|
|
{
|
|
int len;
|
|
int tlen = 0;
|
|
|
|
if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
|
|
tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
|
|
if (tlen < 0) return tlen;
|
|
}
|
|
|
|
switch (node->type) {
|
|
case ANCHOR_PREC_READ:
|
|
len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
|
|
break;
|
|
case ANCHOR_PREC_READ_NOT:
|
|
len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
|
|
break;
|
|
case ANCHOR_LOOK_BEHIND:
|
|
len = SIZE_OP_LOOK_BEHIND + tlen;
|
|
break;
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
|
|
break;
|
|
|
|
case ANCHOR_WORD_BOUNDARY:
|
|
case ANCHOR_NO_WORD_BOUNDARY:
|
|
#ifdef USE_WORD_BEGIN_END
|
|
case ANCHOR_WORD_BEGIN:
|
|
case ANCHOR_WORD_END:
|
|
#endif
|
|
len = SIZE_OP_WORD_BOUNDARY;
|
|
break;
|
|
|
|
case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
len = SIZE_OPCODE;
|
|
break;
|
|
|
|
default:
|
|
len = SIZE_OPCODE;
|
|
break;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r, len;
|
|
enum OpCode op;
|
|
|
|
switch (node->type) {
|
|
case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
|
|
case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
|
|
case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
|
|
case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
|
|
case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
|
|
case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
|
|
|
|
case ANCHOR_WORD_BOUNDARY:
|
|
op = OP_WORD_BOUNDARY;
|
|
word:
|
|
r = add_opcode(reg, op);
|
|
if (r != 0) return r;
|
|
r = add_mode(reg, (ModeType )node->ascii_mode);
|
|
break;
|
|
|
|
case ANCHOR_NO_WORD_BOUNDARY:
|
|
op = OP_NO_WORD_BOUNDARY; goto word;
|
|
break;
|
|
#ifdef USE_WORD_BEGIN_END
|
|
case ANCHOR_WORD_BEGIN:
|
|
op = OP_WORD_BEGIN; goto word;
|
|
break;
|
|
case ANCHOR_WORD_END:
|
|
op = OP_WORD_END; goto word;
|
|
break;
|
|
#endif
|
|
|
|
case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
|
|
break;
|
|
|
|
case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
|
|
break;
|
|
|
|
case ANCHOR_PREC_READ:
|
|
r = add_opcode(reg, OP_PREC_READ_START);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_PREC_READ_END);
|
|
break;
|
|
|
|
case ANCHOR_PREC_READ_NOT:
|
|
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
|
|
if (len < 0) return len;
|
|
r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_PREC_READ_NOT_END);
|
|
break;
|
|
|
|
case ANCHOR_LOOK_BEHIND:
|
|
{
|
|
int n;
|
|
r = add_opcode(reg, OP_LOOK_BEHIND);
|
|
if (r != 0) return r;
|
|
if (node->char_len < 0) {
|
|
r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
|
|
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
}
|
|
else
|
|
n = node->char_len;
|
|
|
|
r = add_length(reg, n);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
|
|
}
|
|
break;
|
|
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
{
|
|
int n;
|
|
|
|
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
|
|
r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,
|
|
len + SIZE_OP_LOOK_BEHIND_NOT_END);
|
|
if (r != 0) return r;
|
|
if (node->char_len < 0) {
|
|
r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
|
|
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
}
|
|
else
|
|
n = node->char_len;
|
|
r = add_length(reg, n);
|
|
if (r != 0) return r;
|
|
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
|
|
if (r != 0) return r;
|
|
r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_gimmick_node(GimmickNode* node, regex_t* reg)
|
|
{
|
|
int r;
|
|
|
|
switch (node->type) {
|
|
case GIMMICK_FAIL:
|
|
r = add_opcode(reg, OP_FAIL);
|
|
break;
|
|
|
|
case GIMMICK_KEEP:
|
|
r = add_opcode(reg, OP_PUSH_SAVE_VAL);
|
|
if (r != 0) return r;
|
|
r = add_save_type(reg, SAVE_KEEP);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->id);
|
|
break;
|
|
|
|
case GIMMICK_SAVE:
|
|
r = add_opcode(reg, OP_PUSH_SAVE_VAL);
|
|
if (r != 0) return r;
|
|
r = add_save_type(reg, node->detail_type);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->id);
|
|
break;
|
|
|
|
case GIMMICK_UPDATE_VAR:
|
|
r = add_opcode(reg, OP_UPDATE_VAR);
|
|
if (r != 0) return r;
|
|
r = add_update_var_type(reg, node->detail_type);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, node->id);
|
|
break;
|
|
|
|
#ifdef USE_CALLOUT
|
|
case GIMMICK_CALLOUT:
|
|
switch (node->detail_type) {
|
|
case ONIG_CALLOUT_OF_CONTENTS:
|
|
case ONIG_CALLOUT_OF_NAME:
|
|
{
|
|
r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
|
|
OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
|
|
if (r != 0) return r;
|
|
if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
|
|
r = add_mem_num(reg, node->id);
|
|
if (r != 0) return r;
|
|
}
|
|
r = add_mem_num(reg, node->num);
|
|
if (r != 0) return r;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
r = ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
|
|
{
|
|
int len;
|
|
|
|
switch (node->type) {
|
|
case GIMMICK_FAIL:
|
|
len = SIZE_OP_FAIL;
|
|
break;
|
|
|
|
case GIMMICK_KEEP:
|
|
case GIMMICK_SAVE:
|
|
len = SIZE_OP_PUSH_SAVE_VAL;
|
|
break;
|
|
|
|
case GIMMICK_UPDATE_VAR:
|
|
len = SIZE_OP_UPDATE_VAR;
|
|
break;
|
|
|
|
#ifdef USE_CALLOUT
|
|
case GIMMICK_CALLOUT:
|
|
switch (node->detail_type) {
|
|
case ONIG_CALLOUT_OF_CONTENTS:
|
|
len = SIZE_OP_CALLOUT_CONTENTS;
|
|
break;
|
|
case ONIG_CALLOUT_OF_NAME:
|
|
len = SIZE_OP_CALLOUT_NAME;
|
|
break;
|
|
|
|
default:
|
|
len = ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
compile_length_tree(Node* node, regex_t* reg)
|
|
{
|
|
int len, r;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
len = 0;
|
|
do {
|
|
r = compile_length_tree(NODE_CAR(node), reg);
|
|
if (r < 0) return r;
|
|
len += r;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
r = len;
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
int n;
|
|
|
|
n = r = 0;
|
|
do {
|
|
r += compile_length_tree(NODE_CAR(node), reg);
|
|
n++;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
if (NODE_STRING_IS_RAW(node))
|
|
r = compile_length_string_raw_node(STR_(node), reg);
|
|
else
|
|
r = compile_length_string_node(node, reg);
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
r = compile_length_cclass_node(CCLASS_(node), reg);
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
r = SIZE_OPCODE;
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
{
|
|
BackRefNode* br = BACKREF_(node);
|
|
|
|
if (NODE_IS_CHECKER(node)) {
|
|
#ifdef USE_BACKREF_WITH_LEVEL
|
|
if (NODE_IS_NEST_LEVEL(node)) {
|
|
r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
|
}
|
|
else
|
|
#endif
|
|
r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
|
}
|
|
else {
|
|
#ifdef USE_BACKREF_WITH_LEVEL
|
|
if (NODE_IS_NEST_LEVEL(node)) {
|
|
r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
|
|
SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
|
}
|
|
else
|
|
#endif
|
|
if (br->back_num == 1) {
|
|
r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
|
|
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
|
|
}
|
|
else {
|
|
r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
r = SIZE_OP_CALL;
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
r = compile_length_quantifier_node(QUANT_(node), reg);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
r = compile_length_anchor_node(ANCHOR_(node), reg);
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
r = compile_length_gimmick_node(GIMMICK_(node), reg);
|
|
break;
|
|
|
|
default:
|
|
return ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
compile_tree(Node* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int n, len, pos, r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
do {
|
|
r = compile_tree(NODE_CAR(node), reg, env);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
Node* x = node;
|
|
len = 0;
|
|
do {
|
|
len += compile_length_tree(NODE_CAR(x), reg);
|
|
if (IS_NOT_NULL(NODE_CDR(x))) {
|
|
len += SIZE_OP_PUSH + SIZE_OP_JUMP;
|
|
}
|
|
} while (IS_NOT_NULL(x = NODE_CDR(x)));
|
|
pos = reg->used + len; /* goal position */
|
|
|
|
do {
|
|
len = compile_length_tree(NODE_CAR(node), reg);
|
|
if (IS_NOT_NULL(NODE_CDR(node))) {
|
|
enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
|
|
r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
|
|
if (r != 0) break;
|
|
}
|
|
r = compile_tree(NODE_CAR(node), reg, env);
|
|
if (r != 0) break;
|
|
if (IS_NOT_NULL(NODE_CDR(node))) {
|
|
len = pos - (reg->used + SIZE_OP_JUMP);
|
|
r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
|
if (r != 0) break;
|
|
}
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
if (NODE_STRING_IS_RAW(node))
|
|
r = compile_string_raw_node(STR_(node), reg);
|
|
else
|
|
r = compile_string_node(node, reg);
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
r = compile_cclass_node(CCLASS_(node), reg);
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
{
|
|
int op;
|
|
|
|
switch (CTYPE_(node)->ctype) {
|
|
case CTYPE_ANYCHAR:
|
|
if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
|
|
r = add_opcode(reg, OP_ANYCHAR_ML);
|
|
else
|
|
r = add_opcode(reg, OP_ANYCHAR);
|
|
break;
|
|
|
|
case ONIGENC_CTYPE_WORD:
|
|
if (CTYPE_(node)->ascii_mode == 0) {
|
|
op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
|
|
}
|
|
else {
|
|
op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
|
|
}
|
|
r = add_opcode(reg, op);
|
|
break;
|
|
|
|
default:
|
|
return ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
{
|
|
BackRefNode* br = BACKREF_(node);
|
|
|
|
if (NODE_IS_CHECKER(node)) {
|
|
#ifdef USE_BACKREF_WITH_LEVEL
|
|
if (NODE_IS_NEST_LEVEL(node)) {
|
|
r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
|
|
if (r != 0) return r;
|
|
r = add_length(reg, br->nest_level);
|
|
if (r != 0) return r;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
r = add_opcode(reg, OP_BACKREF_CHECK);
|
|
if (r != 0) return r;
|
|
}
|
|
|
|
goto add_bacref_mems;
|
|
}
|
|
else {
|
|
#ifdef USE_BACKREF_WITH_LEVEL
|
|
if (NODE_IS_NEST_LEVEL(node)) {
|
|
r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
|
|
if (r != 0) return r;
|
|
r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
|
|
if (r != 0) return r;
|
|
r = add_length(reg, br->nest_level);
|
|
if (r != 0) return r;
|
|
|
|
goto add_bacref_mems;
|
|
}
|
|
else
|
|
#endif
|
|
if (br->back_num == 1) {
|
|
n = br->back_static[0];
|
|
if (IS_IGNORECASE(reg->options)) {
|
|
r = add_opcode(reg, OP_BACKREF_N_IC);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, n);
|
|
}
|
|
else {
|
|
switch (n) {
|
|
case 1: r = add_opcode(reg, OP_BACKREF1); break;
|
|
case 2: r = add_opcode(reg, OP_BACKREF2); break;
|
|
default:
|
|
r = add_opcode(reg, OP_BACKREF_N);
|
|
if (r != 0) return r;
|
|
r = add_mem_num(reg, n);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
int i;
|
|
int* p;
|
|
|
|
if (IS_IGNORECASE(reg->options)) {
|
|
r = add_opcode(reg, OP_BACKREF_MULTI_IC);
|
|
}
|
|
else {
|
|
r = add_opcode(reg, OP_BACKREF_MULTI);
|
|
}
|
|
if (r != 0) return r;
|
|
|
|
add_bacref_mems:
|
|
r = add_length(reg, br->back_num);
|
|
if (r != 0) return r;
|
|
p = BACKREFS_P(br);
|
|
for (i = br->back_num - 1; i >= 0; i--) {
|
|
r = add_mem_num(reg, p[i]);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
r = compile_call(CALL_(node), reg, env);
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
r = compile_quantifier_node(QUANT_(node), reg, env);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
r = compile_anchor_node(ANCHOR_(node), reg, env);
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
r = compile_gimmick_node(GIMMICK_(node), reg);
|
|
break;
|
|
|
|
default:
|
|
#ifdef ONIG_DEBUG
|
|
fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
|
{
|
|
int r = 0;
|
|
Node* node = *plink;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = noname_disable_map(&(NODE_CAR(node)), map, counter);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
Node** ptarget = &(NODE_BODY(node));
|
|
Node* old = *ptarget;
|
|
r = noname_disable_map(ptarget, map, counter);
|
|
if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
|
|
onig_reduce_nested_quantifier(node, *ptarget);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_NAMED_GROUP(node)) {
|
|
(*counter)++;
|
|
map[en->m.regnum].new_val = *counter;
|
|
en->m.regnum = *counter;
|
|
r = noname_disable_map(&(NODE_BODY(node)), map, counter);
|
|
}
|
|
else {
|
|
*plink = NODE_BODY(node);
|
|
NODE_BODY(node) = NULL_NODE;
|
|
onig_node_free(node);
|
|
r = noname_disable_map(plink, map, counter);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
|
|
if (r != 0) return r;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = noname_disable_map(&(en->te.Then), map, counter);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = noname_disable_map(&(en->te.Else), map, counter);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
else
|
|
r = noname_disable_map(&(NODE_BODY(node)), map, counter);
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (IS_NOT_NULL(NODE_BODY(node)))
|
|
r = noname_disable_map(&(NODE_BODY(node)), map, counter);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
renumber_node_backref(Node* node, GroupNumRemap* map)
|
|
{
|
|
int i, pos, n, old_num;
|
|
int *backs;
|
|
BackRefNode* bn = BACKREF_(node);
|
|
|
|
if (! NODE_IS_BY_NAME(node))
|
|
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
|
|
|
old_num = bn->back_num;
|
|
if (IS_NULL(bn->back_dynamic))
|
|
backs = bn->back_static;
|
|
else
|
|
backs = bn->back_dynamic;
|
|
|
|
for (i = 0, pos = 0; i < old_num; i++) {
|
|
n = map[backs[i]].new_val;
|
|
if (n > 0) {
|
|
backs[pos] = n;
|
|
pos++;
|
|
}
|
|
}
|
|
|
|
bn->back_num = pos;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
renumber_by_map(Node* node, GroupNumRemap* map)
|
|
{
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = renumber_by_map(NODE_CAR(node), map);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
r = renumber_by_map(NODE_BODY(node), map);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
r = renumber_by_map(NODE_BODY(node), map);
|
|
if (r != 0) return r;
|
|
|
|
if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = renumber_by_map(en->te.Then, map);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = renumber_by_map(en->te.Else, map);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
r = renumber_node_backref(node, map);
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (IS_NOT_NULL(NODE_BODY(node)))
|
|
r = renumber_by_map(NODE_BODY(node), map);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
numbered_ref_check(Node* node)
|
|
{
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = numbered_ref_check(NODE_CAR(node));
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (IS_NULL(NODE_BODY(node)))
|
|
break;
|
|
/* fall */
|
|
case NODE_QUANT:
|
|
r = numbered_ref_check(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
r = numbered_ref_check(NODE_BODY(node));
|
|
if (r != 0) return r;
|
|
|
|
if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = numbered_ref_check(en->te.Then);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = numbered_ref_check(en->te.Else);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
if (! NODE_IS_BY_NAME(node))
|
|
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r, i, pos, counter;
|
|
int result;
|
|
MemStatusType loc;
|
|
GroupNumRemap* map;
|
|
|
|
map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
|
CHECK_NULL_RETURN_MEMERR(map);
|
|
for (i = 1; i <= env->num_mem; i++) {
|
|
map[i].new_val = 0;
|
|
}
|
|
counter = 0;
|
|
r = noname_disable_map(root, map, &counter);
|
|
if (r != 0) return r;
|
|
|
|
r = renumber_by_map(*root, map);
|
|
if (r != 0) return r;
|
|
|
|
for (i = 1, pos = 1; i <= env->num_mem; i++) {
|
|
if (map[i].new_val > 0) {
|
|
SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
|
|
pos++;
|
|
}
|
|
}
|
|
|
|
loc = env->capture_history;
|
|
MEM_STATUS_CLEAR(env->capture_history);
|
|
for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
|
|
if (MEM_STATUS_AT(loc, i)) {
|
|
MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
|
|
}
|
|
}
|
|
|
|
env->num_mem = env->num_named;
|
|
reg->num_mem = env->num_named;
|
|
result = onig_renumber_name_table(reg, map);
|
|
xfree(map);
|
|
return result;
|
|
}
|
|
|
|
#ifdef USE_CALL
|
|
static int
|
|
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
|
|
{
|
|
int i, offset;
|
|
EnclosureNode* en;
|
|
AbsAddrType addr;
|
|
|
|
for (i = 0; i < uslist->num; i++) {
|
|
if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
|
|
return ONIGERR_PARSER_BUG;
|
|
|
|
en = ENCLOSURE_(uslist->us[i].target);
|
|
addr = en->m.called_addr;
|
|
offset = uslist->us[i].offset;
|
|
|
|
BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
#define GET_CHAR_LEN_VARLEN -1
|
|
#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
|
|
|
|
/* fixed size pattern node only */
|
|
static int
|
|
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
|
{
|
|
int tlen;
|
|
int r = 0;
|
|
|
|
level++;
|
|
*len = 0;
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
do {
|
|
r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
|
|
if (r == 0)
|
|
*len = distance_add(*len, tlen);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
int tlen2;
|
|
int varlen = 0;
|
|
|
|
r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
|
|
while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
|
|
r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
|
|
if (r == 0) {
|
|
if (tlen != tlen2)
|
|
varlen = 1;
|
|
}
|
|
}
|
|
if (r == 0) {
|
|
if (varlen != 0) {
|
|
if (level == 1)
|
|
r = GET_CHAR_LEN_TOP_ALT_VARLEN;
|
|
else
|
|
r = GET_CHAR_LEN_VARLEN;
|
|
}
|
|
else
|
|
*len = tlen;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* sn = STR_(node);
|
|
UChar *s = sn->s;
|
|
|
|
while (s < sn->end) {
|
|
s += enclen(reg->enc, s);
|
|
(*len)++;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
if (qn->lower == qn->upper) {
|
|
if (qn->upper == 0) {
|
|
*len = 0;
|
|
}
|
|
else {
|
|
r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
|
|
if (r == 0)
|
|
*len = distance_multiply(tlen, qn->lower);
|
|
}
|
|
}
|
|
else
|
|
r = GET_CHAR_LEN_VARLEN;
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
if (! NODE_IS_RECURSION(node))
|
|
r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
|
|
else
|
|
r = GET_CHAR_LEN_VARLEN;
|
|
break;
|
|
#endif
|
|
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
*len = 1;
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
switch (en->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
#ifdef USE_CALL
|
|
if (NODE_IS_CLEN_FIXED(node))
|
|
*len = en->char_len;
|
|
else {
|
|
r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
|
|
if (r == 0) {
|
|
en->char_len = *len;
|
|
NODE_STATUS_ADD(node, CLEN_FIXED);
|
|
}
|
|
}
|
|
break;
|
|
#endif
|
|
case ENCLOSURE_OPTION:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
|
|
break;
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
int clen, elen;
|
|
|
|
r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
|
|
if (r == 0) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
|
|
if (r != 0) break;
|
|
}
|
|
else tlen = 0;
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = get_char_length_tree1(en->te.Else, reg, &elen, level);
|
|
if (r != 0) break;
|
|
}
|
|
else elen = 0;
|
|
|
|
if (clen + tlen != elen) {
|
|
r = GET_CHAR_LEN_VARLEN;
|
|
}
|
|
else {
|
|
*len = elen;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
case NODE_GIMMICK:
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
if (NODE_IS_CHECKER(node))
|
|
break;
|
|
/* fall */
|
|
default:
|
|
r = GET_CHAR_LEN_VARLEN;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
get_char_length_tree(Node* node, regex_t* reg, int* len)
|
|
{
|
|
return get_char_length_tree1(node, reg, len, 0);
|
|
}
|
|
|
|
/* x is not included y ==> 1 : 0 */
|
|
static int
|
|
is_exclusive(Node* x, Node* y, regex_t* reg)
|
|
{
|
|
int i, len;
|
|
OnigCodePoint code;
|
|
UChar *p;
|
|
NodeType ytype;
|
|
|
|
retry:
|
|
ytype = NODE_TYPE(y);
|
|
switch (NODE_TYPE(x)) {
|
|
case NODE_CTYPE:
|
|
{
|
|
if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
|
|
CTYPE_(y)->ctype == CTYPE_ANYCHAR)
|
|
break;
|
|
|
|
switch (ytype) {
|
|
case NODE_CTYPE:
|
|
if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
|
|
CTYPE_(y)->not != CTYPE_(x)->not &&
|
|
CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
swap:
|
|
{
|
|
Node* tmp;
|
|
tmp = x; x = y; y = tmp;
|
|
goto retry;
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
goto swap;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
{
|
|
int range;
|
|
CClassNode* xc = CCLASS_(x);
|
|
|
|
switch (ytype) {
|
|
case NODE_CTYPE:
|
|
switch (CTYPE_(y)->ctype) {
|
|
case CTYPE_ANYCHAR:
|
|
return 0;
|
|
break;
|
|
|
|
case ONIGENC_CTYPE_WORD:
|
|
if (CTYPE_(y)->not == 0) {
|
|
if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
|
|
range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
|
|
for (i = 0; i < range; i++) {
|
|
if (BITSET_AT(xc->bs, i)) {
|
|
if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
else {
|
|
if (IS_NOT_NULL(xc->mbuf)) return 0;
|
|
if (IS_NCCLASS_NOT(xc)) return 0;
|
|
|
|
range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
|
|
for (i = 0; i < range; i++) {
|
|
if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
|
|
if (BITSET_AT(xc->bs, i))
|
|
return 0;
|
|
}
|
|
}
|
|
for (i = range; i < SINGLE_BYTE_SIZE; i++) {
|
|
if (BITSET_AT(xc->bs, i)) return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
{
|
|
int v;
|
|
CClassNode* yc = CCLASS_(y);
|
|
|
|
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
|
v = BITSET_AT(xc->bs, i);
|
|
if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
|
|
v = BITSET_AT(yc->bs, i);
|
|
if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
|
|
(v == 0 && IS_NCCLASS_NOT(yc)))
|
|
return 0;
|
|
}
|
|
}
|
|
if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
|
|
(IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
goto swap;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* xs = STR_(x);
|
|
|
|
if (NODE_STRING_LEN(x) == 0)
|
|
break;
|
|
|
|
switch (ytype) {
|
|
case NODE_CTYPE:
|
|
switch (CTYPE_(y)->ctype) {
|
|
case CTYPE_ANYCHAR:
|
|
break;
|
|
|
|
case ONIGENC_CTYPE_WORD:
|
|
if (CTYPE_(y)->ascii_mode == 0) {
|
|
if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
|
|
return CTYPE_(y)->not;
|
|
else
|
|
return !(CTYPE_(y)->not);
|
|
}
|
|
else {
|
|
if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
|
|
return CTYPE_(y)->not;
|
|
else
|
|
return !(CTYPE_(y)->not);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
{
|
|
CClassNode* cc = CCLASS_(y);
|
|
|
|
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
|
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
|
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
UChar *q;
|
|
StrNode* ys = STR_(y);
|
|
|
|
len = NODE_STRING_LEN(x);
|
|
if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
|
|
if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
|
|
/* tiny version */
|
|
return 0;
|
|
}
|
|
else {
|
|
for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
|
|
if (*p != *q) return 1;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static Node*
|
|
get_head_value_node(Node* node, int exact, regex_t* reg)
|
|
{
|
|
Node* n = NULL_NODE;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_BACKREF:
|
|
case NODE_ALT:
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
#endif
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
|
break;
|
|
/* fall */
|
|
case NODE_CCLASS:
|
|
if (exact == 0) {
|
|
n = node;
|
|
}
|
|
break;
|
|
|
|
case NODE_LIST:
|
|
n = get_head_value_node(NODE_CAR(node), exact, reg);
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* sn = STR_(node);
|
|
|
|
if (sn->end <= sn->s)
|
|
break;
|
|
|
|
if (exact != 0 &&
|
|
!NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
|
|
}
|
|
else {
|
|
n = node;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
if (qn->lower > 0) {
|
|
if (IS_NOT_NULL(qn->head_exact))
|
|
n = qn->head_exact;
|
|
else
|
|
n = get_head_value_node(NODE_BODY(node), exact, reg);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
switch (en->type) {
|
|
case ENCLOSURE_OPTION:
|
|
{
|
|
OnigOptionType options = reg->options;
|
|
|
|
reg->options = ENCLOSURE_(node)->o.options;
|
|
n = get_head_value_node(NODE_BODY(node), exact, reg);
|
|
reg->options = options;
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_MEMORY:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
case ENCLOSURE_IF_ELSE:
|
|
n = get_head_value_node(NODE_BODY(node), exact, reg);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
|
|
n = get_head_value_node(NODE_BODY(node), exact, reg);
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
static int
|
|
check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
|
|
{
|
|
NodeType type;
|
|
int r = 0;
|
|
|
|
type = NODE_TYPE(node);
|
|
if ((NODE_TYPE2BIT(type) & type_mask) == 0)
|
|
return 1;
|
|
|
|
switch (type) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
|
|
anchor_mask);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
if (((1<<en->type) & enclosure_mask) == 0)
|
|
return 1;
|
|
|
|
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
|
|
if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
|
|
if (r != 0) break;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
type = ANCHOR_(node)->type;
|
|
if ((type & anchor_mask) == 0)
|
|
return 1;
|
|
|
|
if (IS_NOT_NULL(NODE_BODY(node)))
|
|
r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
static OnigLen
|
|
tree_min_len(Node* node, ScanEnv* env)
|
|
{
|
|
OnigLen len;
|
|
OnigLen tmin;
|
|
|
|
len = 0;
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_BACKREF:
|
|
if (! NODE_IS_CHECKER(node)) {
|
|
int i;
|
|
int* backs;
|
|
MemEnv* mem_env = SCANENV_MEMENV(env);
|
|
BackRefNode* br = BACKREF_(node);
|
|
if (NODE_IS_RECURSION(node)) break;
|
|
|
|
backs = BACKREFS_P(br);
|
|
len = tree_min_len(mem_env[backs[0]].node, env);
|
|
for (i = 1; i < br->back_num; i++) {
|
|
tmin = tree_min_len(mem_env[backs[i]].node, env);
|
|
if (len > tmin) len = tmin;
|
|
}
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
{
|
|
Node* t = NODE_BODY(node);
|
|
if (NODE_IS_RECURSION(node)) {
|
|
if (NODE_IS_MIN_FIXED(t))
|
|
len = ENCLOSURE_(t)->min_len;
|
|
}
|
|
else
|
|
len = tree_min_len(t, env);
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case NODE_LIST:
|
|
do {
|
|
tmin = tree_min_len(NODE_CAR(node), env);
|
|
len = distance_add(len, tmin);
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
Node *x, *y;
|
|
y = node;
|
|
do {
|
|
x = NODE_CAR(y);
|
|
tmin = tree_min_len(x, env);
|
|
if (y == node) len = tmin;
|
|
else if (len > tmin) len = tmin;
|
|
} while (IS_NOT_NULL(y = NODE_CDR(y)));
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* sn = STR_(node);
|
|
len = (int )(sn->end - sn->s);
|
|
}
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
len = ONIGENC_MBC_MINLEN(env->enc);
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
if (qn->lower > 0) {
|
|
len = tree_min_len(NODE_BODY(node), env);
|
|
len = distance_multiply(len, qn->lower);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
switch (en->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
if (NODE_IS_MIN_FIXED(node))
|
|
len = en->min_len;
|
|
else {
|
|
if (NODE_IS_MARK1(node))
|
|
len = 0; /* recursive */
|
|
else {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
len = tree_min_len(NODE_BODY(node), env);
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
|
|
en->min_len = len;
|
|
NODE_STATUS_ADD(node, MIN_FIXED);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_OPTION:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
len = tree_min_len(NODE_BODY(node), env);
|
|
break;
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
OnigLen elen;
|
|
|
|
len = tree_min_len(NODE_BODY(node), env);
|
|
if (IS_NOT_NULL(en->te.Then))
|
|
len += tree_min_len(en->te.Then, env);
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
elen = tree_min_len(en->te.Else, env);
|
|
else elen = 0;
|
|
|
|
if (elen < len) len = elen;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
{
|
|
GimmickNode* g = GIMMICK_(node);
|
|
if (g->type == GIMMICK_FAIL) {
|
|
len = INFINITE_LEN;
|
|
break;
|
|
}
|
|
}
|
|
/* fall */
|
|
case NODE_ANCHOR:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static OnigLen
|
|
tree_max_len(Node* node, ScanEnv* env)
|
|
{
|
|
OnigLen len;
|
|
OnigLen tmax;
|
|
|
|
len = 0;
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
do {
|
|
tmax = tree_max_len(NODE_CAR(node), env);
|
|
len = distance_add(len, tmax);
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
do {
|
|
tmax = tree_max_len(NODE_CAR(node), env);
|
|
if (len < tmax) len = tmax;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* sn = STR_(node);
|
|
len = (OnigLen )(sn->end - sn->s);
|
|
}
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
if (! NODE_IS_CHECKER(node)) {
|
|
int i;
|
|
int* backs;
|
|
MemEnv* mem_env = SCANENV_MEMENV(env);
|
|
BackRefNode* br = BACKREF_(node);
|
|
if (NODE_IS_RECURSION(node)) {
|
|
len = INFINITE_LEN;
|
|
break;
|
|
}
|
|
backs = BACKREFS_P(br);
|
|
for (i = 0; i < br->back_num; i++) {
|
|
tmax = tree_max_len(mem_env[backs[i]].node, env);
|
|
if (len < tmax) len = tmax;
|
|
}
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
if (! NODE_IS_RECURSION(node))
|
|
len = tree_max_len(NODE_BODY(node), env);
|
|
else
|
|
len = INFINITE_LEN;
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
if (qn->upper != 0) {
|
|
len = tree_max_len(NODE_BODY(node), env);
|
|
if (len != 0) {
|
|
if (! IS_REPEAT_INFINITE(qn->upper))
|
|
len = distance_multiply(len, qn->upper);
|
|
else
|
|
len = INFINITE_LEN;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
switch (en->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
if (NODE_IS_MAX_FIXED(node))
|
|
len = en->max_len;
|
|
else {
|
|
if (NODE_IS_MARK1(node))
|
|
len = INFINITE_LEN;
|
|
else {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
len = tree_max_len(NODE_BODY(node), env);
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
|
|
en->max_len = len;
|
|
NODE_STATUS_ADD(node, MAX_FIXED);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_OPTION:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
len = tree_max_len(NODE_BODY(node), env);
|
|
break;
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
OnigLen tlen, elen;
|
|
|
|
len = tree_max_len(NODE_BODY(node), env);
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
tlen = tree_max_len(en->te.Then, env);
|
|
len = distance_add(len, tlen);
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
elen = tree_max_len(en->te.Else, env);
|
|
else elen = 0;
|
|
|
|
if (elen > len) len = elen;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
check_backrefs(Node* node, ScanEnv* env)
|
|
{
|
|
int r;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = check_backrefs(NODE_CAR(node), env);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
|
|
r = 0;
|
|
break;
|
|
}
|
|
/* fall */
|
|
case NODE_QUANT:
|
|
r = check_backrefs(NODE_BODY(node), env);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
r = check_backrefs(NODE_BODY(node), env);
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (r != 0) return r;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = check_backrefs(en->te.Then, env);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = check_backrefs(en->te.Else, env);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
{
|
|
int i;
|
|
BackRefNode* br = BACKREF_(node);
|
|
int* backs = BACKREFS_P(br);
|
|
MemEnv* mem_env = SCANENV_MEMENV(env);
|
|
|
|
for (i = 0; i < br->back_num; i++) {
|
|
if (backs[i] > env->num_mem)
|
|
return ONIGERR_INVALID_BACKREF;
|
|
|
|
NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
|
|
}
|
|
r = 0;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
r = 0;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
|
|
#ifdef USE_CALL
|
|
|
|
#define RECURSION_EXIST (1<<0)
|
|
#define RECURSION_MUST (1<<1)
|
|
#define RECURSION_INFINITE (1<<2)
|
|
|
|
static int
|
|
infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
|
|
{
|
|
int ret;
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
{
|
|
Node *x;
|
|
OnigLen min;
|
|
|
|
x = node;
|
|
do {
|
|
ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
|
|
if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
|
|
r |= ret;
|
|
if (head != 0) {
|
|
min = tree_min_len(NODE_CAR(x), env);
|
|
if (min != 0) head = 0;
|
|
}
|
|
} while (IS_NOT_NULL(x = NODE_CDR(x)));
|
|
}
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
int must;
|
|
|
|
must = RECURSION_MUST;
|
|
do {
|
|
ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
|
|
if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
|
|
|
|
r |= (ret & RECURSION_EXIST);
|
|
must &= ret;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
r |= must;
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
|
|
if (r < 0) return r;
|
|
if ((r & RECURSION_MUST) != 0) {
|
|
if (QUANT_(node)->lower == 0)
|
|
r &= ~RECURSION_MUST;
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
|
|
break;
|
|
/* fall */
|
|
case NODE_CALL:
|
|
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_MARK2(node))
|
|
return 0;
|
|
else if (NODE_IS_MARK1(node))
|
|
return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
|
|
: RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
|
|
else {
|
|
NODE_STATUS_ADD(node, MARK2);
|
|
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
|
|
NODE_STATUS_REMOVE(node, MARK2);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
int eret;
|
|
|
|
ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
|
|
if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
|
|
r |= ret;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
OnigLen min;
|
|
if (head != 0) {
|
|
min = tree_min_len(NODE_BODY(node), env);
|
|
}
|
|
else min = 0;
|
|
|
|
ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
|
|
if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
|
|
r |= ret;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
eret = infinite_recursive_call_check(en->te.Else, env, head);
|
|
if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
|
|
r |= (eret & RECURSION_EXIST);
|
|
if ((eret & RECURSION_MUST) == 0)
|
|
r &= ~RECURSION_MUST;
|
|
}
|
|
}
|
|
else {
|
|
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
|
|
{
|
|
int r;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
|
|
r = 0;
|
|
break;
|
|
}
|
|
/* fall */
|
|
case NODE_QUANT:
|
|
r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
|
|
int ret;
|
|
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
|
|
ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
|
|
if (ret < 0) return ret;
|
|
else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
|
|
return ONIGERR_NEVER_ENDING_RECURSION;
|
|
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = infinite_recursive_call_check_trav(en->te.Then, env);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = infinite_recursive_call_check_trav(en->te.Else, env);
|
|
if (r != 0) return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
|
|
break;
|
|
|
|
default:
|
|
r = 0;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
recursive_call_check(Node* node)
|
|
{
|
|
int r;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
r = 0;
|
|
do {
|
|
r |= recursive_call_check(NODE_CAR(node));
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
|
|
r = 0;
|
|
break;
|
|
}
|
|
/* fall */
|
|
case NODE_QUANT:
|
|
r = recursive_call_check(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_CALL:
|
|
r = recursive_call_check(NODE_BODY(node));
|
|
if (r != 0) {
|
|
if (NODE_IS_MARK1(NODE_BODY(node)))
|
|
NODE_STATUS_ADD(node, RECURSION);
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_MARK2(node))
|
|
return 0;
|
|
else if (NODE_IS_MARK1(node))
|
|
return 1; /* recursion */
|
|
else {
|
|
NODE_STATUS_ADD(node, MARK2);
|
|
r = recursive_call_check(NODE_BODY(node));
|
|
NODE_STATUS_REMOVE(node, MARK2);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
r = 0;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r |= recursive_call_check(en->te.Then);
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r |= recursive_call_check(en->te.Else);
|
|
}
|
|
r |= recursive_call_check(NODE_BODY(node));
|
|
}
|
|
else {
|
|
r = recursive_call_check(NODE_BODY(node));
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
r = 0;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
#define IN_RECURSION (1<<0)
|
|
#define FOUND_CALLED_NODE 1
|
|
|
|
static int
|
|
recursive_call_check_trav(Node* node, ScanEnv* env, int state)
|
|
{
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
{
|
|
int ret;
|
|
do {
|
|
ret = recursive_call_check_trav(NODE_CAR(node), env, state);
|
|
if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
|
|
else if (ret < 0) return ret;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
r = recursive_call_check_trav(NODE_BODY(node), env, state);
|
|
if (QUANT_(node)->upper == 0) {
|
|
if (r == FOUND_CALLED_NODE)
|
|
QUANT_(node)->is_refered = 1;
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
{
|
|
AnchorNode* an = ANCHOR_(node);
|
|
if (ANCHOR_HAS_BODY(an))
|
|
r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
int ret;
|
|
int state1;
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
|
|
if (! NODE_IS_RECURSION(node)) {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
r = recursive_call_check(NODE_BODY(node));
|
|
if (r != 0)
|
|
NODE_STATUS_ADD(node, RECURSION);
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
}
|
|
|
|
if (NODE_IS_CALLED(node))
|
|
r = FOUND_CALLED_NODE;
|
|
}
|
|
}
|
|
|
|
state1 = state;
|
|
if (NODE_IS_RECURSION(node))
|
|
state1 |= IN_RECURSION;
|
|
|
|
ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
|
|
if (ret == FOUND_CALLED_NODE)
|
|
r = FOUND_CALLED_NODE;
|
|
|
|
if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
ret = recursive_call_check_trav(en->te.Then, env, state1);
|
|
if (ret == FOUND_CALLED_NODE)
|
|
r = FOUND_CALLED_NODE;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
ret = recursive_call_check_trav(en->te.Else, env, state1);
|
|
if (ret == FOUND_CALLED_NODE)
|
|
r = FOUND_CALLED_NODE;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
#endif
|
|
|
|
/* divide different length alternatives in look-behind.
|
|
(?<=A|B) ==> (?<=A)|(?<=B)
|
|
(?<!A|B) ==> (?<!A)(?<!B)
|
|
*/
|
|
static int
|
|
divide_look_behind_alternatives(Node* node)
|
|
{
|
|
Node *head, *np, *insert_node;
|
|
AnchorNode* an = ANCHOR_(node);
|
|
int anc_type = an->type;
|
|
|
|
head = NODE_ANCHOR_BODY(an);
|
|
np = NODE_CAR(head);
|
|
swap_node(node, head);
|
|
NODE_CAR(node) = head;
|
|
NODE_BODY(head) = np;
|
|
|
|
np = node;
|
|
while (IS_NOT_NULL(np = NODE_CDR(np))) {
|
|
insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
|
|
CHECK_NULL_RETURN_MEMERR(insert_node);
|
|
NODE_BODY(insert_node) = NODE_CAR(np);
|
|
NODE_CAR(np) = insert_node;
|
|
}
|
|
|
|
if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
|
|
np = node;
|
|
do {
|
|
NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
|
|
} while (IS_NOT_NULL(np = NODE_CDR(np)));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
|
|
{
|
|
int r, len;
|
|
AnchorNode* an = ANCHOR_(node);
|
|
|
|
r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
|
|
if (r == 0)
|
|
an->char_len = len;
|
|
else if (r == GET_CHAR_LEN_VARLEN)
|
|
r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
|
|
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
|
|
r = divide_look_behind_alternatives(node);
|
|
else
|
|
r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
next_setup(Node* node, Node* next_node, regex_t* reg)
|
|
{
|
|
NodeType type;
|
|
|
|
retry:
|
|
type = NODE_TYPE(node);
|
|
if (type == NODE_QUANT) {
|
|
QuantNode* qn = QUANT_(node);
|
|
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
|
#ifdef USE_QUANT_PEEK_NEXT
|
|
Node* n = get_head_value_node(next_node, 1, reg);
|
|
/* '\0': for UTF-16BE etc... */
|
|
if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
|
|
qn->next_head_exact = n;
|
|
}
|
|
#endif
|
|
/* automatic posseivation a*b ==> (?>a*)b */
|
|
if (qn->lower <= 1) {
|
|
if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
|
|
Node *x, *y;
|
|
x = get_head_value_node(NODE_BODY(node), 0, reg);
|
|
if (IS_NOT_NULL(x)) {
|
|
y = get_head_value_node(next_node, 0, reg);
|
|
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
|
|
Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
|
|
CHECK_NULL_RETURN_MEMERR(en);
|
|
NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
|
|
swap_node(node, en);
|
|
NODE_BODY(node) = en;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (type == NODE_ENCLOSURE) {
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
node = NODE_BODY(node);
|
|
goto retry;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
update_string_node_case_fold(regex_t* reg, Node *node)
|
|
{
|
|
UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
|
UChar *sbuf, *ebuf, *sp;
|
|
int r, i, len, sbuf_size;
|
|
StrNode* sn = STR_(node);
|
|
|
|
end = sn->end;
|
|
sbuf_size = (int )(end - sn->s) * 2;
|
|
sbuf = (UChar* )xmalloc(sbuf_size);
|
|
CHECK_NULL_RETURN_MEMERR(sbuf);
|
|
ebuf = sbuf + sbuf_size;
|
|
|
|
sp = sbuf;
|
|
p = sn->s;
|
|
while (p < end) {
|
|
len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
|
|
for (i = 0; i < len; i++) {
|
|
if (sp >= ebuf) {
|
|
sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
|
|
CHECK_NULL_RETURN_MEMERR(sbuf);
|
|
sp = sbuf + sbuf_size;
|
|
sbuf_size *= 2;
|
|
ebuf = sbuf + sbuf_size;
|
|
}
|
|
|
|
*sp++ = buf[i];
|
|
}
|
|
}
|
|
|
|
r = onig_node_str_set(node, sbuf, sp);
|
|
if (r != 0) {
|
|
xfree(sbuf);
|
|
return r;
|
|
}
|
|
|
|
xfree(sbuf);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
|
|
{
|
|
int r;
|
|
Node *node;
|
|
|
|
node = onig_node_new_str(s, end);
|
|
if (IS_NULL(node)) return ONIGERR_MEMORY;
|
|
|
|
r = update_string_node_case_fold(reg, node);
|
|
if (r != 0) {
|
|
onig_node_free(node);
|
|
return r;
|
|
}
|
|
|
|
NODE_STRING_SET_AMBIG(node);
|
|
NODE_STRING_SET_DONT_GET_OPT_INFO(node);
|
|
*rnode = node;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
|
|
int slen, UChar *end, regex_t* reg, Node **rnode)
|
|
{
|
|
int r, i, j;
|
|
int len;
|
|
int varlen;
|
|
Node *anode, *var_anode, *snode, *xnode, *an;
|
|
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
|
|
|
*rnode = var_anode = NULL_NODE;
|
|
|
|
varlen = 0;
|
|
for (i = 0; i < item_num; i++) {
|
|
if (items[i].byte_len != slen) {
|
|
varlen = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (varlen != 0) {
|
|
*rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
|
if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
|
|
|
|
xnode = onig_node_new_list(NULL, NULL);
|
|
if (IS_NULL(xnode)) goto mem_err;
|
|
NODE_CAR(var_anode) = xnode;
|
|
|
|
anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
|
if (IS_NULL(anode)) goto mem_err;
|
|
NODE_CAR(xnode) = anode;
|
|
}
|
|
else {
|
|
*rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
|
if (IS_NULL(anode)) return ONIGERR_MEMORY;
|
|
}
|
|
|
|
snode = onig_node_new_str(p, p + slen);
|
|
if (IS_NULL(snode)) goto mem_err;
|
|
|
|
NODE_CAR(anode) = snode;
|
|
|
|
for (i = 0; i < item_num; i++) {
|
|
snode = onig_node_new_str(NULL, NULL);
|
|
if (IS_NULL(snode)) goto mem_err;
|
|
|
|
for (j = 0; j < items[i].code_len; j++) {
|
|
len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
|
|
if (len < 0) {
|
|
r = len;
|
|
goto mem_err2;
|
|
}
|
|
|
|
r = onig_node_str_cat(snode, buf, buf + len);
|
|
if (r != 0) goto mem_err2;
|
|
}
|
|
|
|
an = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
|
if (IS_NULL(an)) {
|
|
goto mem_err2;
|
|
}
|
|
//The NULL pointer check is not necessary. It is added just for pass static
|
|
//analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"
|
|
//in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"
|
|
//in line 3510 will be executed, so the null pointer has been checked before
|
|
//deferenced in line 3584.
|
|
if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
|
|
Node *rem;
|
|
UChar *q = p + items[i].byte_len;
|
|
|
|
if (q < end) {
|
|
r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
|
if (r != 0) {
|
|
onig_node_free(an);
|
|
goto mem_err2;
|
|
}
|
|
|
|
xnode = onig_node_list_add(NULL_NODE, snode);
|
|
if (IS_NULL(xnode)) {
|
|
onig_node_free(an);
|
|
onig_node_free(rem);
|
|
goto mem_err2;
|
|
}
|
|
if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
|
onig_node_free(an);
|
|
onig_node_free(xnode);
|
|
onig_node_free(rem);
|
|
goto mem_err;
|
|
}
|
|
|
|
NODE_CAR(an) = xnode;
|
|
}
|
|
else {
|
|
NODE_CAR(an) = snode;
|
|
}
|
|
|
|
NODE_CDR(var_anode) = an;
|
|
var_anode = an;
|
|
}
|
|
else {
|
|
NODE_CAR(an) = snode;
|
|
NODE_CDR(anode) = an;
|
|
anode = an;
|
|
}
|
|
}
|
|
|
|
return varlen;
|
|
|
|
mem_err2:
|
|
onig_node_free(snode);
|
|
|
|
mem_err:
|
|
onig_node_free(*rnode);
|
|
|
|
return ONIGERR_MEMORY;
|
|
}
|
|
|
|
static int
|
|
expand_case_fold_string(Node* node, regex_t* reg)
|
|
{
|
|
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
|
|
|
int r, n, len, alt_num;
|
|
UChar *start, *end, *p;
|
|
Node *top_root, *root, *snode, *prev_node;
|
|
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
|
StrNode* sn = STR_(node);
|
|
|
|
if (NODE_STRING_IS_AMBIG(node)) return 0;
|
|
|
|
start = sn->s;
|
|
end = sn->end;
|
|
if (start >= end) return 0;
|
|
|
|
r = 0;
|
|
top_root = root = prev_node = snode = NULL_NODE;
|
|
alt_num = 1;
|
|
p = start;
|
|
while (p < end) {
|
|
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,
|
|
items);
|
|
if (n < 0) {
|
|
r = n;
|
|
goto err;
|
|
}
|
|
|
|
len = enclen(reg->enc, p);
|
|
|
|
if (n == 0) {
|
|
if (IS_NULL(snode)) {
|
|
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
|
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
|
if (IS_NULL(root)) {
|
|
onig_node_free(prev_node);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
|
|
prev_node = snode = onig_node_new_str(NULL, NULL);
|
|
if (IS_NULL(snode)) goto mem_err;
|
|
if (IS_NOT_NULL(root)) {
|
|
if (IS_NULL(onig_node_list_add(root, snode))) {
|
|
onig_node_free(snode);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
}
|
|
|
|
r = onig_node_str_cat(snode, p, p + len);
|
|
if (r != 0) goto err;
|
|
}
|
|
else {
|
|
alt_num *= (n + 1);
|
|
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
|
|
|
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
|
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
|
if (IS_NULL(root)) {
|
|
onig_node_free(prev_node);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
|
|
r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
|
if (r < 0) goto mem_err;
|
|
if (r == 1) {
|
|
if (IS_NULL(root)) {
|
|
top_root = prev_node;
|
|
}
|
|
else {
|
|
if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
|
onig_node_free(prev_node);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
|
|
root = NODE_CAR(prev_node);
|
|
}
|
|
else { /* r == 0 */
|
|
if (IS_NOT_NULL(root)) {
|
|
if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
|
onig_node_free(prev_node);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
}
|
|
|
|
snode = NULL_NODE;
|
|
}
|
|
|
|
p += len;
|
|
}
|
|
|
|
if (p < end) {
|
|
Node *srem;
|
|
|
|
r = expand_case_fold_make_rem_string(&srem, p, end, reg);
|
|
if (r != 0) goto mem_err;
|
|
|
|
if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
|
|
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
|
if (IS_NULL(root)) {
|
|
onig_node_free(srem);
|
|
onig_node_free(prev_node);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
|
|
if (IS_NULL(root)) {
|
|
prev_node = srem;
|
|
}
|
|
else {
|
|
if (IS_NULL(onig_node_list_add(root, srem))) {
|
|
onig_node_free(srem);
|
|
goto mem_err;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ending */
|
|
top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
|
swap_node(node, top_root);
|
|
onig_node_free(top_root);
|
|
return 0;
|
|
|
|
mem_err:
|
|
r = ONIGERR_MEMORY;
|
|
|
|
err:
|
|
onig_node_free(top_root);
|
|
return r;
|
|
}
|
|
|
|
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
|
|
static enum QuantBodyEmpty
|
|
quantifiers_memory_node_info(Node* node)
|
|
{
|
|
int r = QUANT_BODY_IS_EMPTY;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
{
|
|
int v;
|
|
do {
|
|
v = quantifiers_memory_node_info(NODE_CAR(node));
|
|
if (v > r) r = v;
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
if (NODE_IS_RECURSION(node)) {
|
|
return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
|
|
}
|
|
else
|
|
r = quantifiers_memory_node_info(NODE_BODY(node));
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
if (qn->upper != 0) {
|
|
r = quantifiers_memory_node_info(NODE_BODY(node));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
switch (en->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
if (NODE_IS_RECURSION(node)) {
|
|
return QUANT_BODY_IS_EMPTY_REC;
|
|
}
|
|
return QUANT_BODY_IS_EMPTY_MEM;
|
|
break;
|
|
|
|
case ENCLOSURE_OPTION:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
r = quantifiers_memory_node_info(NODE_BODY(node));
|
|
break;
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
int v;
|
|
r = quantifiers_memory_node_info(NODE_BODY(node));
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
v = quantifiers_memory_node_info(en->te.Then);
|
|
if (v > r) r = v;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
v = quantifiers_memory_node_info(en->te.Else);
|
|
if (v > r) r = v;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
case NODE_STRING:
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
case NODE_ANCHOR:
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
|
|
|
|
|
|
#define IN_ALT (1<<0)
|
|
#define IN_NOT (1<<1)
|
|
#define IN_REAL_REPEAT (1<<2)
|
|
#define IN_VAR_REPEAT (1<<3)
|
|
#define IN_ZERO_REPEAT (1<<4)
|
|
#define IN_MULTI_ENTRY (1<<5)
|
|
|
|
#ifdef USE_CALL
|
|
|
|
#ifdef __GNUC__
|
|
__inline
|
|
#endif
|
|
static int
|
|
setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
|
|
{
|
|
MemEnv* mem_env = SCANENV_MEMENV(env);
|
|
|
|
if (cn->by_number != 0) {
|
|
int gnum = cn->group_num;
|
|
|
|
if (env->num_named > 0 &&
|
|
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
|
! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
|
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
|
}
|
|
|
|
if (gnum > env->num_mem) {
|
|
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
|
|
cn->name, cn->name_end);
|
|
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
|
|
}
|
|
|
|
set_call_attr:
|
|
NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
|
|
if (IS_NULL(NODE_CALL_BODY(cn))) {
|
|
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
|
|
cn->name, cn->name_end);
|
|
return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
|
}
|
|
}
|
|
else {
|
|
int *refs;
|
|
|
|
int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
|
|
if (n <= 0) {
|
|
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
|
|
cn->name, cn->name_end);
|
|
return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
|
}
|
|
else if (n > 1) {
|
|
onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
|
|
cn->name, cn->name_end);
|
|
return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
|
|
}
|
|
else {
|
|
cn->group_num = refs[0];
|
|
goto set_call_attr;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
setup_call2_call(Node* node)
|
|
{
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
setup_call2_call(NODE_CAR(node));
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
setup_call2_call(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
|
|
setup_call2_call(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (! NODE_IS_MARK1(node)) {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
setup_call2_call(NODE_BODY(node));
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
setup_call2_call(NODE_BODY(node));
|
|
if (IS_NOT_NULL(en->te.Then))
|
|
setup_call2_call(en->te.Then);
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
setup_call2_call(en->te.Else);
|
|
}
|
|
else {
|
|
setup_call2_call(NODE_BODY(node));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CALL:
|
|
if (! NODE_IS_MARK1(node)) {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
{
|
|
CallNode* cn = CALL_(node);
|
|
Node* called = NODE_CALL_BODY(cn);
|
|
|
|
cn->entry_count++;
|
|
|
|
NODE_STATUS_ADD(called, CALLED);
|
|
ENCLOSURE_(called)->m.entry_count++;
|
|
setup_call2_call(called);
|
|
}
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int
|
|
setup_call(Node* node, ScanEnv* env, int state)
|
|
{
|
|
int r;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = setup_call(NODE_CAR(node), env, state);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
if (QUANT_(node)->upper == 0)
|
|
state |= IN_ZERO_REPEAT;
|
|
|
|
r = setup_call(NODE_BODY(node), env, state);
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
|
|
r = setup_call(NODE_BODY(node), env, state);
|
|
else
|
|
r = 0;
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if ((state & IN_ZERO_REPEAT) != 0) {
|
|
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
|
|
ENCLOSURE_(node)->m.entry_count--;
|
|
}
|
|
r = setup_call(NODE_BODY(node), env, state);
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
r = setup_call(NODE_BODY(node), env, state);
|
|
if (r != 0) return r;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = setup_call(en->te.Then, env, state);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
r = setup_call(en->te.Else, env, state);
|
|
}
|
|
else
|
|
r = setup_call(NODE_BODY(node), env, state);
|
|
}
|
|
break;
|
|
|
|
case NODE_CALL:
|
|
if ((state & IN_ZERO_REPEAT) != 0) {
|
|
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
|
|
CALL_(node)->entry_count--;
|
|
}
|
|
|
|
r = setup_call_node_call(CALL_(node), env, state);
|
|
break;
|
|
|
|
default:
|
|
r = 0;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
setup_call2(Node* node)
|
|
{
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
do {
|
|
r = setup_call2(NODE_CAR(node));
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
if (QUANT_(node)->upper != 0)
|
|
r = setup_call2(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
|
|
r = setup_call2(NODE_BODY(node));
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
if (! NODE_IS_IN_ZERO_REPEAT(node))
|
|
r = setup_call2(NODE_BODY(node));
|
|
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (r != 0) return r;
|
|
if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = setup_call2(en->te.Then);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
r = setup_call2(en->te.Else);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CALL:
|
|
if (! NODE_IS_IN_ZERO_REPEAT(node)) {
|
|
setup_call2_call(node);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
|
|
static void
|
|
setup_called_state_call(Node* node, int state)
|
|
{
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_ALT:
|
|
state |= IN_ALT;
|
|
/* fall */
|
|
case NODE_LIST:
|
|
do {
|
|
setup_called_state_call(NODE_CAR(node), state);
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
|
|
state |= IN_REAL_REPEAT;
|
|
if (qn->lower != qn->upper)
|
|
state |= IN_VAR_REPEAT;
|
|
|
|
setup_called_state_call(NODE_QUANT_BODY(qn), state);
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
{
|
|
AnchorNode* an = ANCHOR_(node);
|
|
|
|
switch (an->type) {
|
|
case ANCHOR_PREC_READ_NOT:
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
state |= IN_NOT;
|
|
/* fall */
|
|
case ANCHOR_PREC_READ:
|
|
case ANCHOR_LOOK_BEHIND:
|
|
setup_called_state_call(NODE_ANCHOR_BODY(an), state);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
if (en->type == ENCLOSURE_MEMORY) {
|
|
if (NODE_IS_MARK1(node)) {
|
|
if ((~en->m.called_state & state) != 0) {
|
|
en->m.called_state |= state;
|
|
setup_called_state_call(NODE_BODY(node), state);
|
|
}
|
|
}
|
|
else {
|
|
NODE_STATUS_ADD(node, MARK1);
|
|
en->m.called_state |= state;
|
|
setup_called_state_call(NODE_BODY(node), state);
|
|
NODE_STATUS_REMOVE(node, MARK1);
|
|
}
|
|
}
|
|
else if (en->type == ENCLOSURE_IF_ELSE) {
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
setup_called_state_call(en->te.Then, state);
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
setup_called_state_call(en->te.Else, state);
|
|
}
|
|
else {
|
|
setup_called_state_call(NODE_BODY(node), state);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CALL:
|
|
setup_called_state_call(NODE_BODY(node), state);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
setup_called_state(Node* node, int state)
|
|
{
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_ALT:
|
|
state |= IN_ALT;
|
|
/* fall */
|
|
case NODE_LIST:
|
|
do {
|
|
setup_called_state(NODE_CAR(node), state);
|
|
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
setup_called_state_call(node, state);
|
|
break;
|
|
#endif
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
switch (en->type) {
|
|
case ENCLOSURE_MEMORY:
|
|
if (en->m.entry_count > 1)
|
|
state |= IN_MULTI_ENTRY;
|
|
|
|
en->m.called_state |= state;
|
|
/* fall */
|
|
case ENCLOSURE_OPTION:
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
setup_called_state(NODE_BODY(node), state);
|
|
break;
|
|
case ENCLOSURE_IF_ELSE:
|
|
setup_called_state(NODE_BODY(node), state);
|
|
if (IS_NOT_NULL(en->te.Then))
|
|
setup_called_state(en->te.Then, state);
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
setup_called_state(en->te.Else, state);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
|
|
state |= IN_REAL_REPEAT;
|
|
if (qn->lower != qn->upper)
|
|
state |= IN_VAR_REPEAT;
|
|
|
|
setup_called_state(NODE_QUANT_BODY(qn), state);
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
{
|
|
AnchorNode* an = ANCHOR_(node);
|
|
|
|
switch (an->type) {
|
|
case ANCHOR_PREC_READ_NOT:
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
state |= IN_NOT;
|
|
/* fall */
|
|
case ANCHOR_PREC_READ:
|
|
case ANCHOR_LOOK_BEHIND:
|
|
setup_called_state(NODE_ANCHOR_BODY(an), state);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
case NODE_STRING:
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
#endif /* USE_CALL */
|
|
|
|
|
|
static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
|
|
|
|
#ifdef __GNUC__
|
|
__inline
|
|
#endif
|
|
static int
|
|
setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
|
|
{
|
|
/* allowed node types in look-behind */
|
|
#define ALLOWED_TYPE_IN_LB \
|
|
( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
|
|
| NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
|
|
| NODE_BIT_CALL | NODE_BIT_GIMMICK)
|
|
|
|
#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
|
|
#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
|
|
|
|
#define ALLOWED_ANCHOR_IN_LB \
|
|
( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
|
|
| ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
|
|
| ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
|
|
| ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
|
|
| ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
|
|
|
|
#define ALLOWED_ANCHOR_IN_LB_NOT \
|
|
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
|
|
| ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
|
|
| ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
|
|
| ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
|
|
| ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
|
|
|
|
int r;
|
|
AnchorNode* an = ANCHOR_(node);
|
|
|
|
switch (an->type) {
|
|
case ANCHOR_PREC_READ:
|
|
r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
|
|
break;
|
|
case ANCHOR_PREC_READ_NOT:
|
|
r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
|
|
break;
|
|
|
|
case ANCHOR_LOOK_BEHIND:
|
|
{
|
|
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
|
|
ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
|
|
if (r < 0) return r;
|
|
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
|
|
if (r != 0) return r;
|
|
r = setup_look_behind(node, reg, env);
|
|
}
|
|
break;
|
|
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
{
|
|
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
|
|
ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
|
|
if (r < 0) return r;
|
|
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
|
r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
|
|
if (r != 0) return r;
|
|
r = setup_look_behind(node, reg, env);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
r = 0;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
#ifdef __GNUC__
|
|
__inline
|
|
#endif
|
|
static int
|
|
setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
|
|
{
|
|
int r;
|
|
OnigLen d;
|
|
QuantNode* qn = QUANT_(node);
|
|
Node* body = NODE_BODY(node);
|
|
|
|
if ((state & IN_REAL_REPEAT) != 0) {
|
|
NODE_STATUS_ADD(node, IN_REAL_REPEAT);
|
|
}
|
|
if ((state & IN_MULTI_ENTRY) != 0) {
|
|
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
|
|
}
|
|
|
|
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
|
|
d = tree_min_len(body, env);
|
|
if (d == 0) {
|
|
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
|
|
qn->body_empty_info = quantifiers_memory_node_info(body);
|
|
if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
|
|
if (NODE_TYPE(body) == NODE_ENCLOSURE &&
|
|
ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
|
|
MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
|
|
}
|
|
}
|
|
#else
|
|
qn->body_empty_info = QUANT_BODY_IS_EMPTY;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
|
|
state |= IN_REAL_REPEAT;
|
|
if (qn->lower != qn->upper)
|
|
state |= IN_VAR_REPEAT;
|
|
|
|
r = setup_tree(body, reg, state, env);
|
|
if (r != 0) return r;
|
|
|
|
/* expand string */
|
|
#define EXPAND_STRING_MAX_LENGTH 100
|
|
if (NODE_TYPE(body) == NODE_STRING) {
|
|
if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
|
|
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
|
|
int len = NODE_STRING_LEN(body);
|
|
StrNode* sn = STR_(body);
|
|
|
|
if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
|
|
int i, n = qn->lower;
|
|
onig_node_conv_to_str_node(node, STR_(body)->flag);
|
|
for (i = 0; i < n; i++) {
|
|
r = onig_node_str_cat(node, sn->s, sn->end);
|
|
if (r != 0) return r;
|
|
}
|
|
onig_node_free(body);
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
|
if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
|
|
if (NODE_TYPE(body) == NODE_QUANT) {
|
|
QuantNode* tqn = QUANT_(body);
|
|
if (IS_NOT_NULL(tqn->head_exact)) {
|
|
qn->head_exact = tqn->head_exact;
|
|
tqn->head_exact = NULL;
|
|
}
|
|
}
|
|
else {
|
|
qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return r;
|
|
}
|
|
|
|
/* setup_tree does the following work.
|
|
1. check empty loop. (set qn->body_empty_info)
|
|
2. expand ignore-case in char class.
|
|
3. set memory status bit flags. (reg->mem_stats)
|
|
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
|
|
5. find invalid patterns in look-behind.
|
|
6. expand repeated string.
|
|
*/
|
|
static int
|
|
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
|
{
|
|
int r = 0;
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
{
|
|
Node* prev = NULL_NODE;
|
|
do {
|
|
r = setup_tree(NODE_CAR(node), reg, state, env);
|
|
if (IS_NOT_NULL(prev) && r == 0) {
|
|
r = next_setup(prev, NODE_CAR(node), reg);
|
|
}
|
|
prev = NODE_CAR(node);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
}
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
do {
|
|
r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
|
|
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
|
|
r = expand_case_fold_string(node, reg);
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
{
|
|
int i;
|
|
int* p;
|
|
BackRefNode* br = BACKREF_(node);
|
|
p = BACKREFS_P(br);
|
|
for (i = 0; i < br->back_num; i++) {
|
|
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
|
MEM_STATUS_ON(env->backrefed_mem, p[i]);
|
|
MEM_STATUS_ON(env->bt_mem_start, p[i]);
|
|
#ifdef USE_BACKREF_WITH_LEVEL
|
|
if (NODE_IS_NEST_LEVEL(node)) {
|
|
MEM_STATUS_ON(env->bt_mem_end, p[i]);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
switch (en->type) {
|
|
case ENCLOSURE_OPTION:
|
|
{
|
|
OnigOptionType options = reg->options;
|
|
reg->options = ENCLOSURE_(node)->o.options;
|
|
r = setup_tree(NODE_BODY(node), reg, state, env);
|
|
reg->options = options;
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_MEMORY:
|
|
#ifdef USE_CALL
|
|
state |= en->m.called_state;
|
|
#endif
|
|
|
|
if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
|
|
|| NODE_IS_RECURSION(node)) {
|
|
MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
|
|
}
|
|
r = setup_tree(NODE_BODY(node), reg, state, env);
|
|
break;
|
|
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
{
|
|
Node* target = NODE_BODY(node);
|
|
r = setup_tree(target, reg, state, env);
|
|
if (NODE_TYPE(target) == NODE_QUANT) {
|
|
QuantNode* tqn = QUANT_(target);
|
|
if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
|
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
|
if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
|
|
NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_IF_ELSE:
|
|
r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
|
|
if (r != 0) return r;
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
|
|
if (r != 0) return r;
|
|
}
|
|
if (IS_NOT_NULL(en->te.Else))
|
|
r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_QUANT:
|
|
r = setup_quant(node, reg, state, env);
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
r = setup_anchor(node, reg, state, env);
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
#endif
|
|
case NODE_CTYPE:
|
|
case NODE_CCLASS:
|
|
case NODE_GIMMICK:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/* set skip map for Boyer-Moore search */
|
|
static int
|
|
set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
|
|
UChar skip[], int** int_skip)
|
|
{
|
|
int i, len;
|
|
|
|
len = (int )(end - s);
|
|
if (len < ONIG_CHAR_TABLE_SIZE) {
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
|
|
|
|
for (i = 0; i < len - 1; i++)
|
|
skip[s[i]] = len - 1 - i;
|
|
}
|
|
else {
|
|
if (IS_NULL(*int_skip)) {
|
|
*int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
|
if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
|
}
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
|
|
|
|
for (i = 0; i < len - 1; i++)
|
|
(*int_skip)[s[i]] = len - 1 - i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#define OPT_EXACT_MAXLEN 24
|
|
|
|
typedef struct {
|
|
OnigLen min; /* min byte length */
|
|
OnigLen max; /* max byte length */
|
|
} MinMax;
|
|
|
|
typedef struct {
|
|
MinMax mmd;
|
|
OnigEncoding enc;
|
|
OnigOptionType options;
|
|
OnigCaseFoldType case_fold_flag;
|
|
ScanEnv* scan_env;
|
|
} OptEnv;
|
|
|
|
typedef struct {
|
|
int left;
|
|
int right;
|
|
} OptAnc;
|
|
|
|
typedef struct {
|
|
MinMax mmd; /* position */
|
|
OptAnc anc;
|
|
int reach_end;
|
|
int ignore_case;
|
|
int len;
|
|
UChar s[OPT_EXACT_MAXLEN];
|
|
} OptExact;
|
|
|
|
typedef struct {
|
|
MinMax mmd; /* position */
|
|
OptAnc anc;
|
|
int value; /* weighted value */
|
|
UChar map[ONIG_CHAR_TABLE_SIZE];
|
|
} OptMap;
|
|
|
|
typedef struct {
|
|
MinMax len;
|
|
OptAnc anc;
|
|
OptExact exb; /* boundary */
|
|
OptExact exm; /* middle */
|
|
OptExact expr; /* prec read (?=...) */
|
|
OptMap map; /* boundary */
|
|
} NodeOpt;
|
|
|
|
|
|
static int
|
|
map_position_value(OnigEncoding enc, int i)
|
|
{
|
|
static const short int Vals[] = {
|
|
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
|
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
|
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
|
};
|
|
|
|
if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
|
|
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
|
return 20;
|
|
else
|
|
return (int )Vals[i];
|
|
}
|
|
else
|
|
return 4; /* Take it easy. */
|
|
}
|
|
|
|
static int
|
|
distance_value(MinMax* mm)
|
|
{
|
|
/* 1000 / (min-max-dist + 1) */
|
|
static const short int dist_vals[] = {
|
|
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
|
|
91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
|
|
48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
|
|
32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
|
|
24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
|
|
20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
|
|
16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
|
|
14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
|
|
12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
|
|
11, 11, 11, 11, 11, 10, 10, 10, 10, 10
|
|
};
|
|
|
|
OnigLen d;
|
|
|
|
if (mm->max == INFINITE_LEN) return 0;
|
|
|
|
d = mm->max - mm->min;
|
|
if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
|
|
/* return dist_vals[d] * 16 / (mm->min + 12); */
|
|
return (int )dist_vals[d];
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
|
|
{
|
|
if (v2 <= 0) return -1;
|
|
if (v1 <= 0) return 1;
|
|
|
|
v1 *= distance_value(d1);
|
|
v2 *= distance_value(d2);
|
|
|
|
if (v2 > v1) return 1;
|
|
if (v2 < v1) return -1;
|
|
|
|
if (d2->min < d1->min) return 1;
|
|
if (d2->min > d1->min) return -1;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
is_equal_mml(MinMax* a, MinMax* b)
|
|
{
|
|
return (a->min == b->min && a->max == b->max) ? 1 : 0;
|
|
}
|
|
|
|
static void
|
|
set_mml(MinMax* l, OnigLen min, OnigLen max)
|
|
{
|
|
l->min = min;
|
|
l->max = max;
|
|
}
|
|
|
|
static void
|
|
clear_mml(MinMax* l)
|
|
{
|
|
l->min = l->max = 0;
|
|
}
|
|
|
|
static void
|
|
copy_mml(MinMax* to, MinMax* from)
|
|
{
|
|
to->min = from->min;
|
|
to->max = from->max;
|
|
}
|
|
|
|
static void
|
|
add_mml(MinMax* to, MinMax* from)
|
|
{
|
|
to->min = distance_add(to->min, from->min);
|
|
to->max = distance_add(to->max, from->max);
|
|
}
|
|
|
|
static void
|
|
alt_merge_mml(MinMax* to, MinMax* from)
|
|
{
|
|
if (to->min > from->min) to->min = from->min;
|
|
if (to->max < from->max) to->max = from->max;
|
|
}
|
|
|
|
static void
|
|
copy_opt_env(OptEnv* to, OptEnv* from)
|
|
{
|
|
*to = *from;
|
|
}
|
|
|
|
static void
|
|
clear_opt_anc_info(OptAnc* a)
|
|
{
|
|
a->left = 0;
|
|
a->right = 0;
|
|
}
|
|
|
|
static void
|
|
copy_opt_anc_info(OptAnc* to, OptAnc* from)
|
|
{
|
|
*to = *from;
|
|
}
|
|
|
|
static void
|
|
concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
|
|
OnigLen left_len, OnigLen right_len)
|
|
{
|
|
clear_opt_anc_info(to);
|
|
|
|
to->left = left->left;
|
|
if (left_len == 0) {
|
|
to->left |= right->left;
|
|
}
|
|
|
|
to->right = right->right;
|
|
if (right_len == 0) {
|
|
to->right |= left->right;
|
|
}
|
|
else {
|
|
to->right |= (left->right & ANCHOR_PREC_READ_NOT);
|
|
}
|
|
}
|
|
|
|
static int
|
|
is_left(int a)
|
|
{
|
|
if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||
|
|
a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
is_set_opt_anc_info(OptAnc* to, int anc)
|
|
{
|
|
if ((to->left & anc) != 0) return 1;
|
|
|
|
return ((to->right & anc) != 0 ? 1 : 0);
|
|
}
|
|
|
|
static void
|
|
add_opt_anc_info(OptAnc* to, int anc)
|
|
{
|
|
if (is_left(anc))
|
|
to->left |= anc;
|
|
else
|
|
to->right |= anc;
|
|
}
|
|
|
|
static void
|
|
remove_opt_anc_info(OptAnc* to, int anc)
|
|
{
|
|
if (is_left(anc))
|
|
to->left &= ~anc;
|
|
else
|
|
to->right &= ~anc;
|
|
}
|
|
|
|
static void
|
|
alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
|
|
{
|
|
to->left &= add->left;
|
|
to->right &= add->right;
|
|
}
|
|
|
|
static int
|
|
is_full_opt_exact(OptExact* e)
|
|
{
|
|
return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
|
|
}
|
|
|
|
static void
|
|
clear_opt_exact(OptExact* e)
|
|
{
|
|
clear_mml(&e->mmd);
|
|
clear_opt_anc_info(&e->anc);
|
|
e->reach_end = 0;
|
|
e->ignore_case = 0;
|
|
e->len = 0;
|
|
e->s[0] = '\0';
|
|
}
|
|
|
|
static void
|
|
copy_opt_exact(OptExact* to, OptExact* from)
|
|
{
|
|
*to = *from;
|
|
}
|
|
|
|
static int
|
|
concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
|
|
{
|
|
int i, j, len, r;
|
|
UChar *p, *end;
|
|
OptAnc tanc;
|
|
|
|
if (! to->ignore_case && add->ignore_case) {
|
|
if (to->len >= add->len) return 0; /* avoid */
|
|
|
|
to->ignore_case = 1;
|
|
}
|
|
|
|
r = 0;
|
|
p = add->s;
|
|
end = p + add->len;
|
|
for (i = to->len; p < end; ) {
|
|
len = enclen(enc, p);
|
|
if (i + len > OPT_EXACT_MAXLEN) {
|
|
r = 1; /* 1:full */
|
|
break;
|
|
}
|
|
for (j = 0; j < len && p < end; j++)
|
|
to->s[i++] = *p++;
|
|
}
|
|
|
|
to->len = i;
|
|
to->reach_end = (p == end ? add->reach_end : 0);
|
|
|
|
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
|
if (! to->reach_end) tanc.right = 0;
|
|
copy_opt_anc_info(&to->anc, &tanc);
|
|
|
|
return r;
|
|
}
|
|
|
|
static void
|
|
concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
|
|
{
|
|
int i, j, len;
|
|
UChar *p;
|
|
|
|
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
|
len = enclen(enc, p);
|
|
if (i + len > OPT_EXACT_MAXLEN) break;
|
|
for (j = 0; j < len && p < end; j++)
|
|
to->s[i++] = *p++;
|
|
}
|
|
|
|
to->len = i;
|
|
}
|
|
|
|
static void
|
|
alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
|
|
{
|
|
int i, j, len;
|
|
|
|
if (add->len == 0 || to->len == 0) {
|
|
clear_opt_exact(to);
|
|
return ;
|
|
}
|
|
|
|
if (! is_equal_mml(&to->mmd, &add->mmd)) {
|
|
clear_opt_exact(to);
|
|
return ;
|
|
}
|
|
|
|
for (i = 0; i < to->len && i < add->len; ) {
|
|
if (to->s[i] != add->s[i]) break;
|
|
len = enclen(env->enc, to->s + i);
|
|
|
|
for (j = 1; j < len; j++) {
|
|
if (to->s[i+j] != add->s[i+j]) break;
|
|
}
|
|
if (j < len) break;
|
|
i += len;
|
|
}
|
|
|
|
if (! add->reach_end || i < add->len || i < to->len) {
|
|
to->reach_end = 0;
|
|
}
|
|
to->len = i;
|
|
to->ignore_case |= add->ignore_case;
|
|
|
|
alt_merge_opt_anc_info(&to->anc, &add->anc);
|
|
if (! to->reach_end) to->anc.right = 0;
|
|
}
|
|
|
|
static void
|
|
select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
|
|
{
|
|
int vn, va;
|
|
|
|
vn = now->len;
|
|
va = alt->len;
|
|
|
|
if (va == 0) {
|
|
return ;
|
|
}
|
|
else if (vn == 0) {
|
|
copy_opt_exact(now, alt);
|
|
return ;
|
|
}
|
|
else if (vn <= 2 && va <= 2) {
|
|
/* ByteValTable[x] is big value --> low price */
|
|
va = map_position_value(enc, now->s[0]);
|
|
vn = map_position_value(enc, alt->s[0]);
|
|
|
|
if (now->len > 1) vn += 5;
|
|
if (alt->len > 1) va += 5;
|
|
}
|
|
|
|
if (now->ignore_case == 0) vn *= 2;
|
|
if (alt->ignore_case == 0) va *= 2;
|
|
|
|
if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
|
|
copy_opt_exact(now, alt);
|
|
}
|
|
|
|
static void
|
|
clear_opt_map(OptMap* map)
|
|
{
|
|
static const OptMap clean_info = {
|
|
{0, 0}, {0, 0}, 0,
|
|
{
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
}
|
|
};
|
|
|
|
xmemcpy(map, &clean_info, sizeof(OptMap));
|
|
}
|
|
|
|
static void
|
|
copy_opt_map(OptMap* to, OptMap* from)
|
|
{
|
|
xmemcpy(to,from,sizeof(OptMap));
|
|
}
|
|
|
|
static void
|
|
add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
|
|
{
|
|
if (m->map[c] == 0) {
|
|
m->map[c] = 1;
|
|
m->value += map_position_value(enc, c);
|
|
}
|
|
}
|
|
|
|
static int
|
|
add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
|
|
OnigEncoding enc, OnigCaseFoldType fold_flag)
|
|
{
|
|
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
|
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
|
int i, n;
|
|
|
|
add_char_opt_map(map, p[0], enc);
|
|
|
|
fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
|
|
n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
|
|
if (n < 0) return n;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
|
|
add_char_opt_map(map, buf[0], enc);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
select_opt_map(OptMap* now, OptMap* alt)
|
|
{
|
|
static int z = 1<<15; /* 32768: something big value */
|
|
|
|
int vn, va;
|
|
|
|
if (alt->value == 0) return ;
|
|
if (now->value == 0) {
|
|
copy_opt_map(now, alt);
|
|
return ;
|
|
}
|
|
|
|
vn = z / now->value;
|
|
va = z / alt->value;
|
|
if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
|
|
copy_opt_map(now, alt);
|
|
}
|
|
|
|
static int
|
|
comp_opt_exact_or_map(OptExact* e, OptMap* m)
|
|
{
|
|
#define COMP_EM_BASE 20
|
|
int ae, am;
|
|
|
|
if (m->value <= 0) return -1;
|
|
|
|
ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
|
|
am = COMP_EM_BASE * 5 * 2 / m->value;
|
|
return comp_distance_value(&e->mmd, &m->mmd, ae, am);
|
|
}
|
|
|
|
static void
|
|
alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
|
|
{
|
|
int i, val;
|
|
|
|
/* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
|
|
if (to->value == 0) return ;
|
|
if (add->value == 0 || to->mmd.max < add->mmd.min) {
|
|
clear_opt_map(to);
|
|
return ;
|
|
}
|
|
|
|
alt_merge_mml(&to->mmd, &add->mmd);
|
|
|
|
val = 0;
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
|
if (add->map[i])
|
|
to->map[i] = 1;
|
|
|
|
if (to->map[i])
|
|
val += map_position_value(enc, i);
|
|
}
|
|
to->value = val;
|
|
|
|
alt_merge_opt_anc_info(&to->anc, &add->anc);
|
|
}
|
|
|
|
static void
|
|
set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
|
|
{
|
|
copy_mml(&(opt->exb.mmd), plen);
|
|
copy_mml(&(opt->expr.mmd), plen);
|
|
copy_mml(&(opt->map.mmd), plen);
|
|
}
|
|
|
|
static void
|
|
clear_node_opt_info(NodeOpt* opt)
|
|
{
|
|
clear_mml(&opt->len);
|
|
clear_opt_anc_info(&opt->anc);
|
|
clear_opt_exact(&opt->exb);
|
|
clear_opt_exact(&opt->exm);
|
|
clear_opt_exact(&opt->expr);
|
|
clear_opt_map(&opt->map);
|
|
}
|
|
|
|
static void
|
|
copy_node_opt_info(NodeOpt* to, NodeOpt* from)
|
|
{
|
|
xmemcpy(to,from,sizeof(NodeOpt));
|
|
}
|
|
|
|
static void
|
|
concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
|
|
{
|
|
int exb_reach, exm_reach;
|
|
OptAnc tanc;
|
|
|
|
concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
|
|
copy_opt_anc_info(&to->anc, &tanc);
|
|
|
|
if (add->exb.len > 0 && to->len.max == 0) {
|
|
concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);
|
|
copy_opt_anc_info(&add->exb.anc, &tanc);
|
|
}
|
|
|
|
if (add->map.value > 0 && to->len.max == 0) {
|
|
if (add->map.mmd.max == 0)
|
|
add->map.anc.left |= to->anc.left;
|
|
}
|
|
|
|
exb_reach = to->exb.reach_end;
|
|
exm_reach = to->exm.reach_end;
|
|
|
|
if (add->len.max != 0)
|
|
to->exb.reach_end = to->exm.reach_end = 0;
|
|
|
|
if (add->exb.len > 0) {
|
|
if (exb_reach) {
|
|
concat_opt_exact(&to->exb, &add->exb, enc);
|
|
clear_opt_exact(&add->exb);
|
|
}
|
|
else if (exm_reach) {
|
|
concat_opt_exact(&to->exm, &add->exb, enc);
|
|
clear_opt_exact(&add->exb);
|
|
}
|
|
}
|
|
select_opt_exact(enc, &to->exm, &add->exb);
|
|
select_opt_exact(enc, &to->exm, &add->exm);
|
|
|
|
if (to->expr.len > 0) {
|
|
if (add->len.max > 0) {
|
|
if (to->expr.len > (int )add->len.max)
|
|
to->expr.len = add->len.max;
|
|
|
|
if (to->expr.mmd.max == 0)
|
|
select_opt_exact(enc, &to->exb, &to->expr);
|
|
else
|
|
select_opt_exact(enc, &to->exm, &to->expr);
|
|
}
|
|
}
|
|
else if (add->expr.len > 0) {
|
|
copy_opt_exact(&to->expr, &add->expr);
|
|
}
|
|
|
|
select_opt_map(&to->map, &add->map);
|
|
add_mml(&to->len, &add->len);
|
|
}
|
|
|
|
static void
|
|
alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
|
|
{
|
|
alt_merge_opt_anc_info(&to->anc, &add->anc);
|
|
alt_merge_opt_exact(&to->exb, &add->exb, env);
|
|
alt_merge_opt_exact(&to->exm, &add->exm, env);
|
|
alt_merge_opt_exact(&to->expr, &add->expr, env);
|
|
alt_merge_opt_map(env->enc, &to->map, &add->map);
|
|
|
|
alt_merge_mml(&to->len, &add->len);
|
|
}
|
|
|
|
|
|
#define MAX_NODE_OPT_INFO_REF_COUNT 5
|
|
|
|
static int
|
|
optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
|
|
{
|
|
int i;
|
|
int r;
|
|
NodeOpt xo;
|
|
OnigEncoding enc;
|
|
|
|
r = 0;
|
|
enc = env->enc;
|
|
clear_node_opt_info(opt);
|
|
set_bound_node_opt_info(opt, &env->mmd);
|
|
|
|
switch (NODE_TYPE(node)) {
|
|
case NODE_LIST:
|
|
{
|
|
OptEnv nenv;
|
|
Node* nd = node;
|
|
|
|
copy_opt_env(&nenv, env);
|
|
do {
|
|
r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
|
|
if (r == 0) {
|
|
add_mml(&nenv.mmd, &xo.len);
|
|
concat_left_node_opt_info(enc, opt, &xo);
|
|
}
|
|
} while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
|
|
}
|
|
break;
|
|
|
|
case NODE_ALT:
|
|
{
|
|
Node* nd = node;
|
|
|
|
do {
|
|
r = optimize_nodes(NODE_CAR(nd), &xo, env);
|
|
if (r == 0) {
|
|
if (nd == node) copy_node_opt_info(opt, &xo);
|
|
else alt_merge_node_opt_info(opt, &xo, env);
|
|
}
|
|
} while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
{
|
|
StrNode* sn = STR_(node);
|
|
int slen = (int )(sn->end - sn->s);
|
|
/* int is_raw = NODE_STRING_IS_RAW(node); */
|
|
|
|
if (! NODE_STRING_IS_AMBIG(node)) {
|
|
concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
|
|
if (slen > 0) {
|
|
add_char_opt_map(&opt->map, *(sn->s), enc);
|
|
}
|
|
set_mml(&opt->len, slen, slen);
|
|
}
|
|
else {
|
|
int max;
|
|
|
|
if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
|
|
int n = onigenc_strlen(enc, sn->s, sn->end);
|
|
max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
|
|
}
|
|
else {
|
|
concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
|
|
opt->exb.ignore_case = 1;
|
|
|
|
if (slen > 0) {
|
|
r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
|
|
enc, env->case_fold_flag);
|
|
if (r != 0) break;
|
|
}
|
|
|
|
max = slen;
|
|
}
|
|
|
|
set_mml(&opt->len, slen, max);
|
|
}
|
|
|
|
if (opt->exb.len == slen)
|
|
opt->exb.reach_end = 1;
|
|
}
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
{
|
|
int z;
|
|
CClassNode* cc = CCLASS_(node);
|
|
|
|
/* no need to check ignore case. (set in setup_tree()) */
|
|
|
|
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
|
|
OnigLen min = ONIGENC_MBC_MINLEN(enc);
|
|
OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
|
|
|
|
set_mml(&opt->len, min, max);
|
|
}
|
|
else {
|
|
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
|
z = BITSET_AT(cc->bs, i);
|
|
if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
|
|
add_char_opt_map(&opt->map, (UChar )i, enc);
|
|
}
|
|
}
|
|
set_mml(&opt->len, 1, 1);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
{
|
|
int min, max;
|
|
int range;
|
|
|
|
max = ONIGENC_MBC_MAXLEN_DIST(enc);
|
|
|
|
if (max == 1) {
|
|
min = 1;
|
|
|
|
switch (CTYPE_(node)->ctype) {
|
|
case CTYPE_ANYCHAR:
|
|
break;
|
|
|
|
case ONIGENC_CTYPE_WORD:
|
|
range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
|
|
if (CTYPE_(node)->not != 0) {
|
|
for (i = 0; i < range; i++) {
|
|
if (! ONIGENC_IS_CODE_WORD(enc, i)) {
|
|
add_char_opt_map(&opt->map, (UChar )i, enc);
|
|
}
|
|
}
|
|
for (i = range; i < SINGLE_BYTE_SIZE; i++) {
|
|
add_char_opt_map(&opt->map, (UChar )i, enc);
|
|
}
|
|
}
|
|
else {
|
|
for (i = 0; i < range; i++) {
|
|
if (ONIGENC_IS_CODE_WORD(enc, i)) {
|
|
add_char_opt_map(&opt->map, (UChar )i, enc);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
min = ONIGENC_MBC_MINLEN(enc);
|
|
}
|
|
set_mml(&opt->len, min, max);
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
switch (ANCHOR_(node)->type) {
|
|
case ANCHOR_BEGIN_BUF:
|
|
case ANCHOR_BEGIN_POSITION:
|
|
case ANCHOR_BEGIN_LINE:
|
|
case ANCHOR_END_BUF:
|
|
case ANCHOR_SEMI_END_BUF:
|
|
case ANCHOR_END_LINE:
|
|
case ANCHOR_PREC_READ_NOT:
|
|
case ANCHOR_LOOK_BEHIND:
|
|
add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
|
|
break;
|
|
|
|
case ANCHOR_PREC_READ:
|
|
{
|
|
r = optimize_nodes(NODE_BODY(node), &xo, env);
|
|
if (r == 0) {
|
|
if (xo.exb.len > 0)
|
|
copy_opt_exact(&opt->expr, &xo.exb);
|
|
else if (xo.exm.len > 0)
|
|
copy_opt_exact(&opt->expr, &xo.exm);
|
|
|
|
opt->expr.reach_end = 0;
|
|
|
|
if (xo.map.value > 0)
|
|
copy_opt_map(&opt->map, &xo.map);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
if (! NODE_IS_CHECKER(node)) {
|
|
int* backs;
|
|
OnigLen min, max, tmin, tmax;
|
|
MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
|
|
BackRefNode* br = BACKREF_(node);
|
|
|
|
if (NODE_IS_RECURSION(node)) {
|
|
set_mml(&opt->len, 0, INFINITE_LEN);
|
|
break;
|
|
}
|
|
backs = BACKREFS_P(br);
|
|
min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
|
|
max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
|
|
for (i = 1; i < br->back_num; i++) {
|
|
tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
|
|
tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
|
|
if (min > tmin) min = tmin;
|
|
if (max < tmax) max = tmax;
|
|
}
|
|
set_mml(&opt->len, min, max);
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
if (NODE_IS_RECURSION(node))
|
|
set_mml(&opt->len, 0, INFINITE_LEN);
|
|
else {
|
|
OnigOptionType save = env->options;
|
|
env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
|
|
r = optimize_nodes(NODE_BODY(node), opt, env);
|
|
env->options = save;
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
{
|
|
OnigLen min, max;
|
|
QuantNode* qn = QUANT_(node);
|
|
|
|
r = optimize_nodes(NODE_BODY(node), &xo, env);
|
|
if (r != 0) break;
|
|
|
|
if (qn->lower > 0) {
|
|
copy_node_opt_info(opt, &xo);
|
|
if (xo.exb.len > 0) {
|
|
if (xo.exb.reach_end) {
|
|
for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
|
|
int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
|
|
if (rc > 0) break;
|
|
}
|
|
if (i < qn->lower) opt->exb.reach_end = 0;
|
|
}
|
|
}
|
|
|
|
if (qn->lower != qn->upper) {
|
|
opt->exb.reach_end = 0;
|
|
opt->exm.reach_end = 0;
|
|
}
|
|
if (qn->lower > 1)
|
|
opt->exm.reach_end = 0;
|
|
}
|
|
|
|
if (IS_REPEAT_INFINITE(qn->upper)) {
|
|
if (env->mmd.max == 0 &&
|
|
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
|
|
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
|
|
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
|
|
else
|
|
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
|
|
}
|
|
|
|
max = (xo.len.max > 0 ? INFINITE_LEN : 0);
|
|
}
|
|
else {
|
|
max = distance_multiply(xo.len.max, qn->upper);
|
|
}
|
|
|
|
min = distance_multiply(xo.len.min, qn->lower);
|
|
set_mml(&opt->len, min, max);
|
|
}
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
{
|
|
EnclosureNode* en = ENCLOSURE_(node);
|
|
|
|
switch (en->type) {
|
|
case ENCLOSURE_OPTION:
|
|
{
|
|
OnigOptionType save = env->options;
|
|
|
|
env->options = en->o.options;
|
|
r = optimize_nodes(NODE_BODY(node), opt, env);
|
|
env->options = save;
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_MEMORY:
|
|
#ifdef USE_CALL
|
|
en->opt_count++;
|
|
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
|
|
OnigLen min, max;
|
|
|
|
min = 0;
|
|
max = INFINITE_LEN;
|
|
if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
|
|
if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
|
|
set_mml(&opt->len, min, max);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
r = optimize_nodes(NODE_BODY(node), opt, env);
|
|
if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
|
|
if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
|
|
remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
r = optimize_nodes(NODE_BODY(node), opt, env);
|
|
break;
|
|
|
|
case ENCLOSURE_IF_ELSE:
|
|
{
|
|
OptEnv nenv;
|
|
|
|
copy_opt_env(&nenv, env);
|
|
r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
|
|
if (r == 0) {
|
|
add_mml(&nenv.mmd, &xo.len);
|
|
concat_left_node_opt_info(enc, opt, &xo);
|
|
if (IS_NOT_NULL(en->te.Then)) {
|
|
r = optimize_nodes(en->te.Then, &xo, &nenv);
|
|
if (r == 0) {
|
|
concat_left_node_opt_info(enc, opt, &xo);
|
|
}
|
|
}
|
|
|
|
if (IS_NOT_NULL(en->te.Else)) {
|
|
r = optimize_nodes(en->te.Else, &xo, env);
|
|
if (r == 0)
|
|
alt_merge_node_opt_info(opt, &xo, env);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
break;
|
|
|
|
default:
|
|
#ifdef ONIG_DEBUG
|
|
fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
|
|
#endif
|
|
r = ONIGERR_TYPE_BUG;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int
|
|
set_optimize_exact(regex_t* reg, OptExact* e)
|
|
{
|
|
int r;
|
|
|
|
if (e->len == 0) return 0;
|
|
|
|
if (e->ignore_case) {
|
|
reg->exact = (UChar* )xmalloc(e->len);
|
|
CHECK_NULL_RETURN_MEMERR(reg->exact);
|
|
xmemcpy(reg->exact, e->s, e->len);
|
|
reg->exact_end = reg->exact + e->len;
|
|
reg->optimize = OPTIMIZE_EXACT_IC;
|
|
}
|
|
else {
|
|
int allow_reverse;
|
|
|
|
reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
|
|
CHECK_NULL_RETURN_MEMERR(reg->exact);
|
|
reg->exact_end = reg->exact + e->len;
|
|
|
|
allow_reverse =
|
|
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
|
|
|
|
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
|
r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
|
|
reg->map, &(reg->int_map));
|
|
if (r != 0) return r;
|
|
|
|
reg->optimize = (allow_reverse != 0
|
|
? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
|
|
}
|
|
else {
|
|
reg->optimize = OPTIMIZE_EXACT;
|
|
}
|
|
}
|
|
|
|
reg->dmin = e->mmd.min;
|
|
reg->dmax = e->mmd.max;
|
|
|
|
if (reg->dmin != INFINITE_LEN) {
|
|
reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
set_optimize_map(regex_t* reg, OptMap* m)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
|
reg->map[i] = m->map[i];
|
|
|
|
reg->optimize = OPTIMIZE_MAP;
|
|
reg->dmin = m->mmd.min;
|
|
reg->dmax = m->mmd.max;
|
|
|
|
if (reg->dmin != INFINITE_LEN) {
|
|
reg->threshold_len = reg->dmin + 1;
|
|
}
|
|
}
|
|
|
|
static void
|
|
set_sub_anchor(regex_t* reg, OptAnc* anc)
|
|
{
|
|
reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;
|
|
reg->sub_anchor |= anc->right & ANCHOR_END_LINE;
|
|
}
|
|
|
|
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
|
static void print_optimize_info(FILE* f, regex_t* reg);
|
|
#endif
|
|
|
|
static int
|
|
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
|
{
|
|
int r;
|
|
NodeOpt opt;
|
|
OptEnv env;
|
|
|
|
env.enc = reg->enc;
|
|
env.options = reg->options;
|
|
env.case_fold_flag = reg->case_fold_flag;
|
|
env.scan_env = scan_env;
|
|
clear_mml(&env.mmd);
|
|
|
|
r = optimize_nodes(node, &opt, &env);
|
|
if (r != 0) return r;
|
|
|
|
reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
|
|
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
|
|
ANCHOR_LOOK_BEHIND);
|
|
|
|
if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
|
|
reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
|
|
|
|
reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
|
ANCHOR_PREC_READ_NOT);
|
|
|
|
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
|
reg->anchor_dmin = opt.len.min;
|
|
reg->anchor_dmax = opt.len.max;
|
|
}
|
|
|
|
if (opt.exb.len > 0 || opt.exm.len > 0) {
|
|
select_opt_exact(reg->enc, &opt.exb, &opt.exm);
|
|
if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {
|
|
goto set_map;
|
|
}
|
|
else {
|
|
r = set_optimize_exact(reg, &opt.exb);
|
|
set_sub_anchor(reg, &opt.exb.anc);
|
|
}
|
|
}
|
|
else if (opt.map.value > 0) {
|
|
set_map:
|
|
set_optimize_map(reg, &opt.map);
|
|
set_sub_anchor(reg, &opt.map.anc);
|
|
}
|
|
else {
|
|
reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;
|
|
if (opt.len.max == 0)
|
|
reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;
|
|
}
|
|
|
|
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
|
print_optimize_info(stderr, reg);
|
|
#endif
|
|
return r;
|
|
}
|
|
|
|
static void
|
|
clear_optimize_info(regex_t* reg)
|
|
{
|
|
reg->optimize = OPTIMIZE_NONE;
|
|
reg->anchor = 0;
|
|
reg->anchor_dmin = 0;
|
|
reg->anchor_dmax = 0;
|
|
reg->sub_anchor = 0;
|
|
reg->exact_end = (UChar* )NULL;
|
|
reg->threshold_len = 0;
|
|
if (IS_NOT_NULL(reg->exact)) {
|
|
xfree(reg->exact);
|
|
reg->exact = (UChar* )NULL;
|
|
}
|
|
}
|
|
|
|
#ifdef ONIG_DEBUG
|
|
|
|
static void print_enc_string(FILE* fp, OnigEncoding enc,
|
|
const UChar *s, const UChar *end)
|
|
{
|
|
fprintf(fp, "\nPATTERN: /");
|
|
|
|
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
|
const UChar *p;
|
|
OnigCodePoint code;
|
|
|
|
p = s;
|
|
while (p < end) {
|
|
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
if (code >= 0x80) {
|
|
fprintf(fp, " 0x%04x ", (int )code);
|
|
}
|
|
else {
|
|
fputc((int )code, fp);
|
|
}
|
|
|
|
p += enclen(enc, p);
|
|
}
|
|
}
|
|
else {
|
|
while (s < end) {
|
|
fputc((int )*s, fp);
|
|
s++;
|
|
}
|
|
}
|
|
|
|
fprintf(fp, "/\n");
|
|
}
|
|
|
|
#endif /* ONIG_DEBUG */
|
|
|
|
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
|
|
|
static void
|
|
print_distance_range(FILE* f, OnigLen a, OnigLen b)
|
|
{
|
|
if (a == INFINITE_LEN)
|
|
fputs("inf", f);
|
|
else
|
|
fprintf(f, "(%u)", a);
|
|
|
|
fputs("-", f);
|
|
|
|
if (b == INFINITE_LEN)
|
|
fputs("inf", f);
|
|
else
|
|
fprintf(f, "(%u)", b);
|
|
}
|
|
|
|
static void
|
|
print_anchor(FILE* f, int anchor)
|
|
{
|
|
int q = 0;
|
|
|
|
fprintf(f, "[");
|
|
|
|
if (anchor & ANCHOR_BEGIN_BUF) {
|
|
fprintf(f, "begin-buf");
|
|
q = 1;
|
|
}
|
|
if (anchor & ANCHOR_BEGIN_LINE) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "begin-line");
|
|
}
|
|
if (anchor & ANCHOR_BEGIN_POSITION) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "begin-pos");
|
|
}
|
|
if (anchor & ANCHOR_END_BUF) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "end-buf");
|
|
}
|
|
if (anchor & ANCHOR_SEMI_END_BUF) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "semi-end-buf");
|
|
}
|
|
if (anchor & ANCHOR_END_LINE) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "end-line");
|
|
}
|
|
if (anchor & ANCHOR_ANYCHAR_INF) {
|
|
if (q) fprintf(f, ", ");
|
|
q = 1;
|
|
fprintf(f, "anychar-inf");
|
|
}
|
|
if (anchor & ANCHOR_ANYCHAR_INF_ML) {
|
|
if (q) fprintf(f, ", ");
|
|
fprintf(f, "anychar-inf-ml");
|
|
}
|
|
|
|
fprintf(f, "]");
|
|
}
|
|
|
|
static void
|
|
print_optimize_info(FILE* f, regex_t* reg)
|
|
{
|
|
static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
|
|
"EXACT_IC", "MAP" };
|
|
|
|
fprintf(f, "optimize: %s\n", on[reg->optimize]);
|
|
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
|
|
if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
|
|
print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
|
|
fprintf(f, "\n");
|
|
|
|
if (reg->optimize) {
|
|
fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
|
|
fprintf(f, "\n");
|
|
}
|
|
fprintf(f, "\n");
|
|
|
|
if (reg->exact) {
|
|
UChar *p;
|
|
fprintf(f, "exact: [");
|
|
for (p = reg->exact; p < reg->exact_end; p++) {
|
|
fputc(*p, f);
|
|
}
|
|
fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
|
|
}
|
|
else if (reg->optimize & OPTIMIZE_MAP) {
|
|
int c, i, n = 0;
|
|
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
|
if (reg->map[i]) n++;
|
|
|
|
fprintf(f, "map: n=%d\n", n);
|
|
if (n > 0) {
|
|
c = 0;
|
|
fputc('[', f);
|
|
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
|
if (reg->map[i] != 0) {
|
|
if (c > 0) fputs(", ", f);
|
|
c++;
|
|
if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
|
|
ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
|
|
fputc(i, f);
|
|
else
|
|
fprintf(f, "%d", i);
|
|
}
|
|
}
|
|
fprintf(f, "]\n");
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
extern RegexExt*
|
|
onig_get_regex_ext(regex_t* reg)
|
|
{
|
|
if (IS_NULL(REG_EXTP(reg))) {
|
|
RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
|
|
if (IS_NULL(ext)) return 0;
|
|
|
|
ext->pattern = 0;
|
|
ext->pattern_end = 0;
|
|
#ifdef USE_CALLOUT
|
|
ext->tag_table = 0;
|
|
ext->callout_num = 0;
|
|
ext->callout_list_alloc = 0;
|
|
ext->callout_list = 0;
|
|
#endif
|
|
|
|
REG_EXTPL(reg) = (void* )ext;
|
|
}
|
|
|
|
return REG_EXTP(reg);
|
|
}
|
|
|
|
static void
|
|
free_regex_ext(RegexExt* ext)
|
|
{
|
|
if (IS_NOT_NULL(ext)) {
|
|
if (IS_NOT_NULL(ext->pattern))
|
|
xfree((void* )ext->pattern);
|
|
|
|
#ifdef USE_CALLOUT
|
|
if (IS_NOT_NULL(ext->tag_table))
|
|
onig_callout_tag_table_free(ext->tag_table);
|
|
|
|
if (IS_NOT_NULL(ext->callout_list))
|
|
onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
|
|
#endif
|
|
|
|
xfree(ext);
|
|
}
|
|
}
|
|
|
|
extern int
|
|
onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
|
|
{
|
|
RegexExt* ext;
|
|
UChar* s;
|
|
|
|
ext = onig_get_regex_ext(reg);
|
|
CHECK_NULL_RETURN_MEMERR(ext);
|
|
|
|
s = onigenc_strdup(reg->enc, pattern, pattern_end);
|
|
CHECK_NULL_RETURN_MEMERR(s);
|
|
|
|
ext->pattern = s;
|
|
ext->pattern_end = s + (pattern_end - pattern);
|
|
|
|
return ONIG_NORMAL;
|
|
}
|
|
|
|
|
|
extern void
|
|
onig_free_body(regex_t* reg)
|
|
{
|
|
if (IS_NOT_NULL(reg)) {
|
|
if (IS_NOT_NULL(reg->p)) xfree(reg->p);
|
|
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
|
|
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
|
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
|
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
|
if (IS_NOT_NULL(REG_EXTP(reg))) {
|
|
free_regex_ext(REG_EXTP(reg));
|
|
REG_EXTPL(reg) = 0;
|
|
}
|
|
|
|
onig_names_free(reg);
|
|
}
|
|
}
|
|
|
|
extern void
|
|
onig_free(regex_t* reg)
|
|
{
|
|
if (IS_NOT_NULL(reg)) {
|
|
onig_free_body(reg);
|
|
xfree(reg);
|
|
}
|
|
}
|
|
|
|
#define REGEX_TRANSFER(to,from) do {\
|
|
onig_free_body(to);\
|
|
xmemcpy(to, from, sizeof(regex_t));\
|
|
xfree(from);\
|
|
} while (0)
|
|
|
|
extern void
|
|
onig_transfer(regex_t* to, regex_t* from)
|
|
{
|
|
REGEX_TRANSFER(to, from);
|
|
}
|
|
|
|
|
|
#ifdef ONIG_DEBUG_PARSE
|
|
static void print_tree P_((FILE* f, Node* node));
|
|
#endif
|
|
|
|
extern int
|
|
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
|
OnigErrorInfo* einfo)
|
|
{
|
|
#define COMPILE_INIT_SIZE 20
|
|
|
|
int r, init_size;
|
|
Node* root;
|
|
ScanEnv scan_env;
|
|
#ifdef USE_CALL
|
|
UnsetAddrList uslist;
|
|
#endif
|
|
|
|
root = 0;
|
|
if (IS_NOT_NULL(einfo)) {
|
|
einfo->enc = reg->enc;
|
|
einfo->par = (UChar* )NULL;
|
|
}
|
|
|
|
#ifdef ONIG_DEBUG
|
|
print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
|
#endif
|
|
|
|
if (reg->alloc == 0) {
|
|
init_size = (int )(pattern_end - pattern) * 2;
|
|
if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
|
r = BB_INIT(reg, init_size);
|
|
if (r != 0) goto end;
|
|
}
|
|
else
|
|
reg->used = 0;
|
|
|
|
reg->num_mem = 0;
|
|
reg->num_repeat = 0;
|
|
reg->num_null_check = 0;
|
|
reg->repeat_range_alloc = 0;
|
|
reg->repeat_range = (OnigRepeatRange* )NULL;
|
|
|
|
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
|
|
if (r != 0) goto err;
|
|
|
|
/* mixed use named group and no-named group */
|
|
if (scan_env.num_named > 0 &&
|
|
IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
|
! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
|
if (scan_env.num_named != scan_env.num_mem)
|
|
r = disable_noname_group_capture(&root, reg, &scan_env);
|
|
else
|
|
r = numbered_ref_check(root);
|
|
|
|
if (r != 0) goto err;
|
|
}
|
|
|
|
r = check_backrefs(root, &scan_env);
|
|
if (r != 0) goto err;
|
|
|
|
#ifdef USE_CALL
|
|
if (scan_env.num_call > 0) {
|
|
r = unset_addr_list_init(&uslist, scan_env.num_call);
|
|
if (r != 0) goto err;
|
|
scan_env.unset_addr_list = &uslist;
|
|
r = setup_call(root, &scan_env, 0);
|
|
if (r != 0) goto err_unset;
|
|
r = setup_call2(root);
|
|
if (r != 0) goto err_unset;
|
|
r = recursive_call_check_trav(root, &scan_env, 0);
|
|
if (r < 0) goto err_unset;
|
|
r = infinite_recursive_call_check_trav(root, &scan_env);
|
|
if (r != 0) goto err_unset;
|
|
|
|
setup_called_state(root, 0);
|
|
}
|
|
|
|
reg->num_call = scan_env.num_call;
|
|
#endif
|
|
|
|
r = setup_tree(root, reg, 0, &scan_env);
|
|
if (r != 0) goto err_unset;
|
|
|
|
#ifdef ONIG_DEBUG_PARSE
|
|
print_tree(stderr, root);
|
|
#endif
|
|
|
|
reg->capture_history = scan_env.capture_history;
|
|
reg->bt_mem_start = scan_env.bt_mem_start;
|
|
reg->bt_mem_start |= reg->capture_history;
|
|
if (IS_FIND_CONDITION(reg->options))
|
|
MEM_STATUS_ON_ALL(reg->bt_mem_end);
|
|
else {
|
|
reg->bt_mem_end = scan_env.bt_mem_end;
|
|
reg->bt_mem_end |= reg->capture_history;
|
|
}
|
|
reg->bt_mem_start |= reg->bt_mem_end;
|
|
|
|
clear_optimize_info(reg);
|
|
#ifndef ONIG_DONT_OPTIMIZE
|
|
r = set_optimize_info_from_tree(root, reg, &scan_env);
|
|
if (r != 0) goto err_unset;
|
|
#endif
|
|
|
|
if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
|
|
xfree(scan_env.mem_env_dynamic);
|
|
scan_env.mem_env_dynamic = (MemEnv* )NULL;
|
|
}
|
|
|
|
r = compile_tree(root, reg, &scan_env);
|
|
if (r == 0) {
|
|
if (scan_env.keep_num > 0) {
|
|
r = add_opcode(reg, OP_UPDATE_VAR);
|
|
if (r != 0) goto err;
|
|
r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
|
|
if (r != 0) goto err;
|
|
r = add_mem_num(reg, 0 /* not used */);
|
|
if (r != 0) goto err;
|
|
}
|
|
|
|
r = add_opcode(reg, OP_END);
|
|
#ifdef USE_CALL
|
|
if (scan_env.num_call > 0) {
|
|
r = fix_unset_addr_list(&uslist, reg);
|
|
unset_addr_list_end(&uslist);
|
|
if (r != 0) goto err;
|
|
}
|
|
#endif
|
|
|
|
if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
|
|
#ifdef USE_CALLOUT
|
|
|| (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
|
|
#endif
|
|
)
|
|
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
|
|
else {
|
|
if (reg->bt_mem_start != 0)
|
|
reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
|
|
else
|
|
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
|
|
}
|
|
}
|
|
#ifdef USE_CALL
|
|
else if (scan_env.num_call > 0) {
|
|
unset_addr_list_end(&uslist);
|
|
}
|
|
#endif
|
|
onig_node_free(root);
|
|
|
|
#ifdef ONIG_DEBUG_COMPILE
|
|
onig_print_names(stderr, reg);
|
|
onig_print_compiled_byte_code_list(stderr, reg);
|
|
#endif
|
|
|
|
end:
|
|
return r;
|
|
|
|
err_unset:
|
|
#ifdef USE_CALL
|
|
if (scan_env.num_call > 0) {
|
|
unset_addr_list_end(&uslist);
|
|
}
|
|
#endif
|
|
err:
|
|
if (IS_NOT_NULL(scan_env.error)) {
|
|
if (IS_NOT_NULL(einfo)) {
|
|
einfo->par = scan_env.error;
|
|
einfo->par_end = scan_env.error_end;
|
|
}
|
|
}
|
|
|
|
onig_node_free(root);
|
|
if (IS_NOT_NULL(scan_env.mem_env_dynamic))
|
|
xfree(scan_env.mem_env_dynamic);
|
|
return r;
|
|
}
|
|
|
|
|
|
static int onig_inited = 0;
|
|
|
|
extern int
|
|
onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
|
|
OnigEncoding enc, OnigSyntaxType* syntax)
|
|
{
|
|
int r;
|
|
|
|
xmemset(reg, 0, sizeof(*reg));
|
|
|
|
if (onig_inited == 0) {
|
|
#if 0
|
|
return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
|
|
#else
|
|
r = onig_initialize(&enc, 1);
|
|
if (r != 0)
|
|
return ONIGERR_FAIL_TO_INITIALIZE;
|
|
|
|
onig_warning("You didn't call onig_initialize() explicitly");
|
|
#endif
|
|
}
|
|
|
|
if (IS_NULL(reg))
|
|
return ONIGERR_INVALID_ARGUMENT;
|
|
|
|
if (ONIGENC_IS_UNDEF(enc))
|
|
return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
|
|
|
|
if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
|
|
== (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
|
|
return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
|
|
}
|
|
|
|
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
|
option |= syntax->options;
|
|
option &= ~ONIG_OPTION_SINGLELINE;
|
|
}
|
|
else
|
|
option |= syntax->options;
|
|
|
|
(reg)->enc = enc;
|
|
(reg)->options = option;
|
|
(reg)->syntax = syntax;
|
|
(reg)->optimize = 0;
|
|
(reg)->exact = (UChar* )NULL;
|
|
(reg)->int_map = (int* )NULL;
|
|
(reg)->int_map_backward = (int* )NULL;
|
|
REG_EXTPL(reg) = NULL;
|
|
|
|
(reg)->p = (UChar* )NULL;
|
|
(reg)->alloc = 0;
|
|
(reg)->used = 0;
|
|
(reg)->name_table = (void* )NULL;
|
|
|
|
(reg)->case_fold_flag = case_fold_flag;
|
|
return 0;
|
|
}
|
|
|
|
extern int
|
|
onig_new_without_alloc(regex_t* reg,
|
|
const UChar* pattern, const UChar* pattern_end,
|
|
OnigOptionType option, OnigEncoding enc,
|
|
OnigSyntaxType* syntax, OnigErrorInfo* einfo)
|
|
{
|
|
int r;
|
|
|
|
r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
|
if (r != 0) return r;
|
|
|
|
r = onig_compile(reg, pattern, pattern_end, einfo);
|
|
return r;
|
|
}
|
|
|
|
extern int
|
|
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
|
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
|
OnigErrorInfo* einfo)
|
|
{
|
|
int r;
|
|
|
|
*reg = (regex_t* )xmalloc(sizeof(regex_t));
|
|
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
|
|
|
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
|
if (r != 0) goto err;
|
|
|
|
r = onig_compile(*reg, pattern, pattern_end, einfo);
|
|
if (r != 0) {
|
|
err:
|
|
onig_free(*reg);
|
|
*reg = NULL;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
extern int
|
|
onig_initialize(OnigEncoding encodings[], int n)
|
|
{
|
|
int i;
|
|
int r;
|
|
|
|
if (onig_inited != 0)
|
|
return 0;
|
|
|
|
onigenc_init();
|
|
|
|
onig_inited = 1;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
OnigEncoding enc = encodings[i];
|
|
r = onig_initialize_encoding(enc);
|
|
if (r != 0)
|
|
return r;
|
|
}
|
|
|
|
return ONIG_NORMAL;
|
|
}
|
|
|
|
typedef struct EndCallListItem {
|
|
struct EndCallListItem* next;
|
|
void (*func)(void);
|
|
} EndCallListItemType;
|
|
|
|
static EndCallListItemType* EndCallTop;
|
|
|
|
extern void onig_add_end_call(void (*func)(void))
|
|
{
|
|
EndCallListItemType* item;
|
|
|
|
item = (EndCallListItemType* )xmalloc(sizeof(*item));
|
|
if (item == 0) return ;
|
|
|
|
item->next = EndCallTop;
|
|
item->func = func;
|
|
|
|
EndCallTop = item;
|
|
}
|
|
|
|
static void
|
|
exec_end_call_list(void)
|
|
{
|
|
EndCallListItemType* prev;
|
|
void (*func)(void);
|
|
|
|
while (EndCallTop != 0) {
|
|
func = EndCallTop->func;
|
|
(*func)();
|
|
|
|
prev = EndCallTop;
|
|
EndCallTop = EndCallTop->next;
|
|
xfree(prev);
|
|
}
|
|
}
|
|
|
|
extern int
|
|
onig_end(void)
|
|
{
|
|
exec_end_call_list();
|
|
|
|
#ifdef USE_CALLOUT
|
|
onig_global_callout_names_free();
|
|
#endif
|
|
|
|
onigenc_end();
|
|
|
|
onig_inited = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int
|
|
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
|
|
{
|
|
OnigCodePoint n, *data;
|
|
OnigCodePoint low, high, x;
|
|
|
|
GET_CODE_POINT(n, p);
|
|
data = (OnigCodePoint* )p;
|
|
data++;
|
|
|
|
for (low = 0, high = n; low < high; ) {
|
|
x = (low + high) >> 1;
|
|
if (code > data[x * 2 + 1])
|
|
low = x + 1;
|
|
else
|
|
high = x;
|
|
}
|
|
|
|
return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
|
}
|
|
|
|
extern int
|
|
onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
|
|
{
|
|
int found;
|
|
CClassNode* cc = (CClassNode* )cc_arg;
|
|
|
|
if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
|
|
if (IS_NULL(cc->mbuf)) {
|
|
found = 0;
|
|
}
|
|
else {
|
|
found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
|
|
}
|
|
}
|
|
else {
|
|
found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
|
}
|
|
|
|
if (IS_NCCLASS_NOT(cc))
|
|
return !found;
|
|
else
|
|
return found;
|
|
}
|
|
|
|
extern int
|
|
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
|
{
|
|
int len;
|
|
|
|
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
|
len = 2;
|
|
}
|
|
else {
|
|
len = ONIGENC_CODE_TO_MBCLEN(enc, code);
|
|
}
|
|
return onig_is_code_in_cc_len(len, code, cc);
|
|
}
|
|
|
|
|
|
#ifdef ONIG_DEBUG_PARSE
|
|
|
|
static void
|
|
p_string(FILE* f, int len, UChar* s)
|
|
{
|
|
fputs(":", f);
|
|
while (len-- > 0) { fputc(*s++, f); }
|
|
}
|
|
|
|
static void
|
|
Indent(FILE* f, int indent)
|
|
{
|
|
int i;
|
|
for (i = 0; i < indent; i++) putc(' ', f);
|
|
}
|
|
|
|
static void
|
|
print_indent_tree(FILE* f, Node* node, int indent)
|
|
{
|
|
int i;
|
|
NodeType type;
|
|
UChar* p;
|
|
int add = 3;
|
|
|
|
Indent(f, indent);
|
|
if (IS_NULL(node)) {
|
|
fprintf(f, "ERROR: null node!!!\n");
|
|
exit (0);
|
|
}
|
|
|
|
type = NODE_TYPE(node);
|
|
switch (type) {
|
|
case NODE_LIST:
|
|
case NODE_ALT:
|
|
if (type == NODE_LIST)
|
|
fprintf(f, "<list:%p>\n", node);
|
|
else
|
|
fprintf(f, "<alt:%p>\n", node);
|
|
|
|
print_indent_tree(f, NODE_CAR(node), indent + add);
|
|
while (IS_NOT_NULL(node = NODE_CDR(node))) {
|
|
if (NODE_TYPE(node) != type) {
|
|
fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
|
|
exit(0);
|
|
}
|
|
print_indent_tree(f, NODE_CAR(node), indent + add);
|
|
}
|
|
break;
|
|
|
|
case NODE_STRING:
|
|
fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
|
|
for (p = STR_(node)->s; p < STR_(node)->end; p++) {
|
|
if (*p >= 0x20 && *p < 0x7f)
|
|
fputc(*p, f);
|
|
else {
|
|
fprintf(f, " 0x%02x", *p);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CCLASS:
|
|
fprintf(f, "<cclass:%p>", node);
|
|
if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
|
|
if (CCLASS_(node)->mbuf) {
|
|
BBuf* bbuf = CCLASS_(node)->mbuf;
|
|
for (i = 0; i < bbuf->used; i++) {
|
|
if (i > 0) fprintf(f, ",");
|
|
fprintf(f, "%0x", bbuf->p[i]);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case NODE_CTYPE:
|
|
fprintf(f, "<ctype:%p> ", node);
|
|
switch (CTYPE_(node)->ctype) {
|
|
case CTYPE_ANYCHAR:
|
|
fprintf(f, "<anychar:%p>", node);
|
|
break;
|
|
|
|
case ONIGENC_CTYPE_WORD:
|
|
if (CTYPE_(node)->not != 0)
|
|
fputs("not word", f);
|
|
else
|
|
fputs("word", f);
|
|
|
|
if (CTYPE_(node)->ascii_mode != 0)
|
|
fputs(" (ascii)", f);
|
|
|
|
break;
|
|
|
|
default:
|
|
fprintf(f, "ERROR: undefined ctype.\n");
|
|
exit(0);
|
|
}
|
|
break;
|
|
|
|
case NODE_ANCHOR:
|
|
fprintf(f, "<anchor:%p> ", node);
|
|
switch (ANCHOR_(node)->type) {
|
|
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
|
case ANCHOR_END_BUF: fputs("end buf", f); break;
|
|
case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
|
|
case ANCHOR_END_LINE: fputs("end line", f); break;
|
|
case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
|
|
case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
|
|
|
|
case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;
|
|
case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
|
|
#ifdef USE_WORD_BEGIN_END
|
|
case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
|
|
case ANCHOR_WORD_END: fputs("word end", f); break;
|
|
#endif
|
|
case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
fputs("extended-grapheme-cluster boundary", f); break;
|
|
case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
|
|
fputs("no-extended-grapheme-cluster boundary", f); break;
|
|
case ANCHOR_PREC_READ:
|
|
fprintf(f, "prec read\n");
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
case ANCHOR_PREC_READ_NOT:
|
|
fprintf(f, "prec read not\n");
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
case ANCHOR_LOOK_BEHIND:
|
|
fprintf(f, "look behind\n");
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
case ANCHOR_LOOK_BEHIND_NOT:
|
|
fprintf(f, "look behind not\n");
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
|
|
default:
|
|
fprintf(f, "ERROR: undefined anchor type.\n");
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NODE_BACKREF:
|
|
{
|
|
int* p;
|
|
BackRefNode* br = BACKREF_(node);
|
|
p = BACKREFS_P(br);
|
|
fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
|
|
for (i = 0; i < br->back_num; i++) {
|
|
if (i > 0) fputs(", ", f);
|
|
fprintf(f, "%d", p[i]);
|
|
}
|
|
}
|
|
break;
|
|
|
|
#ifdef USE_CALL
|
|
case NODE_CALL:
|
|
{
|
|
CallNode* cn = CALL_(node);
|
|
fprintf(f, "<call:%p>", node);
|
|
p_string(f, cn->name_end - cn->name, cn->name);
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case NODE_QUANT:
|
|
fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
|
|
QUANT_(node)->lower, QUANT_(node)->upper,
|
|
(QUANT_(node)->greedy ? "" : "?"));
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
|
|
case NODE_ENCLOSURE:
|
|
fprintf(f, "<enclosure:%p> ", node);
|
|
switch (ENCLOSURE_(node)->type) {
|
|
case ENCLOSURE_OPTION:
|
|
fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
|
|
break;
|
|
case ENCLOSURE_MEMORY:
|
|
fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
|
|
break;
|
|
case ENCLOSURE_STOP_BACKTRACK:
|
|
fprintf(f, "stop-bt");
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
fprintf(f, "\n");
|
|
print_indent_tree(f, NODE_BODY(node), indent + add);
|
|
break;
|
|
|
|
case NODE_GIMMICK:
|
|
fprintf(f, "<gimmick:%p> ", node);
|
|
switch (GIMMICK_(node)->type) {
|
|
case GIMMICK_FAIL:
|
|
fprintf(f, "fail");
|
|
break;
|
|
case GIMMICK_KEEP:
|
|
fprintf(f, "keep:%d", GIMMICK_(node)->id);
|
|
break;
|
|
case GIMMICK_SAVE:
|
|
fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
|
|
break;
|
|
case GIMMICK_UPDATE_VAR:
|
|
fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
|
|
break;
|
|
#ifdef USE_CALLOUT
|
|
case GIMMICK_CALLOUT:
|
|
switch (GIMMICK_(node)->detail_type) {
|
|
case ONIG_CALLOUT_OF_CONTENTS:
|
|
fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
|
|
break;
|
|
case ONIG_CALLOUT_OF_NAME:
|
|
fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
break;
|
|
|
|
default:
|
|
fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
|
|
break;
|
|
}
|
|
|
|
if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
|
|
type != NODE_ENCLOSURE)
|
|
fprintf(f, "\n");
|
|
fflush(f);
|
|
}
|
|
|
|
static void
|
|
print_tree(FILE* f, Node* node)
|
|
{
|
|
print_indent_tree(f, node, 0);
|
|
}
|
|
#endif
|