git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@8964 6f19259b-4bc3-4df7-8a09-765794883524
		
			
				
	
	
		
			587 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			587 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * This file contains code for
 | |
|  *
 | |
|  *      int rexpr(char *expr, char *s);
 | |
|  *
 | |
|  * which answers
 | |
|  *
 | |
|  *      1 if 's' is in the language described by the regular expression 'expr'
 | |
|  *      0 if it is not
 | |
|  *     -1 if the regular expression is invalid
 | |
|  *
 | |
|  * Language membership is determined by constructing a non-deterministic
 | |
|  * finite automata (NFA) from the regular expression.  A depth-
 | |
|  * first-search is performed on the NFA (graph) to check for a match of 's'.
 | |
|  * Each non-epsilon arc consumes one character from 's'.  Backtracking is
 | |
|  * performed to check all possible paths through the NFA.
 | |
|  *
 | |
|  * Regular expressions follow the meta-language:
 | |
|  *
 | |
|  * <regExpr>        ::= <andExpr> ( '|' <andExpr> )*
 | |
|  *
 | |
|  * <andExpr>        ::= <expr> ( <expr> )*
 | |
|  *
 | |
|  * <expr>           ::= {'~'} '[' <atomList> ']' <repeatSymbol>
 | |
|  *                      | '(' <regExpr> ')' <repeatSymbol>
 | |
|  *                      | '{' <regExpr> '}' <repeatSymbol>
 | |
|  *                      | <atom> <repeatSymbol>
 | |
|  *
 | |
|  * <repeatSymbol>   ::= { '*' | '+' }
 | |
|  *
 | |
|  * <atomList>       ::= <atom> ( <atom> )*
 | |
|  *                      | { <atomList> } <atom> '-' <atom> { <atomList> }
 | |
|  *
 | |
|  * <atom>           ::= Token[Atom]
 | |
|  *
 | |
|  * Notes:
 | |
|  *		~	means complement the set in [..]. i.e. all characters not listed
 | |
|  *		*	means match 0 or more times (can be on expression or atom)
 | |
|  *		+	means match 1 or more times (can be on expression or atom)
 | |
|  *		{}	optional
 | |
|  *		()	grouping
 | |
|  *		[]	set of atoms
 | |
|  *		x-y	all characters from x to y (found only in [..])
 | |
|  *		\xx the character with value xx
 | |
|  *
 | |
|  * Examples:
 | |
|  *		[a-z]+
 | |
|  *			match 1 or more lower-case letters (e.g. variable)
 | |
|  *
 | |
|  *		0x[0-9A-Fa-f]+
 | |
|  *			match a hex number with 0x on front (e.g. 0xA1FF)
 | |
|  *
 | |
|  *		[0-9]+.[0-9]+{e[0-9]+}
 | |
|  *			match a floating point number (e.g. 3.14e21)
 | |
|  *
 | |
|  * Code example:
 | |
|  *		if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword
 | |
|  *
 | |
|  * Terence Parr
 | |
|  * Purdue University
 | |
|  * April 1991
 | |
|  */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <ctype.h>
 | |
| #ifdef __STDC__
 | |
| #include <stdlib.h>
 | |
| #else
 | |
| #include <malloc.h>
 | |
| #endif
 | |
| #include "rexpr.h"
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int regExpr( GraphPtr g );
 | |
| static int andExpr( GraphPtr g );
 | |
| static int expr( GraphPtr g );
 | |
| static int repeatSymbol( GraphPtr g );
 | |
| static int atomList( char *p, int complement );
 | |
| static void next( void );
 | |
| static ArcPtr newGraphArc( void );
 | |
| static NodePtr newNode( void );
 | |
| static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );
 | |
| static Graph BuildNFA_atom( int label );
 | |
| static Graph BuildNFA_AB( Graph A, Graph B );
 | |
| static Graph BuildNFA_AorB( Graph A, Graph B );
 | |
| static Graph BuildNFA_set( char *s );
 | |
| static Graph BuildNFA_Astar( Graph A );
 | |
| static Graph BuildNFA_Aplus( Graph A );
 | |
| static Graph BuildNFA_Aoptional( Graph A );
 | |
| #else
 | |
| static int regExpr();
 | |
| static int andExpr();
 | |
| static int expr();
 | |
| static int repeatSymbol();
 | |
| static int atomList();
 | |
| static void next();
 | |
| static ArcPtr newGraphArc();
 | |
| static NodePtr newNode();
 | |
| static int ArcBetweenGraphNode();
 | |
| static Graph BuildNFA_atom();
 | |
| static Graph BuildNFA_AB();
 | |
| static Graph BuildNFA_AorB();
 | |
| static Graph BuildNFA_set();
 | |
| static Graph BuildNFA_Astar();
 | |
| static Graph BuildNFA_Aplus();
 | |
| static Graph BuildNFA_Aoptional();
 | |
| #endif
 | |
| 
 | |
| static char *_c;
 | |
| static int token, tokchar;
 | |
| static NodePtr accept;
 | |
| static NodePtr freelist = NULL;
 | |
| 
 | |
| /*
 | |
|  * return 1 if s in language described by expr
 | |
|  *        0 if s is not
 | |
|  *       -1 if expr is an invalid regular expression
 | |
|  */
 | |
| #ifdef __USE_PROTOS
 | |
| static int rexpr(char *expr,char *s)
 | |
| #else
 | |
| static int rexpr(expr, s)
 | |
| char *expr, *s;
 | |
| #endif
 | |
| {
 | |
| 	NodePtr p,q;
 | |
| 	Graph nfa;
 | |
| 	int result;
 | |
| 
 | |
| 	fprintf(stderr, "rexpr(%s,%s);\n", expr,s);
 | |
| 	freelist = NULL;
 | |
| 	_c = expr;
 | |
| 	next();
 | |
| 	if ( regExpr(&nfa) == -1 ) return -1;
 | |
| 	accept = nfa.right;
 | |
| 	result = match(nfa.left, s);
 | |
| 	/* free all your memory */
 | |
| 	p = q = freelist;
 | |
| 	while ( p!=NULL ) { q = p->track; free(p); p = q; }
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * do a depth-first-search on the NFA looking for a path from start to
 | |
|  * accept state labelled with the characters of 's'.
 | |
|  */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int match(NodePtr automaton,char *s)
 | |
| #else
 | |
| static int match(automaton, s)
 | |
| NodePtr automaton;
 | |
| char *s;
 | |
| #endif
 | |
| {
 | |
| 	ArcPtr p;
 | |
| 	
 | |
| 	if ( automaton == accept && *s == '\0' ) return 1;	/* match */
 | |
| 
 | |
| 	for (p=automaton->arcs; p!=NULL; p=p->next)			/* try all arcs */
 | |
| 	{
 | |
| 		if ( p->label == Epsilon )
 | |
| 		{
 | |
| 			if ( match(p->target, s) ) return 1;
 | |
| 		}
 | |
| 		else if ( p->label == *s )
 | |
| 				if ( match(p->target, s+1) ) return 1;
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * <regExpr>        ::= <andExpr> ( '|' {<andExpr>} )*
 | |
|  *
 | |
|  * Return -1 if syntax error
 | |
|  * Return  0 if none found
 | |
|  * Return  1 if a regExrp was found
 | |
|  */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int regExpr(GraphPtr g)
 | |
| #else
 | |
| static int regExpr(g)
 | |
| GraphPtr g;
 | |
| #endif
 | |
| {
 | |
| 	Graph g1, g2;
 | |
| 	
 | |
| 	if ( andExpr(&g1) == -1 )
 | |
| 	{
 | |
| 		return -1;
 | |
| 	}
 | |
| 	
 | |
| 	while ( token == '|' )
 | |
| 	{
 | |
| 		int a;
 | |
| 		next();
 | |
| 		a = andExpr(&g2);
 | |
| 		if ( a == -1 ) return -1;	/* syntax error below */
 | |
| 		else if ( !a ) return 1;	/* empty alternative */
 | |
| 		g1 = BuildNFA_AorB(g1, g2);
 | |
| 	}
 | |
| 	
 | |
| 	if ( token!='\0' ) return -1;
 | |
| 
 | |
| 	*g = g1;
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * <andExpr>        ::= <expr> ( <expr> )*
 | |
|  */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int andExpr(GraphPtr g)
 | |
| #else
 | |
| static int andExpr(g)
 | |
| GraphPtr g;
 | |
| #endif
 | |
| {
 | |
| 	Graph g1, g2;
 | |
| 	
 | |
| 	if ( expr(&g1) == -1 )
 | |
| 	{
 | |
| 		return -1;
 | |
| 	}
 | |
| 	
 | |
| 	while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )
 | |
| 	{
 | |
| 		if (expr(&g2) == -1) return -1;
 | |
| 		g1 = BuildNFA_AB(g1, g2);
 | |
| 	}
 | |
| 	
 | |
| 	*g = g1;
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * <expr>           ::=    {'~'} '[' <atomList> ']' <repeatSymbol>
 | |
|  *                      | '(' <regExpr> ')' <repeatSymbol>
 | |
|  *                      | '{' <regExpr> '}' <repeatSymbol>
 | |
|  *                      | <atom> <repeatSymbol>
 | |
|  */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int expr(GraphPtr g)
 | |
| #else
 | |
| static int expr(g)
 | |
| GraphPtr g;
 | |
| #endif
 | |
| {
 | |
| 	int complement = 0;
 | |
| 	char s[257];    /* alloc space for string of char in [] */
 | |
| 	
 | |
| 	if ( token == '~' || token == '[' )
 | |
| 	{
 | |
| 		if ( token == '~' ) {complement = 1; next();}
 | |
| 		if ( token != '[' ) return -1;
 | |
| 		next();
 | |
| 		if ( atomList( s, complement ) == -1 ) return -1;
 | |
| 		*g = BuildNFA_set( s );
 | |
| 		if ( token != ']' ) return -1;
 | |
| 		next();
 | |
| 		repeatSymbol( g );
 | |
| 		return 1;
 | |
| 	}
 | |
| 	if ( token == '(' )
 | |
| 	{
 | |
| 		next();
 | |
| 		if ( regExpr( g ) == -1 ) return -1;
 | |
| 		if ( token != ')' ) return -1;
 | |
| 		next();
 | |
| 		repeatSymbol( g );
 | |
| 		return 1;
 | |
| 	}
 | |
| 	if ( token == '{' )
 | |
| 	{
 | |
| 		next();
 | |
| 		if ( regExpr( g ) == -1 ) return -1;
 | |
| 		if ( token != '}' ) return -1;
 | |
| 		next();
 | |
| 		/* S p e c i a l  C a s e   O p t i o n a l  {  } */
 | |
| 		if ( token != '*' && token != '+' )
 | |
| 		{
 | |
| 			*g = BuildNFA_Aoptional( *g );
 | |
| 		}
 | |
| 		repeatSymbol( g );
 | |
| 		return 1;
 | |
| 	}
 | |
| 	if ( token == Atom )
 | |
| 	{
 | |
| 		*g = BuildNFA_atom( tokchar );
 | |
| 		next();
 | |
| 		repeatSymbol( g );
 | |
| 		return 1;
 | |
| 	}
 | |
| 	
 | |
| 	return -1;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * <repeatSymbol>   ::= { '*' | '+' }
 | |
|  */
 | |
| #ifdef __USE_PROTOS
 | |
| static int repeatSymbol(GraphPtr g)
 | |
| #else
 | |
| static int repeatSymbol(g)
 | |
| GraphPtr g;
 | |
| #endif
 | |
| {
 | |
| 	switch ( token )
 | |
| 	{
 | |
| 		case '*' : *g = BuildNFA_Astar( *g ); next(); break;
 | |
| 		case '+' : *g = BuildNFA_Aplus( *g ); next(); break;
 | |
| 	}
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * <atomList>       ::= <atom> { <atom> }*
 | |
|  *                      { <atomList> } <atom> '-' <atom> { <atomList> }
 | |
|  *
 | |
|  * a-b is same as ab
 | |
|  * q-a is same as q
 | |
|  */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static int atomList(char *p, int complement)
 | |
| #else
 | |
| static int atomList(p, complement)
 | |
| char *p;
 | |
| int complement;
 | |
| #endif
 | |
| {
 | |
| 	static unsigned char set[256];		/* no duplicates */
 | |
| 	int first, last, i;
 | |
| 	char *s = p;
 | |
| 	
 | |
| 	if ( token != Atom ) return -1;
 | |
| 	
 | |
| 	for (i=0; i<256; i++) set[i] = 0;
 | |
| 	while ( token == Atom )
 | |
| 	{
 | |
| 		if ( !set[tokchar] ) *s++ = tokchar;
 | |
| 		set[tokchar] = 1;    			/* Add atom to set */
 | |
| 		next();
 | |
| 		if ( token == '-' )         	/* have we found '-' */
 | |
| 		{
 | |
| 			first = *(s-1);             /* Get last char */
 | |
| 			next();
 | |
| 			if ( token != Atom ) return -1;
 | |
| 			else
 | |
| 			{
 | |
| 				last = tokchar;
 | |
| 			}
 | |
| 			for (i = first+1; i <= last; i++)
 | |
| 			{
 | |
| 				if ( !set[tokchar] ) *s++ = i;
 | |
| 				set[i] = 1;    			/* Add atom to set */
 | |
| 			}
 | |
| 			next();
 | |
| 		}
 | |
| 	}
 | |
| 	*s = '\0';
 | |
| 	if ( complement )
 | |
| 	{
 | |
| 		for (i=0; i<256; i++) set[i] = !set[i];
 | |
| 		for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;
 | |
| 		*s = '\0';
 | |
| 	}
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| /* a somewhat stupid lexical analyzer */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static void next(void)
 | |
| #else
 | |
| static void next()
 | |
| #endif
 | |
| {
 | |
| 	while ( *_c==' ' || *_c=='\t' || *_c=='\n' ) _c++;
 | |
| 	if ( *_c=='\\' )
 | |
| 	{
 | |
| 		_c++;
 | |
| 		if ( isdigit(*_c) )
 | |
| 		{
 | |
| 			int n=0;
 | |
| 			while ( isdigit(*_c) )
 | |
| 			{
 | |
| 				n = n*10 + (*_c++ - '0');
 | |
| 			}
 | |
| 			if ( n>255 ) n=255;
 | |
| 			tokchar = n;
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			switch (*_c)
 | |
| 			{
 | |
| 				case 'n' : tokchar = '\n'; break;
 | |
| 				case 't' : tokchar = '\t'; break;
 | |
| 				case 'r' : tokchar = '\r'; break;
 | |
| 				default  : tokchar = *_c;
 | |
| 			}
 | |
| 			_c++;
 | |
| 		}
 | |
| 		token = Atom;
 | |
| 	}
 | |
| 	else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&
 | |
| 			  *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&
 | |
| 			  *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )
 | |
| 	{
 | |
| 		token = Atom;
 | |
| 		tokchar = *_c++;
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		token = tokchar = *_c++;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* N F A  B u i l d i n g  R o u t i n e s */
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static ArcPtr newGraphArc(void)
 | |
| #else
 | |
| static ArcPtr newGraphArc()
 | |
| #endif
 | |
| {
 | |
| 	ArcPtr p;
 | |
| 	p = (ArcPtr) calloc(1, sizeof(Arc));
 | |
| 	if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}
 | |
| 	if ( freelist != NULL ) p->track = (ArcPtr) freelist;
 | |
| 	freelist = (NodePtr) p;
 | |
| 	return p;
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static NodePtr newNode(void)
 | |
| #else
 | |
| static NodePtr newNode()
 | |
| #endif
 | |
| {
 | |
| 	NodePtr p;
 | |
| 	p = (NodePtr) calloc(1, sizeof(Node));
 | |
| 	if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}
 | |
| 	if ( freelist != NULL ) p->track = freelist;
 | |
| 	freelist = p;
 | |
| 	return p;
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static void ArcBetweenGraphNodes(NodePtr i,NodePtr j,int label)
 | |
| #else
 | |
| static void ArcBetweenGraphNodes(i, j, label)
 | |
| NodePtr i, j;
 | |
| int label;
 | |
| #endif
 | |
| {
 | |
| 	ArcPtr a;
 | |
| 	
 | |
| 	a = newGraphArc();
 | |
| 	if ( i->arcs == NULL ) i->arctail = i->arcs = a;
 | |
| 	else {(i->arctail)->next = a; i->arctail = a;}
 | |
| 	a->label = label;
 | |
| 	a->target = j;
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_atom(int label)
 | |
| #else
 | |
| static Graph BuildNFA_atom(label)
 | |
| int label;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 	
 | |
| 	g.left = newNode();
 | |
| 	g.right = newNode();
 | |
| 	ArcBetweenGraphNodes(g.left, g.right, label);
 | |
| 	return( g );
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_AB(Graph A,Graph B)
 | |
| #else
 | |
| static Graph BuildNFA_AB(A, B)
 | |
| Graph A, B;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 	
 | |
| 	ArcBetweenGraphNodes(A.right, B.left, Epsilon);
 | |
| 	g.left = A.left;
 | |
| 	g.right = B.right;
 | |
| 	return( g );
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_AorB(Graph A,Graph B)
 | |
| #else
 | |
| static Graph BuildNFA_AorB(A, B)
 | |
| Graph A, B;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 	
 | |
| 	g.left = newNode();
 | |
| 	ArcBetweenGraphNodes(g.left, A.left, Epsilon);
 | |
| 	ArcBetweenGraphNodes(g.left, B.left, Epsilon);
 | |
| 	g.right = newNode();
 | |
| 	ArcBetweenGraphNodes(A.right, g.right, Epsilon);
 | |
| 	ArcBetweenGraphNodes(B.right, g.right, Epsilon);
 | |
| 	return( g );
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_set(char *s)
 | |
| #else
 | |
| static Graph BuildNFA_set( s )
 | |
| char *s;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 	
 | |
| 	if ( s == NULL ) return g;
 | |
| 	
 | |
| 	g.left = newNode();
 | |
| 	g.right = newNode();
 | |
| 	while ( *s != '\0' )
 | |
| 	{
 | |
| 		ArcBetweenGraphNodes(g.left, g.right, *s++);
 | |
| 	}
 | |
| 	return g;
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_Astar(Graph A)
 | |
| #else
 | |
| static Graph BuildNFA_Astar( A )
 | |
| Graph A;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 
 | |
| 	g.left = newNode();
 | |
| 	g.right = newNode();
 | |
| 	
 | |
| 	ArcBetweenGraphNodes(g.left, A.left, Epsilon);
 | |
| 	ArcBetweenGraphNodes(g.left, g.right, Epsilon);
 | |
| 	ArcBetweenGraphNodes(A.right, g.right, Epsilon);
 | |
| 	ArcBetweenGraphNodes(A.right, A.left, Epsilon);
 | |
| 	
 | |
| 	return( g );
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_Aplus(Graph A)
 | |
| #else
 | |
| static Graph BuildNFA_Aplus( A )
 | |
| Graph A;
 | |
| #endif
 | |
| {
 | |
| 	ArcBetweenGraphNodes(A.right, A.left, Epsilon);
 | |
| 	
 | |
| 	return( A );
 | |
| }
 | |
| 
 | |
| #ifdef __USE_PROTOS
 | |
| static Graph BuildNFA_Aoptional(Graph A)
 | |
| #else
 | |
| static Graph BuildNFA_Aoptional( A )
 | |
| Graph A;
 | |
| #endif
 | |
| {
 | |
| 	Graph g;
 | |
| 	
 | |
| 	g.left = newNode();
 | |
| 	g.right = newNode();
 | |
| 	
 | |
| 	ArcBetweenGraphNodes(g.left, A.left, Epsilon);
 | |
| 	ArcBetweenGraphNodes(g.left, g.right, Epsilon);
 | |
| 	ArcBetweenGraphNodes(A.right, g.right, Epsilon);
 | |
| 	
 | |
| 	return( g );
 | |
| }
 |