310 lines
7.5 KiB
C
310 lines
7.5 KiB
C
|
/*
|
||
|
* testRegexp.c: simple module for testing regular expressions
|
||
|
*
|
||
|
* See Copyright for the status of this software.
|
||
|
*
|
||
|
* Daniel Veillard <veillard@redhat.com>
|
||
|
*/
|
||
|
|
||
|
#include "libxml.h"
|
||
|
#ifdef LIBXML_AUTOMATA_ENABLED
|
||
|
#include <string.h>
|
||
|
|
||
|
#include <libxml/tree.h>
|
||
|
#include <libxml/xmlautomata.h>
|
||
|
|
||
|
static int scanNumber(char **ptr) {
|
||
|
int ret = 0;
|
||
|
char *cur;
|
||
|
|
||
|
cur = *ptr;
|
||
|
while ((*cur >= '0') && (*cur <= '9')) {
|
||
|
ret = ret * 10 + (*cur - '0');
|
||
|
cur++;
|
||
|
}
|
||
|
*ptr = cur;
|
||
|
return(ret);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
testRegexpFile(const char *filename) {
|
||
|
FILE *input;
|
||
|
char expr[5000];
|
||
|
int len;
|
||
|
int ret;
|
||
|
int i;
|
||
|
xmlAutomataPtr am;
|
||
|
xmlAutomataStatePtr states[1000];
|
||
|
xmlRegexpPtr regexp = NULL;
|
||
|
xmlRegExecCtxtPtr exec = NULL;
|
||
|
|
||
|
for (i = 0;i<1000;i++)
|
||
|
states[i] = NULL;
|
||
|
|
||
|
input = fopen(filename, "r");
|
||
|
if (input == NULL) {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Cannot open %s for reading\n", filename);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
am = xmlNewAutomata();
|
||
|
if (am == NULL) {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Cannot create automata\n");
|
||
|
fclose(input);
|
||
|
return;
|
||
|
}
|
||
|
states[0] = xmlAutomataGetInitState(am);
|
||
|
if (states[0] == NULL) {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Cannot get start state\n");
|
||
|
xmlFreeAutomata(am);
|
||
|
fclose(input);
|
||
|
return;
|
||
|
}
|
||
|
ret = 0;
|
||
|
|
||
|
while (fgets(expr, 4500, input) != NULL) {
|
||
|
if (expr[0] == '#')
|
||
|
continue;
|
||
|
len = strlen(expr);
|
||
|
len--;
|
||
|
while ((len >= 0) &&
|
||
|
((expr[len] == '\n') || (expr[len] == '\t') ||
|
||
|
(expr[len] == '\r') || (expr[len] == ' '))) len--;
|
||
|
expr[len + 1] = 0;
|
||
|
if (len >= 0) {
|
||
|
if ((am != NULL) && (expr[0] == 't') && (expr[1] == ' ')) {
|
||
|
char *ptr = &expr[2];
|
||
|
int from, to;
|
||
|
|
||
|
from = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
if (states[from] == NULL)
|
||
|
states[from] = xmlAutomataNewState(am);
|
||
|
ptr++;
|
||
|
to = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
if (states[to] == NULL)
|
||
|
states[to] = xmlAutomataNewState(am);
|
||
|
ptr++;
|
||
|
xmlAutomataNewTransition(am, states[from], states[to],
|
||
|
BAD_CAST ptr, NULL);
|
||
|
} else if ((am != NULL) && (expr[0] == 'e') && (expr[1] == ' ')) {
|
||
|
char *ptr = &expr[2];
|
||
|
int from, to;
|
||
|
|
||
|
from = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
if (states[from] == NULL)
|
||
|
states[from] = xmlAutomataNewState(am);
|
||
|
ptr++;
|
||
|
to = scanNumber(&ptr);
|
||
|
if (states[to] == NULL)
|
||
|
states[to] = xmlAutomataNewState(am);
|
||
|
xmlAutomataNewEpsilon(am, states[from], states[to]);
|
||
|
} else if ((am != NULL) && (expr[0] == 'f') && (expr[1] == ' ')) {
|
||
|
char *ptr = &expr[2];
|
||
|
int state;
|
||
|
|
||
|
state = scanNumber(&ptr);
|
||
|
if (states[state] == NULL) {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad state %d : %s\n", state, expr);
|
||
|
break;
|
||
|
}
|
||
|
xmlAutomataSetFinalState(am, states[state]);
|
||
|
} else if ((am != NULL) && (expr[0] == 'c') && (expr[1] == ' ')) {
|
||
|
char *ptr = &expr[2];
|
||
|
int from, to;
|
||
|
int min, max;
|
||
|
|
||
|
from = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
if (states[from] == NULL)
|
||
|
states[from] = xmlAutomataNewState(am);
|
||
|
ptr++;
|
||
|
to = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
if (states[to] == NULL)
|
||
|
states[to] = xmlAutomataNewState(am);
|
||
|
ptr++;
|
||
|
min = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
ptr++;
|
||
|
max = scanNumber(&ptr);
|
||
|
if (*ptr != ' ') {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Bad line %s\n", expr);
|
||
|
break;
|
||
|
}
|
||
|
ptr++;
|
||
|
xmlAutomataNewCountTrans(am, states[from], states[to],
|
||
|
BAD_CAST ptr, min, max, NULL);
|
||
|
} else if ((am != NULL) && (expr[0] == '-') && (expr[1] == '-')) {
|
||
|
/* end of the automata */
|
||
|
regexp = xmlAutomataCompile(am);
|
||
|
xmlFreeAutomata(am);
|
||
|
am = NULL;
|
||
|
if (regexp == NULL) {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Failed to compile the automata");
|
||
|
break;
|
||
|
}
|
||
|
} else if ((expr[0] == '=') && (expr[1] == '>')) {
|
||
|
if (regexp == NULL) {
|
||
|
printf("=> failed not compiled\n");
|
||
|
} else {
|
||
|
if (exec == NULL)
|
||
|
exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
|
||
|
if (ret == 0) {
|
||
|
ret = xmlRegExecPushString(exec, NULL, NULL);
|
||
|
}
|
||
|
if (ret == 1)
|
||
|
printf("=> Passed\n");
|
||
|
else if ((ret == 0) || (ret == -1))
|
||
|
printf("=> Failed\n");
|
||
|
else if (ret < 0)
|
||
|
printf("=> Error\n");
|
||
|
xmlRegFreeExecCtxt(exec);
|
||
|
exec = NULL;
|
||
|
}
|
||
|
ret = 0;
|
||
|
} else if (regexp != NULL) {
|
||
|
if (exec == NULL)
|
||
|
exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
|
||
|
ret = xmlRegExecPushString(exec, BAD_CAST expr, NULL);
|
||
|
} else {
|
||
|
xmlGenericError(xmlGenericErrorContext,
|
||
|
"Unexpected line %s\n", expr);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
fclose(input);
|
||
|
if (regexp != NULL)
|
||
|
xmlRegFreeRegexp(regexp);
|
||
|
if (exec != NULL)
|
||
|
xmlRegFreeExecCtxt(exec);
|
||
|
if (am != NULL)
|
||
|
xmlFreeAutomata(am);
|
||
|
}
|
||
|
|
||
|
int main(int argc, char **argv) {
|
||
|
|
||
|
xmlInitMemory();
|
||
|
|
||
|
if (argc == 1) {
|
||
|
int ret;
|
||
|
xmlAutomataPtr am;
|
||
|
xmlAutomataStatePtr start, cur;
|
||
|
xmlRegexpPtr regexp;
|
||
|
xmlRegExecCtxtPtr exec;
|
||
|
|
||
|
am = xmlNewAutomata();
|
||
|
start = xmlAutomataGetInitState(am);
|
||
|
|
||
|
/* generate a[ba]*a */
|
||
|
cur = xmlAutomataNewTransition(am, start, NULL, BAD_CAST"a", NULL);
|
||
|
xmlAutomataNewTransition(am, cur, cur, BAD_CAST"b", NULL);
|
||
|
xmlAutomataNewTransition(am, cur, cur, BAD_CAST"a", NULL);
|
||
|
cur = xmlAutomataNewCountTrans(am, cur, NULL, BAD_CAST"a", 2, 3, NULL);
|
||
|
xmlAutomataSetFinalState(am, cur);
|
||
|
|
||
|
/* compile it in a regexp and free the automata */
|
||
|
regexp = xmlAutomataCompile(am);
|
||
|
xmlFreeAutomata(am);
|
||
|
|
||
|
/* test the regexp */
|
||
|
xmlRegexpPrint(stdout, regexp);
|
||
|
exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
|
||
|
ret = xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"b", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
if (ret == 0) {
|
||
|
ret = xmlRegExecPushString(exec, NULL, NULL);
|
||
|
if (ret == 1)
|
||
|
printf("final\n");
|
||
|
else if (ret < 0)
|
||
|
printf("error\n");
|
||
|
}
|
||
|
xmlRegFreeExecCtxt(exec);
|
||
|
|
||
|
/* free the regexp */
|
||
|
xmlRegFreeRegexp(regexp);
|
||
|
} else {
|
||
|
int i;
|
||
|
|
||
|
for (i = 1;i < argc;i++)
|
||
|
testRegexpFile(argv[i]);
|
||
|
}
|
||
|
|
||
|
xmlCleanupParser();
|
||
|
xmlMemoryDump();
|
||
|
return(0);
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
#include <stdio.h>
|
||
|
int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
|
||
|
printf("%s : Automata support not compiled in\n", argv[0]);
|
||
|
return(0);
|
||
|
}
|
||
|
#endif /* LIBXML_AUTOMATA_ENABLED */
|