60#define matcher smatcher
63#define dissect sdissect
64#define backref sbackref
71#define matcher lmatcher
74#define dissect ldissect
75#define backref lbackref
105static int matcher(
struct re_guts *g,
char *
string,
size_t nmatch,
regmatch_t pmatch[],
int eflags);
106static char * dissect(
struct match *
m,
char *start,
char *stop, sopno startst, sopno stopst);
107static char * backref(
struct match *
m,
char *start,
char *stop, sopno startst, sopno stopst, sopno lev);
108static char *
fast(
struct match *
m,
char *start,
char *stop, sopno startst, sopno stopst);
109static char * slow(
struct match *
m,
char *start,
char *stop, sopno startst, sopno stopst);
110static states step(
struct re_guts *g, sopno start, sopno stop, states bef,
int ch, states aft);
113#define BOLEOL (BOL+2)
114#define NOTHING (BOL+3)
117#define CODEMAX (BOL+5)
118#define NONCHAR(c) ((c) > CHAR_MAX)
119#define NNONCHAR (CODEMAX-CHAR_MAX)
121static void print(
struct match *
m,
char *caption, states st,
int ch, FILE *d);
124static void at(
struct match *
m,
char *title,
char *start,
char *stop, sopno startst, sopno stopst);
127 static char * pchar(
int ch);
136#define SP(t, s, c) print(m, t, s, c, stdout)
137#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
138#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }
141#define AT(t, p1, p2, s1, s2)
151matcher(g,
string, nmatch, pmatch, eflags)
161 register struct match *
m = &mv;
163 register const sopno gf = g->firststate+1;
164 register const sopno gl = g->laststate;
169 if (g->cflags®_NOSUB)
171 if (eflags®_STARTEND) {
172 start =
string + pmatch[0].rm_so;
173 stop =
string + pmatch[0].rm_eo;
176 stop = start + strlen(start);
182 if (g->must != NULL) {
183 for (dp = start; dp < stop; dp++)
184 if (*dp == g->must[0] && stop - dp >= g->mlen &&
185 memcmp(dp, g->must, (
size_t)g->mlen) == 0)
208 endp =
fast(
m, start, stop, gf, gl);
213 if (nmatch == 0 && !g->backrefs)
217 assert(
m->coldp != NULL);
219 NOTE(
"finding start");
220 endp = slow(
m,
m->coldp, stop, gf, gl);
223 assert(
m->coldp <
m->endp);
226 if (nmatch == 1 && !g->backrefs)
230 if (
m->pmatch == NULL)
233 if (
m->pmatch == NULL) {
237 for (i = 1; i <=
m->g->nsub; i++)
238 m->pmatch[i].rm_so =
m->pmatch[i].rm_eo = -1;
239 if (!g->backrefs && !(
m->eflags®_BACKR)) {
241 dp = dissect(
m,
m->coldp, endp, gf, gl);
243 if (g->nplus > 0 &&
m->lastpos == NULL)
244 m->lastpos = (
char **)malloc((g->nplus+1) *
246 if (g->nplus > 0 &&
m->lastpos == NULL) {
251 NOTE(
"backref dissect");
252 dp = backref(
m,
m->coldp, endp, gf, gl, (sopno)0);
259 assert(g->nplus == 0 ||
m->lastpos != NULL);
261 if (dp != NULL || endp <= m->coldp)
264 endp = slow(
m,
m->coldp, endp-1, gf, gl);
269 for (i = 1; i <=
m->g->nsub; i++) {
270 assert(
m->pmatch[i].rm_so == -1);
271 assert(
m->pmatch[i].rm_eo == -1);
274 NOTE(
"backoff dissect");
275 dp = backref(
m,
m->coldp, endp, gf, gl, (sopno)0);
277 assert(dp == NULL || dp == endp);
283 start =
m->coldp + 1;
284 assert(start <= stop);
289 pmatch[0].rm_so =
m->coldp -
m->offp;
290 pmatch[0].rm_eo = endp -
m->offp;
293 assert(
m->pmatch != NULL);
294 for (i = 1; i < nmatch; i++)
296 pmatch[i] =
m->pmatch[i];
298 pmatch[i].rm_so = -1;
299 pmatch[i].rm_eo = -1;
303 if (
m->pmatch != NULL)
304 free((
char *)
m->pmatch);
305 if (
m->lastpos != NULL)
306 free((
char *)
m->lastpos);
317dissect(
m, start, stop, startst, stopst)
335 register char *oldssp;
338 AT(
"diss", start, stop, startst, stopst);
340 for (ss = startst; ss < stopst; ss = es) {
343 switch (OP(
m->g->strip[es])) {
346 es += OPND(
m->g->strip[es]);
349 while (OP(
m->g->strip[es]) != O_CH)
350 es += OPND(
m->g->strip[es]);
356 switch (OP(
m->g->strip[ss])) {
381 rest = slow(
m, sp, stp, ss, es);
382 assert(rest != NULL);
384 tail = slow(
m, rest, stop, es, stopst);
394 if (slow(
m, sp, rest, ssub, esub) != NULL) {
395 dp = dissect(
m, sp, rest, ssub, esub);
405 rest = slow(
m, sp, stp, ss, es);
406 assert(rest != NULL);
408 tail = slow(
m, rest, stop, es, stopst);
420 sep = slow(
m, ssp, rest, ssub, esub);
421 if (sep == NULL || sep == ssp)
432 assert(slow(
m, ssp, sep, ssub, esub) == rest);
433 dp = dissect(
m, ssp, sep, ssub, esub);
441 rest = slow(
m, sp, stp, ss, es);
442 assert(rest != NULL);
444 tail = slow(
m, rest, stop, es, stopst);
452 esub = ss + OPND(
m->g->strip[ss]) - 1;
453 assert(OP(
m->g->strip[esub]) == OOR1);
455 if (slow(
m, sp, rest, ssub, esub) == rest)
458 assert(OP(
m->g->strip[esub]) == OOR1);
460 assert(OP(
m->g->strip[esub]) == OOR2);
462 esub += OPND(
m->g->strip[esub]);
463 if (OP(
m->g->strip[esub]) == OOR2)
466 assert(OP(
m->g->strip[esub]) == O_CH);
468 dp = dissect(
m, sp, rest, ssub, esub);
480 i = OPND(
m->g->strip[ss]);
481 assert(0 < i && i <= m->g->nsub);
482 m->pmatch[i].rm_so = sp -
m->offp;
485 i = OPND(
m->g->strip[ss]);
486 assert(0 < i && i <= m->g->nsub);
487 m->pmatch[i].rm_eo = sp -
m->offp;
505backref(
m, start, stop, startst, stopst, lev)
523 register regoff_t offsave;
526 AT(
"back", start, stop, startst, stopst);
531 for (ss = startst; !hard && ss < stopst; ss++)
532 switch (OP(s =
m->g->strip[ss])) {
534 if (sp == stop || *sp++ != (
char)OPND(s))
543 cs = &
m->g->sets[OPND(s)];
544 if (sp == stop || !CHIN(cs, *sp++))
548 if ( (sp ==
m->beginp && !(
m->eflags®_NOTBOL)) ||
549 (sp < m->endp && *(sp-1) ==
'\n' &&
550 (
m->g->cflags®_NEWLINE)) )
556 if ( (sp ==
m->endp && !(
m->eflags®_NOTEOL)) ||
557 (sp < m->endp && *sp ==
'\n' &&
558 (
m->g->cflags®_NEWLINE)) )
564 if (( (sp ==
m->beginp && !(
m->eflags®_NOTBOL)) ||
565 (sp < m->endp && *(sp-1) ==
'\n' &&
566 (
m->g->cflags®_NEWLINE)) ||
568 !ISWORD(*(sp-1))) ) &&
569 (sp < m->endp && ISWORD(*sp)) )
575 if (( (sp ==
m->endp && !(
m->eflags®_NOTEOL)) ||
576 (sp < m->endp && *sp ==
'\n' &&
577 (
m->g->cflags®_NEWLINE)) ||
578 (sp < m->endp && !ISWORD(*sp)) ) &&
579 (sp >
m->beginp && ISWORD(*(sp-1))) )
590 assert(OP(s) == OOR2);
592 }
while (OP(s =
m->g->strip[ss]) != O_CH);
607 AT(
"hard", sp, stop, ss, stopst);
612 assert(0 < i && i <= m->g->nsub);
613 if (
m->pmatch[i].rm_eo == -1)
615 assert(
m->pmatch[i].rm_so != -1);
616 len =
m->pmatch[i].rm_eo -
m->pmatch[i].rm_so;
617 assert(stop -
m->beginp >= len);
620 ssp =
m->offp +
m->pmatch[i].rm_so;
621 if (memcmp(sp, ssp, len) != 0)
623 while (
m->g->strip[ss] != SOP(O_BACK, i))
625 return(backref(
m, sp+len, stop, ss+1, stopst, lev));
628 dp = backref(
m, sp, stop, ss+1, stopst, lev);
631 return(backref(
m, sp, stop, ss+OPND(s)+1, stopst, lev));
634 assert(
m->lastpos != NULL);
635 assert(lev+1 <=
m->g->nplus);
636 m->lastpos[lev+1] = sp;
637 return(backref(
m, sp, stop, ss+1, stopst, lev+1));
640 if (sp ==
m->lastpos[lev])
641 return(backref(
m, sp, stop, ss+1, stopst, lev-1));
643 m->lastpos[lev] = sp;
644 dp = backref(
m, sp, stop, ss-OPND(s)+1, stopst, lev);
646 return(backref(
m, sp, stop, ss+1, stopst, lev-1));
652 esub = ss + OPND(s) - 1;
653 assert(OP(
m->g->strip[esub]) == OOR1);
655 dp = backref(
m, sp, stop, ssub, esub, lev);
659 if (OP(
m->g->strip[esub]) == O_CH)
662 assert(OP(
m->g->strip[esub]) == OOR2);
664 esub += OPND(
m->g->strip[esub]);
665 if (OP(
m->g->strip[esub]) == OOR2)
668 assert(OP(
m->g->strip[esub]) == O_CH);
673 assert(0 < i && i <= m->g->nsub);
674 offsave =
m->pmatch[i].rm_so;
675 m->pmatch[i].rm_so = sp -
m->offp;
676 dp = backref(
m, sp, stop, ss+1, stopst, lev);
679 m->pmatch[i].rm_so = offsave;
684 assert(0 < i && i <= m->g->nsub);
685 offsave =
m->pmatch[i].rm_eo;
686 m->pmatch[i].rm_eo = sp -
m->offp;
687 dp = backref(
m, sp, stop, ss+1, stopst, lev);
690 m->pmatch[i].rm_eo = offsave;
701 return "shut up gcc";
710fast(
m, start, stop, startst, stopst)
717 register states st =
m->st;
718 register states fresh =
m->fresh;
719 register states tmp =
m->tmp;
720 register char *p = start;
721 register int c = (start ==
m->beginp) ? OUT : *(start-1);
725 register char *coldp;
729 st = step(
m->g, startst, stopst, st, NOTHING, st);
736 c = (p ==
m->endp) ? OUT : *p;
743 if ( (lastc ==
'\n' &&
m->g->cflags®_NEWLINE) ||
744 (lastc == OUT && !(
m->eflags®_NOTBOL)) ) {
748 if ( (c ==
'\n' &&
m->g->cflags®_NEWLINE) ||
749 (c == OUT && !(
m->eflags®_NOTEOL)) ) {
750 flagch = (flagch == BOL) ? BOLEOL : EOL;
755 st = step(
m->g, startst, stopst, st, flagch, st);
760 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
761 (c != OUT && ISWORD(c)) ) {
764 if ( (lastc != OUT && ISWORD(lastc)) &&
765 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
768 if (flagch == BOW || flagch == EOW) {
769 st = step(
m->g, startst, stopst, st, flagch, st);
774 if (ISSET(st, stopst) || p == stop)
781 st = step(
m->g, startst, stopst, tmp, c, st);
783 assert(EQ(step(
m->g, startst, stopst, st, NOTHING, st), st));
787 assert(coldp != NULL);
789 if (ISSET(st, stopst))
801slow(
m, start, stop, startst, stopst)
808 register states st =
m->st;
809 register states empty =
m->empty;
810 register states tmp =
m->tmp;
811 register char *p = start;
812 register int c = (start ==
m->beginp) ? OUT : *(start-1);
816 register char *matchp;
818 AT(
"slow", start, stop, startst, stopst);
821 SP(
"sstart", st, *p);
822 st = step(
m->g, startst, stopst, st, NOTHING, st);
827 c = (p ==
m->endp) ? OUT : *p;
832 if ( (lastc ==
'\n' &&
m->g->cflags®_NEWLINE) ||
833 (lastc == OUT && !(
m->eflags®_NOTBOL)) ) {
837 if ( (c ==
'\n' &&
m->g->cflags®_NEWLINE) ||
838 (c == OUT && !(
m->eflags®_NOTEOL)) ) {
839 flagch = (flagch == BOL) ? BOLEOL : EOL;
844 st = step(
m->g, startst, stopst, st, flagch, st);
845 SP(
"sboleol", st, c);
849 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
850 (c != OUT && ISWORD(c)) ) {
853 if ( (lastc != OUT && ISWORD(lastc)) &&
854 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
857 if (flagch == BOW || flagch == EOW) {
858 st = step(
m->g, startst, stopst, st, flagch, st);
859 SP(
"sboweow", st, c);
863 if (ISSET(st, stopst))
865 if (EQ(st, empty) || p == stop)
872 st = step(
m->g, startst, stopst, tmp, c, st);
874 assert(EQ(step(
m->g, startst, stopst, st, NOTHING, st), st));
897step(g, start, stop, bef, ch, aft)
908 register onestate here;
912 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
916 assert(pc == stop-1);
920 assert(!NONCHAR(ch) || ch != (
char)OPND(s));
921 if (ch == (
char)OPND(s))
925 if (ch == BOL || ch == BOLEOL)
929 if (ch == EOL || ch == BOLEOL)
945 cs = &g->sets[OPND(s)];
946 if (!NONCHAR(ch) && CHIN(cs, ch))
958 i = ISSETBACK(aft, OPND(s));
959 BACK(aft, aft, OPND(s));
960 if (!i && ISSETBACK(aft, OPND(s))) {
968 FWD(aft, aft, OPND(s));
979 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
980 FWD(aft, aft, OPND(s));
983 if (ISSTATEIN(aft, here)) {
985 OP(s = g->strip[pc+look]) != O_CH;
987 assert(OP(s) == OOR2);
993 if (OP(g->strip[pc+OPND(s)]) != O_CH) {
994 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
995 FWD(aft, aft, OPND(s));
1019print(
m, caption, st, ch, d)
1028 register int first = 1;
1030 if (!(
m->eflags®_TRACE))
1033 fprintf(d,
"%s", caption);
1035 fprintf(d,
" %s", pchar(ch));
1036 for (i = 0; i < g->nstates; i++)
1038 fprintf(d,
"%s%d", (first) ?
"\t" :
", ", i);
1052at(
m, title, start, stop, startst, stopst)
1060 if (!(
m->eflags®_TRACE))
1063 printf(
"%s %s-", title, pchar(*start));
1064 printf(
"%s ", pchar(*stop));
1065 printf(
"%ld-%ld\n", (
long)startst, (
long)stopst);
1085 static char pbuf[10];
1087 if (isprint((uch)ch) || ch ==
' ')
1088 sprintf(pbuf,
"%c", ch);
1090 sprintf(pbuf,
"\\%o", ch);
@ fast
If on allow fast machine code, if off (default) disable it.