YAP 7.1.0
scanner.c
Go to the documentation of this file.
1/*************************************************************************
2 * *
3 * YAP Prolog *
4 * *
5 * Yap Prolog was developed at NCCUP - Universidade do Porto *
6 * *
7 * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
8 * *
9 **************************************************************************
10 * *
11 * File: %W% %G% *
12 * Last rev: 22-1-03 *
13 * mods: *
14 * comments: Prolog's scanner *
15 * *
16 *************************************************************************/
17
46#include "Yap.h"
47#include "YapEval.h"
48#include "YapHeap.h"
49#include "Yatom.h"
50#include "alloc.h"
51#include "yapio.h"
52
53
54/* stuff we want to use in standard YAP code */
55#include "YapText.h"
56#if _MSC_VER || defined(__MINGW32__)
57#if HAVE_FINITE == 1
58#undef HAVE_FINITE
59#endif
60#include <windows.h>
61#endif
62#include "iopreds.h"
63#if HAVE_STRING_H
64#include <string.h>
65#endif
66#if HAVE_WCTYPE_H
67#include <wctype.h>
68#endif
69#if O_LOCALE
70#include "locale.h"
71#endif
72
73/* You just can't trust some machines */
74#define my_isxdigit(C, SU, SL) \
75 (chtype(C) == NU || (C >= 'A' && C <= (SU)) || (C >= 'a' && C <= (SL)))
76#define my_isupper(C) (C >= 'A' && C <= 'Z')
77#define my_islower(C) (C >= 'a' && C <= 'z')
78
79static Term float_send(char *, int);
80static Term get_num(int *, int *, struct stream_desc *, int, char **, size_t *);
81
86static void Yap_setCurrentSourceLocation(struct stream_desc *s) {
87 CACHE_REGS
88#if HAVE_SOCKET
89 if (s->status & Socket_Stream_f)
90 LOCAL_SourceFileName = AtomSocket;
91 else
92#endif
93 if (s->status & Pipe_Stream_f)
94 LOCAL_SourceFileName = AtomPipe;
95 else if (s->status & InMemory_Stream_f)
96 LOCAL_SourceFileName = s->name;
97 else
98 LOCAL_SourceFileName = s->name;
99 LOCAL_SourceFileLineno = s->linecount;
100}
101
102/* token table with some help from Richard O'Keefe's PD scanner */
103char_kind_t Yap_chtype0[NUMBER_OF_CHARS + 1] = {
104 EF,
105 /* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si
106
107 */
108 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
109
110 /* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us
111 */
112 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
113
114 /* sp ! " # $ % & ' ( ) * + , - . / */
115 BS, SL, DC, SY, SY, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY,
116
117 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
118 NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY,
119
120 /* @ A B C D E F G H I J K L M N O */
121 SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
122
123 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
124 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL,
125
126 /* ` a b c d e f g h i j k l m n o */
127 SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
128
129 /* p q r s t u v w x y z { | } ~ del */
130 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS,
131
132 /* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
133 */
134 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
135
136 /* 144 145 ’ 147 148 149 150 151 152 153 154 155 156 157 158 159
137 */
138 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
139
140 /* ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ */
141 BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY,
142
143 /* ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ */
144 SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY,
145
146 /* À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï */
147 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
148
149/* Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß */
150#ifdef vms
151 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC,
152#else
153 UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC,
154#endif
155 /* à á â ã ä å æ ç è é ê ë ì í î ï */
156 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
157
158/* ð ñ ò ó ô õ ö ÷ ø ù ú û ü cannot write the last
159 * three because of lcc */
160#ifdef vms
161 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
162#else
163 LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC
164#endif
165};
166
167// standard get char, uses conversion table
168// and converts to wide
169static inline int getchr(struct stream_desc *inp) {
170 /* if (inp != inp0) { fprintf(stderr,"\n %s
171 * **********************************\n", AtomName(inp->name)); */
172 /* inp0 = inp; */
173 /* } */
174 int sno = inp - GLOBAL_Stream;
175 int ch = inp->stream_wgetc_for_read(sno);
176 // fprintf(stderr,"%c", ch);
177 return ch;
178}
179// get char for quoted data, eg, quoted atoms and so on
180// converts to wide
181static inline int getchrq(struct stream_desc *inp) {
182 int ch = inp->stream_wgetc(inp - GLOBAL_Stream);
183 return ch;
184}
185
186/* in case there is an overflow */
187typedef struct scanner_extra_alloc {
188 struct scanner_extra_alloc *next;
189 void *filler;
191
192#define CodeSpaceError(t, p, l) CodeSpaceError__(t, p, l PASS_REGS)
193static TokEntry *CodeSpaceError__(TokEntry *t, TokEntry *p,
194 TokEntry *l USES_REGS) {
195 LOCAL_Error_TYPE = RESOURCE_ERROR_HEAP;
196 LOCAL_ErrorMessage = "Code Space Overflow";
197 if (t) {
198 t->Tok = eot_tok;
199 t->TokInfo = TermOutOfHeapError;
200 }
201 /* serious error now */
202 return l;
203}
204
205#define TrailSpaceError(t, l) TrailSpaceError__(t, l PASS_REGS)
206static TokEntry *TrailSpaceError__(TokEntry *t, TokEntry *l USES_REGS) {
207 LOCAL_ErrorMessage = "Trail Overflow";
208 LOCAL_Error_TYPE = RESOURCE_ERROR_TRAIL;
209 if (t) {
210 t->Tok = eot_tok;
211 t->TokInfo = TermOutOfTrailError;
212 }
213 return l;
214}
215extern double atof(const char *);
216
217static Term float_send(char *s, int sign) {
218 Float f = (Float)(sign * atof(s));
219#if HAVE_ISFINITE || defined(isfinite)
220 if (trueGlobalPrologFlag(ISO_FLAG)) { /* iso */
221 if (!isfinite(f)) {
222 CACHE_REGS
223 LOCAL_ErrorMessage = "Float overflow while scanning";
224 return (MkEvalFl(f));
225 }
226 }
227#elif HAVE_FINITE
228 if (trueGlobalPrologFlag(ISO_FLAG)) { /* iso */
229 if (!finite(f)) {
230 LOCAL_ErrorMessage = "Float overflow while scanning";
231 return (MkEvalFl(f));
232 }
233 }
234#endif
235 {
236 CACHE_REGS
237 return MkFloatTerm(f);
238 }
239}
240
241/* we have an overflow at s */
242static Term read_int_overflow(const char *s, Int base, Int val, int sign) {
243#ifdef USE_GMP
244 /* try to scan it as a bignum */
245 mpz_t new;
246 Term t;
247
248 mpz_init_set_str(new, s, base);
249 if (sign < 0)
250 mpz_neg(new, new);
251 t = Yap_MkBigIntTerm(new);
252 mpz_clear(new);
253 return t;
254#else
255 CACHE_REGS
256 /* try to scan it as a float */
257 return MkIntegerTerm(val);
258#endif
259}
260
261static wchar_t read_quoted_char(int *scan_nextp, struct stream_desc *st) {
262 int ch;
263
264/* escape sequence */
265do_switch:
266 ch = getchrq(st);
267 switch (ch) {
268 case 10:
269 return 0;
270 case '\\':
271 return '\\';
272 case 'a':
273 return '\a';
274 case 'b':
275 return '\b';
276 case 'c':
277 while (chtype((ch = getchrq(st))) == BS)
278 ;
279 {
280 if (ch == '\\') {
281 goto do_switch;
282 }
283 return ch;
284 }
285 case 'd':
286 return 127;
287 case 'e':
288 return '\x1B'; /* <ESC>, a.k.a. \e */
289 case 'f':
290 return '\f';
291 case 'n':
292 return '\n';
293 case 'r':
294 return '\r';
295 case 's': /* space */
296 return ' ';
297 case 't':
298 return '\t';
299 case 'u': {
300 int i;
301 wchar_t wc = '\0';
302
303 for (i = 0; i < 4; i++) {
304 ch = getchrq(st);
305 if (ch >= '0' && ch <= '9') {
306 wc += (ch - '0') << ((3 - i) * 4);
307 } else if (ch >= 'a' && ch <= 'f') {
308 wc += ((ch - 'a') + 10) << ((3 - i) * 4);
309 } else if (ch >= 'A' && ch <= 'F') {
310 wc += ((ch - 'A') + 10) << ((3 - i) * 4);
311 } else {
312 return Yap_encoding_error(wc, 1, st);
313 }
314 }
315 return wc;
316 }
317 case 'U': {
318 int i;
319 wchar_t wc = '\0';
320
321 for (i = 0; i < 8; i++) {
322 ch = getchrq(st);
323 if (ch >= '0' && ch <= '9') {
324 wc += (ch - '0') << ((7 - i) * 4);
325 } else if (ch >= 'a' && ch <= 'f') {
326 wc += ((ch - 'a') + 10) << ((7 - i) * 4);
327 } else if (ch >= 'A' && ch <= 'F') {
328 wc += ((ch - 'A') + 10) << ((7 - i) * 4);
329 } else {
330 return Yap_encoding_error(wc, 1, st);
331 }
332 }
333 return wc;
334 }
335 case 'v':
336 return '\v';
337 case 'z': /* Prolog end-of-file */
338 return Yap_encoding_error(ch, 1, st);
339 case '\'':
340 return '\'';
341 case '"':
342 return '"';
343 case '`':
344 return '`';
345 case '^':
346 if (trueGlobalPrologFlag(ISO_FLAG)) {
347 return Yap_encoding_error(ch, 1, st);
348 } else {
349 ch = getchrq(st);
350 if (ch == '?') { /* delete character */
351 return 127;
352 } else if (ch >= 'a' && ch < 'z') { /* hexa */
353 return ch - 'a';
354 } else if (ch >= 'A' && ch < 'Z') { /* hexa */
355 return ch - 'A';
356 } else {
357 return '^';
358 }
359 }
360 case '0':
361 case '1':
362 case '2':
363 case '3':
364 case '4':
365 case '5':
366 case '6':
367 case '7':
368 /* character in octal: maximum of 3 digits, terminates with \ */
369 /* follow ISO */
370 {
371 unsigned char so_far = ch - '0';
372 ch = getchrq(st);
373 if (ch >= '0' && ch < '8') { /* octal */
374 so_far = so_far * 8 + (ch - '0');
375 ch = getchrq(st);
376 if (ch >= '0' && ch < '8') { /* octal */
377 so_far = so_far * 8 + (ch - '0');
378 ch = getchrq(st);
379 if (ch != '\\') {
380 return Yap_encoding_error(ch, 1, st);
381 }
382 return so_far;
383 } else if (ch == '\\') {
384 return so_far;
385 } else {
386 return Yap_encoding_error(ch, 1, st);
387 }
388 } else if (ch == '\\') {
389 return so_far;
390 } else {
391 return Yap_encoding_error(ch, 1, st);
392 }
393 }
394 case 'x':
395 /* hexadecimal character (YAP allows empty hexadecimal */
396 {
397 unsigned char so_far = 0;
398 ch = getchrq(st);
399 if (my_isxdigit(ch, 'f', 'F')) { /* hexa */
400 so_far =
401 so_far * 16 + (chtype(ch) == NU
402 ? ch - '0'
403 : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
404 ch = getchrq(st);
405 if (my_isxdigit(ch, 'f', 'F')) { /* hexa */
406 so_far =
407 so_far * 16 + (chtype(ch) == NU
408 ? ch - '0'
409 : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
410 ch = getchrq(st);
411 if (ch == '\\') {
412 return so_far;
413 } else {
414 return Yap_encoding_error(ch, 1, st);
415 }
416 } else if (ch == '\\') {
417 return so_far;
418 } else {
419 return Yap_encoding_error(ch, 1, st);
420 }
421 } else if (ch == '\\') {
422 return so_far;
423 } else {
424 return Yap_encoding_error(ch, 1, st);
425 }
426 }
427 default:
428 /* reject sequence. The ISO standard does not
429 consider this sequence legal, whereas SICStus would
430 eat up the escape sequence. */
431 return Yap_encoding_error(ch, 1, st);
432 }
433}
434
435#define number_overflow() \
436 { \
437 imgsz = Yap_Min(imgsz * 2, imgsz); \
438 char *nbuf; \
439 nbuf = Realloc(buf, imgsz); \
440 left = imgsz - max_size; \
441 max_size = imgsz; \
442 buf = nbuf; \
443 }
444
445/* reads a number, either integer or float */
446
447static Term get_num(int *chp, int *chbuffp, StreamDesc *st, int sign,
448 char **bufp, size_t *szp) {
449 int ch = *chp;
450 Int val = 0L, base = ch - '0';
451 int might_be_float = TRUE, has_overflow = FALSE;
452 const unsigned char *decimalpoint;
453 char *buf0 = *bufp, *sp = buf0, *buf = buf0;
454 size_t imgsz = *szp, max_size = imgsz, left = max_size - 2;
455
456 *sp++ = ch;
457 ch = getchr(st);
458 /*
459 * because of things like 00'2, 03'2 and even better 12'2, I need to
460 * do this (have mercy)
461 */
462 if (chtype(ch) == NU) {
463 *sp++ = ch;
464 if (--left == 0)
465 number_overflow();
466 base = 10 * base + ch - '0';
467 ch = getchr(st);
468 }
469 if (ch == '\'') {
470 if (base > 36) {
471 return Yap_symbol_encoding_error(ch, 1, st, "Admissible bases are 11..36");
472 }
473 might_be_float = FALSE;
474 if (--left == 0)
475 number_overflow();
476 *sp++ = ch;
477 ch = getchr(st);
478 if (base == 0) {
479 CACHE_REGS
480 wchar_t ascii = ch;
481 int scan_extra = TRUE;
482
483 if (ch == '\\' &&
484 Yap_GetModuleEntry(CurrentModule)->flags & M_CHARESCAPE) {
485 ascii = read_quoted_char(&scan_extra, st);
486 }
487 /* a quick way to represent ASCII */
488 if (scan_extra)
489 *chp = getchr(st);
490 if (sign == -1) {
491 return MkIntegerTerm(-ascii);
492 }
493 return MkIntegerTerm(ascii);
494 } else if (base >= 10 && base <= 36) {
495 int upper_case = 'A' - 11 + base;
496 int lower_case = 'a' - 11 + base;
497
498 while (my_isxdigit(ch, upper_case, lower_case)) {
499 Int oval = val;
500 int chval =
501 (chtype(ch) == NU ? ch - '0'
502 : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
503 if (--left == 0)
504 number_overflow();
505 *sp++ = ch;
506 val = oval * base + chval;
507 if (oval != (val - chval) / base) /* overflow */
508 has_overflow = (has_overflow || TRUE);
509 ch = getchr(st);
510 }
511 }
512 } else if (ch == 'x' && base == 0) {
513 might_be_float = FALSE;
514 if (--left == 0)
515 number_overflow();
516 *sp++ = ch;
517 ch = getchr(st);
518 if (!my_isxdigit(ch, 'F', 'f')) {
519 return Yap_symbol_encoding_error(ch, 1, st, "invalid hexadecimal digit");
520 }
521 while (my_isxdigit(ch, 'F', 'f')) {
522 Int oval = val;
523 int chval =
524 (chtype(ch) == NU ? ch - '0'
525 : (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
526 if (--left == 0)
527 number_overflow();
528 *sp++ = ch;
529 val = val * 16 + chval;
530 if (oval != (val - chval) / 16) /* overflow */
531 has_overflow = TRUE;
532 ch = getchr(st);
533 }
534 *chp = ch;
535 } else if (ch == 'o' && base == 0) {
536 might_be_float = false;
537 base = 8;
538 ch = getchr(st);
539 if (ch < '0' || ch > '7') {
540 return Yap_symbol_encoding_error(ch, 1, st, "invalid octal digit");
541 }
542 } else if (ch == 'b' && base == 0) {
543 might_be_float = false;
544 base = 2;
545 ch = getchr(st);
546 if (ch < '0' || ch > '1') {
547 return Yap_symbol_encoding_error(ch, 1, st, "invalid octal digit");
548 }
549
550 } else {
551 val = base;
552 base = 10;
553 }
554 while (chtype(ch) == NU) {
555 Int oval = val;
556 if (!(val == 0 && ch == '0') || has_overflow) {
557 if (--left == 0)
558 number_overflow();
559 *sp++ = ch;
560 }
561 if (ch - '0' >= base) {
562 CACHE_REGS
563 if (sign == -1)
564 return MkIntegerTerm(-val);
565 return MkIntegerTerm(val);
566 }
567 if (oval > Int_MAX / 10 - (ch - '0')) /* overflow */
568 has_overflow = true;
569 else
570 val = val * base + ch - '0';
571 ch = getchr(st);
572 }
573 if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) {
574 int has_dot = (ch == '.');
575 if (has_dot) {
576 unsigned char *dp;
577 int dc;
578 if (chtype(ch = getchr(st)) != NU) {
579 if (ch == 'e' || ch == 'E') {
580 if (trueGlobalPrologFlag(ISO_FLAG))
581 return Yap_symbol_encoding_error(ch, 1, st, "e/E float format not allowed in ISO mode");
582 } else { /* followed by a letter, end of term? */
583 CACHE_REGS
584 sp[0] = '\0';
585 *chbuffp = '.';
586 *chp = ch;
587 if (has_overflow)
588 return read_int_overflow(buf, base, val, sign);
589 if (sign == -1)
590 return MkIntegerTerm(-val);
591 return MkIntegerTerm(val);
592 }
593 }
594#if O_LOCALE
595 if ((decimalpoint = (unsigned char *)(localeconv()->decimal_point)) ==
596 NULL)
597#endif
598 decimalpoint = (const unsigned char *)".";
599 dp = (unsigned char *)decimalpoint;
600 /* translate . to current locale */
601 while ((dc = *dp++) != '\0') {
602 *sp++ = dc;
603 if (--left == 0)
604 number_overflow();
605 }
606 /* numbers after . */
607 if (chtype(ch) == NU) {
608 do {
609 if (--left == 0)
610 number_overflow();
611 *sp++ = ch;
612 } while (chtype(ch = getchr(st)) == NU);
613 }
614 }
615 if (ch == 'e' || ch == 'E') {
616 if (--left == 0)
617 number_overflow();
618 *sp++ = ch;
619 ch = getchr(st);
620 if (ch == '-') {
621 if (--left == 0)
622 number_overflow();
623 *sp++ = '-';
624 ch = getchr(st);
625 } else if (ch == '+') {
626 ch = getchr(st);
627 }
628 if (chtype(ch) != NU) {
629 CACHE_REGS
630 if (has_dot)
631 return float_send(buf, sign);
632 return MkIntegerTerm(sign * val);
633 }
634 do {
635 if (--left == 0)
636 number_overflow();
637 *sp++ = ch;
638 } while (chtype(ch = getchr(st)) == NU);
639 }
640 *sp = '\0';
641 *chp = ch;
642 return float_send(buf, sign);
643 } else if (has_overflow) {
644 *sp = '\0';
645 /* skip base */
646 *chp = ch;
647 if (buf[0] == '0' && buf[1] == 'x')
648 return read_int_overflow(buf + 2, 16, val, sign);
649 else if (buf[0] == '0' && buf[1] == 'o')
650 return read_int_overflow(buf + 2, 8, val, sign);
651 else if (buf[0] == '0' && buf[1] == 'b')
652 return read_int_overflow(buf + 2, 2, val, sign);
653 if (buf[1] == '\'')
654 return read_int_overflow(buf + 2, base, val, sign);
655 if (buf[2] == '\'')
656 return read_int_overflow(buf + 3, base, val, sign);
657 return read_int_overflow(buf, base, val, sign);
658 } else {
659 CACHE_REGS
660 *chp = ch;
661 return MkIntegerTerm(val * sign);
662 }
663}
664
667 CACHE_REGS
668 Term out;
669 int sign = 1;
670 int ch, cherr = 0;
671 char *ptr;
672 int lvl = push_text_stack();
673 LOCAL_VarTable = LOCAL_AnonVarTable = NULL;
674 LOCAL_VarList = LOCAL_VarTail = NULL;
675 if (!(ptr = Malloc(4096))) {
676 pop_text_stack(lvl);
677 Yap_ThrowError(RESOURCE_ERROR_HEAP, TermNil, "scanner: failed to allocate token image");
678 return 0;
679 }
680#if HAVE_ISWSPACE
681 while (iswspace(ch = getchr(inp)))
682 ;
683#else
684 while (isspace(ch = getchr(inp)))
685 ;
686#endif
687 TokEntry *tokptr = Malloc(sizeof(TokEntry));
688 tokptr->TokLine = GetCurInpLine(inp);
689 tokptr->TokPos = GetCurInpPos(inp);
690 tokptr->TokOffset = GetCurInpOffset(inp);
691 if (ch == '-') {
692 sign = -1;
693 ch = getchr(inp);
694 } else if (ch == '+') {
695 ch = getchr(inp);
696 }
697 if (chtype(ch) == NU) {
698 cherr = '\0';
699 if (ASP - HR < 1024) {
700 pop_text_stack(lvl);
701 Yap_ThrowError(RESOURCE_ERROR_STACK, TermNil, "scanner: failed to allocate token image");
702 return 0;
703 }
704 size_t sz = 1024;
705 char *buf = Malloc(sz);
706 out = get_num(&ch, &cherr, inp, sign, &buf, &sz); /* */
707 } else {
708
709 out = 0;
710 }
711#if HAVE_ISWSPACE
712 while (iswspace(ch = getchr(inp)))
713 ;
714#else
715 while (isspace(ch = getchr(inp)))
716 ;
717#endif
718 if (ch == EOFCHAR || (ch == '.' &&
719#if HAVE_ISWSPACE
720 (iswspace(ch = getchr(inp)) || ch == EOFCHAR)
721#else
722 (isspace(ch = getchr(inp)) || ch == EOFCHAR)
723#endif
724 )) {
725 pop_text_stack(lvl);
726
727}
728 return out;
729 }
730
731#define CHECK_SPACE() \
732 if (ASP - HR < 1024) { \
733 LOCAL_ErrorMessage = "Stack Overflow"; \
734 LOCAL_Error_TYPE = RESOURCE_ERROR_STACK; \
735 LOCAL_Error_Size = 0L; \
736 if (p) { \
737 p->Tok = Ord(kind = eot_tok); \
738 p->TokInfo = TermOutOfStackError; \
739 } \
740 /* serious error now */ \
741 return l; \
742 }
743
747Term Yap_tokRep(void *tokptre) {
748 CACHE_REGS
749 TokEntry *tokptr = tokptre;
750 Term info = tokptr->TokInfo;
751
752 switch (tokptr->Tok) {
753 case Name_tok:
754 if (!info) {
755 info = TermNil;
756 } else {
757 info = MkAtomTerm((Atom)info);
758 }
759 return Yap_MkApplTerm(Yap_MkFunctor(AtomAtom, 1), 1, &info);
760 case QuasiQuotes_tok:
761 info = MkAtomTerm(Yap_LookupAtom("<QQ>"));
762 return Yap_MkApplTerm(Yap_MkFunctor(AtomAtom, 1), 1, &info);
763 case Number_tok:
764 return Yap_MkApplTerm(Yap_MkFunctor(AtomNumber, 1), 1, &info);
765 break;
766 case Var_tok: {
767 Term t[2];
768 VarEntry *varinfo = (VarEntry *)info;
769 if ((t[0] = varinfo->VarAdr) == TermNil) {
770 t[0] = varinfo->VarAdr = MkVarTerm();
771 }
772 t[1] = MkAtomTerm((Atom)(varinfo->VarRep));
773 return Yap_MkApplTerm(Yap_MkFunctor(AtomGVar, 2), 2, t);
774 }
775 case String_tok:
776 return Yap_MkApplTerm(Yap_MkFunctor(AtomString, 1), 1, &info);
777 case BQString_tok:
778 return Yap_MkApplTerm(Yap_MkFunctor(AtomString, 1), 1, &info);
779 case Error_tok:
780 return MkAtomTerm(AtomError);
781 case eot_tok:
782 return MkAtomTerm(Yap_LookupAtom("EOT"));
783 case Ponctuation_tok:
784 if (info == Terml)
785 return TermBeginBracket;
786 else
787 return info;
788 }
789 return TermDot;
790}
791
792const char *Yap_tokText(void *tokptre) {
793 CACHE_REGS
794 TokEntry *tokptr = tokptre;
795 Term info = tokptr->TokInfo;
796
797 switch (tokptr->Tok) {
798 case eot_tok:
799 return "EOT";
800 case Ponctuation_tok:
801 if (info == Terml)
802 return "(";
803 case Error_tok:
804 case BQString_tok:
805 case String_tok:
806 case Name_tok:
807 return AtomOfTerm(info)->StrOfAE;
808 case QuasiQuotes_tok:
809 return "<QQ>";
810 case Number_tok:
811 if (IsIntegerTerm(info)) {
812 char *s = Malloc(36);
813 snprintf(s, 35, Int_FORMAT, IntegerOfTerm(info));
814 return s;
815 } else if (IsFloatTerm(info)) {
816 char *s = Malloc(64);
817 snprintf(s, 63, "%6g", FloatOfTerm(info));
818 return s;
819 } else {
820 size_t len = Yap_gmp_to_size(info, 10);
821 char *s = Malloc(len + 2);
822 return Yap_gmp_to_string(info, s, len + 1, 10);
823 }
824 break;
825 case Var_tok:
826 if (info == 0)
827 return "[]";
828 return ((Atom)info)->StrOfAE;
829 }
830 return ".";
831}
832
833static void open_comment(int ch, StreamDesc *st USES_REGS) {
834 CELL *h0 = HR;
835 HR += 5;
836 h0[0] = AbsAppl(h0 + 2);
837 h0[1] = TermNil;
838 if (!LOCAL_CommentsTail) {
839 /* first comment */
840 LOCAL_Comments = AbsPair(h0);
841 } else {
842 /* extra comment */
843 *LOCAL_CommentsTail = AbsPair(h0);
844 }
845 LOCAL_CommentsTail = h0 + 1;
846 h0 += 2;
847 h0[0] = (CELL)FunctorMinus;
848 h0[1] = Yap_StreamPosition(st - GLOBAL_Stream);
849 h0[2] = TermNil;
850 LOCAL_CommentsNextChar = h0 + 2;
851 LOCAL_CommentsBuff = (wchar_t *)Malloc(1024 * sizeof(wchar_t));
852 LOCAL_CommentsBuffLim = 1024;
853 LOCAL_CommentsBuff[0] = ch;
854 LOCAL_CommentsBuffPos = 1;
855}
856
857static void extend_comment(int ch USES_REGS) {
858 LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = ch;
859 LOCAL_CommentsBuffPos++;
860 if (LOCAL_CommentsBuffPos == LOCAL_CommentsBuffLim - 1) {
861 LOCAL_CommentsBuff = (wchar_t *)realloc(
862 LOCAL_CommentsBuff, sizeof(wchar_t) * (LOCAL_CommentsBuffLim + 4096));
863 LOCAL_CommentsBuffLim += 4096;
864 }
865}
866
867static void close_comment(USES_REGS1) {
868 LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = '\0';
869 *LOCAL_CommentsNextChar = Yap_WCharsToString(LOCAL_CommentsBuff PASS_REGS);
870 Free(LOCAL_CommentsBuff);
871 LOCAL_CommentsBuff = NULL;
872 LOCAL_CommentsBuffLim = 0;
873}
874
875// mark that we reached EOF,
876// next token will be end_of_file)
877static void mark_eof(struct stream_desc *st) {
878 st->status |= Push_Eof_Stream_f;
879}
880
881
882#define safe_add_ch_to_buff(ch) charp += put_utf8(charp, ch);
883
884#define add_ch_to_buff(ch) \
885 {\
886 if (ch == 10 && (trueGlobalPrologFlag(ISO_FLAG) || \
887 falseLocalPrologFlag(MULTILINE_QUOTED_TEXT_FLAG)))\
888 { \
889 /* in ISO a new line terminates a string */ \
890 }\
891 if (t) { t->Tok = Ord(kind = eot_tok); \
892 } \
893 charp += put_utf8(charp, ch); }
894
895TokEntry *Yap_tokenizer(void *st_, void *params_) {
896 struct stream_desc *st = st_;
897 scanner_params *params = params_;
898 CACHE_REGS
899 TokEntry *t, *l, *p;
900 enum TokenKinds kind;
901 int solo_flag = TRUE;
902 int32_t ch, och = ' ';
903 struct qq_struct_t *cur_qq = NULL;
904 int sign = 1;
905 size_t imgsz = 1024;
906 char *TokImage = Malloc(imgsz PASS_REGS);
907 bool store_comments = params->store_comments;
908
909 LOCAL_VarTable = NULL;
910 LOCAL_AnonVarTable = NULL;
911 l = NULL;
912 p = NULL; /* Just to make lint happy */
913 ch = getchr(st);
914 while (chtype(ch) == BS) {
915 och = ch;
916 ch = getchr(st);
917 }
918 params->tposOUTPUT = Yap_StreamPosition(st - GLOBAL_Stream);
919 Yap_setCurrentSourceLocation(st);
920 LOCAL_StartLineCount = st->linecount;
921 LOCAL_StartLinePos = st->linestart;
922 do {
923 int quote, isvar;
924 unsigned char *charp, *mp;
925 size_t len;
926
927 t = Malloc(sizeof(TokEntry));
928 t->TokNext = NULL;
929 if (t == NULL) {
930 return CodeSpaceError(t, p, l);
931 }
932 if (!l)
933 l = t;
934 else
935 p->TokNext = t;
936 p = t;
937 restart:
938 while (chtype(ch) == BS) {
939 ch = getchr(st);
940 }
941 t->TokPos = GetCurInpPos(st);
942 t->TokLine = GetCurInpLine(st);
943 t->TokOffset = GetCurInpOffset(st);
944
945 switch (chtype(ch)) {
946
947 case CC:
948 if (store_comments) {
949 open_comment(ch, st PASS_REGS);
950 continue_comment:
951 while ((ch = getchr(st)) != 10 && chtype(ch) != EF) {
952 extend_comment(ch PASS_REGS);
953 }
954 extend_comment(ch PASS_REGS);
955 if (chtype(ch) != EF) {
956 ch = getchr(st);
957 if (chtype(ch) == CC) {
958 extend_comment(ch PASS_REGS);
959 goto continue_comment;
960 }
961 }
962 close_comment(PASS_REGS1);
963 } else {
964 while ((ch = getchr(st)) != 10 && chtype(ch) != EF)
965 ;
966 }
967 if (chtype(ch) != EF) {
968 /* blank space */
969 if (t == l) {
970 /* we found a comment before reading characters */
971 while (chtype(ch) == BS) {
972 ch = getchr(st);
973 }
974 params->tposOUTPUT = Yap_StreamPosition(st - GLOBAL_Stream);
975 Yap_setCurrentSourceLocation(st);
976 }
977 goto restart;
978 } else {
979 t->Tok = Ord(kind = eot_tok);
980 mark_eof(st);
981 t->TokInfo = TermEof;
982 }
983 break;
984
985 case UC:
986 case UL:
987 case LC:
988 och = ch;
989 ch = getchr(st);
990 scan_name : {
991 charp = (unsigned char *)TokImage;
992 isvar = (chtype(och) != LC);
993 add_ch_to_buff(och);
994 for (; chtype(ch) <= NU; ch = getchr(st)) {
995 if (charp == (unsigned char *)TokImage + (imgsz - 1)) {
996 unsigned char *p0 = (unsigned char *)TokImage;
997 imgsz = Yap_Min(imgsz * 2, imgsz + 1024 * 1024 * 1024);
998 TokImage = Realloc(p0, imgsz);
999 if (TokImage == NULL) {
1000 return CodeSpaceError(t, p, l);
1001 }
1002 charp = (unsigned char *)TokImage + (charp - p0);
1003 }
1004 add_ch_to_buff(ch);
1005 }
1006 while (ch == '\'' && isvar && params->ce) {
1007 if (charp == (unsigned char *)AuxSp - 1024) {
1008 return CodeSpaceError(t, p, l);
1009 }
1010 safe_add_ch_to_buff(ch);
1011 ch = getchr(st);
1012 }
1013 add_ch_to_buff('\0');
1014 if (!isvar || (ch == '(' && params->vn_asfl) ||
1015 (TokImage[0] != '_' && params->vprefix)) {
1016 Atom ae;
1017 /* don't do this in iso */
1018 ae = Yap_LookupAtom(TokImage);
1019 if (ae == NIL) {
1020 return CodeSpaceError(t, p, l);
1021 }
1022 t->TokInfo = MkAtomTerm(ae);
1023 if (ch == '(')
1024 solo_flag = FALSE;
1025 t->Tok = Ord(kind = Name_tok);
1026 } else {
1027 VarEntry *ve = Yap_LookupVar((const char *)TokImage);
1028 t->TokInfo = Unsigned(ve);
1029 if (cur_qq) {
1030 ve->refs++;
1031 }
1032 t->Tok = Ord(kind = Var_tok);
1033 }
1034
1035 } break;
1036
1037 case NU: {
1038 int cherr;
1039 int cha;
1040 sign = 1;
1041
1042 scan_number:
1043 cha = ch;
1044 cherr = 0;
1045 CHECK_SPACE();
1046 if ((t->TokInfo = get_num(&cha, &cherr, st, sign, &TokImage, &imgsz)) ==
1047 0L) {
1048 if (t->TokInfo == 0) {
1049 p->Tok = eot_tok;
1050 }
1051 /* serious error now */
1052 return l;
1053 }
1054 ch = cha;
1055 if (cherr) {
1056 TokEntry *e;
1057 t->Tok = Number_tok;
1058 t->TokPos = GetCurInpPos(st);
1059 t->TokLine = GetCurInpLine(st);
1060 t->TokOffset = GetCurInpOffset(st);
1061 e = Malloc(sizeof(TokEntry));
1062 if (e == NULL) {
1063 return TrailSpaceError(p, l);
1064
1065 } else {
1066 e->TokNext = NULL;
1067 }
1068 t->TokNext = e;
1069 t = e;
1070 p = e;
1071 switch (cherr) {
1072 case 'e':
1073 case 'E':
1074 och = cherr;
1075 goto scan_name;
1076 break;
1077 case '=':
1078 case '_':
1079 /* handle error while parsing a float */
1080 {
1081 TokEntry *e2;
1082
1083 t->Tok = Ord(Var_tok);
1084 t->TokInfo = (Term)Yap_LookupVar("E");
1085 t->TokPos = GetCurInpPos(st);
1086 t->TokLine = GetCurInpLine(st);
1087 t->TokOffset= GetCurInpOffset(st);
1088 e2 = Malloc(sizeof(TokEntry));
1089 if (e2 == NULL) {
1090 return TrailSpaceError(p, l);
1091 } else {
1092 e2->TokNext = NULL;
1093 }
1094 t->TokNext = e2;
1095 t = e2;
1096 p = e2;
1097 if (cherr == '=')
1098 och = '+';
1099 else
1100 och = '-';
1101 }
1102 goto enter_symbol;
1103 case '+':
1104 case '-':
1105 /* handle error while parsing a float */
1106 {
1107 TokEntry *e2;
1108
1109 if (chtype(ch) == NU) {
1110 if (och == '-')
1111 sign = -1;
1112 else
1113 sign = 1;
1114 goto scan_number;
1115 }
1116 t->Tok = Name_tok;
1117 if (ch == '(')
1118 solo_flag = FALSE;
1119 t->TokInfo = MkAtomTerm(AtomE);
1120 t->TokLine = GetCurInpLine(st);
1121 t->TokPos = GetCurInpPos(st);
1122 t->TokOffset= GetCurInpOffset(st);
1123 e2 = Malloc(sizeof(TokEntry));
1124 if (e2 == NULL) {
1125 return TrailSpaceError(p, l);
1126 } else {
1127 e2->TokNext = NULL;
1128 }
1129 t->TokNext = e2;
1130 t = e2;
1131 p = e2;
1132 if (cherr == '=')
1133 och = '+';
1134 else
1135 och = '-';
1136 }
1137 goto enter_symbol;
1138 default:
1139 och = cherr;
1140 goto enter_symbol;
1141 }
1142 } else {
1143 t->Tok = Ord(kind = Number_tok);
1144 }
1145 } break;
1146
1147 case QT:
1148 case DC:
1149 quoted_string:
1150 charp = (unsigned char *)TokImage;
1151 quote = ch;
1152 len = 0;
1153 ch = getchrq(st);
1154
1155 while (TRUE) {
1156 if (charp > (unsigned char *)TokImage + (imgsz - 1)) {
1157 size_t sz = charp - (unsigned char *)TokImage;
1158 TokImage =
1159 Realloc(TokImage, (imgsz = Yap_Min(imgsz * 2, imgsz + MBYTE)));
1160 if (TokImage == NULL) {
1161 return CodeSpaceError(t, p, l);
1162 }
1163 charp = (unsigned char *)TokImage + sz;
1164 break;
1165 }
1166 if (ch == 10 && (trueGlobalPrologFlag(ISO_FLAG) ||
1167 trueLocalPrologFlag(MULTILINE_QUOTED_TEXT_FLAG))) {
1168 t->TokInfo = Yap_CharsToTDQ((char *)TokImage, CurrentModule,
1169 LOCAL_encoding PASS_REGS);
1170 Yap_bad_nl_error(t->TokInfo, st); /* in ISO a new linea terminates a string */
1171 break;
1172 }
1173 else if (ch == EOFCHAR) {
1174 break;
1175 }
1176 else if (ch == quote) {
1177 ch = getchrq(st);
1178 if (ch != quote)
1179 break;
1180 add_ch_to_buff(ch);
1181 ch = getchrq(st);
1182 } else if (ch == '\\' &&
1183 Yap_GetModuleEntry(CurrentModule)->flags & M_CHARESCAPE) {
1184 int scan_next = TRUE;
1185 if ((ch = read_quoted_char(&scan_next, st))) {
1186 safe_add_ch_to_buff(ch);
1187 }
1188 if (scan_next) {
1189 ch = getchrq(st);
1190 }
1191 } else {
1192 add_ch_to_buff(ch);
1193 ch = getchrq(st);
1194 }
1195 ++len;
1196 }
1197 *charp = '\0';
1198 if (quote == '"') {
1199 t->TokInfo = Yap_CharsToTDQ((char *)TokImage, CurrentModule,
1200 LOCAL_encoding PASS_REGS);
1201
1202 if (!(t->TokInfo)) {
1203 return CodeSpaceError(t, p, l);
1204 }
1205 if (IsAtomTerm(t->TokInfo)) {
1206 t->Tok = Ord(kind = Name_tok);
1207 } else {
1208 t->Tok = Ord(kind = String_tok);
1209 }
1210 } else if (quote == '`') {
1211 t->TokInfo = Yap_CharsToTBQ((char *)TokImage, CurrentModule,
1212 LOCAL_encoding PASS_REGS);
1213 if (!(t->TokInfo)) {
1214 return CodeSpaceError(t, p, l);
1215 }
1216 if (IsAtomTerm(t->TokInfo)) {
1217 t->Tok = Ord(kind = Name_tok);
1218 } else {
1219 t->Tok = Ord(kind = String_tok);
1220 }
1221 } else {
1222 t->TokInfo = MkAtomTerm(Yap_LookupAtom(TokImage));
1223 if (!(t->TokInfo)) {
1224 return CodeSpaceError(t, p, l);
1225 }
1226 t->Tok = Ord(kind = Name_tok);
1227 if (ch == '(')
1228 solo_flag = false;
1229 }
1230 break;
1231
1232 case BS:
1233 if (ch == '\0') {
1234 int pch;
1235 t->Tok = Ord(kind = eot_tok);
1236 pch = Yap_peek(st - GLOBAL_Stream);
1237 if (chtype(pch) == EF) {
1238 mark_eof(st);
1239 } else {
1240 if (params->get_eot_blank)
1241 getchr(st);
1242 }
1243 t->TokInfo = TermEof;
1244 return l;
1245 } else
1246 ch = getchr(st);
1247 break;
1248 case SY: {
1249 int pch;
1250 if (ch == '.' && (pch = getchr(st)) &&
1251 (chtype(pch) == BS || chtype(pch) == EF || pch == '%')) {
1252 if (chtype(ch) != EF)
1253 ch = pch;
1254 t->Tok = Ord(kind = eot_tok);
1255 // consume...
1256 if (pch == '%') {
1257 t->TokInfo = TermNewLine;
1258 return l;
1259 }
1260 return l;
1261 }
1262 if (ch == '`')
1263 goto quoted_string;
1264 if (ch != '.') {
1265 och = ch;
1266 ch = getchr(st);
1267 } else {
1268 och = ch;
1269 ch = pch;
1270 }
1271 if (och == '.') {
1272 if (chtype(ch) == BS || chtype(ch) == EF || ch == '%') {
1273 t->Tok = Ord(kind = eot_tok);
1274 if (ch == '%') {
1275 t->TokInfo = TermNewLine;
1276 return l;
1277 }
1278 if (chtype(ch) == EF) {
1279 mark_eof(st);
1280 t->TokInfo = TermEof;
1281 } else {
1282 t->TokInfo = TermNewLine;
1283 }
1284 return l;
1285 }
1286 }
1287 if (och == '/' && ch == '*') {
1288 if (store_comments) {
1289 CHECK_SPACE();
1290 open_comment('/', st PASS_REGS);
1291 while ((och != '*' || ch != '/') && chtype(ch) != EF) {
1292 och = ch;
1293 CHECK_SPACE();
1294 extend_comment(ch PASS_REGS);
1295 ch = getchr(st);
1296 }
1297 if (chtype(ch) != EF) {
1298 CHECK_SPACE();
1299 extend_comment(ch PASS_REGS);
1300 }
1301 close_comment(PASS_REGS1);
1302 } else {
1303 while ((och != '*' || ch != '/') && chtype(ch) != EF) {
1304 och = ch;
1305 ch = getchr(st);
1306 }
1307 }
1308 if (chtype(ch) == EF) {
1309 t->Tok = Ord(kind = eot_tok);
1310 t->TokInfo = TermEof;
1311 break;
1312 } else {
1313 /* leave comments */
1314 ch = getchr(st);
1315 if (t == l) {
1316 /* we found a comment before reading characters */
1317 while (chtype(ch) == BS) {
1318 ch = getchr(st);
1319 }
1320 CHECK_SPACE();
1321 params->tposOUTPUT = Yap_StreamPosition(st - GLOBAL_Stream);
1322 Yap_setCurrentSourceLocation(st);
1323 }
1324 }
1325 goto restart;
1326 }
1327 }
1328 enter_symbol:
1329 if (och == '.' && (chtype(ch) == BS || chtype(ch) == EF || ch == '%')) {
1330 t->Tok = Ord(kind = eot_tok);
1331 if (ch == '%') {
1332 t->TokInfo = TermNewLine;
1333 return l;
1334 }
1335 if (chtype(ch) == EF) {
1336 mark_eof(st);
1337 t->TokInfo = TermEof;
1338 } else {
1339 t->TokInfo = TermNl;
1340 }
1341 return l;
1342 } else {
1343 Atom ae;
1344 charp = (unsigned char *)TokImage;
1345 add_ch_to_buff(och);
1346 for (; chtype(ch) == SY; ch = getchr(st)) {
1347 if (charp >= (unsigned char *)TokImage + (imgsz - 10)) {
1348 size_t sz = charp - (unsigned char *)TokImage;
1349 imgsz = Yap_Min(imgsz * 2, imgsz + MBYTE);
1350 TokImage = Realloc(TokImage, imgsz);
1351 if (!TokImage) {
1352 return CodeSpaceError(t, p, l);
1353 }
1354 charp = (unsigned char *)TokImage + sz;
1355 }
1356 add_ch_to_buff(ch);
1357 }
1358 add_ch_to_buff('\0');
1359 ae = Yap_LookupAtom(TokImage);
1360 if (ae == NIL) {
1361 return CodeSpaceError(t, p, l);
1362 }
1363 t->TokInfo = MkAtomTerm(ae);
1364 if (t->TokInfo == (CELL)NIL) {
1365 return CodeSpaceError(t, p, l);
1366 }
1367 t->Tok = Ord(kind = Name_tok);
1368 if (ch == '(')
1369 solo_flag = false;
1370 else
1371 solo_flag = true;
1372 }
1373 break;
1374
1375 case SL: {
1376 unsigned char chs[2];
1377 chs[0] = ch;
1378 chs[1] = '\0';
1379 ch = getchr(st);
1380 t->TokInfo = MkAtomTerm(Yap_ULookupAtom(chs));
1381 t->Tok = Ord(kind = Name_tok);
1382 if (ch == '(')
1383 solo_flag = FALSE;
1384 } break;
1385
1386 case BK:
1387 och = ch;
1388 ch = getchr(st);
1389 {
1390 unsigned char *chs;
1391 charp = chs = (unsigned char *)TokImage;
1392 add_ch_to_buff(och);
1393 charp[0] = '\0';
1394 t->TokInfo = MkAtomTerm(Yap_ULookupAtom(chs));
1395 }
1396 if (och == '(') {
1397 while (chtype(ch) == BS) {
1398 ch = getchr(st);
1399 }
1400 if (ch == ')') {
1401 t->TokInfo = TermEmptyBrackets;
1402 t->Tok = Ord(kind = Name_tok);
1403 ch = getchr(st);
1404 solo_flag = FALSE;
1405 break;
1406 } else if (!solo_flag) {
1407 t->TokInfo = Terml;
1408 solo_flag = TRUE;
1409 }
1410 } else if (och == '[') {
1411 while (chtype(ch) == BS) {
1412 ch = getchr(st);
1413 };
1414 if (ch == ']') {
1415 t->TokInfo = TermNil;
1416 t->Tok = Ord(kind = Name_tok);
1417 ch = getchr(st);
1418 solo_flag = FALSE;
1419 break;
1420 }
1421 } else if (och == '{') {
1422 if (ch == '|') {
1423 qq_t *qq = (qq_t *)calloc(sizeof(qq_t), 1);
1424 if (!qq) {
1425 LOCAL_ErrorMessage = "not enough heap space to read in quasi quote";
1426 t->Tok = Ord(kind = eot_tok);
1427 t->TokInfo = TermOutOfHeapError;
1428 return l;
1429 }
1430 if (cur_qq) {
1431 LOCAL_ErrorMessage = "quasi quote in quasi quote";
1432 // Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
1433 t->Tok = Ord(kind = eot_tok);
1434 t->TokInfo = TermOutOfHeapError;
1435 return l;
1436 } else {
1437 cur_qq = qq;
1438 }
1439 t->TokInfo = (CELL)qq;
1440 if (st->status & Seekable_Stream_f) {
1441 qq->start.byteno = fseek(st->file, 0, 0);
1442 } else {
1443 qq->start.byteno = st->charcount - 1;
1444 }
1445 qq->start.lineno = st->linecount;
1446 qq->start.linepos = st->charcount + 1 - st->linestart;
1447 qq->start.charno = st->charcount + 1;
1448 t->Tok = Ord(kind = QuasiQuotes_tok);
1449 ch = getchr(st);
1450 solo_flag = FALSE;
1451 break;
1452 }
1453 while (chtype(ch) == BS) {
1454 ch = getchr(st);
1455 };
1456 if (ch == '}') {
1457 t->TokInfo = TermBraces;
1458 t->Tok = Ord(kind = Name_tok);
1459 ch = getchr(st);
1460 solo_flag = FALSE;
1461 break;
1462 }
1463 } else if (och == '|' && ch == '|') {
1464 qq_t *qq = cur_qq;
1465 if (!qq) {
1466 LOCAL_ErrorMessage = "quasi quoted's || without {|";
1467 // Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
1468 cur_qq = NULL;
1469 t->Tok = Ord(kind = eot_tok);
1470 t->TokInfo = TermError;
1471 return l;
1472 }
1473 cur_qq = NULL;
1474 t->TokInfo = (CELL)qq;
1475 if (st->status & Seekable_Stream_f) {
1476 qq->mid.byteno = fseek(st->file, 0, 0);
1477 } else {
1478 qq->mid.byteno = st->charcount - 1;
1479 }
1480 qq->mid.lineno = st->linecount;
1481 qq->mid.linepos = st->charcount+1-st->linestart;
1482 qq->mid.charno = st->charcount - 1;
1483 t->Tok = Ord(kind = QuasiQuotes_tok);
1484 ch = getchr(st);
1485 charp = (unsigned char *)TokImage;
1486 quote = ch;
1487 len = 0;
1488 ch = getchrq(st);
1489
1490 while (TRUE) {
1491 if (ch == '|') {
1492 ch = getchrq(st);
1493 if (ch != '}') {
1494 } else {
1495 charp += put_utf8((unsigned char *)charp, och);
1496 charp += put_utf8((unsigned char *)charp, ch);
1497 /* we're done */
1498 break;
1499 }
1500 } else if (chtype(ch) == EF) {
1501 mark_eof(st);
1502 t->Tok = Ord(kind = eot_tok);
1503 t->TokInfo = TermOutOfHeapError;
1504 break;
1505 } else {
1506 charp += put_utf8(charp, ch);
1507 ch = getchrq(st);
1508 }
1509 }
1510 len = charp - (unsigned char *)TokImage;
1511 mp = malloc(len + 1);
1512 if (mp == NULL) {
1513 LOCAL_ErrorMessage = "not enough heap space to read in quasi quote";
1514 t->Tok = Ord(kind = eot_tok);
1515 t->TokInfo = TermOutOfHeapError;
1516 return l;
1517 }
1518 strncpy((char *)mp, (const char *)TokImage, len + 1);
1519 qq->text = (unsigned char *)mp;
1520 if (st->status & Seekable_Stream_f) {
1521 qq->end.byteno = fseek(st->file, 0, 0);
1522 } else {
1523 qq->end.byteno = st->charcount - 1;
1524 }
1525 qq->end.lineno = st->linecount;
1526 qq->end.linepos = st->charcount - st->linestart;
1527 qq->end.charno = st->charcount - 1;
1528 if (!(t->TokInfo)) {
1529 return CodeSpaceError(t, p, l);
1530 }
1531 // Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
1532 solo_flag = FALSE;
1533 ch = getchr(st);
1534 break;
1535 }
1536 t->Tok = Ord(kind = Ponctuation_tok);
1537 break;
1538 case EF:
1539 mark_eof(st);
1540 t->Tok = Ord(kind = eot_tok);
1541 t->TokInfo = TermEof;
1542 return l;
1543
1544 default: {
1545 kind = Error_tok;
1546 char err[1024];
1547 snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
1548 ch, ch, chtype(ch));
1549 }
1550 t->Tok = Ord(kind = eot_tok);
1551 t->TokInfo = TermEof;
1552 }
1553 if (LOCAL_ErrorMessage) {
1554 /* insert an error token to inform the system of what happened */
1555 TokEntry *e = Malloc(sizeof(TokEntry));
1556 if (e == NULL) {
1557 return TrailSpaceError(p, l);
1558 }
1559 p->TokNext = e;
1560 e->Tok = Error_tok;
1561 e->TokInfo = MkAtomTerm(Yap_LookupAtom(LOCAL_ErrorMessage));
1562 e->TokPos = GetCurInpPos(st);
1563 e->TokLine = GetCurInpLine(st);
1564 e->TokOffset= GetCurInpOffset(st);
1565 e->TokNext = NULL;
1566 LOCAL_ErrorMessage = NULL;
1567 p = e;
1568 }
1569 } while (kind != eot_tok);
1570
1571 return (l);
1572}
1573
1578 CACHE_REGS
1579 LOCAL_Comments = TermNil;
1580 LOCAL_CommentsNextChar = LOCAL_CommentsTail = NULL;
1581 if (LOCAL_CommentsBuff) {
1582 LOCAL_CommentsBuff = NULL;
1583 }
1584 LOCAL_CommentsBuffLim = 0;
1585}
1586
Main definitions.
int Yap_symbol_encoding_error(YAP_Int ch, int code, struct stream_desc *st, const char *s)
This is a bug while encoding a symbol, and should always result in a syntax error.
Definition: chartypes.c:127
Term Yap_scan_num(StreamDesc *inp)
This routine is used when we need to parse a string into a number.
Definition: scanner.c:666
void Yap_clean_tokenizer(void)
terminate scanning: just closes the comment store
Definition: scanner.c:1577
Term Yap_tokRep(void *tokptre)
convert a token to text
Definition: scanner.c:747
void * Malloc(size_t sz USES_REGS)
allocate a temporary text block
Definition: alloc.c:1759
bool store_comments
Access to commen.
Definition: yapio.h:88
int(* stream_wgetc)(int)
function the stream uses for reading an octet
Definition: YapStreams.h:257
int(* stream_wgetc_for_read)(int)
direct handle to stream in that space
Definition: YapStreams.h:261