YAP 7.1.0
chartypes.c
Go to the documentation of this file.
1/*************************************************************************
2 * *
3 * YAP Prolog *
4 * *
5 * Yap Prolog was developed at NCCUP - Universidade do Porto *
6 * *
7 * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-1997 *
8 * *
9 **************************************************************************
10 * *
11 * File: charcodes.c *
12 * Last rev: 5/2/88 *
13 * mods: *
14 * comments: Character codes and character conversion *
15 * *
16 *************************************************************************/
17#ifdef SCCS
18static char SccsId[] = "%W% %G%";
19#endif
31
37/*
38 * This file includes the definition of a character properties.
39 *
40 */
41
42#include "Yap.h"
43#include "YapHeap.h"
44#include "YapText.h"
45#include "Yatom.h"
46#include "yapio.h"
47#include <stdlib.h>
48#if HAVE_UNISTD_H
49#include <unistd.h>
50#endif
51#if HAVE_STDARG_H
52#include <stdarg.h>
53#endif
54#if HAVE_CTYPE_H
55#include <ctype.h>
56#endif
57#if HAVE_WCTYPE_H
58#include <wctype.h>
59#endif
60#if HAVE_LOCALE_H
61#include <locale.h>
62#endif
63#ifdef _WIN32
64#if HAVE_IO_H
65/* Windows */
66#include <io.h>
67#endif
68#if HAVE_SOCKET
69#include <winsock2.h>
70#endif
71#include <windows.h>
72#ifndef S_ISDIR
73#define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR)
74#endif
75#endif
76#include "YapEval.h"
77#include "iopreds.h"
78
79static Int p_change_type_of_char(USES_REGS1);
80
81int Yap_encoding_error(YAP_Int ch, int code, struct stream_desc *st) {
82 CACHE_REGS
83 // if (LOCAL_encoding_errors == TermIgnore)
84 // return ch;
85 if (st->status & RepFail_Prolog_f)
86 return -1;
87 if (true||st->status & RepError_Prolog_f ||
88 trueGlobalPrologFlag(ISO_FLAG))
89 Yap_ThrowError(SYNTAX_ERROR, MkIntTerm(code), "encoding error at stream %d %s:%lu, character %lu",st-GLOBAL_Stream,
90 AtomName((Atom)st->name), st->linecount, st->charcount);
91 fprintf(stderr,"encoding error at stream %ld %s:%lu, character %lu",st-GLOBAL_Stream,
92 RepAtom(st->name)->StrOfAE, st->linecount, st->charcount);
93 return 0;
94}
95
96
97int Yap_bad_nl_error( Term string, struct stream_desc *st) {
98 CACHE_REGS
99 // if (LOCAL_encoding_errors == TermIgnore)
100 // return ch;
101if (trueLocalPrologFlag(MULTILINE_QUOTED_TEXT_FLAG)||
102 trueGlobalPrologFlag(ISO_FLAG)) {
103 if (st->status & RepFail_Prolog_f)
104 return -1;
105 if (st->status & RepError_Prolog_f) {
106 Yap_ThrowError(SYNTAX_ERROR, string, "%s:%lu:0 error: quoted text terminates on newline",
107 AtomName((Atom)st->name), st->linecount);
108 return 0;
109 }else {
110 fprintf(stderr, "%s:%lu:0 warning: quoted text terminates on newline",
111 AtomName((Atom)
112 st->name), st->linecount);
113 return 0;
114 }
115 }
116 return -1;
117}
118
127int Yap_symbol_encoding_error(YAP_Int ch, int code, struct stream_desc *st, const char *s) {
128 CACHE_REGS
129 Yap_ThrowError__(AtomName(st->name), "parser", st->linecount, SYNTAX_ERROR, MkIntTerm(ch),
130 "encoding error at character %l %s", code, s);
131 return 0;
132}
133
134Term Yap_StringToNumberTerm(const char *s, encoding_t *encp, bool error_on) {
135 CACHE_REGS
136 int sno;
137 Atom nat = AtomEmptyBrackets;
138 sno = Yap_open_buf_read_stream(NULL, s, strlen(s), encp, MEM_BUF_USER, nat, TermEvaluable);
139 if (sno < 0)
140 return FALSE;
141 if (encp)
142 GLOBAL_Stream[sno].encoding = *encp;
143 else
144 GLOBAL_Stream[sno].encoding = LOCAL_encoding;
145#ifdef __ANDROID__
146 while (*s && isblank(*s) && Yap_wide_chtype(*s) == BS)
147 s++;
148#endif
149 GLOBAL_Stream[sno].status |= CloseOnException_Stream_f;
150 if (error_on) {
151 GLOBAL_Stream[sno].status |= RepFail_Prolog_f;
152 return 0;
153 }
154 int i = push_text_stack();
155 Term t = Yap_scan_num(GLOBAL_Stream + sno);
156 Yap_CloseStream(sno);
157 UNLOCK(GLOBAL_Stream[sno].streamlock);
158 pop_text_stack(i);
159 return t;
160}
161
162const char *encvs[] = {"LANG", "LC_ALL", "LC_CTYPE", NULL};
163
164// where we can fins an encoding
165typedef struct enc_map {
166 const char *s;
167 encoding_t e;
168} enc_map_t;
169
170static enc_map_t ematches[] = {
171 {"UTF-8", ENC_ISO_UTF8},
172 {"utf8", ENC_ISO_UTF8},
173 {"UTF-16", ENC_UTF16_LE}, // ok, this is a very bad name
174 {"UCS-2", ENC_UTF16_LE}, // ok, this is probably gone by now
175 {"ISO-LATIN1", ENC_ISO_LATIN1},
176 {"ISO-8859-1", ENC_ISO_LATIN1},
177 {"Windows-1252", ENC_ISO_LATIN1}, // almost, but not quite
178 {"CP-1252", ENC_ISO_LATIN1},
179 {"C", ENC_ISO_ASCII},
180#ifdef _WIN32
181 {NULL, ENC_ISO_ASCII}
182#else
183 {NULL, ENC_ISO_UTF8}
184#endif
185};
186
187static encoding_t enc_os_default(encoding_t rc) {
188 // by default, return UTF-8
189 // note that we match the C locale to UTF8/16, as all Unix machines will work
190 // on UNICODE.
191 // WIN32 we will rely on BOM
192
193 if (rc == ENC_ISO_ASCII) {
194 return ENC_ISO_UTF8;
195 }
196 return rc;
197}
198
199encoding_t Yap_SystemEncoding(void) {
200 int i = -1;
201 while (i == -1 || encvs[i]) {
202 char *v;
203 if (i == -1) {
204 if ((v = setlocale(LC_CTYPE, NULL)) == NULL || !strcmp(v, "C")) {
205 if ((v = getenv("LC_CTYPE")))
206 setlocale(LC_CTYPE, v);
207 else if ((v = getenv("LANG")))
208 setlocale(LC_CTYPE, v);
209 }
210 } else {
211 v = getenv(encvs[i]);
212 }
213 if (v) {
214 int j = 0;
215 const char *coding;
216 while ((coding = ematches[j].s) != NULL) {
217 char *v1;
218 if ((v1 = strstr(v, coding)) && strlen(v1) == strlen(coding)) {
219 return ematches[j].e;
220 }
221 j++;
222 }
223 }
224 i++;
225 }
226 return ENC_ISO_ASCII;
227}
228
229static encoding_t DefaultEncoding(void) {
230 return enc_os_default(Yap_SystemEncoding());
231}
232
233encoding_t Yap_DefaultEncoding(void) {
234 CACHE_REGS
235 return LOCAL_encoding;
236}
237
238void Yap_SetDefaultEncoding(encoding_t new_encoding) {
239 CACHE_REGS
240 LOCAL_encoding = new_encoding;
241}
242
243static Int get_default_encoding(USES_REGS1) {
244 Term out = MkIntegerTerm(Yap_DefaultEncoding());
245 return Yap_unify(ARG1, out);
246}
247
248static Int p_encoding(USES_REGS1) { /* '$encoding'(Stream,N) */
249 int sno =
250 Yap_CheckStream(ARG1, Input_Stream_f | Output_Stream_f, "encoding/2");
251 Term t = Deref(ARG2);
252 if (sno < 0)
253 return FALSE;
254 if (IsVarTerm(t)) {
255 UNLOCK(GLOBAL_Stream[sno].streamlock);
256 return Yap_unify(ARG2, MkIntegerTerm(GLOBAL_Stream[sno].encoding));
257 }
258 GLOBAL_Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
259 UNLOCK(GLOBAL_Stream[sno].streamlock);
260 return TRUE;
261}
262
263static int get_char(Term t) {
264 if (IsVarTerm(t = Deref(t))) {
265 Yap_ThrowError(INSTANTIATION_ERROR, t, NULL);
266 return 0;
267 }
268 if (!IsAtomTerm(t)) {
269 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, NULL);
270 return 0;
271 }
272 Atom at = AtomOfTerm(t);
273 unsigned char *s = RepAtom(at)->UStrOfAE;
274 utf8proc_int32_t c;
275 s += get_utf8(s, 1, &c);
276 return c;
277 if (s[0] != '\0') {
278 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, NULL);
279 return 0;
280 }
281 return c;
282}
283
284static int get_code(Term t) {
285 if (IsVarTerm(t = Deref(t))) {
286 Yap_ThrowError(INSTANTIATION_ERROR, t, NULL);
287 return 0;
288 }
289 if (!IsIntegerTerm(t)) {
290 Yap_ThrowError(TYPE_ERROR_CHARACTER_CODE, t, NULL);
291 return 0;
292 }
293 Int ch = IntegerOfTerm(t);
294 if (ch < -1) {
295 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
296 return 0;
297 }
298 return ch;
299}
300
301static int get_char_or_code(Term t, bool *is_char) {
302 if (!IsAtomTerm(t)) {
303 if (!IsIntegerTerm(t)) {
304 Yap_ThrowError(TYPE_ERROR_CHARACTER, t, NULL);
305 return 0;
306 }
307 Int ch = IntegerOfTerm(t);
308 if (ch < -1) {
309 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
310 return 0;
311 }
312 *is_char = false;
313 return ch;
314 }
315 unsigned char *s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
316 int val;
317 s0 += get_utf8(s0, 1, &val);
318 if (s0[0] != '\0') {
319 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, NULL);
320 return 0;
321 }
322 *is_char = true;
323 return val;
324}
325
326static Int toupper2(USES_REGS1) {
327 bool is_char = false;
328 Term t;
329 if (!IsVarTerm(t = Deref(ARG1))) {
330 Int out = get_char_or_code(t, &is_char), uout;
331 if (out < 128)
332 uout = toupper(out);
333 else
334 uout = utf8proc_toupper(out);
335 if (is_char)
336 return Yap_unify(ARG2, MkCharTerm(uout));
337 else
338 return Yap_unify(ARG2, MkIntegerTerm(uout));
339 } else if (!IsVarTerm(t = Deref(ARG2))) {
340 Int uout = get_char_or_code(t, &is_char), out;
341 char_kind_t charp = Yap_wide_chtype(uout);
342 if (charp == UC) {
343 if (uout < 128)
344 out = tolower(uout);
345 else
346 out = utf8proc_tolower(uout);
347 } else {
348 out = uout;
349 }
350 if (is_char)
351 return Yap_unify(ARG1, MkCharTerm(out));
352 else
353 return Yap_unify(ARG1, MkIntegerTerm(out));
354 } else {
355 Yap_ThrowError(INSTANTIATION_ERROR, ARG1, NULL);
356 }
357 return false;
358}
359
360static Int tolower2(USES_REGS1) {
361 bool is_char = false;
362 Term t;
363 if (!IsVarTerm(t = Deref(ARG1))) {
364 bool is_char = false;
365 Int out = get_char_or_code(ARG1, &is_char), uout;
366 if (out < 128)
367 uout = tolower(out);
368 else
369 uout = utf8proc_tolower(out);
370 if (is_char)
371 return Yap_unify(ARG2, MkCharTerm(uout));
372 else
373 return Yap_unify(ARG2, MkIntegerTerm(uout));
374 } else if (IsVarTerm(t = Deref(ARG2))) {
375 Int uout = get_char_or_code(t, &is_char), out;
376 char_kind_t charp = Yap_wide_chtype(uout);
377 if (charp == LC) {
378 if (uout < 128)
379 out = toupper(uout);
380 else
381 out = utf8proc_toupper(uout);
382 } else {
383 out = uout;
384 }
385 if (is_char)
386 return Yap_unify(ARG1, MkCharTerm(out));
387 else
388 return Yap_unify(ARG1, MkIntegerTerm(out));
389 } else {
390 Yap_ThrowError(INSTANTIATION_ERROR, ARG1, NULL);
391 }
392 return false;
393}
394
395static Int
396 p_change_type_of_char(USES_REGS1) { /* change_type_of_char(+char,+type) */
397 Term t1 = Deref(ARG1);
398 Term t2 = Deref(ARG2);
399 if (!IsVarTerm(t1) && !IsIntegerTerm(t1))
400 return FALSE;
401 if (!IsVarTerm(t2) && !IsIntegerTerm(t2))
402 return FALSE;
403 Yap_chtype[IntegerOfTerm(t1)] = IntegerOfTerm(t2);
404 return TRUE;
405}
406
407static Int char_type_alnum(USES_REGS1) {
408 int ch = get_char(ARG1);
409 char_kind_t k = Yap_wide_chtype(ch);
410 return k == UC || k == LC || k == NU;
411}
412
413static Int char_type_alpha(USES_REGS1) {
414 int ch = get_char(ARG1);
415 char_kind_t k = Yap_wide_chtype(ch);
416 return k == UC || k == LC;
417}
418
419static Int char_type_csym(USES_REGS1) {
420 int ch = get_char(ARG1);
421 char_kind_t k = Yap_wide_chtype(ch);
422 return k >= UC && k <= NU;
423}
424
425static Int char_type_csymf(USES_REGS1) {
426 int ch = get_char(ARG1);
427 char_kind_t k = Yap_wide_chtype(ch);
428 return k >= UC && k <= LC;
429}
430
431static Int char_type_ascii(USES_REGS1) {
432 int ch = get_char(ARG1);
433 return isascii(ch);
434}
435
436static Int char_type_white(USES_REGS1) {
437 int ch = get_char(ARG1);
438 if (ch < 256) {
439 char_kind_t k = Yap_chtype[ch];
440 return k == BS;
441 }
442 utf8proc_category_t ct = utf8proc_category(ch);
443 return ct == UTF8PROC_CATEGORY_ZS;
444}
445
446static Int char_type_cntrl(USES_REGS1) {
447 Int ch = get_char(ARG1);
448 char_kind_t k = Yap_wide_chtype(ch);
449 return k == BG;
450}
451
452static Int char_type_digit(USES_REGS1) {
453 Int ch = get_char(ARG1);
454 char_kind_t k = Yap_wide_chtype(ch);
455 return k == NU;
456}
457
458static Int char_type_xdigit(USES_REGS1) {
459 Int ch = get_char(ARG1);
460#if HAVE_ISWXDIGIT
461 return iswxdigit(ch);
462#elif HAVE_ISWHEXNUMBER
463 return iswhexnumber(ch);
464#else
465 return iswdigit(ch) || ((ch >= 'a' && ch <= 'f') && (ch >= 'A' && ch <= 'F'));
466#endif
467}
468
469static Int char_type_graph(USES_REGS1) {
470 Int ch = get_char(ARG1);
471 return iswgraph(ch);
472}
473
474static Int char_type_lower(USES_REGS1) {
475 char_kind_t k;
476 int ch = get_char(ARG1);
477 if (ch < 256) {
478 k = Yap_chtype[ch];
479 } else {
480 k = Yap_wide_chtype(ch);
481 }
482 return k == LC;
483}
484
485static Int char_type_upper(USES_REGS1) {
486 char_kind_t k;
487 int ch = get_char(ARG1);
488 if (ch < 256) {
489 k = Yap_chtype[ch];
490 } else {
491 k = Yap_wide_chtype(ch);
492 }
493 return k == UC;
494}
495
496static Int char_type_punct(USES_REGS1) {
497 int ch = get_char(ARG1);
498 char_kind_t k = Yap_wide_chtype(ch);
499 return k >= QT && k <= BK;
500}
501
502static Int char_type_space(USES_REGS1) {
503 int ch = get_char(ARG1);
504char_kind_t k = Yap_wide_chtype(ch);
505 return k >= QT && k <= BK;
506}
507
508static Int char_type_end_of_file(USES_REGS1) {
509 Int ch = get_char(ARG1);
510 return ch == WEOF || ch == -1;
511}
512
513static Int char_type_end_of_line(USES_REGS1) {
514 Int ch = get_char(ARG1);
515 if (ch < 256) {
516 return ch >= 10 && ch <= 13;
517 }
518 utf8proc_category_t ct = utf8proc_category(ch);
519 return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
520}
521
522static Int char_type_newline(USES_REGS1) {
523 Int ch = get_char(ARG1);
524 if (ch < 256) {
525 return ch == 10;
526 }
527 return false;
528}
529
530static Int char_type_period(USES_REGS1) {
531 Int ch = get_char(ARG1);
532 return ch == '.' || ch == '!' || ch == '?';
533}
534
535static Int char_type_quote(USES_REGS1) {
536 Int ch = get_char(ARG1);
537 utf8proc_category_t ct = utf8proc_category(ch);
538 return ct == UTF8PROC_CATEGORY_PI || ct == UTF8PROC_CATEGORY_PF;
539}
540
541static Int char_type_paren(USES_REGS1) {
542 Int ch = get_char(ARG1);
543 utf8proc_category_t ct = utf8proc_category(ch);
544 return ct == UTF8PROC_CATEGORY_PS || ct == UTF8PROC_CATEGORY_PE;
545}
546
547static Int char_type_prolog_var_start(USES_REGS1) {
548 Int ch = get_char(ARG1);
549 char_kind_t k = Yap_wide_chtype(ch);
550 return k == LC || ch == '_';
551}
552
553static Int char_type_prolog_atom_start(USES_REGS1) {
554 Int ch = get_char(ARG1);
555 char_kind_t k = Yap_wide_chtype(ch);
556 return k == UC;
557}
558
559static Int char_type_prolog_identifier_continue(USES_REGS1) {
560 int ch = get_char(ARG1);
561 char_kind_t k = Yap_wide_chtype(ch);
562 return k >= UC && k <= NU;
563}
564
565static Int char_type_prolog_prolog_symbol(USES_REGS1) {
566 int ch = get_char(ARG1);
567 char_kind_t k = Yap_wide_chtype(ch);
568 return k == SL || k == SY;
569}
570
571static Int code_type_alnum(USES_REGS1) {
572 int ch = get_code(ARG1);
573 char_kind_t k = Yap_wide_chtype(ch);
574 return k == UC || k == LC || k == NU;
575}
576
577static Int code_type_alpha(USES_REGS1) {
578 int ch = get_code(ARG1);
579 char_kind_t k = Yap_wide_chtype(ch);
580 return k == UC || k == LC;
581}
582
583static Int code_type_csym(USES_REGS1) {
584 int ch = get_code(ARG1);
585 char_kind_t k = Yap_wide_chtype(ch);
586 return k >= UC && k <= NU;
587}
588
589static Int code_type_csymf(USES_REGS1) {
590 int ch = get_code(ARG1);
591 char_kind_t k = Yap_wide_chtype(ch);
592 return k >= UC && k <= LC;
593}
594
595static Int code_type_ascii(USES_REGS1) {
596 int ch = get_code(ARG1);
597 return isascii(ch);
598}
599
600static Int code_type_white(USES_REGS1) {
601 int ch = get_code(ARG1);
602 if (ch < 256) {
603 char_kind_t k = Yap_chtype[ch];
604 return k == BS;
605 }
606 utf8proc_category_t ct = utf8proc_category(ch);
607 return ct == UTF8PROC_CATEGORY_ZS;
608}
609
610static Int code_type_cntrl(USES_REGS1) {
611 Int ch = get_code(ARG1);
612 char_kind_t k = Yap_wide_chtype(ch);
613 return k == BG;
614}
615
616static Int code_type_digit(USES_REGS1) {
617 Int ch = get_code(ARG1);
618 char_kind_t k = Yap_wide_chtype(ch);
619 return k == NU;
620}
621
622static Int code_type_xdigit(USES_REGS1) {
623 Int ch = get_code(ARG1);
624#if HAVE_ISWXDIGIT
625 return iswxdigit(ch);
626#elif HAVE_ISWHEXNUMBER
627 return iswhexnumber(ch);
628#else
629 return iswdigit(ch) || ((ch >= 'a' && ch <= 'f') && (ch >= 'A' && ch <= 'F'));
630#endif
631}
632
633static Int code_type_graph(USES_REGS1) {
634 Int ch = get_code(ARG1);
635 return iswgraph(ch);
636}
637
638static Int code_type_lower(USES_REGS1) {
639 int ch = get_code(ARG1);
640 char_kind_t k = Yap_wide_chtype(ch);
641 return k == LC;
642}
643
644static Int code_type_upper(USES_REGS1) {
645 int ch = get_code(ARG1);
646 char_kind_t k = Yap_wide_chtype(ch);
647 return k == UC;
648}
649
650static Int code_type_punct(USES_REGS1) {
651 int ch = get_char(ARG1);
652 if (ch < 256) {
653 char_kind_t k = Yap_chtype[ch];
654 return k >= QT && k <= BK;
655 }
656 return false;
657}
658
659static Int code_type_space(USES_REGS1) {
660 int ch = get_code(ARG1);
661 if (ch < 256) {
662 char_kind_t k = Yap_chtype[ch];
663 return k == BS;
664 }
665 utf8proc_category_t ct = utf8proc_category(ch);
666 return ct == UTF8PROC_CATEGORY_ZS;
667}
668
669static Int code_type_end_of_file(USES_REGS1) {
670 Int ch = get_code(ARG1);
671 return ch == WEOF || ch == -1;
672}
673
674static Int code_type_end_of_line(USES_REGS1) {
675 Int ch = get_code(ARG1);
676 if (ch < 256) {
677 return ch >= 10 && ch <= 13;
678 }
679 utf8proc_category_t ct = utf8proc_category(ch);
680 return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
681}
682
683static Int code_type_newline(USES_REGS1) {
684 Int ch = get_code(ARG1);
685 if (ch < 256) {
686 return ch == 10;
687 }
688 return false;
689}
690
691static Int code_type_period(USES_REGS1) {
692 Int ch = get_code(ARG1);
693 return ch == '.' || ch == '!' || ch == '?';
694}
695
696static Int code_type_quote(USES_REGS1) {
697 Int ch = get_code(ARG1);
698 utf8proc_category_t ct = utf8proc_category(ch);
699 return ct == UTF8PROC_CATEGORY_PI || ct == UTF8PROC_CATEGORY_PF;
700}
701
702static Int code_type_paren(USES_REGS1) {
703 Int ch = get_code(ARG1);
704 utf8proc_category_t ct = utf8proc_category(ch);
705 return ct == UTF8PROC_CATEGORY_PS || ct == UTF8PROC_CATEGORY_PE;
706}
707
708static Int code_type_prolog_var_start(USES_REGS1) {
709 Int ch = get_code(ARG1);
710 char_kind_t k = Yap_wide_chtype(ch);
711 return k == LC || ch == '_';
712}
713
714static Int code_type_prolog_atom_start(USES_REGS1) {
715 Int ch = get_code(ARG1);
716 char_kind_t k = Yap_wide_chtype(ch);
717 return k == UC;
718}
719
720static Int code_type_prolog_identifier_continue(USES_REGS1) {
721 int ch = get_code(ARG1);
722 char_kind_t k = Yap_wide_chtype(ch);
723 return k >= UC && k <= NU;
724}
725
726static Int code_type_prolog_prolog_symbol(USES_REGS1) {
727 int ch = get_code(ARG1);
728 char_kind_t k = Yap_wide_chtype(ch);
729 return k == SL || k == SY;
730}
731
732int ISOWGetc(int sno) {
733 int ch = GLOBAL_Stream[sno].stream_wgetc(sno);
734 if (ch != EOF && GLOBAL_CharConversionTable != NULL) {
735
736 if (ch < NUMBER_OF_CHARS) {
737 /* only do this in ASCII */
738 return GLOBAL_CharConversionTable[ch];
739 }
740 }
741 return ch;
742}
743
744static Int p_force_char_conversion(USES_REGS1) {
745 int i;
746
747 /* don't actually enable it until someone tries to add a conversion */
748 if (GLOBAL_CharConversionTable2 == NULL)
749 return (TRUE);
750 for (i = 0; i < MaxStreams; i++) {
751 if (!(GLOBAL_Stream[i].status & Free_Stream_f))
752 GLOBAL_Stream[i].stream_wgetc_for_read = ISOWGetc;
753 }
754 GLOBAL_CharConversionTable = GLOBAL_CharConversionTable2;
755 return (TRUE);
756}
757
758static Int p_disable_char_conversion(USES_REGS1) {
759 int i;
760
761 for (i = 0; i < MaxStreams; i++) {
762 if (!(GLOBAL_Stream[i].status & Free_Stream_f))
763 GLOBAL_Stream[i].stream_wgetc_for_read = GLOBAL_Stream[i].stream_wgetc;
764 }
765 GLOBAL_CharConversionTable = NULL;
766 return (TRUE);
767}
768
769static Int char_conversion(USES_REGS1) {
770 Term t = Deref(ARG1), t1 = Deref(ARG2);
771 unsigned char *s0, *s1;
772
773 if (IsVarTerm(t)) {
774 Yap_ThrowError(INSTANTIATION_ERROR, t, "char_conversion/2");
775 return (FALSE);
776 }
777 if (!IsAtomTerm(t)) {
778 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
779 return (FALSE);
780 }
781 s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
782 if (s0[1] != '\0') {
783 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
784 return (FALSE);
785 }
786 if (IsVarTerm(t1)) {
787 Yap_ThrowError(INSTANTIATION_ERROR, t1, "char_conversion/2");
788 return (FALSE);
789 }
790 if (!IsAtomTerm(t1)) {
791 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
792 return (FALSE);
793 }
794 s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
795 if (s1[1] != '\0') {
796 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
797 return (FALSE);
798 }
799 /* check if we do have a table for converting characters */
800 if (GLOBAL_CharConversionTable2 == NULL) {
801 int i;
802
803 /* don't create a table if we don't need to */
804 if (s0[0] == s1[0])
805 return (TRUE);
806 GLOBAL_CharConversionTable2 =
807 Yap_AllocCodeSpace(NUMBER_OF_CHARS * sizeof(char));
808 while (GLOBAL_CharConversionTable2 == NULL) {
809 if (!Yap_growheap(FALSE, NUMBER_OF_CHARS * sizeof(char), NULL)) {
810 Yap_ThrowError(RESOURCE_ERROR_HEAP, TermNil, LOCAL_ErrorMessage);
811 return (FALSE);
812 }
813 }
814 if (trueGlobalPrologFlag(CHAR_CONVERSION_FLAG)) {
815 CACHE_REGS
816 if (p_force_char_conversion(PASS_REGS1) == FALSE)
817 return (FALSE);
818 }
819 for (i = 0; i < NUMBER_OF_CHARS; i++)
820 GLOBAL_CharConversionTable2[i] = i;
821 }
822 /* just add the new entry */
823 GLOBAL_CharConversionTable2[(int)s0[0]] = s1[0];
824 /* done */
825 return (TRUE);
826}
827
828static Int p_current_char_conversion(USES_REGS1) {
829 Term t, t1;
830 unsigned char *s0, *s1;
831
832 if (GLOBAL_CharConversionTable == NULL) {
833 return (FALSE);
834 }
835 t = Deref(ARG1);
836 if (IsVarTerm(t)) {
837 Yap_ThrowError(INSTANTIATION_ERROR, t, "current_char_conversion/2");
838 return (FALSE);
839 }
840 if (!IsAtomTerm(t)) {
841 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
842 return (FALSE);
843 }
844 s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
845 if (s0[1] != '\0') {
846 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
847 return (FALSE);
848 }
849 t1 = Deref(ARG2);
850 if (IsVarTerm(t1)) {
851 char out[2];
852 if (GLOBAL_CharConversionTable[(int)s0[0]] == '\0')
853 return (FALSE);
854 out[0] = GLOBAL_CharConversionTable[(int)s0[0]];
855 out[1] = '\0';
856 return (Yap_unify(ARG2, MkAtomTerm(Yap_LookupAtom(out))));
857 }
858 if (!IsAtomTerm(t1)) {
859 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
860 return (FALSE);
861 }
862 s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
863 if (s1[1] != '\0') {
864 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
865 return (FALSE);
866 } else {
867 return (GLOBAL_CharConversionTable[(int)s0[0]] == '\0' &&
868 GLOBAL_CharConversionTable[(int)s0[0]] == s1[0]);
869 }
870}
871
872static Int p_all_char_conversions(USES_REGS1) {
873 Term out = TermNil;
874 int i;
875
876 if (GLOBAL_CharConversionTable == NULL) {
877 return (FALSE);
878 }
879 for (i = NUMBER_OF_CHARS; i > 0;) {
880 i--;
881 if (GLOBAL_CharConversionTable[i] != '\0') {
882 Term t1, t2;
883 char s[2];
884 s[1] = '\0';
885 s[0] = GLOBAL_CharConversionTable[i];
886 t1 = MkAtomTerm(Yap_LookupAtom(s));
887 out = MkPairTerm(t1, out);
888 s[0] = i;
889 t2 = MkAtomTerm(Yap_LookupAtom(s));
890 out = MkPairTerm(t2, out);
891 }
892 }
893 return (Yap_unify(ARG1, out));
894}
895
896void Yap_InitChtypes(void) {
897 CACHE_REGS
898 LOCAL_encoding = DefaultEncoding();
899 Yap_InitCPred("$change_type_of_char", 2, p_change_type_of_char,
900 SafePredFlag | SyncPredFlag | HiddenPredFlag);
901 Yap_InitCPred("toupper", 2, toupper2, SafePredFlag);
902 Yap_InitCPred("tolower", 2, tolower2, SafePredFlag);
903 Yap_InitCPred("char_conversion", 2, char_conversion, SyncPredFlag);
904
905 Yap_InitCPred("$get_default_encoding", 1, get_default_encoding,
906 SafePredFlag | HiddenPredFlag);
907
908 Yap_InitCPred("$encoding", 2, p_encoding, SafePredFlag | SyncPredFlag),
909
910 Yap_InitCPred("$current_char_conversion", 2, p_current_char_conversion,
911 SyncPredFlag | HiddenPredFlag);
912 Yap_InitCPred("$all_char_conversions", 1, p_all_char_conversions,
913 SyncPredFlag | HiddenPredFlag);
914 Yap_InitCPred("$force_char_conversion", 0, p_force_char_conversion,
915 SyncPredFlag | HiddenPredFlag);
916 Yap_InitCPred("$disable_char_conversion", 0, p_disable_char_conversion,
917 SyncPredFlag | HiddenPredFlag);
918 // CurrentModule = CHTYPE_MODULE;
919 Yap_InitCPred("char_type_alnum", 1, char_type_alnum, SafePredFlag);
920 Yap_InitCPred("char_type_alpha", 1, char_type_alpha, SafePredFlag);
921 Yap_InitCPred("char_type_csym", 1, char_type_csym, SafePredFlag);
922 Yap_InitCPred("char_type_csymf", 1, char_type_csymf, SafePredFlag);
923 Yap_InitCPred("char_type_ascii", 1, char_type_ascii, SafePredFlag);
924 Yap_InitCPred("char_type_white", 1, char_type_white, SafePredFlag);
925 Yap_InitCPred("char_type_cntrl", 1, char_type_cntrl, SafePredFlag);
926 Yap_InitCPred("char_type_digit", 1, char_type_digit, SafePredFlag);
927 Yap_InitCPred("char_type_xdigit", 1, char_type_xdigit, SafePredFlag);
928 Yap_InitCPred("char_type_graph", 1, char_type_graph, SafePredFlag);
929 Yap_InitCPred("char_type_lower", 1, char_type_lower, SafePredFlag);
930 Yap_InitCPred("char_type_upper", 1, char_type_upper, SafePredFlag);
931 Yap_InitCPred("char_type_punct", 1, char_type_punct, SafePredFlag);
932 Yap_InitCPred("char_type_space", 1, char_type_space, SafePredFlag);
933 Yap_InitCPred("char_type_end_of_file", 1, char_type_end_of_file,
934 SafePredFlag);
935 Yap_InitCPred("char_type_end_of_line", 1, char_type_end_of_line,
936 SafePredFlag);
937 Yap_InitCPred("char_type_newline", 1, char_type_newline, SafePredFlag);
938 Yap_InitCPred("char_type_period", 1, char_type_period, SafePredFlag);
939 Yap_InitCPred("char_type_quote", 1, char_type_quote, SafePredFlag);
940 Yap_InitCPred("char_type_paren", 1, char_type_paren, SafePredFlag);
941 Yap_InitCPred("char_type_prolog_var_start", 1, char_type_prolog_var_start,
942 SafePredFlag);
943 Yap_InitCPred("char_type_prolog_atom_start", 1, char_type_prolog_atom_start,
944 SafePredFlag);
945 Yap_InitCPred("char_type_prolog_identifier_continue", 1,
946 char_type_prolog_identifier_continue, SafePredFlag);
947 Yap_InitCPred("char_type_prolog_prolog_symbol", 1,
948 char_type_prolog_prolog_symbol, SafePredFlag);
949 Yap_InitCPred("code_type_alnum", 1, code_type_alnum, SafePredFlag);
950 Yap_InitCPred("code_type_alpha", 1, code_type_alpha, SafePredFlag);
951 Yap_InitCPred("code_type_csym", 1, code_type_csym, SafePredFlag);
952 Yap_InitCPred("code_type_csymf", 1, code_type_csymf, SafePredFlag);
953 Yap_InitCPred("code_type_ascii", 1, code_type_ascii, SafePredFlag);
954 Yap_InitCPred("code_type_white", 1, code_type_white, SafePredFlag);
955 Yap_InitCPred("code_type_cntrl", 1, code_type_cntrl, SafePredFlag);
956 Yap_InitCPred("code_type_digit", 1, code_type_digit, SafePredFlag);
957 Yap_InitCPred("code_type_xdigit", 1, code_type_xdigit, SafePredFlag);
958 Yap_InitCPred("code_type_graph", 1, code_type_graph, SafePredFlag);
959 Yap_InitCPred("code_type_lower", 1, code_type_lower, SafePredFlag);
960 Yap_InitCPred("code_type_upper", 1, code_type_upper, SafePredFlag);
961 Yap_InitCPred("code_type_punct", 1, code_type_punct, SafePredFlag);
962 Yap_InitCPred("code_type_space", 1, code_type_space, SafePredFlag);
963 Yap_InitCPred("code_type_end_of_file", 1, code_type_end_of_file,
964 SafePredFlag);
965 Yap_InitCPred("code_type_end_of_line", 1, code_type_end_of_line,
966 SafePredFlag);
967 Yap_InitCPred("code_type_newline", 1, code_type_newline, SafePredFlag);
968 Yap_InitCPred("code_type_period", 1, code_type_period, SafePredFlag);
969 Yap_InitCPred("code_type_quote", 1, code_type_quote, SafePredFlag);
970 Yap_InitCPred("code_type_paren", 1, code_type_paren, SafePredFlag);
971 Yap_InitCPred("code_type_prolog_var_start", 1, code_type_prolog_var_start,
972 SafePredFlag);
973 Yap_InitCPred("code_type_prolog_atom_start", 1, code_type_prolog_atom_start,
974 SafePredFlag);
975 Yap_InitCPred("code_type_prolog_identifier_continue", 1,
976 code_type_prolog_identifier_continue, SafePredFlag);
977 Yap_InitCPred("code_type_prolog_prolog_symbol", 1,
978 code_type_prolog_prolog_symbol, SafePredFlag);
979 CurrentModule = PROLOG_MODULE;
980}
Main definitions.
int Yap_symbol_encoding_error(YAP_Int ch, int code, struct stream_desc *st, const char *s)
This is a bug while encoding a symbol, and should always result in a syntax error.
Definition: chartypes.c:127
Term Yap_scan_num(StreamDesc *inp)
This routine is used when we need to parse a string into a number.
Definition: scanner.c:666
void Yap_ThrowError__(const char *file, const char *function, int lineno, yap_error_number type, Term where, const char *msg,...)
Throw an error directly to the error handler.
Definition: errors.c:789
@ char_conversion
Writable flag telling whether a character conversion table is used when reading terms.
Definition: YapGFlagInfo.h:159
@ encoding
support for coding systens, YAP relies on UTF-8 internally
Definition: YapLFlagInfo.h:83
char_kind_t Yap_wide_chtype(int ch)
Definition: text.c:60