YAP 7.1.0
text.c
Go to the documentation of this file.
1/*************************************************************************
2 * *
3 * YAP Prolog *
4 * *
5 * Yap Prolog was developed at NCCUP - Universidade do Porto *
6 * *
7 * Copyright L.Damas, V. Santos Costa and Universidade do Porto 1985-- *
8 * *
9 **************************************************************************
10 * *
11 * File: strings.c *
12 * comments: General-conversion of character sequences. *
13 * *
14 * Last rev: $Date: 2008-07-24 16:02:00 $,$Author: vsc $ *
15 * *
16 *************************************************************************/
17
32#include "Yap.h"
33#include "YapEval.h"
34#include "YapHeap.h"
35#include "YapStreams.h"
36#include "YapText.h"
37#include "Yatom.h"
38#include "yapio.h"
39
40#include <YapText.h>
41#include <string.h>
42#include <wchar.h>
43
44#ifndef HAVE_WCSNLEN
45inline static size_t min_size(size_t i, size_t j) { return (i < j ? i : j); }
46#define wcsnlen(S, N) min_size(N, wcslen(S))
47#endif
48
49#if !defined(HAVE_STPCPY) && !defined(__APPLE__)
50inline static void *__stpcpy(void *i, const void *j) {
51 return strcpy(i, j) + strlen(j);
52}
53#define stpcpy __stpcpy
54#endif
55
56#ifndef NAN
57#define NAN (0.0 / 0.0)
58#endif
59
60inline char_kind_t Yap_wide_chtype(int ch) {
61 if (ch < 256)
62 return Yap_chtype[ch];
63 switch (utf8proc_category(ch)) {
64 case UTF8PROC_CATEGORY_CN:
65 return BG;
66 case UTF8PROC_CATEGORY_LU:
67 return UC;
68 case UTF8PROC_CATEGORY_LL:
69 return LC;
70 case UTF8PROC_CATEGORY_LT:
71 return UC;
72 case UTF8PROC_CATEGORY_LM:
73 return LC;
74 case UTF8PROC_CATEGORY_LO:
75 return LC;
76 case UTF8PROC_CATEGORY_MN:
77 return BG;
78 case UTF8PROC_CATEGORY_MC:
79 return BK;
80 case UTF8PROC_CATEGORY_ME:
81 return BK;
82 case UTF8PROC_CATEGORY_ND:
83 return NU;
84 case UTF8PROC_CATEGORY_NL:
85 return NU;
86 case UTF8PROC_CATEGORY_NO:
87 return NU;
88 case UTF8PROC_CATEGORY_PC:
89 return SL;
90 case UTF8PROC_CATEGORY_PD:
91 return SY;
92 case UTF8PROC_CATEGORY_PS:
93 return BK;
94 case UTF8PROC_CATEGORY_PE:
95 return BK;
96 case UTF8PROC_CATEGORY_PI:
97 return QT;
98 case UTF8PROC_CATEGORY_PF:
99 return QT;
100 case UTF8PROC_CATEGORY_PO:
101 return SL;
102 case UTF8PROC_CATEGORY_SM:
103 return SY;
104 case UTF8PROC_CATEGORY_SC:
105 return SY;
106 case UTF8PROC_CATEGORY_SK:
110 return LC;
111 case UTF8PROC_CATEGORY_SO:
112 return SL;
113 case UTF8PROC_CATEGORY_ZS:
114 return BS;
115 case UTF8PROC_CATEGORY_ZL:
116 return BS;
117 case UTF8PROC_CATEGORY_ZP:
118 return BS;
119 case UTF8PROC_CATEGORY_CC:
120 return BG;
121 case UTF8PROC_CATEGORY_CF:
122 return BG;
123 case UTF8PROC_CATEGORY_CS:
124 return BG;
125 case UTF8PROC_CATEGORY_CO:
126 return BG;
127 }
128 return BS;
129}
130
131static Term Globalize(Term v USES_REGS) {
132 if (!IsVarTerm(v = Deref(v))) {
133 return v;
134 }
135 if (VarOfTerm(v) > HR && VarOfTerm(v) < LCL0) {
136 Bind_Local(VarOfTerm(v), MkVarTerm());
137 v = Deref(v);
138 }
139 return v;
140}
141
142static void *codes2buf(Term t0, void *b0, bool get_codes,
143 bool fixed USES_REGS) {
144 unsigned char *st0, *st, ar[16];
145 Term t = t0;
146 size_t length = 0;
147
148 if (t == TermNil) {
149 st0 = Malloc(4);
150 st0[0] = 0;
151 return st0;
152 }
153 if (!IsPairTerm(t)) {
154 Yap_ThrowError(TYPE_ERROR_LIST, t, "scanning list of codes");
155 return NULL;
156 }
157 bool codes = IsIntegerTerm(HeadOfTerm(t));
158 if (get_codes != codes && fixed) {
159 if (codes) {
160 Yap_ThrowError(TYPE_ERROR_INTEGER, HeadOfTerm(t),
161 "scanning list of codes");
162 } else {
163 Yap_ThrowError(TYPE_ERROR_ATOM, HeadOfTerm(t), "scanning list of atoms");
164 }
165 }
166 if (codes) {
167 while (IsPairTerm(t)) {
168 Term hd = HeadOfTerm(t);
169 if (IsVarTerm(hd)) {
170 Yap_ThrowError(INSTANTIATION_ERROR, hd, "scanning list of codes");
171 return NULL;
172 }
173 if (!IsIntegerTerm(hd)) {
174 Yap_ThrowError(TYPE_ERROR_CHARACTER_CODE, hd, "scanning list of codes");
175 return NULL;
176 }
177 Int code = IntegerOfTerm(hd);
178 if (code < 0) {
179 Yap_ThrowError(REPRESENTATION_ERROR_CHARACTER_CODE, hd,
180 "scanning list of character codes, found %d", code);
181 return NULL;
182 }else if (code == 0) {
183 length += 2;
184 } else {
185 length += put_utf8(ar, code);
186 }
187 t = TailOfTerm(t);
188 if (IsVarTerm(t)) {
189 Yap_ThrowError(INSTANTIATION_ERROR, t, "scanning list of codes");
190 return NULL;
191 }
192 if (!IsPairTerm(t) && t != TermNil) {
193 Yap_ThrowError(TYPE_ERROR_LIST, t, "scanning list of codes");
194 return NULL;
195 }
196 }
197 } else {
198 while (IsPairTerm(t)) {
199 Term hd = HeadOfTerm(t);
200 if (IsVarTerm(hd)) {
201 Yap_ThrowError(INSTANTIATION_ERROR, hd, "scanning list of codes");
202 return NULL;
203 }
204 if (!IsAtomTerm(hd)) {
205 Yap_ThrowError(TYPE_ERROR_CHARACTER, hd, "scanning list of texts");
206 return NULL;
207 }
208 const char *code = RepAtom(AtomOfTerm(hd))->StrOfAE;
209 if (code < 0) {
210 Yap_ThrowError(TYPE_ERROR_CHARACTER, hd, "scanning list of atoms");
211 return NULL;
212 } else if (code == 0) {
213 length += 2;
214 } else {
215 length += strlen(code);
216 }
217 t = TailOfTerm(t);
218 if (IsVarTerm(t)) {
219 Yap_ThrowError(INSTANTIATION_ERROR, t, "scanning list of codes");
220 return NULL;
221 }
222 if (!IsPairTerm(t) && t != TermNil) {
223 Yap_ThrowError(TYPE_ERROR_LIST, t, "scanning list of codes");
224 return NULL;
225 }
226 }
227 }
228
229 if (!IsVarTerm(t)) {
230 if (t != TermNil) {
231 Yap_ThrowError(TYPE_ERROR_LIST, t, "scanning list of codes");
232 return NULL;
233 }
234 }
235
236 st0 = st = Malloc(length + 1);
237 t = t0;
238 if (codes) {
239 while (IsPairTerm(t)) {
240 Term hd = HeadOfTerm(t);
241 Int code = IntegerOfTerm(hd);
242
243 if (code == 0) {
244 st[0] = 0xC0;
245 st[1] = 0x80;
246st +=2;
247 } else
248 st = st + put_utf8(st, code);
249 t = TailOfTerm(t);
250 }
251 } else {
252 while (IsPairTerm(t)) {
253 Term hd = HeadOfTerm(t);
254 const char *code = RepAtom(AtomOfTerm(hd))->StrOfAE;
255 st = (unsigned char *)stpcpy((char *)st, code);
256 t = TailOfTerm(t);
257 }
258 }
259 st[0] = '\0';
260
261 return st0;
262}
263
264static unsigned char *latin2utf8(seq_tv_t *inp) {
265 unsigned char *b0 = inp->val.uc;
266 size_t sz = strlen(inp->val.c);
267 sz *= 2;
268 int ch;
269 unsigned char *buf = Malloc(sz + 1), *pt = buf;
270 if (!buf)
271 return NULL;
272 while ((ch = *b0++)) {
273 int off = put_utf8(pt, ch);
274 if (off < 0) {
275 continue;
276 }
277 pt += off;
278 }
279 *pt++ = '\0';
280 return buf;
281}
282
283static unsigned char *wchar2utf8(seq_tv_t *inp) {
284 size_t sz = wcslen(inp->val.w) * 4;
285 wchar_t *b0 = inp->val.w;
286 unsigned char *buf = Malloc(sz + 1), *pt = buf;
287 int ch;
288 if (!buf)
289 return NULL;
290 while ((ch = *b0++))
291 pt += put_utf8(pt, ch);
292 *pt++ = '\0';
293 return buf;
294}
295
296
297static void *slice(size_t min, size_t max, const unsigned char *buf USES_REGS) {
298size_t sz = strlen((char*)buf)+1;
299if (max >sz) max = sz;
300 unsigned char *nbuf = Malloc(sz + 1);
301 const unsigned char *ptr = skip_utf8(buf, min);
302 unsigned char *nptr = nbuf;
303 utf8proc_int32_t chr;
304
305 while (min++ < max && *ptr) {
306 ptr += get_utf8(ptr, -1, &chr);
307 nptr += put_utf8(nptr, chr);
308 if (nptr-nbuf > sz-64) {
309 size_t delta = nptr-nbuf;
310 nbuf = Realloc(nbuf,4096);
311 nptr = nbuf+delta;
312 }
313 }
314 nptr[0] = '\0';
315 return nbuf;
316}
317
318
319static unsigned char *Yap_ListOfCodesToBuffer(unsigned char *buf, Term t,
320 seq_tv_t *inp USES_REGS) {
321 bool codes = true, fixed = true;
322 unsigned char *nbuf = codes2buf(t, buf, codes, fixed PASS_REGS);
323 return nbuf;
324}
325
326static unsigned char *Yap_ListOfAtomsToBuffer(unsigned char *buf, Term t,
327 seq_tv_t *inp USES_REGS) {
328 bool codes = false;
329 unsigned char *nbuf = codes2buf(t, buf, codes, true PASS_REGS);
330 return nbuf;
331}
332
333static unsigned char *Yap_ListToBuffer(unsigned char *buf, Term t,
334 seq_tv_t *inp USES_REGS) {
335 return codes2buf(t, buf, NULL, false PASS_REGS);
336}
337
338static yap_error_number gen_type_error(int flags) {
339 if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
340 YAP_STRING_FLOAT | YAP_STRING_ATOMS_CODES | YAP_STRING_BIG)) ==
341 (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
342 YAP_STRING_ATOMS_CODES | YAP_STRING_BIG))
343 return TYPE_ERROR_TEXT;
344 if ((flags & (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT |
345 YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
346 (YAP_STRING_STRING | YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT |
347 YAP_STRING_BIG))
348 return TYPE_ERROR_ATOMIC;
349 if ((flags & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) ==
350 (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG))
351 return TYPE_ERROR_NUMBER;
352 if (flags & YAP_STRING_ATOM)
353 return TYPE_ERROR_ATOM;
354 if (flags & YAP_STRING_STRING)
355 return TYPE_ERROR_STRING;
356 if (flags & (YAP_STRING_CODES | YAP_STRING_ATOMS))
357 return TYPE_ERROR_LIST;
358 return TYPE_ERROR_NUMBER;
359}
360
361// static int cnt;
362
363unsigned char *Yap_readText(seq_tv_t *inp USES_REGS) {
364#define POPRET(x) return pop_output_text_stack(lvl, x)
365 int lvl = push_text_stack();
366 yap_error_number err = YAP_NO_ERROR;
367 /* we know what the term is */
368 if (!(inp->type & (YAP_STRING_CHARS | YAP_STRING_WCHARS))) {
369 seq_type_t inpt = inp->type & (YAP_STRING_TERM|YAP_STRING_ATOM|YAP_STRING_ATOMS_CODES);
370 if (!(inpt & YAP_STRING_TERM)) {
371 if (IsVarTerm(inp->val.t)) {
372 err = INSTANTIATION_ERROR;
373 } else if (!IsAtomTerm(inp->val.t) && inpt == YAP_STRING_ATOM) {
374 err = TYPE_ERROR_ATOM;
375 } else if (!IsStringTerm(inp->val.t) && inpt == YAP_STRING_STRING) {
376 err = TYPE_ERROR_STRING;
377 } else if (!IsPairOrNilTerm(inp->val.t) && !IsStringTerm(inp->val.t) &&
378 inpt == (YAP_STRING_ATOMS_CODES | YAP_STRING_STRING)) {
379 err = TYPE_ERROR_LIST;
380 } else if (!IsPairOrNilTerm(inp->val.t) && !IsStringTerm(inp->val.t) &&
381 !IsAtomTerm(inp->val.t) && !(inp->type & YAP_STRING_DATUM)) {
382 err = TYPE_ERROR_TEXT;
383 }
384 if (err) {
385 pop_text_stack(lvl);
386 Yap_ThrowError(err,
387 inp->val.t, "while converting term %s", Yap_TermToBuffer(
388 inp->val.t, Handle_cyclics_f|Quote_illegal_f | Handle_vars_f));
389 }
390 }
391 if ((inp->val.t == TermNil) && inp->type & YAP_STRING_PREFER_LIST )
392 {
393 char *out = Malloc(4);
394 memset(out, 0, 4);
395 POPRET( out );
396 }
397 if (IsAtomTerm(inp->val.t) && inp->type & YAP_STRING_ATOM) {
398 // this is a term, extract to a buffer, and representation is wide
399 // Yap_DebugPlWriteln(inp->val.t);
400 Atom at = AtomOfTerm(inp->val.t);
401 if (RepAtom(at)->UStrOfAE[0] == 0) {
402 char *out = Malloc(4);
403 memset(out, 0, 4);
404 POPRET( out );
405 }
406 if (inp->type & YAP_STRING_WITH_BUFFER) {
407 pop_text_stack(lvl);
408 return at->UStrOfAE;
409 }
410 {
411 size_t sz = strlen(at->StrOfAE);
412 char *out = Malloc(sz + 1);
413 strcpy(out, at->StrOfAE);
414 POPRET( out );
415 }
416 }
417 if (IsStringTerm(inp->val.t) && inp->type & YAP_STRING_STRING) {
418 // this is a term, extract to a buffer, and representation is wide
419 // Yap_DebugPlWriteln(inp->val.t);
420 const char *s = StringOfTerm(inp->val.t);
421 if (s[0] == 0) {
422 char *out = Malloc(4);
423 memset(out, 0, 4);
424 POPRET( out );
425
426 }
427 if (inp->type & YAP_STRING_WITH_BUFFER) {
428 pop_text_stack(lvl);
429 return (unsigned char *)UStringOfTerm(inp->val.t);
430 }
431 {
432 inp->type |= YAP_STRING_IN_TMP;
433 size_t sz = strlen(s);
434 char *out = Malloc(sz + 1);
435 strcpy(out, s);
436 POPRET( out );
437 }
438 } else if (IsPairOrNilTerm(inp->val.t)) {
439 if (((inp->type & (YAP_STRING_CODES | YAP_STRING_ATOMS)) ==
440 (YAP_STRING_CODES | YAP_STRING_ATOMS))) {
441 // Yap_DebugPlWriteln(inp->val.t);
442 char * out = (char *)Yap_ListToBuffer(NULL, inp->val.t, inp PASS_REGS);
443 POPRET( out );
444 // this is a term, extract to a sfer, and representation is wide
445 }
446 if (inp->type & YAP_STRING_CODES) {
447 // Yap_DebugPlWriteln(inp->val.t);
448 char *out = (char *)Yap_ListOfCodesToBuffer(NULL, inp->val.t, inp PASS_REGS);
449 // this is a term, extract to a sfer, and representation is wide
450 POPRET( out );
451 }
452 if (inp->type & YAP_STRING_ATOMS) {
453 // Yap_DebugPlWriteln(inp->val.t);
454 char *out = (char *)Yap_ListOfAtomsToBuffer(NULL, inp->val.t, inp PASS_REGS);
455 // this is a term, extract to a buffer, and representation is wide
456 POPRET( out );
457 }
458 }
459 if (inp->type & YAP_STRING_INT && IsIntegerTerm(inp->val.t)) {
460 // ASCII, so both LATIN1 and UTF-8
461 // Yap_DebugPlWriteln(inp->val.t);
462 char * out = Malloc(2 * MaxTmp(PASS_REGS1));
463 if (snprintf(out, MaxTmp(PASS_REGS1) - 1, Int_FORMAT,
464 IntegerOfTerm(inp->val.t)) < 0) {
465 AUX_ERROR(inp->val.t, 2 * MaxTmp(PASS_REGS1), out, char);
466 }
467 POPRET( out );
468 }
469 if (inp->type & YAP_STRING_FLOAT && IsFloatTerm(inp->val.t)) {
470 char *out = Malloc(2 * MaxTmp(PASS_REGS1));
471 if (!Yap_FormatFloat(FloatOfTerm(inp->val.t), &out, 1024)) {
472 pop_text_stack(lvl);
473 return NULL;
474 }
475 POPRET(out);
476 }
477 if (inp->type & YAP_STRING_BIG && IsBigIntTerm(inp->val.t)) {
478 // Yap_DebugPlWriteln(inp->val.t);
479 char *out = Malloc(MaxTmp());
480 if (!Yap_mpz_to_string(Yap_BigIntOfTerm(inp->val.t), out, MaxTmp() - 1,
481 10)) {
482 AUX_ERROR(inp->val.t, MaxTmp(PASS_REGS1), out, char);
483 }
484 POPRET(out);
485 }
486 if (inp->type & YAP_STRING_TERM) {
487 pop_text_stack(lvl);
488 return (unsigned char *)Yap_TermToBuffer(inp->val.t, 0);
489 }
490 pop_text_stack(lvl);
491 Yap_ThrowError(TYPE_ERROR_TEXT,
492 inp->val.t, "while converting term %s", Yap_TermToBuffer(
493 inp->val.t, Handle_cyclics_f|Quote_illegal_f | Handle_vars_f));
494
495 return NULL;
496 }
497
498 if (inp->type & YAP_STRING_CHARS) {
499 if (inp->enc == ENC_ISO_ASCII) {
500 pop_text_stack(lvl);
501 return inp->val.uc;
502 }
503
504 if (inp->enc == ENC_ISO_LATIN1) {
505 POPRET( (char*)latin2utf8(inp));
506 }
507
508 pop_text_stack(lvl);
509
510 return inp->val.uc;
511 }
512 if (inp->type & YAP_STRING_WCHARS) {
513 // printf("%S\n",inp->val.w);
514 POPRET( (char *)wchar2utf8(inp) );
515 }
516 pop_text_stack(lvl);
517 Yap_ThrowError(TYPE_ERROR_TEXT,
518 TermNil, "Bad text input ");
519 return NULL;
520}
521
522static Term write_strings(unsigned char *s0, seq_tv_t *out USES_REGS) {
523 size_t max;
524
525 if (s0 && s0[0]) max = strlen((char *)s0);
526 else max = 0;
527
528 if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
529 if (out->type & YAP_STRING_NCHARS)
530 if (out->type & YAP_STRING_TRUNC && out->max < max) {
531 max = out->max;
532 s0[max] = '\0';
533 }
534 }
535
536 out->val.t = MkUStringTerm(s0);
537
538 return out->val.t;
539}
540
541static Term write_atoms(void *s0, seq_tv_t *out USES_REGS) {
542 Term t = AbsPair(HR);
543 char *s1 = (char *)s0;
544 size_t sz = 0;
545 size_t max = strlen(s1);
546 if (s1[0] == '\0') {
547 out->val.t = TermNil;
548 return TermNil;
549 }
550 if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
551 if (out->type & YAP_STRING_TRUNC && out->max < max)
552 max = out->max;
553 }
554
555 unsigned char *s = s0, *lim = s + strnlen((char *)s, max);
556 unsigned char *cp = s;
557 unsigned char w[10];
558 int wp = 0;
559 LOCAL_TERM_ERROR(t, 2 * (lim - s));
560 while (cp < lim && *cp) {
561 utf8proc_int32_t chr;
562 CELL *cl;
563 s += get_utf8(s, -1, &chr);
564 if (chr == '\0') {
565 w[0] = '\0';
566 break;
567 }
568 wp = put_utf8(w, chr);
569 w[wp] = '\0';
570 cl = HR;
571 HR += 2;
572 cl[0] = MkAtomTerm(Yap_ULookupAtom(w));
573 cl[1] = AbsPair(HR);
574 sz++;
575 if (sz == max)
576 break;
577 }
578 if (out->type & YAP_STRING_DIFF) {
579 if (sz == 0)
580 t = out->dif;
581 else
582 HR[-1] = Globalize(out->dif PASS_REGS);
583 } else {
584 if (sz == 0)
585 t = TermNil;
586 else
587 HR[-1] = TermNil;
588 }
589 out->val.t = t;
590 return (t);
591}
592
593static Term write_codes(void *s0, seq_tv_t *out USES_REGS) {
594 Term t;
595 size_t sz = strlen(s0);
596 if (sz == 0) {
597 if (out->type & YAP_STRING_DIFF) {
598 out->val.t = Globalize(out->dif PASS_REGS);
599 } else {
600 out->val.t = TermNil;
601 }
602 return out->val.t;
603 }
604 unsigned char *s = s0, *lim = s + sz;
605 unsigned char *cp = s;
606
607 t = AbsPair(HR);
608 LOCAL_TERM_ERROR(t, 2 * (lim - s));
609 t = AbsPair(HR);
610 while (*cp) {
611 utf8proc_int32_t chr;
612 CELL *cl;
613 cp += get_utf8(cp, -1, &chr);
614 if (chr == '\0')
615 break;
616 cl = HR;
617 HR += 2;
618 cl[0] = MkIntegerTerm(chr);
619 cl[1] = AbsPair(HR);
620 }
621 if (sz == 0) {
622 HR[-1] = Globalize(out->dif PASS_REGS);
623 } else {
624 HR[-1] = TermNil;
625 }
626 out->val.t = t;
627 return (t);
628}
629
630static Atom write_atom(void *s0, seq_tv_t *out USES_REGS) {
631 unsigned char *s = s0;
632 int32_t ch;
633 if (s[0] == '\0') {
634 return Yap_LookupAtom("");
635 }
636 size_t leng = strlen(s0);
637 if (strlen_utf8(s0) <= leng) {
638 return Yap_LookupAtom(s0);
639 } else {
640 size_t n = get_utf8(s, -1, &ch);
641 unsigned char *buf = Malloc(n + 1);
642 memmove(buf, s0, n + 1);
643 return Yap_ULookupAtom(buf);
644 }
645}
646
647void *write_buffer(unsigned char *s0, seq_tv_t *out USES_REGS) {
648 int l = push_text_stack();
649 size_t leng = strlen((char *)s0);
650 size_t min = 0, max = leng;
651 if (out->enc == ENC_ISO_UTF8) {
652 if (out->val.uc == NULL) { // this should always be the case
653 out->val.uc = Malloc(leng + 1);
654 strcpy(out->val.c, (char *)s0);
655 } else if (out->val.uc != s0) {
656 out->val.c = Malloc(leng + 1);
657 strcpy(out->val.c, (char *)s0);
658 }
659 } else if (out->enc == ENC_ISO_LATIN1) {
660
661 unsigned char *s = s0;
662 unsigned char *cp = s;
663 unsigned char *buf = out->val.uc;
664 if (!buf) {
665 pop_text_stack(l);
666 return NULL;
667 }
668 while (*cp) {
669 utf8proc_int32_t chr;
670 int off = get_utf8(cp, -1, &chr);
671 if (off <= 0 || chr > 255) {
672 pop_text_stack(l);
673 return NULL;
674 }
675 if (off == max)
676 break;
677 cp += off;
678 *buf++ = chr;
679 }
680 if (max >= min)
681 *buf++ = '\0';
682 else
683 while (max < min) {
684 utf8proc_int32_t chr;
685 max++;
686 cp += get_utf8(cp, -1, &chr);
687 *buf++ = chr;
688 }
689 } else if (out->enc == ENC_WCHAR) {
690 unsigned char *s = s0, *lim = s + (max = strnlen((char *)s0, max));
691 unsigned char *cp = s;
692 wchar_t *buf0, *buf;
693
694 buf = buf0 = out->val.w;
695 if (!buf) {
696 pop_text_stack(l);
697 return NULL;
698 }
699 while (*cp && cp < lim) {
700 utf8proc_int32_t chr;
701 cp += get_utf8(cp, -1, &chr);
702 *buf++ = chr;
703 }
704 if (max >= min)
705 *buf++ = '\0';
706 else
707 while (max < min) {
708 utf8proc_int32_t chr;
709 max++;
710 cp += get_utf8(cp, -1, &chr);
711 *buf++ = chr;
712 }
713 *buf = '\0';
714 } else {
715 // no other encodings are supported.
716 pop_text_stack(l);
717 return NULL;
718 }
719 out->val.c = pop_output_text_stack(l, out->val.c);
720 return out->val.c;
721}
722
723static size_t write_length(const unsigned char *s0, seq_tv_t *out USES_REGS) {
724 return strlen_utf8(s0);
725}
726
727static Term write_number(unsigned char *s, seq_tv_t *out USES_REGS) {
728 Term t;
729
730 t = Yap_StringToNumberTerm((char *)s, &out->enc,false);
731 LOCAL_delay = false;
732 return t;
733}
734
735static Term string_to_term(void *s, seq_tv_t *out USES_REGS) {
736 Term o;
737 int lvl = push_text_stack();
739 yap_error_descriptor_t *old = Yap_pushErrorContext(true, new_error, LOCAL_ActiveError);
740 o = out->val.t = Yap_BufferToTerm(s, TermNil);
741 LOCAL_ActiveError = Yap_popErrorContext(true, false, old);
742 pop_text_stack(lvl);
743 return o;
744}
745
746bool write_Text(unsigned char *inp, seq_tv_t *out USES_REGS) {
747 /* we know what the term is */
748 out->val.t = 0;
749 if (out->type == 0) {
750 return true;
751 }
752
753 if (out->type & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG)) {
754 if ((out->val.t = write_number(
755 inp, out PASS_REGS)) != 0L) {
756 // Yap_DebugPlWriteln(out->val.t);
757
758 return true;
759 }
760
761 if (!(out->type & YAP_STRING_ATOM))
762 {
763 return false;
764 }
765 }
766 if (out->type & (YAP_STRING_ATOM)) {
767
768
769 if ((out->val.a = write_atom(inp, out PASS_REGS)) != NIL) {
770 Atom at = out->val.a;
771 if (at && (out->type & YAP_STRING_OUTPUT_TERM))
772 out->val.t = MkAtomTerm(at);
773 // Yap_DebugPlWriteln(out->val.t);
774 return at != NIL;
775 }
776 }
777 if (out->type & YAP_STRING_DATUM) {
778 if ((out->val.t = string_to_term(inp, out PASS_REGS)) != 0L)
779 return out->val.t != 0;
780 }
781
782 switch (out->type & YAP_TYPE_MASK) {
783 case YAP_STRING_CHARS: {
784 void *room = write_buffer(inp, out PASS_REGS);
785 // printf("%s\n", out->val.c);
786 return room != NULL;
787 }
788 case YAP_STRING_WCHARS: {
789 void *room = write_buffer(inp, out PASS_REGS);
790 // printf("%S\n", out->val.w);
791 return room != NULL;
792 }
793 case YAP_STRING_STRING:
794 out->val.t = write_strings(inp, out PASS_REGS);
795 // Yap_DebugPlWriteln(out->val.t);
796 return out->val.t != 0;
797 case YAP_STRING_ATOMS:
798 out->val.t = write_atoms(inp, out PASS_REGS);
799 // Yap_DebugPlWriteln(out->val.t);
800 return out->val.t != 0;
801 case YAP_STRING_CODES:
802 out->val.t = write_codes(inp, out PASS_REGS);
803 // Yap_DebugPlWriteln(out->val.t);
804 return out->val.t != 0;
805 case YAP_STRING_LENGTH:
806 out->val.l = write_length(inp, out PASS_REGS);
807 // printf("s\n",out->val.l);
808 return out->val.l != (size_t)(-1);
809 case YAP_STRING_ATOM:
810 out->val.a = write_atom(inp, out PASS_REGS);
811 // Yap_DebugPlWriteln(out->val.t);
812 return out->val.a != NULL;
813 case YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG:
814 out->val.t = write_number(inp, out PASS_REGS);
815 // Yap_DebugPlWriteln(out->val.t);
816 if (out->val.t==0 &&(out->type & (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG |YAP_STRING_ATOM | YAP_STRING_STRING| YAP_STRING_TERM)) ==
817 (YAP_STRING_INT | YAP_STRING_FLOAT | YAP_STRING_BIG))
818 Yap_ThrowError(TYPE_ERROR_NUMBER, MkUStringTerm(inp), NULL);
819 return out->val.t != 0;
820 default: { return true; }
821 }
822 return false;
823}
824
825static size_t upcase(void *s0, seq_tv_t *out USES_REGS) {
826
827 unsigned char *s = s0;
828 while (*s) {
829 // assumes the two code have always the same size;
830 utf8proc_int32_t chr;
831 get_utf8(s, -1, &chr);
832 chr = utf8proc_toupper(chr);
833 s += put_utf8(s, chr);
834 }
835 return true;
836}
837
838static size_t downcase(void *s0, seq_tv_t *out USES_REGS) {
839
840 unsigned char *s = s0;
841 while (*s) {
842 // assumes the two code have always the same size;
843 utf8proc_int32_t chr;
844 get_utf8(s, -1, &chr);
845 chr = utf8proc_tolower(chr);
846 s += put_utf8(s, chr);
847 }
848 return true;
849}
850
851bool Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS) {
852 unsigned char *buf;
853 bool rc;
854
855 /*
856 //printf(stderr, "[ %d ", n++) ;
857 if (inp->type & (YAP_STRING_TERM|YAP_STRING_ATOM|YAP_STRING_ATOMS_CODES
858 |YAP_STRING_STRING))
859 //Yap_DebugPlWriteln(inp->val.t);
860 else if (inp->type & YAP_STRING_WCHARS) fprintf(stderr,"S %S\n", inp->val
861 .w);
862 else fprintf(stderr,"s %s\n", inp->val.c);
863 */
864 // cnt++;
865 int l = push_text_stack();
866 buf = Yap_readText(inp PASS_REGS);
867 if (!buf) {
868 pop_text_stack(l);
869 yap_error_number err;
870 if (!(err=gen_type_error(inp->type)))
871 err = TYPE_ERROR_TEXT;
872 return 0L;
873 }
874 if (buf[0]) {
875 size_t leng = strlen_utf8(buf);
876 if (out->type & (YAP_STRING_NCHARS | YAP_STRING_TRUNC)) {
877 if (out->max < leng) {
878 const unsigned char *ptr = skip_utf8(buf, out->max);
879 size_t diff = (ptr - buf);
880 char *nbuf = Malloc(diff + 1);
881 memmove(nbuf, buf, diff);
882 nbuf[diff] = '\0';
883 leng = diff;
884 }
885 // else if (out->type & YAP_STRING_NCHARS &&
886 // const unsigned char *ptr = skip_utf8(buf)
887 }
888 if (out->type & (YAP_STRING_UPCASE | YAP_STRING_DOWNCASE)) {
889 if (out->type & YAP_STRING_UPCASE) {
890 if (!upcase(buf, out)) {
891 pop_text_stack(l);
892 return false;
893 }
894 }
895 if (out->type & YAP_STRING_DOWNCASE) {
896 if (!downcase(buf, out)) {
897 pop_text_stack(l);
898 return false;
899 }
900 }
901 }
902 }
903 rc = write_Text(buf, out PASS_REGS);
904 /* fprintf(stderr, " -> ");
905 if (!rc) fprintf(stderr, "NULL");
906 else if (out->type &
907 (YAP_STRING_TERM|YAP_STRING_ATOMS_CODES
908 |YAP_STRING_STRING)) //Yap_DebugPlWrite(out->val.t);
909 else if (out->type &
910 YAP_STRING_ATOM) //Yap_DebugPlWriteln(MkAtomTerm(out->val.a));
911 else if (out->type & YAP_STRING_WCHARS) fprintf(stderr, "%S",
912 out->val.w);
913 else
914 fprintf(stderr, "%s", out->val.c);
915 fprintf(stderr, "\n]\n"); */
916 out->val.uc = pop_output_text_stack(l,out->val.uc);
917 return rc;
918}
919
920
921//
922// Out must be an atom or a string
923
924//
925// Out must be an atom or a string
926bool Yap_Concat_Text(int tot, seq_tv_t inp[], seq_tv_t *out USES_REGS) {
927 unsigned char *buf;
928 int i;
929 size_t avai, extra;
930 unsigned char *nbuf=NULL;
931
932 int lvl = push_text_stack();
933 buf = Malloc(tot * 256);
934 buf[0] = '\0';
935 if (!buf) {
936 pop_text_stack(lvl);
937 return NULL;
938 }
939 for (i = 0; i < tot; i++) {
940 Term t = inp[i].val.t;
941 if (IsAtomTerm(t) && inp[i].type & YAP_STRING_ATOM) {
942 nbuf = RepAtom(AtomOfTerm(t))->UStrOfAE;
943 } else {
944 nbuf = Yap_readText(inp + i PASS_REGS);
945 }
946 if (!nbuf) {
947 // pop_text_stack(lvl);
948 //return NULL;
949 continue;
950 }
951 // if (!nbuf[0])
952 // continue;
953 if (nbuf && nbuf[0]) {
954 size_t sz = strlen((char*)nbuf);
955 avai = (strlen((char *)buf) - 1 - sz);
956 if (avai < sz) {
957 extra= (tot-i)*sz+256;
958 buf = Realloc(buf, extra);
959 }
960 strcat((char*)buf,(char*)nbuf);
961 }
962 }
963
964 bool rc = write_Text(buf, out PASS_REGS);
965 pop_text_stack( lvl );
966
967 return rc;
968}
969
970//
971bool Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp,
972 seq_tv_t outv[] USES_REGS) {
973 int lvl = push_text_stack();
974 const unsigned char *buf;
975 size_t b_l, u_l;
976
977 inp->type |= YAP_STRING_IN_TMP;
978 buf = Yap_readText(inp PASS_REGS);
979 if (!buf) {
980 pop_text_stack(lvl);
981 return false;
982 }
983 b_l = strlen((char *)buf);
984 if (b_l == 0) {
985 pop_text_stack(lvl);
986 return false;
987 }
988 u_l = strlen_utf8(buf);
989 if (!cuts) {
990 if (n == 2) {
991 size_t b_l0, b_l1, u_l0, u_l1;
992 unsigned char *buf0, *buf1;
993
994 if (outv[0].val.t) {
995 buf0 = Yap_readText(outv PASS_REGS);
996 if (!buf0) {
997 return false;
998 }
999 b_l0 = strlen((const char *)buf0);
1000 if (memcmp(buf, buf0, b_l0) != 0) {
1001 pop_text_stack(lvl);
1002 return false;
1003 }
1004 u_l0 = strlen_utf8(buf0);
1005 u_l1 = u_l - u_l0;
1006
1007 b_l1 = b_l - b_l0;
1008 buf1 = slice(u_l0, u_l, buf PASS_REGS);
1009 b_l1 = strlen((const char *)buf1);
1010 bool rc = write_Text(buf1, outv + 1 PASS_REGS);
1011 pop_text_stack(lvl);
1012 if (!rc) {
1013 return false;
1014 }
1015 return rc;
1016 } else /* if (outv[1].val.t) */ {
1017 buf1 = Yap_readText(outv + 1 PASS_REGS);
1018 if (!buf1) {
1019 pop_text_stack(lvl);
1020 return false;
1021 }
1022 b_l1 = strlen((char *)buf1);
1023 u_l1 = strlen_utf8(buf1);
1024 b_l0 = b_l - b_l1;
1025 u_l0 = u_l - u_l1;
1026 if (memcmp(skip_utf8((const unsigned char *)buf, b_l0), buf1, b_l1) !=
1027 0) {
1028 pop_text_stack(lvl);
1029 return false;
1030 }
1031 buf0 = slice(0, u_l0, buf PASS_REGS);
1032 buf0 = pop_output_text_stack(lvl, buf0);
1033 bool rc = write_Text(buf0, outv PASS_REGS);
1034 return rc;
1035 }
1036 }
1037 }
1038 int i, next;
1039 for (i = 0; i < n; i++) {
1040 if (i == 0)
1041 next = 0;
1042 else
1043 next = cuts[i - 1];
1044 if (i > 0 && cuts[i] == 0)
1045 break;
1046 void *bufi = slice(next, cuts[i], buf PASS_REGS);
1047 bufi = pop_output_text_stack(lvl, bufi);
1048 if (!write_Text(bufi, outv + i PASS_REGS)) {
1049 return false;
1050 }
1051 }
1052 pop_text_stack(lvl);
1053
1054 return true;
1055}
1056
1068const char *Yap_PredIndicatorToUTF8String(PredEntry *ap, char *s0, size_t sz) {
1069 CACHE_REGS
1070 Atom at;
1071 arity_t arity = 0;
1072 Functor f;
1073 char *s, *smax;
1074 s = s0;
1075 smax = s + sz;
1076 Term tmod = ap->ModuleOfPred;
1077 if (tmod) {
1078 char *sn = Yap_AtomToUTF8Text(AtomOfTerm(tmod));
1079 stpcpy(s, sn);
1080 if (smax - s > 1) {
1081 strcat(s, ":");
1082 } else {
1083 return NULL;
1084 }
1085 s++;
1086 } else {
1087 if (smax - s > strlen("prolog:")) {
1088 s = strcpy(s, "prolog:");
1089 } else {
1090 return NULL;
1091 }
1092 }
1093 // follows the actual functor
1094 if (ap->ModuleOfPred == IDB_MODULE) {
1095 if (ap->PredFlags & NumberDBPredFlag) {
1096 Int key = ap->src.IndxId;
1097 snprintf(s, smax - s, "%" PRIdPTR, key);
1098 return s0;
1099 } else if (ap->PredFlags & AtomDBPredFlag) {
1100 at = (Atom)(ap->FunctorOfPred);
1101 if (!stpcpy(s, Yap_AtomToUTF8Text(at)))
1102 return NULL;
1103 } else {
1104 f = ap->FunctorOfPred;
1105 at = NameOfFunctor(f);
1106 arity = ArityOfFunctor(f);
1107 }
1108 } else {
1109 arity = ap->ArityOfPE;
1110 if (arity) {
1111 at = NameOfFunctor(ap->FunctorOfPred);
1112 } else {
1113 at = (Atom)(ap->FunctorOfPred);
1114 }
1115 }
1116 if (!stpcpy(s, Yap_AtomToUTF8Text(at))) {
1117 return NULL;
1118 }
1119 s += strlen(s);
1120 snprintf(s, smax - s, "/%" PRIdPTR, arity);
1121 return s0;
1122}
1123
1133Term Yap_MkTextTerm(const char *s, int guide USES_REGS) {
1134 if (guide == YAP_STRING_ATOM) {
1135 return MkAtomTerm(Yap_LookupAtom(s));
1136 } else if (guide == YAP_STRING_STRING) {
1137 return MkStringTerm(s);
1138 } else if (guide == YAP_STRING_ATOMS) {
1139 return Yap_CharsToListOfAtoms(s, ENC_ISO_UTF8 PASS_REGS);
1140 } else {
1141 return Yap_CharsToListOfCodes(s, ENC_ISO_UTF8 PASS_REGS);
1142 }
1143}
Main definitions.
void * Malloc(size_t sz USES_REGS)
allocate a temporary text block
Definition: alloc.c:1759
yap_error_descriptor_t * Yap_pushErrorContext(bool link, yap_error_descriptor_t *new_error, yap_error_descriptor_t *old_error)
add a new error descriptor, either to the top of the stack, or as the top;
Definition: errors.c:746
@ write_strings
Writable flag telling whether the system should write lists of integers that are writable character c...
Definition: YapGFlagInfo.h:837
Definition: Yatom.h:544
all we need to know about an error/throw
Definition: YapError.h:205
char_kind_t Yap_wide_chtype(int ch)
Definition: text.c:60
Term Yap_MkTextTerm(const char *s, int guide USES_REGS)
Convert from a text buffer (8-bit) to a term that has the same type as Tguide
Definition: text.c:1133
const char * Yap_PredIndicatorToUTF8String(PredEntry *ap, char *s0, size_t sz)
Convert from a predicate structure to an UTF-8 string of the form.
Definition: text.c:1068