YAP 7.1.0
YapUTF8.h
Go to the documentation of this file.
1/*************************************************************************
2 * *
3 * YAP Prolog *
4 * *
5 * Yap Prolog was developed at NCCUP - Universidade do Porto *
6 * *
7 * Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-1997 *
8 * *
9 **************************************************************************
10 * *
11 * File: YapMirror.c *
12 * Last rev: 5/2/88 *
13 * mods: *
14 * comments: Term conversion C implemented support *
15 * *
16 *************************************************************************/
17#ifndef YAP_UTF8_H
18#define YAP_UTF8_H
19
27#if SIZEOF_WCHAR_T == 2
28#define CHARCODE_MAX 0xffff
29#else
30#define CHARCODE_MAX 0x10ffff
31#endif
32
33#include "utf8proc.h"
34
35#ifndef INLINE_ONLY
36#define INLINE_ONLY
37#endif
38
39
40inline static utf8proc_ssize_t get_utf8(const utf8proc_uint8_t *ptr,
41 size_t n,
42 utf8proc_int32_t *valp) {
43 utf8proc_ssize_t rc = utf8proc_iterate(ptr, n, valp);
44 if (rc <= 0) {
45 if (ptr[0] == 0xC0 && ptr[1] == 0x80) {
46 *valp = 0;
47 return 2;
48 }
49 LOCAL_ActiveError->errorNo = REPRESENTATION_ERROR_IN_CHARACTER_CODE;
50 }
51 return rc < 1 ? 1 : rc;
52}
53
54inline static utf8proc_ssize_t put_xutf8(utf8proc_uint8_t *ptr,
55 utf8proc_int32_t val) {
56 if (val == 0) {
57 ptr[0] = 0xC0;
58 ptr[1] = 0x80;
59 return 2;
60 }
61 utf8proc_ssize_t rc = utf8proc_encode_char(val, ptr);
62 if (rc <= 0) {
63
64 LOCAL_ActiveError->errorNo = REPRESENTATION_ERROR_CHARACTER_CODE;
65 }
66 return rc < 1 ? 1 : rc;
67}
68
69
70inline static utf8proc_ssize_t put_utf8(utf8proc_uint8_t *ptr,
71 utf8proc_int32_t val) {
72 utf8proc_ssize_t rc = utf8proc_encode_char(val, ptr);
73 if (rc <= 0) {
74
75 LOCAL_ActiveError->errorNo = REPRESENTATION_ERROR_CHARACTER_CODE;
76 }
77 return rc < 1 ? 1 : rc;
78}
79
80inline static const utf8proc_uint8_t *skip_utf8(const utf8proc_uint8_t *pt,
81 utf8proc_ssize_t n) {
82 utf8proc_ssize_t i;
83 utf8proc_int32_t b;
84 for (i = 0; i < n; i++) {
85 utf8proc_ssize_t l = utf8proc_iterate(pt, -1, &b);
86 if (b == 0)
87 return pt;
88 if (l < 0) {
89 // LOCAL_ActiveError->errorNo = REPRESENTATION_ERROR_CHARACTER_CODE;
90 } else {
91 pt += l;
92 }
93 }
94 return pt;
95}
96
97inline static utf8proc_ssize_t utf8_nof(utf8proc_int32_t val) {
98 return utf8proc_charwidth(val);
99}
100
101inline static utf8proc_ssize_t strlen_utf8(const utf8proc_uint8_t *pt) {
102 utf8proc_ssize_t rc = 0;
103 utf8proc_int32_t b;
104 while (true) {
105 utf8proc_ssize_t l = utf8proc_iterate(pt, -1, &b);
106 if (b == 0)
107 return rc;
108 else if (l > 0) {
109 pt += l;
110 rc++;
111 } else {
112 // LOCAL_ActiveError->errorNo = REPRESENTATION_ERROR_CHARACTER_CODE;
113 pt++;
114 }
115 }
116 return rc;
117}
118
119inline static utf8proc_ssize_t strlen_latin_utf8(const unsigned char *pt) {
120 utf8proc_ssize_t rc = 0;
121 utf8proc_uint8_t b;
122 while (true) {
123 utf8proc_ssize_t l = utf8proc_encode_char(*pt, &b);
124 if (l<0) {
125 pt++;
126 }
127 if (b == 0)
128 return rc;
129 pt++;
130 rc += l;
131 }
132 return rc;
133}
134
135inline static utf8proc_ssize_t strnlen_latin_utf8(const unsigned char *pt,
136 size_t max) {
137 utf8proc_ssize_t rc = 0;
138 utf8proc_uint8_t b;
139 while (true) {
140 utf8proc_ssize_t l = utf8proc_encode_char(*pt, &b);
141 if (b == 0)
142 return rc;
143 pt++;
144 if (l > 0)
145 rc += l;
146 if (--max == 0)
147 return rc;
148 }
149 return rc;
150}
151
152inline static utf8proc_ssize_t strlen_ucs2_utf8(const wchar_t *pt) {
153 utf8proc_ssize_t rc = 0;
154 utf8proc_uint8_t b;
155 while (true) {
156 utf8proc_ssize_t l = utf8proc_encode_char(*pt, &b);
157 if (b == 0)
158 return rc;
159 if (l < 0)
160 continue;
161 pt++;
162 rc += l;
163 }
164 return rc;
165}
166
167inline static utf8proc_ssize_t strnlen_ucs2_utf8(const wchar_t *pt,
168 size_t max) {
169 utf8proc_ssize_t rc = 0;
170 utf8proc_uint8_t b;
171 while (true) {
172 utf8proc_ssize_t l = utf8proc_encode_char(*pt, &b);
173 if (b == 0)
174 return rc;
175 pt++;
176 rc += l;
177 if (--max == 0)
178 return rc;
179 }
180 return rc;
181}
182
183inline static int cmpn_utf8(const utf8proc_uint8_t *pt1,
184 const utf8proc_uint8_t *pt2, utf8proc_ssize_t n) {
185 utf8proc_ssize_t i;
186 utf8proc_int32_t b;
187 for (i = 0; i < n; i++) {
188 if (pt1[0] != pt2[0])
189 return pt1[0] - pt2[0];
190 utf8proc_ssize_t l = utf8proc_iterate(pt1, -1, &b);
191 if (l == 2) {
192 if (pt1[1] != pt2[1])
193 return pt1[1] - pt2[1];
194 } else if (l == 3) {
195 if (pt1[2] != pt2[2])
196 return pt1[2] - pt2[2];
197 } else if (l == 4) {
198 if (pt1[3] != pt2[3])
199 return pt1[3] - pt2[3];
200 }
201 pt1 += l;
202 pt2 += l;
203 }
204 return 0;
205}
206
207// UTF16
208
209#define LEAD_OFFSET ((uint32_t)0xD800 - (uint32_t)(0x10000 >> 10))
210#define SURROGATE_OFFSET \
211 ((uint32_t)0x10000 - (uint32_t)(0xD800 << 10) - (uint32_t)0xDC00)
212
213#endif