YAP 7.1.0
encoding.h
1/*************************************************************************
2* *
3* YAP Prolog %W% %G%
4* *
5* Yap Prolog was developed at NCCUP - Universidade do Porto *
6* *
7* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
8* *
9**************************************************************************
10* *
11* File: yapio.h *
12* Last rev: 22/1/03 *
13* mods: *
14* comments: UNICODE encoding support (based on SWI-Prolog) *
15* *
16*************************************************************************/
17
18#ifndef ENCODING_H
19
20#define ENCODING_H 1
21
22#include "YapConfig.h"
23#include "YapIOConfig.h"
24#include <string.h>
25#include "YapError.h"
26
27
28typedef enum YAP_encoding {
29 ENC_OCTET = 0,
30 ENC_ISO_LATIN1 = 1,
31 ENC_ISO_ASCII = 2,
32 ENC_ISO_ANSI = 4,
33 ENC_ISO_UTF8 = 8,
34 ENC_UTF16_BE = 16,
35 ENC_UTF16_LE = 32,
36 ENC_ISO_UTF32_BE = 64,
37 ENC_ISO_UTF32_LE = 128,
38 ENC_UCS2_BE = 256,
39 ENC_UCS2_LE = 512,
40} encoding_t;
41
42#if WORDS_BIGENDIAN
43#define ENC_WCHAR ENC_ISO_UTF32_BE
44#else
45#define ENC_WCHAR ENC_ISO_UTF32_LE
46#endif
47
48#ifdef YAP_H
49
51encoding_t Yap_DefaultEncoding(void);
52encoding_t Yap_SystemEncoding(void);
53void Yap_SetDefaultEncoding(encoding_t new_encoding);
54
55#if HAVE_XLOCALE_H
56typedef enum {
57 SEQ_ENC_OCTET,
58 SEQ_ENC_ISO_LATIN1,
59 SEQ_ENC_ISO_ASCII,
60 SEQ_ENC_ISO_ANSI,
61 SEQ_ENC_ISO_UTF8,
62 SEQ_ENC_UTF16_BE,
63 SEQ_ENC_UTF16_LE,
64 SEQ_ENC_ISO_UTF32_BE,
65 SEQ_ENC_ISO_UTF32_LE
66} seq_encoding_t;
67
69static inline seq_encoding_t seq_encoding(encoding_t inp) {
70#if HAVE__BUILTIN_FFSLL
71 return __builtin_ffsll(inp);
72#elif HAVE_FFSLL
73 return ffsll(inp);
74#else
75 unsigned int out;
76 // supports max 16 different encodings.
77 if (inp == 0)
78 return 0L;
79 // if (inp & ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
80 if (inp & ((CELL)0xffL << 8)) {
81 inp >>= 8;
82 out += 8;
83 }
84 if (inp & ((CELL)0xfL << 4)) {
85 inp >>= 4;
86 out += 4;
87 }
88 if (inp & ((CELL)0x3L << 2)) {
89 inp >>= 2;
90 out += 2;
91 }
92 if (inp & ((CELL)0x1 << 1))
93 out++;
94#endif
95 return out;
96}
97
98extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
99#endif
100
101static inline const char *enc_name(encoding_t enc) {
102 switch (enc) {
103 case ENC_OCTET:
104 return "octet";
105 case ENC_ISO_LATIN1:
106 return "iso_latin_1";
107 case ENC_ISO_ASCII:
108 return "ascii";
109 case ENC_ISO_ANSI:
110 return "octet";
111 case ENC_ISO_UTF8:
112 return "utf8";
113 case ENC_UTF16_BE:
114 return "utf16_be";
115 case ENC_UTF16_LE:
116 return "utf16_le";
117 case ENC_UCS2_BE:
118 return "ucs2_be";
119 case ENC_UCS2_LE:
120 return "ucs2_le";
121 case ENC_ISO_UTF32_BE:
122 return "utf32_be";
123 case ENC_ISO_UTF32_LE:
124 return "utf32_le";
125 default:
126 return "thanks for watching!!";
127 }
128}
129
130static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
131 {
132 switch(s[0]) {
133 case 'd':
134 {
135 if (!strcmp(s, "default")) {
136 if (enc_bom != ENC_OCTET)
137 return enc_bom;
138 return Yap_DefaultEncoding();
139 } else {
140 return Yap_DefaultEncoding();
141 }
142 }
143 case 'i':
144 {
145 if (!strcmp(s, "iso_utf8"))
146 return ENC_ISO_UTF8;
147 if (!strcmp(s, "iso_latin_1"))
148 return ENC_ISO_LATIN1;
149 if (!strcmp(s, "iso_ascii"))
150 return ENC_ISO_ASCII;
151 if (!strcmp(s, "iso_ansi"))
152 return ENC_ISO_ANSI;
153 }
154 case 'I':
155 {
156 if (!strcmp(s, "ISO-8859-1"))
157 return ENC_ISO_LATIN1;
158 }
159 case 'o':
160 {
161 if (!strcmp(s, "octet"))
162 return ENC_OCTET;
163 }
164 case 'u':
165 {
166 if (!strcmp(s, "utf8"))
167 return ENC_ISO_UTF8;
168 if (!strcmp(s, "utf16_le"))
169 return ENC_UTF16_LE;
170 if (!strcmp(s, "utf16_be"))
171 return ENC_UTF16_BE;
172 if (!strcmp(s, "utf32_be"))
173 return ENC_ISO_UTF32_BE;
174 if (!strcmp(s, "utf32_le"))
175 return ENC_ISO_UTF32_LE;
176 if (!strcmp(s, "unicode_be"))
177 return ENC_UCS2_BE;
178 if (!strcmp(s, "unicode_le"))
179 return ENC_UCS2_LE;
180 }
181 case 'U':
182 {
183 if (!strcmp(s, "UTF-8"))
184 return ENC_ISO_UTF8;
185 if (!strcmp(s, "UTF-16")) {
186 if (enc_bom == ENC_UTF16_LE)
187 return ENC_UTF16_LE;
188 return ENC_UTF16_BE;
189 }
190 if (!strcmp(s, "UTF-16LE"))
191 return ENC_UTF16_LE;
192 if (!strcmp(s, "UTF-16BE"))
193 return ENC_UTF16_BE;
194 if (!strcmp(s, "UTF-32BE"))
195 return ENC_ISO_UTF32_BE;
196 if (!strcmp(s, "UTF-32LE"))
197 return ENC_ISO_UTF32_LE;
198 if (!strcmp(s, "US_ASCII"))
199 return ENC_ISO_ASCII;
200 if (!strcmp(s, "UTF-32")) {
201 if (enc_bom == ENC_ISO_UTF32_LE)
202 return ENC_ISO_UTF32_LE;
203 return ENC_ISO_UTF32_BE;
204 }
205 // just for SWI compat, this actually refers to
206 // UCS-2
207 }
208 if (!strcmp(s, "UCS-2")) {
209 if (enc_bom == ENC_UTF16_LE)
210 return ENC_UCS2_LE;
211 return ENC_UCS2_BE;
212 }
213 if (!strcmp(s, "UCS-2LE"))
214 return ENC_UCS2_LE;
215 if (!strcmp(s, "UCS-2BE"))
216 return ENC_UCS2_BE;
217
218 }
219
220 }
221
222}
223
224
225#endif
226
227#endif