YAP 7.1.0
YapEncoding.h
1/*************************************************************************
2* *
3* YAP Prolog %W% %G%
4* *
5* Yap Prolog was developed at NCCUP - Universidade do Porto *
6* *
7* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
8* *
9**************************************************************************
10* *
11* File: yapio.h *
12* Last rev: 22/1/03 *
13* mods: *
14* comments: UNICODE encoding support (based on SWI-Prolog) *
15* *
16*************************************************************************/
17
18#ifndef ENCODING_H
19
20#define ENCODING_H 1
21
22#include "YapError.h"
23#if HAVE_STRING_H
24
25#include <string.h>
26
27#endif
28
29typedef enum YAP_encoding {
30 ENC_OCTET = 0,
31 ENC_ISO_LATIN1 = 1,
32 ENC_ISO_ASCII = 2,
33 ENC_ISO_ANSI = 4,
34 ENC_ISO_UTF8 = 8,
35 ENC_UTF16_BE = 16,
36 ENC_UTF16_LE = 32,
37 ENC_ISO_UTF32_BE = 64,
38 ENC_ISO_UTF32_LE = 128,
39 ENC_UCS2_BE = 256,
40 ENC_UCS2_LE = 512,
41} encoding_t;
42
43#if WORDS_BIGENDIAN
44#define ENC_WCHAR ENC_ISO_UTF32_BE
45#else
46#define ENC_WCHAR ENC_ISO_UTF32_LE
47#endif
48
49#ifdef YAP_H
50
52encoding_t Yap_DefaultEncoding(void);
53encoding_t Yap_SystemEncoding(void);
54void Yap_SetDefaultEncoding(encoding_t new_encoding);
55
56#if HAVE_XLOCALE_H
57typedef enum {
58 SEQ_ENC_OCTET,
59 SEQ_ENC_ISO_LATIN1,
60 SEQ_ENC_ISO_ASCII,
61 SEQ_ENC_ISO_ANSI,
62 SEQ_ENC_ISO_UTF8,
63 SEQ_ENC_UTF16_BE,
64 SEQ_ENC_UTF16_LE,
65 SEQ_ENC_ISO_UTF32_BE,
66 SEQ_ENC_ISO_UTF32_LE
67} seq_encoding_t;
68
70static inline seq_encoding_t seq_encoding(encoding_t inp) {
71#if HAVE__BUILTIN_FFSLL
72 return __builtin_ffsll(inp);
73#elif HAVE_FFSLL
74 return ffsll(inp);
75#else
76 unsigned int out;
77 // supports max 16 different encodings.
78 if (inp == 0)
79 return 0L;
80 // if (inp & ((CELL)0xffffL << 16)) {inp >>= 16; out += 16;}
81 if (inp & ((CELL)0xffL << 8)) {
82 inp >>= 8;
83 out += 8;
84 }
85 if (inp & ((CELL)0xfL << 4)) {
86 inp >>= 4;
87 out += 4;
88 }
89 if (inp & ((CELL)0x3L << 2)) {
90 inp >>= 2;
91 out += 2;
92 }
93 if (inp & ((CELL)0x1 << 1))
94 out++;
95#endif
96 return out;
97}
98
99extern xlocale enc_locales[SEQ_ENC_ISO_UTF32_LE + 1];
100#endif
101
102static inline const char *enc_name(encoding_t enc) {
103 switch (enc) {
104 case ENC_OCTET:
105 return "octet";
106 case ENC_ISO_LATIN1:
107 return "iso_latin_1";
108 case ENC_ISO_ASCII:
109 return "ascii";
110 case ENC_ISO_ANSI:
111 return "octet";
112 case ENC_ISO_UTF8:
113 return "utf8";
114 case ENC_UTF16_BE:
115 return "utf16_be";
116 case ENC_UTF16_LE:
117 return "utf16_le";
118 case ENC_UCS2_BE:
119 return "ucs2_be";
120 case ENC_UCS2_LE:
121 return "ucs2_le";
122 case ENC_ISO_UTF32_BE:
123 return "utf32_be";
124 case ENC_ISO_UTF32_LE:
125 return "utf32_le";
126 default:
127 return "thanks for watching!!";
128 }
129}
130
131static inline encoding_t enc_id(const char *s, encoding_t enc_bom) {
132 {
133 if (!strcmp(s, "iso_utf8"))
134 return ENC_ISO_UTF8;
135 if (!strcmp(s, "utf8"))
136 return ENC_ISO_UTF8;
137 if (!strcmp(s, "UTF-8"))
138 return ENC_ISO_UTF8;
139 if (!strcmp(s, "utf16_le"))
140 return ENC_UTF16_LE;
141 if (!strcmp(s, "utf16_be"))
142 return ENC_UTF16_BE;
143 if (!strcmp(s, "UTF-16")) {
144 if (enc_bom == ENC_UTF16_LE)
145 return ENC_UTF16_LE;
146 return ENC_UTF16_BE;
147 }
148 if (!strcmp(s, "UTF-16LE"))
149 return ENC_UTF16_LE;
150 if (!strcmp(s, "UTF-16BE"))
151 return ENC_UTF16_BE;
152 if (!strcmp(s, "octet"))
153 return ENC_OCTET;
154 if (!strcmp(s, "iso_latin_1"))
155 return ENC_ISO_LATIN1;
156 if (!strcmp(s, "iso_ascii"))
157 return ENC_ISO_ASCII;
158 if (!strcmp(s, "iso_ansi"))
159 return ENC_ISO_ANSI;
160 if (!strcmp(s, "utf32_be"))
161 return ENC_ISO_UTF32_BE;
162 if (!strcmp(s, "utf32_le"))
163 return ENC_ISO_UTF32_LE;
164 if (!strcmp(s, "UTF-32")) {
165 if (enc_bom == ENC_ISO_UTF32_LE)
166 return ENC_ISO_UTF32_LE;
167 return ENC_ISO_UTF32_BE;
168 }
169 if (!strcmp(s, "UTF-32BE"))
170 return ENC_ISO_UTF32_BE;
171 if (!strcmp(s, "UTF-32LE"))
172 return ENC_ISO_UTF32_LE;
173 if (!strcmp(s, "ISO-8859-1"))
174 return ENC_ISO_LATIN1;
175 if (!strcmp(s, "US_ASCII"))
176 return ENC_ISO_ASCII;
177 // just for SWI compat, this actually refers to
178 // UCS-22
179 if (!strcmp(s, "unicode_be"))
180 return ENC_UCS2_BE;
181 if (!strcmp(s, "unicode_le"))
182 return ENC_UCS2_LE;
183 if (!strcmp(s, "UCS-2")) {
184 if (enc_bom == ENC_UTF16_LE)
185 return ENC_UCS2_LE;
186 return ENC_UCS2_BE;
187 }
188 if (!strcmp(s, "UCS-2LE"))
189 return ENC_UCS2_LE;
190 if (!strcmp(s, "UCS-2BE"))
191 return ENC_UCS2_BE;
192 if (!strcmp(s, "default")) {
193 if (enc_bom != ENC_OCTET)
194 return enc_bom;
195 return Yap_DefaultEncoding();
196 } else {
197 return Yap_DefaultEncoding();
198 }
199 }
200}
201
202#endif
203
204#endif