source: EcnlProtoTool/trunk/onigmo-5.15.0/src/regenc.c@ 279

Last change on this file since 279 was 279, checked in by coas-nagasima, 7 years ago

ファイルを追加、更新。

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
  • Property svn:mime-type set to text/x-csrc
File size: 28.0 KB
Line 
1/**********************************************************************
2 regenc.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
34
35extern int
36onigenc_init(void)
37{
38 return 0;
39}
40
41extern OnigEncoding
42onigenc_get_default_encoding(void)
43{
44 return OnigEncDefaultCharEncoding;
45}
46
47extern int
48onigenc_set_default_encoding(OnigEncoding enc)
49{
50 OnigEncDefaultCharEncoding = enc;
51 return 0;
52}
53
54extern UChar*
55onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
56{
57 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
58 if (p < s) {
59 p += enclen(enc, p);
60 }
61 return p;
62}
63
64extern UChar*
65onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
66 const UChar* start, const UChar* s, const UChar** prev)
67{
68 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
69
70 if (p < s) {
71 if (prev) *prev = (const UChar* )p;
72 p += enclen(enc, p);
73 }
74 else {
75 if (prev) *prev = (const UChar* )NULL; /* Sorry */
76 }
77 return p;
78}
79
80extern UChar*
81onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
82{
83 if (s <= start)
84 return (UChar* )NULL;
85
86 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
87}
88
89extern UChar*
90onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
91{
92 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
93 if (s <= start)
94 return (UChar* )NULL;
95
96 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
97 }
98 return (UChar* )s;
99}
100
101extern UChar*
102onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
103{
104 UChar* q = (UChar* )p;
105 while (n-- > 0) {
106 q += ONIGENC_MBC_ENC_LEN(enc, q);
107 }
108 return (q <= end ? q : NULL);
109}
110
111extern int
112onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
113{
114 int n = 0;
115 UChar* q = (UChar* )p;
116
117 while (q < end) {
118 q += ONIGENC_MBC_ENC_LEN(enc, q);
119 n++;
120 }
121 return n;
122}
123
124extern int
125onigenc_strlen_null(OnigEncoding enc, const UChar* s)
126{
127 int n = 0;
128 UChar* p = (UChar* )s;
129
130 while (1) {
131 if (*p == '\0') {
132 UChar* q;
133 int len = ONIGENC_MBC_MINLEN(enc);
134
135 if (len == 1) return n;
136 q = p + 1;
137 while (len > 1) {
138 if (*q != '\0') break;
139 q++;
140 len--;
141 }
142 if (len == 1) return n;
143 }
144 p += ONIGENC_MBC_ENC_LEN(enc, p);
145 n++;
146 }
147}
148
149extern int
150onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
151{
152 UChar* start = (UChar* )s;
153 UChar* p = (UChar* )s;
154
155 while (1) {
156 if (*p == '\0') {
157 UChar* q;
158 int len = ONIGENC_MBC_MINLEN(enc);
159
160 if (len == 1) return (int )(p - start);
161 q = p + 1;
162 while (len > 1) {
163 if (*q != '\0') break;
164 q++;
165 len--;
166 }
167 if (len == 1) return (int )(p - start);
168 }
169 p += ONIGENC_MBC_ENC_LEN(enc, p);
170 }
171}
172
173const UChar OnigEncAsciiToLowerCaseTable[] = {
174 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
175 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
176 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
177 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
178 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
179 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
180 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
181 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
182 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
183 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
184 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
185 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
186 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
187 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
188 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
189 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
190 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
191 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
192 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
193 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
194 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
195 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
196 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
197 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
198 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
199 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
200 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
201 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
202 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
203 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
204 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
205 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
206};
207
208#ifdef USE_UPPER_CASE_TABLE
209const UChar OnigEncAsciiToUpperCaseTable[256] = {
210 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
211 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
212 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
213 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
214 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
215 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
216 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
217 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
218 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
219 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
220 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
221 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
222 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
223 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
224 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
225 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
226 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
227 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
228 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
229 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
230 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
231 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
232 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
233 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
234 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
235 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
236 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
237 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
238 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
239 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
240 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
241 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
242};
243#endif
244
245const unsigned short OnigEncAsciiCtypeTable[256] = {
246 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
247 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
248 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
249 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
250 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
251 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
252 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
253 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
254 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
255 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
256 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
257 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
258 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
259 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
260 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
261 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
262 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
263 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
264 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
265 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
266 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
267 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
268 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
269 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
270 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
271 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
272 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
273 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
274 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
278};
279
280const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
281 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
282 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
283 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
284 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
285 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
286 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
287 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
288 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
289 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
290 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
291 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
292 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
293 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
294 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
295 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
296 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
297 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
298 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
299 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
300 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
301 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
302 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
303 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
304 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
305 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
306 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
307 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
308 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
309 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
310 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
311 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
312 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
313};
314
315#ifdef USE_UPPER_CASE_TABLE
316const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
317 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
318 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
319 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
320 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
321 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
322 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
323 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
324 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
325 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
326 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
327 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
328 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
329 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
330 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
331 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
332 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
333 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
334 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
335 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
336 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
337 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
338 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
339 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
340 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
341 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
342 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
343 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
344 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
345 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
346 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
347 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
348 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
349};
350#endif
351
352extern void
353onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
354{
355 /* nothing */
356 /* obsoleted. */
357}
358
359extern UChar*
360onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
361{
362 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
363}
364
365const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
366 { 0x41, 0x61 },
367 { 0x42, 0x62 },
368 { 0x43, 0x63 },
369 { 0x44, 0x64 },
370 { 0x45, 0x65 },
371 { 0x46, 0x66 },
372 { 0x47, 0x67 },
373 { 0x48, 0x68 },
374 { 0x49, 0x69 },
375 { 0x4a, 0x6a },
376 { 0x4b, 0x6b },
377 { 0x4c, 0x6c },
378 { 0x4d, 0x6d },
379 { 0x4e, 0x6e },
380 { 0x4f, 0x6f },
381 { 0x50, 0x70 },
382 { 0x51, 0x71 },
383 { 0x52, 0x72 },
384 { 0x53, 0x73 },
385 { 0x54, 0x74 },
386 { 0x55, 0x75 },
387 { 0x56, 0x76 },
388 { 0x57, 0x77 },
389 { 0x58, 0x78 },
390 { 0x59, 0x79 },
391 { 0x5a, 0x7a }
392};
393
394extern int
395onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
396 OnigApplyAllCaseFoldFunc f, void* arg)
397{
398 OnigCodePoint code;
399 int i, r;
400
401 for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
402 code = OnigAsciiLowerMap[i].to;
403 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
404 if (r != 0) return r;
405
406 code = OnigAsciiLowerMap[i].from;
407 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
408 if (r != 0) return r;
409 }
410
411 return 0;
412}
413
414extern int
415onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
416 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
417 OnigCaseFoldCodeItem items[])
418{
419 if (0x41 <= *p && *p <= 0x5a) {
420 items[0].byte_len = 1;
421 items[0].code_len = 1;
422 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
423 return 1;
424 }
425 else if (0x61 <= *p && *p <= 0x7a) {
426 items[0].byte_len = 1;
427 items[0].code_len = 1;
428 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
429 return 1;
430 }
431 else
432 return 0;
433}
434
435static int
436ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
437 OnigApplyAllCaseFoldFunc f, void* arg)
438{
439 OnigCodePoint ss[] = { 0x73, 0x73 };
440
441 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
442}
443
444extern int
445onigenc_apply_all_case_fold_with_map(int map_size,
446 const OnigPairCaseFoldCodes map[],
447 int ess_tsett_flag, OnigCaseFoldType flag,
448 OnigApplyAllCaseFoldFunc f, void* arg)
449{
450 OnigCodePoint code;
451 int i, r;
452
453 r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
454 if (r != 0) return r;
455
456 for (i = 0; i < map_size; i++) {
457 code = map[i].to;
458 r = (*f)(map[i].from, &code, 1, arg);
459 if (r != 0) return r;
460
461 code = map[i].from;
462 r = (*f)(map[i].to, &code, 1, arg);
463 if (r != 0) return r;
464 }
465
466 if (ess_tsett_flag != 0)
467 return ss_apply_all_case_fold(flag, f, arg);
468
469 return 0;
470}
471
472extern int
473onigenc_get_case_fold_codes_by_str_with_map(int map_size,
474 const OnigPairCaseFoldCodes map[],
475 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
476 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
477{
478 if (0x41 <= *p && *p <= 0x5a) {
479 items[0].byte_len = 1;
480 items[0].code_len = 1;
481 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
482 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
483 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
484 /* SS */
485 items[1].byte_len = 2;
486 items[1].code_len = 1;
487 items[1].code[0] = (OnigCodePoint )0xdf;
488 return 2;
489 }
490 else
491 return 1;
492 }
493 else if (0x61 <= *p && *p <= 0x7a) {
494 items[0].byte_len = 1;
495 items[0].code_len = 1;
496 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
497 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
498 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
499 /* ss */
500 items[1].byte_len = 2;
501 items[1].code_len = 1;
502 items[1].code[0] = (OnigCodePoint )0xdf;
503 return 2;
504 }
505 else
506 return 1;
507 }
508 else if (*p == 0xdf && ess_tsett_flag != 0) {
509 items[0].byte_len = 1;
510 items[0].code_len = 2;
511 items[0].code[0] = (OnigCodePoint )'s';
512 items[0].code[1] = (OnigCodePoint )'s';
513
514 items[1].byte_len = 1;
515 items[1].code_len = 2;
516 items[1].code[0] = (OnigCodePoint )'S';
517 items[1].code[1] = (OnigCodePoint )'S';
518
519 items[2].byte_len = 1;
520 items[2].code_len = 2;
521 items[2].code[0] = (OnigCodePoint )'s';
522 items[2].code[1] = (OnigCodePoint )'S';
523
524 items[3].byte_len = 1;
525 items[3].code_len = 2;
526 items[3].code[0] = (OnigCodePoint )'S';
527 items[3].code[1] = (OnigCodePoint )'s';
528
529 return 4;
530 }
531 else {
532 int i;
533
534 for (i = 0; i < map_size; i++) {
535 if (*p == map[i].from) {
536 items[0].byte_len = 1;
537 items[0].code_len = 1;
538 items[0].code[0] = map[i].to;
539 return 1;
540 }
541 else if (*p == map[i].to) {
542 items[0].byte_len = 1;
543 items[0].code_len = 1;
544 items[0].code[0] = map[i].from;
545 return 1;
546 }
547 }
548 }
549
550 return 0;
551}
552
553
554extern int
555onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
556 OnigCodePoint* sb_out ARG_UNUSED,
557 const OnigCodePoint* ranges[] ARG_UNUSED)
558{
559 return ONIG_NO_SUPPORT_CONFIG;
560}
561
562extern int
563onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
564{
565 if (p < end) {
566 if (*p == 0x0a) return 1;
567 }
568 return 0;
569}
570
571/* for single byte encodings */
572extern int
573onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
574 const UChar*end ARG_UNUSED, UChar* lower)
575{
576 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
577
578 (*p)++;
579 return 1; /* return byte length of converted char to lower */
580}
581
582#if 0
583extern int
584onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
585 const UChar** pp, const UChar* end ARG_UNUSED)
586{
587 const UChar* p = *pp;
588
589 (*pp)++;
590 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
591}
592#endif
593
594extern int
595onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
596{
597 return 1;
598}
599
600extern OnigCodePoint
601onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
602{
603 return (OnigCodePoint )(*p);
604}
605
606extern int
607onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
608{
609 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
610}
611
612extern int
613onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
614{
615 *buf = (UChar )(code & 0xff);
616 return 1;
617}
618
619extern UChar*
620onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
621 const UChar* s)
622{
623 return (UChar* )s;
624}
625
626extern int
627onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
628 const UChar* end ARG_UNUSED)
629{
630 return TRUE;
631}
632
633extern int
634onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
635 const UChar* end ARG_UNUSED)
636{
637 return FALSE;
638}
639
640extern int
641onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
642{
643 if (code < 128)
644 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
645 else
646 return FALSE;
647}
648
649extern OnigCodePoint
650onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
651{
652 int c, i, len;
653 OnigCodePoint n;
654
655 len = enclen(enc, p);
656 n = (OnigCodePoint )(*p++);
657 if (len == 1) return n;
658
659 for (i = 1; i < len; i++) {
660 if (p >= end) break;
661 c = *p++;
662 n <<= 8; n += c;
663 }
664 return n;
665}
666
667extern int
668onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
669 const UChar** pp, const UChar* end ARG_UNUSED,
670 UChar* lower)
671{
672 int len;
673 const UChar *p = *pp;
674
675 if (ONIGENC_IS_MBC_ASCII(p)) {
676 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
677 (*pp)++;
678 return 1;
679 }
680 else {
681 int i;
682
683 len = enclen(enc, p);
684 for (i = 0; i < len; i++) {
685 *lower++ = *p++;
686 }
687 (*pp) += len;
688 return len; /* return byte length of converted to lower char */
689 }
690}
691
692#if 0
693extern int
694onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
695 const UChar** pp, const UChar* end ARG_UNUSED)
696{
697 const UChar* p = *pp;
698
699 if (ONIGENC_IS_MBC_ASCII(p)) {
700 (*pp)++;
701 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
702 }
703
704 (*pp) += enclen(enc, p);
705 return FALSE;
706}
707#endif
708
709extern int
710onigenc_mb2_code_to_mbclen(OnigCodePoint code)
711{
712 if ((code & 0xff00) != 0) return 2;
713 else return 1;
714}
715
716extern int
717onigenc_mb4_code_to_mbclen(OnigCodePoint code)
718{
719 if ((code & 0xff000000) != 0) return 4;
720 else if ((code & 0xff0000) != 0) return 3;
721 else if ((code & 0xff00) != 0) return 2;
722 else return 1;
723}
724
725extern int
726onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
727{
728 UChar *p = buf;
729
730 if ((code & 0xff00) != 0) {
731 *p++ = (UChar )((code >> 8) & 0xff);
732 }
733 *p++ = (UChar )(code & 0xff);
734
735#if 1
736 if (enclen(enc, buf) != (p - buf))
737 return ONIGERR_INVALID_CODE_POINT_VALUE;
738#endif
739 return (int )(p - buf);
740}
741
742extern int
743onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
744{
745 UChar *p = buf;
746
747 if ((code & 0xff000000) != 0) {
748 *p++ = (UChar )((code >> 24) & 0xff);
749 }
750 if ((code & 0xff0000) != 0 || p != buf) {
751 *p++ = (UChar )((code >> 16) & 0xff);
752 }
753 if ((code & 0xff00) != 0 || p != buf) {
754 *p++ = (UChar )((code >> 8) & 0xff);
755 }
756 *p++ = (UChar )(code & 0xff);
757
758#if 1
759 if (enclen(enc, buf) != (p - buf))
760 return ONIGERR_INVALID_CODE_POINT_VALUE;
761#endif
762 return (int )(p - buf);
763}
764
765extern int
766onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
767{
768 static const PosixBracketEntryType PBS[] = {
769 POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM),
770 POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA),
771 POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK),
772 POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL),
773 POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT),
774 POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH),
775 POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER),
776 POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT),
777 POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT),
778 POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE),
779 POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER),
780 POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
781 POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII),
782 POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD),
783 };
784
785 const PosixBracketEntryType *pb;
786 int len;
787
788 len = onigenc_strlen(enc, p, end);
789 for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
790 if (len == pb->len &&
791 onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
792 return pb->ctype;
793 }
794
795 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
796}
797
798extern int
799onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
800 unsigned int ctype)
801{
802 if (code < 128)
803 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
804 else {
805 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
806 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
807 }
808 }
809
810 return FALSE;
811}
812
813extern int
814onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
815 unsigned int ctype)
816{
817 if (code < 128)
818 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
819 else {
820 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
821 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
822 }
823 }
824
825 return FALSE;
826}
827
828extern int
829onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
830 const UChar* sascii /* ascii */, int n)
831{
832 int x, c;
833
834 while (n-- > 0) {
835 if (p >= end) return (int )(*sascii);
836
837 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
838 x = *sascii - c;
839 if (x) return x;
840
841 sascii++;
842 p += enclen(enc, p);
843 }
844 return 0;
845}
846
847extern int
848onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
849 const UChar* sascii /* ascii */, int n)
850{
851 int x, c;
852
853 while (n-- > 0) {
854 if (p >= end) return (int )(*sascii);
855
856 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
857 if (ONIGENC_IS_ASCII_CODE(c))
858 c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
859 x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
860 if (x) return x;
861
862 sascii++;
863 p += enclen(enc, p);
864 }
865 return 0;
866}
867
868/* Property management */
869static int
870resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
871{
872 size_t size;
873 const OnigCodePoint **list = *plist;
874
875 size = sizeof(OnigCodePoint*) * new_size;
876 if (IS_NULL(list)) {
877 list = (const OnigCodePoint** )xmalloc(size);
878 if (IS_NULL(list)) return ONIGERR_MEMORY;
879 }
880 else {
881 const OnigCodePoint **tmp;
882 tmp = (const OnigCodePoint** )xrealloc((void* )list, size);
883 if (IS_NULL(tmp)) return ONIGERR_MEMORY;
884 list = tmp;
885 }
886
887 *plist = list;
888 *psize = new_size;
889
890 return 0;
891}
892
893extern int
894onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
895 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
896 int *psize)
897{
898#define PROP_INIT_SIZE 16
899
900 int r;
901
902 if (*psize <= *pnum) {
903 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
904 r = resize_property_list(new_size, plist, psize);
905 if (r != 0) return r;
906 }
907
908 (*plist)[*pnum] = prop;
909
910 if (ONIG_IS_NULL(*table)) {
911 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
912 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
913 }
914
915 *pnum = *pnum + 1;
916 onig_st_insert_strend(*table, name, name + strlen((char* )name),
917 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
918 return 0;
919}
920
921extern int
922onigenc_property_list_init(int (*f)(void))
923{
924 int r;
925
926 THREAD_ATOMIC_START;
927
928 r = f();
929
930 THREAD_ATOMIC_END;
931 return r;
932}
Note: See TracBrowser for help on using the repository browser.