source: asp3_tinet_ecnl_rx/trunk/app1_usb_watt_meter/src/jsonsl.c@ 364

Last change on this file since 364 was 364, checked in by coas-nagasima, 5 years ago

TINETとSocket APIなどを更新

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 53.2 KB
Line 
1/* Copyright (C) 2012-2015 Mark Nunberg.
2 *
3 * See included LICENSE file for license details.
4 */
5/* copy from LICENSE file
6Copyright (c) 2012-2015 M. Nunberg, mnunberg@haskalah.org
7
8Permission is hereby granted, free of charge, to any person obtaining
9a copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sublicense, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice shall be
17included in all copies or substantial portions of the Software.
18
19THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 */
27#include "jsonsl.h"
28#include <limits.h>
29#include <ctype.h>
30
31#ifdef JSONSL_USE_METRICS
32#define XMETRICS \
33 X(STRINGY_INSIGNIFICANT) \
34 X(STRINGY_SLOWPATH) \
35 X(ALLOWED_WHITESPACE) \
36 X(QUOTE_FASTPATH) \
37 X(SPECIAL_FASTPATH) \
38 X(SPECIAL_WSPOP) \
39 X(SPECIAL_SLOWPATH) \
40 X(GENERIC) \
41 X(STRUCTURAL_TOKEN) \
42 X(SPECIAL_SWITCHFIRST) \
43 X(STRINGY_CATCH) \
44 X(NUMBER_FASTPATH) \
45 X(ESCAPES) \
46 X(TOTAL) \
47
48struct jsonsl_metrics_st {
49#define X(m) \
50 unsigned long metric_##m;
51 XMETRICS
52#undef X
53};
54
55static struct jsonsl_metrics_st GlobalMetrics = { 0 };
56static unsigned long GenericCounter[0x100] = { 0 };
57static unsigned long StringyCatchCounter[0x100] = { 0 };
58
59#define INCR_METRIC(m) \
60 GlobalMetrics.metric_##m++;
61
62#define INCR_GENERIC(c) \
63 INCR_METRIC(GENERIC); \
64 GenericCounter[c]++; \
65
66#define INCR_STRINGY_CATCH(c) \
67 INCR_METRIC(STRINGY_CATCH); \
68 StringyCatchCounter[c]++;
69
70JSONSL_API
71void jsonsl_dump_global_metrics(void)
72{
73 int ii;
74 printf("JSONSL Metrics:\n");
75#define X(m) \
76 printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
77 (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
78 XMETRICS
79#undef X
80 printf("Generic Characters:\n");
81 for (ii = 0; ii < 0xff; ii++) {
82 if (GenericCounter[ii]) {
83 printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
84 }
85 }
86 printf("Weird string loop\n");
87 for (ii = 0; ii < 0xff; ii++) {
88 if (StringyCatchCounter[ii]) {
89 printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
90 }
91 }
92}
93
94#else
95#define INCR_METRIC(m)
96#define INCR_GENERIC(c)
97#define INCR_STRINGY_CATCH(c)
98JSONSL_API
99void jsonsl_dump_global_metrics(void) { }
100#endif /* JSONSL_USE_METRICS */
101
102#define CASE_DIGITS \
103case '1': \
104case '2': \
105case '3': \
106case '4': \
107case '5': \
108case '6': \
109case '7': \
110case '8': \
111case '9': \
112case '0':
113
114static unsigned extract_special(unsigned);
115static int is_special_end(unsigned);
116static int is_allowed_whitespace(unsigned);
117static int is_allowed_escape(unsigned);
118static int is_simple_char(unsigned);
119static char get_escape_equiv(unsigned);
120
121JSONSL_API
122jsonsl_t jsonsl_new(int nlevels)
123{
124 unsigned int ii;
125 struct jsonsl_st * jsn;
126
127 if (nlevels < 2) {
128 return NULL;
129 }
130
131 jsn = (struct jsonsl_st *)
132 calloc(1, sizeof (*jsn) +
133 ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
134 );
135
136 jsn->levels_max = (unsigned int) nlevels;
137 jsn->max_callback_level = UINT_MAX;
138 jsonsl_reset(jsn);
139 for (ii = 0; ii < jsn->levels_max; ii++) {
140 jsn->stack[ii].level = ii;
141 }
142 return jsn;
143}
144
145JSONSL_API
146void jsonsl_reset(jsonsl_t jsn)
147{
148 jsn->tok_last = 0;
149 jsn->can_insert = 1;
150 jsn->pos = 0;
151 jsn->level = 0;
152 jsn->stopfl = 0;
153 jsn->in_escape = 0;
154 jsn->expecting = 0;
155}
156
157JSONSL_API
158void jsonsl_destroy(jsonsl_t jsn)
159{
160 if (jsn) {
161 free(jsn);
162 }
163}
164
165
166#define FASTPARSE_EXHAUSTED 1
167#define FASTPARSE_BREAK 0
168
169/*
170 * This function is meant to accelerate string parsing, reducing the main loop's
171 * check if we are indeed a string.
172 *
173 * @param jsn the parser
174 * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
175 * @param[in,out] nbytes_p A pointer to the current size of the buffer
176 * @return true if all bytes have been exhausted (and thus the main loop can
177 * return), false if a special character was examined which requires greater
178 * examination.
179 */
180static int
181jsonsl__str_fastparse(jsonsl_t jsn,
182 const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
183{
184 const jsonsl_uchar_t *bytes = *bytes_p;
185 const jsonsl_uchar_t *end;
186 for (end = bytes + *nbytes_p; bytes != end; bytes++) {
187 if (
188#ifdef JSONSL_USE_WCHAR
189 *bytes >= 0x100 ||
190#endif /* JSONSL_USE_WCHAR */
191 (is_simple_char(*bytes))) {
192 INCR_METRIC(TOTAL);
193 INCR_METRIC(STRINGY_INSIGNIFICANT);
194 } else {
195 /* Once we're done here, re-calculate the position variables */
196 jsn->pos += (bytes - *bytes_p);
197 *nbytes_p -= (bytes - *bytes_p);
198 *bytes_p = bytes;
199 return FASTPARSE_BREAK;
200 }
201 }
202
203 /* Once we're done here, re-calculate the position variables */
204 jsn->pos += (bytes - *bytes_p);
205 return FASTPARSE_EXHAUSTED;
206}
207
208/* Functions exactly like str_fastparse, except it also accepts a 'state'
209 * argument, since the number's value is updated in the state. */
210static int
211jsonsl__num_fastparse(jsonsl_t jsn,
212 const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
213 struct jsonsl_state_st *state)
214{
215 int exhausted = 1;
216 size_t nbytes = *nbytes_p;
217 const jsonsl_uchar_t *bytes = *bytes_p;
218
219 for (; nbytes; nbytes--, bytes++) {
220 jsonsl_uchar_t c = *bytes;
221 if (isdigit(c)) {
222 INCR_METRIC(TOTAL);
223 INCR_METRIC(NUMBER_FASTPATH);
224 state->nelem = (state->nelem * 10) + (c - 0x30);
225 } else {
226 exhausted = 0;
227 break;
228 }
229 }
230 jsn->pos += (*nbytes_p - nbytes);
231 if (exhausted) {
232 return FASTPARSE_EXHAUSTED;
233 }
234 *nbytes_p = nbytes;
235 *bytes_p = bytes;
236 return FASTPARSE_BREAK;
237}
238
239JSONSL_API
240void
241jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
242{
243
244#define INVOKE_ERROR(eb) \
245 if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
246 goto GT_AGAIN; \
247 } \
248 return;
249
250#define STACK_PUSH \
251 if (jsn->level >= (levels_max-1)) { \
252 jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
253 return; \
254 } \
255 state = jsn->stack + (++jsn->level); \
256 state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
257 state->pos_begin = jsn->pos;
258
259#define STACK_POP_NOPOS \
260 state->pos_cur = jsn->pos; \
261 state = jsn->stack + (--jsn->level);
262
263
264#define STACK_POP \
265 STACK_POP_NOPOS; \
266 state->pos_cur = jsn->pos;
267
268#define CALLBACK_AND_POP_NOPOS(T) \
269 state->pos_cur = jsn->pos; \
270 DO_CALLBACK(T, POP); \
271 state->nescapes = 0; \
272 state = jsn->stack + (--jsn->level);
273
274#define CALLBACK_AND_POP(T) \
275 CALLBACK_AND_POP_NOPOS(T); \
276 state->pos_cur = jsn->pos;
277
278#define SPECIAL_POP \
279 CALLBACK_AND_POP(SPECIAL); \
280 jsn->expecting = 0; \
281 jsn->tok_last = 0; \
282
283#define CUR_CHAR (*(jsonsl_uchar_t*)c)
284
285#define DO_CALLBACK(T, action) \
286 if (jsn->call_##T && \
287 jsn->max_callback_level > state->level && \
288 state->ignore_callback == 0) { \
289 \
290 if (jsn->action_callback_##action) { \
291 jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
292 } else if (jsn->action_callback) { \
293 jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
294 } \
295 if (jsn->stopfl) { return; } \
296 }
297
298 /**
299 * Verifies that we are able to insert the (non-string) item into a hash.
300 */
301#define ENSURE_HVAL \
302 if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
303 INVOKE_ERROR(HKEY_EXPECTED); \
304 }
305
306#define VERIFY_SPECIAL(lit) \
307 if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
308 INVOKE_ERROR(SPECIAL_EXPECTED); \
309 }
310
311#define VERIFY_SPECIAL_CI(lit) \
312 if (tolower(CUR_CHAR) != (lit)[jsn->pos - state->pos_begin]) { \
313 INVOKE_ERROR(SPECIAL_EXPECTED); \
314 }
315
316#define STATE_SPECIAL_LENGTH \
317 (state)->nescapes
318
319#define IS_NORMAL_NUMBER \
320 ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
321 (state)->special_flags == JSONSL_SPECIALf_SIGNED)
322
323#define STATE_NUM_LAST jsn->tok_last
324
325#define CONTINUE_NEXT_CHAR() continue
326
327 const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
328 size_t levels_max = jsn->levels_max;
329 struct jsonsl_state_st *state = jsn->stack + jsn->level;
330 jsn->base = bytes;
331
332 for (; nbytes; nbytes--, jsn->pos++, c++) {
333 unsigned state_type;
334 INCR_METRIC(TOTAL);
335
336 GT_AGAIN:
337 state_type = state->type;
338 /* Most common type is typically a string: */
339 if (state_type & JSONSL_Tf_STRINGY) {
340 /* Special escape handling for some stuff */
341 if (jsn->in_escape) {
342 jsn->in_escape = 0;
343 if (!is_allowed_escape(CUR_CHAR)) {
344 INVOKE_ERROR(ESCAPE_INVALID);
345 } else if (CUR_CHAR == 'u') {
346 DO_CALLBACK(UESCAPE, UESCAPE);
347 if (jsn->return_UESCAPE) {
348 return;
349 }
350 }
351 CONTINUE_NEXT_CHAR();
352 }
353
354 if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
355 FASTPARSE_EXHAUSTED) {
356 /* No need to readjust variables as we've exhausted the iterator */
357 return;
358 } else {
359 if (CUR_CHAR == '"') {
360 goto GT_QUOTE;
361 } else if (CUR_CHAR == '\\') {
362 goto GT_ESCAPE;
363 } else {
364 INVOKE_ERROR(WEIRD_WHITESPACE);
365 }
366 }
367 INCR_METRIC(STRINGY_SLOWPATH);
368
369 } else if (state_type == JSONSL_T_SPECIAL) {
370 /* Fast track for signed/unsigned */
371 if (IS_NORMAL_NUMBER) {
372 if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
373 FASTPARSE_EXHAUSTED) {
374 return;
375 } else {
376 goto GT_SPECIAL_NUMERIC;
377 }
378 } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
379#ifdef JSONSL_PARSE_NAN
380 if (CUR_CHAR == 'I' || CUR_CHAR == 'i') {
381 /* parsing -Infinity? */
382 state->special_flags = JSONSL_SPECIALf_NEG_INF;
383 CONTINUE_NEXT_CHAR();
384 }
385#endif
386
387 if (!isdigit(CUR_CHAR)) {
388 INVOKE_ERROR(INVALID_NUMBER);
389 }
390
391 if (CUR_CHAR == '0') {
392 state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
393 } else if (isdigit(CUR_CHAR)) {
394 state->special_flags = JSONSL_SPECIALf_SIGNED;
395 state->nelem = CUR_CHAR - 0x30;
396 } else {
397 INVOKE_ERROR(INVALID_NUMBER);
398 }
399 CONTINUE_NEXT_CHAR();
400
401 } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
402 if (isdigit(CUR_CHAR)) {
403 /* Following a zero! */
404 INVOKE_ERROR(INVALID_NUMBER);
405 }
406 /* Unset the 'zero' flag: */
407 if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
408 state->special_flags = JSONSL_SPECIALf_SIGNED;
409 } else {
410 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
411 }
412 goto GT_SPECIAL_NUMERIC;
413 }
414
415 if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) &&
416 !(state->special_flags & JSONSL_SPECIALf_INF)) {
417 GT_SPECIAL_NUMERIC:
418 switch (CUR_CHAR) {
419 CASE_DIGITS
420 STATE_NUM_LAST = '1';
421 CONTINUE_NEXT_CHAR();
422
423 case '.':
424 if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
425 INVOKE_ERROR(INVALID_NUMBER);
426 }
427 state->special_flags |= JSONSL_SPECIALf_FLOAT;
428 STATE_NUM_LAST = '.';
429 CONTINUE_NEXT_CHAR();
430
431 case 'e':
432 case 'E':
433 if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
434 INVOKE_ERROR(INVALID_NUMBER);
435 }
436 state->special_flags |= JSONSL_SPECIALf_EXPONENT;
437 STATE_NUM_LAST = 'e';
438 CONTINUE_NEXT_CHAR();
439
440 case '-':
441 case '+':
442 if (STATE_NUM_LAST != 'e') {
443 INVOKE_ERROR(INVALID_NUMBER);
444 }
445 STATE_NUM_LAST = '-';
446 CONTINUE_NEXT_CHAR();
447
448 default:
449 if (is_special_end(CUR_CHAR)) {
450 goto GT_SPECIAL_POP;
451 }
452 INVOKE_ERROR(INVALID_NUMBER);
453 break;
454 }
455 }
456 /* else if (!NUMERIC) */
457 if (!is_special_end(CUR_CHAR)) {
458 STATE_SPECIAL_LENGTH++;
459
460 /* Verify TRUE, FALSE, NULL */
461 if (state->special_flags == JSONSL_SPECIALf_TRUE) {
462 VERIFY_SPECIAL("true");
463 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
464 VERIFY_SPECIAL("false");
465 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
466 VERIFY_SPECIAL("null");
467#ifdef JSONSL_PARSE_NAN
468 } else if (state->special_flags == JSONSL_SPECIALf_POS_INF) {
469 VERIFY_SPECIAL_CI("infinity");
470 } else if (state->special_flags == JSONSL_SPECIALf_NEG_INF) {
471 VERIFY_SPECIAL_CI("-infinity");
472 } else if (state->special_flags == JSONSL_SPECIALf_NAN) {
473 VERIFY_SPECIAL_CI("nan");
474 } else if (state->special_flags & JSONSL_SPECIALf_NULL ||
475 state->special_flags & JSONSL_SPECIALf_NAN) {
476 /* previous char was "n", are we parsing null or nan? */
477 if (CUR_CHAR != 'u') {
478 state->special_flags &= ~JSONSL_SPECIALf_NULL;
479 }
480
481 if (tolower(CUR_CHAR) != 'a') {
482 state->special_flags &= ~JSONSL_SPECIALf_NAN;
483 }
484#endif
485 }
486 INCR_METRIC(SPECIAL_FASTPATH);
487 CONTINUE_NEXT_CHAR();
488 }
489
490 GT_SPECIAL_POP:
491 jsn->can_insert = 0;
492 if (IS_NORMAL_NUMBER) {
493 /* Nothing */
494 } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
495 state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
496 /* 0 is unsigned! */
497 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
498 } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
499 /* Still in dash! */
500 INVOKE_ERROR(INVALID_NUMBER);
501 } else if (state->special_flags & JSONSL_SPECIALf_INF) {
502 if (STATE_SPECIAL_LENGTH != 8) {
503 INVOKE_ERROR(SPECIAL_INCOMPLETE);
504 }
505 state->nelem = 1;
506 } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
507 /* Check that we're not at the end of a token */
508 if (STATE_NUM_LAST != '1') {
509 INVOKE_ERROR(INVALID_NUMBER);
510 }
511 } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
512 if (STATE_SPECIAL_LENGTH != 4) {
513 INVOKE_ERROR(SPECIAL_INCOMPLETE);
514 }
515 state->nelem = 1;
516 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
517 if (STATE_SPECIAL_LENGTH != 5) {
518 INVOKE_ERROR(SPECIAL_INCOMPLETE);
519 }
520 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
521 if (STATE_SPECIAL_LENGTH != 4) {
522 INVOKE_ERROR(SPECIAL_INCOMPLETE);
523 }
524 }
525 SPECIAL_POP;
526 jsn->expecting = ',';
527 if (is_allowed_whitespace(CUR_CHAR)) {
528 CONTINUE_NEXT_CHAR();
529 }
530 /**
531 * This works because we have a non-whitespace token
532 * which is not a special token. If this is a structural
533 * character then it will be gracefully handled by the
534 * switch statement. Otherwise it will default to the 'special'
535 * state again,
536 */
537 goto GT_STRUCTURAL_TOKEN;
538 } else if (is_allowed_whitespace(CUR_CHAR)) {
539 INCR_METRIC(ALLOWED_WHITESPACE);
540 /* So we're not special. Harmless insignificant whitespace
541 * passthrough
542 */
543 CONTINUE_NEXT_CHAR();
544 } else if (extract_special(CUR_CHAR)) {
545 /* not a string, whitespace, or structural token. must be special */
546 goto GT_SPECIAL_BEGIN;
547 }
548
549 INCR_GENERIC(CUR_CHAR);
550
551 if (CUR_CHAR == '"') {
552 GT_QUOTE:
553 jsn->can_insert = 0;
554 switch (state_type) {
555
556 /* the end of a string or hash key */
557 case JSONSL_T_STRING:
558 CALLBACK_AND_POP(STRING);
559 CONTINUE_NEXT_CHAR();
560 case JSONSL_T_HKEY:
561 CALLBACK_AND_POP(HKEY);
562 CONTINUE_NEXT_CHAR();
563
564 case JSONSL_T_OBJECT:
565 state->nelem++;
566 if ( (state->nelem-1) % 2 ) {
567 /* Odd, this must be a hash value */
568 if (jsn->tok_last != ':') {
569 INVOKE_ERROR(MISSING_TOKEN);
570 }
571 jsn->expecting = ','; /* Can't figure out what to expect next */
572 jsn->tok_last = 0;
573
574 STACK_PUSH;
575 state->type = JSONSL_T_STRING;
576 DO_CALLBACK(STRING, PUSH);
577
578 } else {
579 /* hash key */
580 if (jsn->expecting != '"') {
581 INVOKE_ERROR(STRAY_TOKEN);
582 }
583 jsn->tok_last = 0;
584 jsn->expecting = ':';
585
586 STACK_PUSH;
587 state->type = JSONSL_T_HKEY;
588 DO_CALLBACK(HKEY, PUSH);
589 }
590 CONTINUE_NEXT_CHAR();
591
592 case JSONSL_T_LIST:
593 state->nelem++;
594 STACK_PUSH;
595 state->type = JSONSL_T_STRING;
596 jsn->expecting = ',';
597 jsn->tok_last = 0;
598 DO_CALLBACK(STRING, PUSH);
599 CONTINUE_NEXT_CHAR();
600
601 case JSONSL_T_SPECIAL:
602 INVOKE_ERROR(STRAY_TOKEN);
603 break;
604
605 default:
606 INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
607 break;
608 } /* switch(state->type) */
609 } else if (CUR_CHAR == '\\') {
610 GT_ESCAPE:
611 INCR_METRIC(ESCAPES);
612 /* Escape */
613 if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
614 INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
615 }
616 state->nescapes++;
617 jsn->in_escape = 1;
618 CONTINUE_NEXT_CHAR();
619 } /* " or \ */
620
621 GT_STRUCTURAL_TOKEN:
622 switch (CUR_CHAR) {
623 case ':':
624 INCR_METRIC(STRUCTURAL_TOKEN);
625 if (jsn->expecting != CUR_CHAR) {
626 INVOKE_ERROR(STRAY_TOKEN);
627 }
628 jsn->tok_last = ':';
629 jsn->can_insert = 1;
630 jsn->expecting = '"';
631 CONTINUE_NEXT_CHAR();
632
633 case ',':
634 INCR_METRIC(STRUCTURAL_TOKEN);
635 /**
636 * The comma is one of the more generic tokens.
637 * In the context of an OBJECT, the can_insert flag
638 * should never be set, and no other action is
639 * necessary.
640 */
641 if (jsn->expecting != CUR_CHAR) {
642 /* make this branch execute only when we haven't manually
643 * just placed the ',' in the expecting register.
644 */
645 INVOKE_ERROR(STRAY_TOKEN);
646 }
647
648 if (state->type == JSONSL_T_OBJECT) {
649 /* end of hash value, expect a string as a hash key */
650 jsn->expecting = '"';
651 } else {
652 jsn->can_insert = 1;
653 }
654
655 jsn->tok_last = ',';
656 jsn->expecting = '"';
657 CONTINUE_NEXT_CHAR();
658
659 /* new list or object */
660 /* hashes are more common */
661 case '{':
662 case '[':
663 INCR_METRIC(STRUCTURAL_TOKEN);
664 if (!jsn->can_insert) {
665 INVOKE_ERROR(CANT_INSERT);
666 }
667
668 ENSURE_HVAL;
669 state->nelem++;
670
671 STACK_PUSH;
672 /* because the constants match the opening delimiters, we can do this: */
673 state->type = CUR_CHAR;
674 state->nelem = 0;
675 jsn->can_insert = 1;
676 if (CUR_CHAR == '{') {
677 /* If we're a hash, we expect a key first, which is quouted */
678 jsn->expecting = '"';
679 }
680 if (CUR_CHAR == JSONSL_T_OBJECT) {
681 DO_CALLBACK(OBJECT, PUSH);
682 } else {
683 DO_CALLBACK(LIST, PUSH);
684 }
685 jsn->tok_last = 0;
686 CONTINUE_NEXT_CHAR();
687
688 /* closing of list or object */
689 case '}':
690 case ']':
691 INCR_METRIC(STRUCTURAL_TOKEN);
692 if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
693 INVOKE_ERROR(TRAILING_COMMA);
694 }
695
696 jsn->can_insert = 0;
697 jsn->level--;
698 jsn->expecting = ',';
699 jsn->tok_last = 0;
700 if (CUR_CHAR == ']') {
701 if (state->type != '[') {
702 INVOKE_ERROR(BRACKET_MISMATCH);
703 }
704 DO_CALLBACK(LIST, POP);
705 } else {
706 if (state->type != '{') {
707 INVOKE_ERROR(BRACKET_MISMATCH);
708 } else if (state->nelem && state->nelem % 2 != 0) {
709 INVOKE_ERROR(VALUE_EXPECTED);
710 }
711 DO_CALLBACK(OBJECT, POP);
712 }
713 state = jsn->stack + jsn->level;
714 state->pos_cur = jsn->pos;
715 CONTINUE_NEXT_CHAR();
716
717 default:
718 GT_SPECIAL_BEGIN:
719 /**
720 * Not a string, not a structural token, and not benign whitespace.
721 * Technically we should iterate over the character always, but since
722 * we are not doing full numerical/value decoding anyway (but only hinting),
723 * we only check upon entry.
724 */
725 if (state->type != JSONSL_T_SPECIAL) {
726 int special_flags = extract_special(CUR_CHAR);
727 if (!special_flags) {
728 /**
729 * Try to do some heuristics here anyway to figure out what kind of
730 * error this is. The 'special' case is a fallback scenario anyway.
731 */
732 if (CUR_CHAR == '\0') {
733 INVOKE_ERROR(FOUND_NULL_BYTE);
734 } else if (CUR_CHAR < 0x20) {
735 INVOKE_ERROR(WEIRD_WHITESPACE);
736 } else {
737 INVOKE_ERROR(SPECIAL_EXPECTED);
738 }
739 }
740 ENSURE_HVAL;
741 state->nelem++;
742 if (!jsn->can_insert) {
743 INVOKE_ERROR(CANT_INSERT);
744 }
745 STACK_PUSH;
746 state->type = JSONSL_T_SPECIAL;
747 state->special_flags = special_flags;
748 STATE_SPECIAL_LENGTH = 1;
749
750 if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
751 state->nelem = CUR_CHAR - 0x30;
752 STATE_NUM_LAST = '1';
753 } else {
754 STATE_NUM_LAST = '-';
755 state->nelem = 0;
756 }
757 DO_CALLBACK(SPECIAL, PUSH);
758 }
759 CONTINUE_NEXT_CHAR();
760 }
761 }
762}
763
764JSONSL_API
765const char* jsonsl_strerror(jsonsl_error_t err)
766{
767 if (err == JSONSL_ERROR_SUCCESS) {
768 return "SUCCESS";
769 }
770#define X(t) \
771 if (err == JSONSL_ERROR_##t) \
772 return #t;
773 JSONSL_XERR;
774#undef X
775 return "<UNKNOWN_ERROR>";
776}
777
778JSONSL_API
779const char *jsonsl_strtype(jsonsl_type_t type)
780{
781#define X(o,c) \
782 if (type == JSONSL_T_##o) \
783 return #o;
784 JSONSL_XTYPE
785#undef X
786 return "UNKNOWN TYPE";
787
788}
789
790/*
791 *
792 * JPR/JSONPointer functions
793 *
794 *
795 */
796#ifndef JSONSL_NO_JPR
797static
798jsonsl_jpr_type_t
799populate_component(char *in,
800 struct jsonsl_jpr_component_st *component,
801 char **next,
802 jsonsl_error_t *errp)
803{
804 unsigned long pctval;
805 char *c = NULL, *outp = NULL, *end = NULL;
806 size_t input_len;
807 jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
808
809 if (*next == NULL || *(*next) == '\0') {
810 return JSONSL_PATH_NONE;
811 }
812
813 /* Replace the next / with a NULL */
814 *next = strstr(in, "/");
815 if (*next != NULL) {
816 *(*next) = '\0'; /* drop the forward slash */
817 input_len = *next - in;
818 end = *next;
819 *next += 1; /* next character after the '/' */
820 } else {
821 input_len = strlen(in);
822 end = in + input_len + 1;
823 }
824
825 component->pstr = in;
826
827 /* Check for special components of interest */
828 if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
829 /* Lone wildcard */
830 ret = JSONSL_PATH_WILDCARD;
831 goto GT_RET;
832 } else if (isdigit(*in)) {
833 /* ASCII Numeric */
834 char *endptr;
835 component->idx = strtoul(in, &endptr, 10);
836 if (endptr && *endptr == '\0') {
837 ret = JSONSL_PATH_NUMERIC;
838 goto GT_RET;
839 }
840 }
841
842 /* Default, it's a string */
843 ret = JSONSL_PATH_STRING;
844 for (c = outp = in; c < end; c++, outp++) {
845 char origc;
846 if (*c != '%') {
847 goto GT_ASSIGN;
848 }
849 /*
850 * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
851 */
852
853 /* Need %XX */
854 if (c+2 >= end) {
855 *errp = JSONSL_ERROR_PERCENT_BADHEX;
856 return JSONSL_PATH_INVALID;
857 }
858 if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
859 *errp = JSONSL_ERROR_PERCENT_BADHEX;
860 return JSONSL_PATH_INVALID;
861 }
862
863 /* Temporarily null-terminate the characters */
864 origc = *(c+3);
865 *(c+3) = '\0';
866 pctval = strtoul(c+1, NULL, 16);
867 *(c+3) = origc;
868
869 *outp = (char) pctval;
870 c += 2;
871 continue;
872
873 GT_ASSIGN:
874 *outp = *c;
875 }
876 /* Null-terminate the string */
877 for (; outp < c; outp++) {
878 *outp = '\0';
879 }
880
881 GT_RET:
882 component->ptype = ret;
883 if (ret != JSONSL_PATH_WILDCARD) {
884 component->len = strlen(component->pstr);
885 }
886 return ret;
887}
888
889JSONSL_API
890jsonsl_jpr_t
891jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
892{
893 char *my_copy = NULL;
894 int count, curidx;
895 struct jsonsl_jpr_st *ret = NULL;
896 struct jsonsl_jpr_component_st *components = NULL;
897 size_t origlen;
898 jsonsl_error_t errstacked;
899
900#define JPR_BAIL(err) *errp = err; goto GT_ERROR;
901
902 if (errp == NULL) {
903 errp = &errstacked;
904 }
905
906 if (path == NULL || *path != '/') {
907 JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
908 }
909
910 count = 1;
911 path++;
912 {
913 const char *c = path;
914 for (; *c; c++) {
915 if (*c == '/') {
916 count++;
917 if (*(c+1) == '/') {
918 JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
919 }
920 }
921 }
922 }
923 if(*path) {
924 count++;
925 }
926
927 components = (struct jsonsl_jpr_component_st *)
928 malloc(sizeof(*components) * count);
929 if (!components) {
930 JPR_BAIL(JSONSL_ERROR_ENOMEM);
931 }
932
933 my_copy = (char *)malloc(strlen(path) + 1);
934 if (!my_copy) {
935 JPR_BAIL(JSONSL_ERROR_ENOMEM);
936 }
937
938 strcpy(my_copy, path);
939
940 components[0].ptype = JSONSL_PATH_ROOT;
941
942 if (*my_copy) {
943 char *cur = my_copy;
944 int pathret = JSONSL_PATH_STRING;
945 curidx = 1;
946 while (curidx < count) {
947 pathret = populate_component(cur, components + curidx, &cur, errp);
948 if (pathret > 0) {
949 curidx++;
950 } else {
951 break;
952 }
953 }
954
955 if (pathret == JSONSL_PATH_INVALID) {
956 JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
957 }
958 } else {
959 curidx = 1;
960 }
961
962 path--; /*revert path to leading '/' */
963 origlen = strlen(path) + 1;
964 ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
965 if (!ret) {
966 JPR_BAIL(JSONSL_ERROR_ENOMEM);
967 }
968 ret->orig = (char *)malloc(origlen);
969 if (!ret->orig) {
970 JPR_BAIL(JSONSL_ERROR_ENOMEM);
971 }
972 ret->components = components;
973 ret->ncomponents = curidx;
974 ret->basestr = my_copy;
975 ret->norig = origlen-1;
976 strcpy(ret->orig, path);
977
978 return ret;
979
980 GT_ERROR:
981 free(my_copy);
982 free(components);
983 if (ret) {
984 free(ret->orig);
985 }
986 free(ret);
987 return NULL;
988#undef JPR_BAIL
989}
990
991void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
992{
993 free(jpr->components);
994 free(jpr->basestr);
995 free(jpr->orig);
996 free(jpr);
997}
998
999/**
1000 * Call when there is a possibility of a match, either as a final match or
1001 * as a path within a match
1002 * @param jpr The JPR path
1003 * @param component Component corresponding to the current element
1004 * @param prlevel The level of the *parent*
1005 * @param chtype The type of the child
1006 * @return Match status
1007 */
1008static jsonsl_jpr_match_t
1009jsonsl__match_continue(jsonsl_jpr_t jpr,
1010 const struct jsonsl_jpr_component_st *component,
1011 unsigned prlevel, unsigned chtype)
1012{
1013 const struct jsonsl_jpr_component_st *next_comp = component + 1;
1014 if (prlevel == jpr->ncomponents - 1) {
1015 /* This is the match. Check the expected type of the match against
1016 * the child */
1017 if (jpr->match_type == 0 || jpr->match_type == chtype) {
1018 return JSONSL_MATCH_COMPLETE;
1019 } else {
1020 return JSONSL_MATCH_TYPE_MISMATCH;
1021 }
1022 }
1023 if (chtype == JSONSL_T_LIST) {
1024 if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
1025 return JSONSL_MATCH_POSSIBLE;
1026 } else {
1027 return JSONSL_MATCH_TYPE_MISMATCH;
1028 }
1029 } else if (chtype == JSONSL_T_OBJECT) {
1030 if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
1031 return JSONSL_MATCH_TYPE_MISMATCH;
1032 } else {
1033 return JSONSL_MATCH_POSSIBLE;
1034 }
1035 } else {
1036 return JSONSL_MATCH_TYPE_MISMATCH;
1037 }
1038}
1039
1040JSONSL_API
1041jsonsl_jpr_match_t
1042jsonsl_path_match(jsonsl_jpr_t jpr,
1043 const struct jsonsl_state_st *parent,
1044 const struct jsonsl_state_st *child,
1045 const char *key, size_t nkey)
1046{
1047 const struct jsonsl_jpr_component_st *comp;
1048 if (!parent) {
1049 /* No parent. Return immediately since it's always a match */
1050 return jsonsl__match_continue(jpr, jpr->components, 0, child->type);
1051 }
1052
1053 comp = jpr->components + parent->level;
1054
1055 /* note that we don't need to verify the type of the match, this is
1056 * always done through the previous call to jsonsl__match_continue.
1057 * If we are in a POSSIBLE tree then we can be certain the types (at
1058 * least at this level) are correct */
1059 if (parent->type == JSONSL_T_OBJECT) {
1060 if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) {
1061 return JSONSL_MATCH_NOMATCH;
1062 }
1063 } else {
1064 if (comp->idx != parent->nelem - 1) {
1065 return JSONSL_MATCH_NOMATCH;
1066 }
1067 }
1068 return jsonsl__match_continue(jpr, comp, parent->level, child->type);
1069}
1070
1071JSONSL_API
1072jsonsl_jpr_match_t
1073jsonsl_jpr_match(jsonsl_jpr_t jpr,
1074 unsigned int parent_type,
1075 unsigned int parent_level,
1076 const char *key,
1077 size_t nkey)
1078{
1079 /* find our current component. This is the child level */
1080 int cmpret;
1081 struct jsonsl_jpr_component_st *p_component;
1082 p_component = jpr->components + parent_level;
1083
1084 if (parent_level >= jpr->ncomponents) {
1085 return JSONSL_MATCH_NOMATCH;
1086 }
1087
1088 /* Lone query for 'root' element. Always matches */
1089 if (parent_level == 0) {
1090 if (jpr->ncomponents == 1) {
1091 return JSONSL_MATCH_COMPLETE;
1092 } else {
1093 return JSONSL_MATCH_POSSIBLE;
1094 }
1095 }
1096
1097 /* Wildcard, always matches */
1098 if (p_component->ptype == JSONSL_PATH_WILDCARD) {
1099 if (parent_level == jpr->ncomponents-1) {
1100 return JSONSL_MATCH_COMPLETE;
1101 } else {
1102 return JSONSL_MATCH_POSSIBLE;
1103 }
1104 }
1105
1106 /* Check numeric array index. This gets its special block so we can avoid
1107 * string comparisons */
1108 if (p_component->ptype == JSONSL_PATH_NUMERIC) {
1109 if (parent_type == JSONSL_T_LIST) {
1110 if (p_component->idx != nkey) {
1111 /* Wrong index */
1112 return JSONSL_MATCH_NOMATCH;
1113 } else {
1114 if (parent_level == jpr->ncomponents-1) {
1115 /* This is the last element of the path */
1116 return JSONSL_MATCH_COMPLETE;
1117 } else {
1118 /* Intermediate element */
1119 return JSONSL_MATCH_POSSIBLE;
1120 }
1121 }
1122 } else if (p_component->is_arridx) {
1123 /* Numeric and an array index (set explicitly by user). But not
1124 * a list for a parent */
1125 return JSONSL_MATCH_TYPE_MISMATCH;
1126 }
1127 } else if (parent_type == JSONSL_T_LIST) {
1128 return JSONSL_MATCH_TYPE_MISMATCH;
1129 }
1130
1131 /* Check lengths */
1132 if (p_component->len != nkey) {
1133 return JSONSL_MATCH_NOMATCH;
1134 }
1135
1136 /* Check string comparison */
1137 cmpret = strncmp(p_component->pstr, key, nkey);
1138 if (cmpret == 0) {
1139 if (parent_level == jpr->ncomponents-1) {
1140 return JSONSL_MATCH_COMPLETE;
1141 } else {
1142 return JSONSL_MATCH_POSSIBLE;
1143 }
1144 }
1145
1146 return JSONSL_MATCH_NOMATCH;
1147}
1148
1149JSONSL_API
1150void jsonsl_jpr_match_state_init(jsonsl_t jsn,
1151 jsonsl_jpr_t *jprs,
1152 size_t njprs)
1153{
1154 size_t ii, *firstjmp;
1155 if (njprs == 0) {
1156 return;
1157 }
1158 jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs);
1159 jsn->jpr_count = njprs;
1160 jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max);
1161 memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs);
1162 /* Set the initial jump table values */
1163
1164 firstjmp = jsn->jpr_root;
1165 for (ii = 0; ii < njprs; ii++) {
1166 firstjmp[ii] = ii+1;
1167 }
1168}
1169
1170JSONSL_API
1171void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)
1172{
1173 if (jsn->jpr_count == 0) {
1174 return;
1175 }
1176
1177 free(jsn->jpr_root);
1178 free(jsn->jprs);
1179 jsn->jprs = NULL;
1180 jsn->jpr_root = NULL;
1181 jsn->jpr_count = 0;
1182}
1183
1184/**
1185 * This function should be called exactly once on each element...
1186 * This should also be called in recursive order, since we rely
1187 * on the parent having been initalized for a match.
1188 *
1189 * Since the parent is checked for a match as well, we maintain a 'serial' counter.
1190 * Whenever we traverse an element, we expect the serial to be the same as a global
1191 * integer. If they do not match, we re-initialize the context, and set the serial.
1192 *
1193 * This ensures a type of consistency without having a proactive reset by the
1194 * main lexer itself.
1195 *
1196 */
1197JSONSL_API
1198jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
1199 struct jsonsl_state_st *state,
1200 const char *key,
1201 size_t nkey,
1202 jsonsl_jpr_match_t *out)
1203{
1204 struct jsonsl_state_st *parent_state;
1205 jsonsl_jpr_t ret = NULL;
1206
1207 /* Jump and JPR tables for our own state and the parent state */
1208 size_t *jmptable, *pjmptable;
1209 size_t jmp_cur, ii, ourjmpidx;
1210
1211 if (!jsn->jpr_root) {
1212 *out = JSONSL_MATCH_NOMATCH;
1213 return NULL;
1214 }
1215
1216 pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1));
1217 jmptable = pjmptable + jsn->jpr_count;
1218
1219 /* If the parent cannot match, then invalidate it */
1220 if (*pjmptable == 0) {
1221 *jmptable = 0;
1222 *out = JSONSL_MATCH_NOMATCH;
1223 return NULL;
1224 }
1225
1226 parent_state = jsn->stack + state->level - 1;
1227
1228 if (parent_state->type == JSONSL_T_LIST) {
1229 nkey = (size_t) parent_state->nelem;
1230 }
1231
1232 *jmptable = 0;
1233 ourjmpidx = 0;
1234 memset(jmptable, 0, sizeof(int) * jsn->jpr_count);
1235
1236 for (ii = 0; ii < jsn->jpr_count; ii++) {
1237 jmp_cur = pjmptable[ii];
1238 if (jmp_cur) {
1239 jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1];
1240 *out = jsonsl_jpr_match(jpr,
1241 parent_state->type,
1242 parent_state->level,
1243 key, nkey);
1244 if (*out == JSONSL_MATCH_COMPLETE) {
1245 ret = jpr;
1246 *jmptable = 0;
1247 return ret;
1248 } else if (*out == JSONSL_MATCH_POSSIBLE) {
1249 jmptable[ourjmpidx] = ii+1;
1250 ourjmpidx++;
1251 }
1252 } else {
1253 break;
1254 }
1255 }
1256 if (!*jmptable) {
1257 *out = JSONSL_MATCH_NOMATCH;
1258 }
1259 return NULL;
1260}
1261
1262JSONSL_API
1263const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
1264{
1265#define X(T,v) \
1266 if ( match == JSONSL_MATCH_##T ) \
1267 return #T;
1268 JSONSL_XMATCH
1269#undef X
1270 return "<UNKNOWN>";
1271}
1272
1273#endif /* JSONSL_WITH_JPR */
1274
1275static char *
1276jsonsl__writeutf8(uint32_t pt, char *out)
1277{
1278 #define ADD_OUTPUT(c) *out = (char)(c); out++;
1279
1280 if (pt < 0x80) {
1281 ADD_OUTPUT(pt);
1282 } else if (pt < 0x800) {
1283 ADD_OUTPUT((pt >> 6) | 0xC0);
1284 ADD_OUTPUT((pt & 0x3F) | 0x80);
1285 } else if (pt < 0x10000) {
1286 ADD_OUTPUT((pt >> 12) | 0xE0);
1287 ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1288 ADD_OUTPUT((pt & 0x3F) | 0x80);
1289 } else {
1290 ADD_OUTPUT((pt >> 18) | 0xF0);
1291 ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80);
1292 ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1293 ADD_OUTPUT((pt & 0x3F) | 0x80);
1294 }
1295 return out;
1296 #undef ADD_OUTPUT
1297}
1298
1299/* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */
1300static int
1301jsonsl__digit2int(char ch) {
1302 int d = ch - '0';
1303 if ((unsigned) d < 10) {
1304 return d;
1305 }
1306 d = ch - 'a';
1307 if ((unsigned) d < 6) {
1308 return d + 10;
1309 }
1310 d = ch - 'A';
1311 if ((unsigned) d < 6) {
1312 return d + 10;
1313 }
1314 return -1;
1315}
1316
1317/* Assume 's' is at least 4 bytes long */
1318static int
1319jsonsl__get_uescape_16(const char *s)
1320{
1321 int ret = 0;
1322 int cur;
1323
1324 #define GET_DIGIT(off) \
1325 cur = jsonsl__digit2int(s[off]); \
1326 if (cur == -1) { return -1; } \
1327 ret |= (cur << (12 - (off * 4)));
1328
1329 GET_DIGIT(0);
1330 GET_DIGIT(1);
1331 GET_DIGIT(2);
1332 GET_DIGIT(3);
1333 #undef GET_DIGIT
1334 return ret;
1335}
1336
1337/**
1338 * Utility function to convert escape sequences
1339 */
1340JSONSL_API
1341size_t jsonsl_util_unescape_ex(const char *in,
1342 char *out,
1343 size_t len,
1344 const int toEscape[128],
1345 unsigned *oflags,
1346 jsonsl_error_t *err,
1347 const char **errat)
1348{
1349 const unsigned char *c = (const unsigned char*)in;
1350 char *begin_p = out;
1351 unsigned oflags_s;
1352 uint16_t last_codepoint = 0;
1353
1354 if (!oflags) {
1355 oflags = &oflags_s;
1356 }
1357 *oflags = 0;
1358
1359 #define UNESCAPE_BAIL(e,offset) \
1360 *err = JSONSL_ERROR_##e; \
1361 if (errat) { \
1362 *errat = (const char*)(c+ (ptrdiff_t)(offset)); \
1363 } \
1364 return 0;
1365
1366 for (; len; len--, c++, out++) {
1367 int uescval;
1368 if (*c != '\\') {
1369 /* Not an escape, so we don't care about this */
1370 goto GT_ASSIGN;
1371 }
1372
1373 if (len < 2) {
1374 UNESCAPE_BAIL(ESCAPE_INVALID, 0);
1375 }
1376 if (!is_allowed_escape(c[1])) {
1377 UNESCAPE_BAIL(ESCAPE_INVALID, 1)
1378 }
1379 if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
1380 c[1] != '\\' && c[1] != '"')) {
1381 /* if we don't want to unescape this string, write the escape sequence to the output */
1382 *out++ = *c++;
1383 --len;
1384 goto GT_ASSIGN;
1385 }
1386
1387 if (c[1] != 'u') {
1388 /* simple skip-and-replace using pre-defined maps.
1389 * TODO: should the maps actually reflect the desired
1390 * replacement character in toEscape?
1391 */
1392 char esctmp = get_escape_equiv(c[1]);
1393 if (esctmp) {
1394 /* Check if there is a corresponding replacement */
1395 *out = esctmp;
1396 } else {
1397 /* Just gobble up the 'reverse-solidus' */
1398 *out = c[1];
1399 }
1400 len--;
1401 c++;
1402 /* do not assign, just continue */
1403 continue;
1404 }
1405
1406 /* next == 'u' */
1407 if (len < 6) {
1408 /* Need at least six characters.. */
1409 UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2);
1410 }
1411
1412 uescval = jsonsl__get_uescape_16((const char *)c + 2);
1413 if (uescval == -1) {
1414 UNESCAPE_BAIL(PERCENT_BADHEX, -1);
1415 }
1416
1417 if (last_codepoint) {
1418 uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval;
1419 uint32_t cp;
1420
1421 if (uescval < 0xDC00 || uescval > 0xDFFF) {
1422 UNESCAPE_BAIL(INVALID_CODEPOINT, -1);
1423 }
1424
1425 cp = (w1 & 0x3FF) << 10;
1426 cp |= (w2 & 0x3FF);
1427 cp += 0x10000;
1428
1429 out = jsonsl__writeutf8(cp, out) - 1;
1430 last_codepoint = 0;
1431
1432 } else if (uescval < 0xD800 || uescval > 0xDFFF) {
1433 *oflags |= JSONSL_SPECIALf_NONASCII;
1434 out = jsonsl__writeutf8(uescval, out) - 1;
1435
1436 } else if (uescval < 0xDC00) {
1437 *oflags |= JSONSL_SPECIALf_NONASCII;
1438 last_codepoint = (uint16_t)uescval;
1439 out--;
1440 } else {
1441 UNESCAPE_BAIL(INVALID_CODEPOINT, 2);
1442 }
1443
1444 /* Post uescape cleanup */
1445 len -= 5; /* Gobble up 5 chars after 'u' */
1446 c += 5;
1447 continue;
1448
1449 /* Only reached by previous branches */
1450 GT_ASSIGN:
1451 *out = *c;
1452 }
1453
1454 if (last_codepoint) {
1455 *err = JSONSL_ERROR_INVALID_CODEPOINT;
1456 return 0;
1457 }
1458
1459 *err = JSONSL_ERROR_SUCCESS;
1460 return out - begin_p;
1461}
1462
1463/**
1464 * Character Table definitions.
1465 * These were all generated via srcutil/genchartables.pl
1466 */
1467
1468/**
1469 * This table contains the beginnings of non-string
1470 * allowable (bareword) values.
1471 */
1472static unsigned short Special_Table[0x100] = {
1473 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1474 /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
1475 /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */
1476 /* 0x2e */ 0,0, /* 0x2f */
1477 /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */
1478 /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */
1479 /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */
1480 /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */
1481 /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */
1482 /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */
1483 /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */
1484 /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */
1485 /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */
1486 /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */
1487 /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x48 */
1488 /* 0x49 */ JSONSL__INF_PROXY /* <I> */, /* 0x49 */
1489 /* 0x4a */ 0,0,0,0, /* 0x4d */
1490 /* 0x4e */ JSONSL__NAN_PROXY /* <N> */, /* 0x4e */
1491 /* 0x4f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */
1492 /* 0x66 */ JSONSL_SPECIALf_FALSE /* <f> */, /* 0x66 */
1493 /* 0x67 */ 0,0, /* 0x68 */
1494 /* 0x69 */ JSONSL__INF_PROXY /* <i> */, /* 0x69 */
1495 /* 0x6a */ 0,0,0,0, /* 0x6d */
1496 /* 0x6e */ JSONSL_SPECIALf_NULL|JSONSL__NAN_PROXY /* <n> */, /* 0x6e */
1497 /* 0x6f */ 0,0,0,0,0, /* 0x73 */
1498 /* 0x74 */ JSONSL_SPECIALf_TRUE /* <t> */, /* 0x74 */
1499 /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1500 /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1501 /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1502 /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1503 /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */
1504};
1505
1506/**
1507 * Contains characters which signal the termination of any of the 'special' bareword
1508 * values.
1509 */
1510static int Special_Endings[0x100] = {
1511 /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1512 /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1513 /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1514 /* 0x0b */ 0,0, /* 0x0c */
1515 /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1516 /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1517 /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1518 /* 0x21 */ 0, /* 0x21 */
1519 /* 0x22 */ 1 /* " */, /* 0x22 */
1520 /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */
1521 /* 0x2c */ 1 /* , */, /* 0x2c */
1522 /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */
1523 /* 0x3a */ 1 /* : */, /* 0x3a */
1524 /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */
1525 /* 0x5b */ 1 /* [ */, /* 0x5b */
1526 /* 0x5c */ 1 /* \ */, /* 0x5c */
1527 /* 0x5d */ 1 /* ] */, /* 0x5d */
1528 /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */
1529 /* 0x7b */ 1 /* { */, /* 0x7b */
1530 /* 0x7c */ 0, /* 0x7c */
1531 /* 0x7d */ 1 /* } */, /* 0x7d */
1532 /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */
1533 /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */
1534 /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */
1535 /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */
1536 /* 0xfe */ 0 /* 0xfe */
1537};
1538
1539/**
1540 * This table contains entries for the allowed whitespace as per RFC 4627
1541 */
1542static int Allowed_Whitespace[0x100] = {
1543 /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1544 /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1545 /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1546 /* 0x0b */ 0,0, /* 0x0c */
1547 /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1548 /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1549 /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1550 /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */
1551 /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */
1552 /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */
1553 /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */
1554 /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */
1555 /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */
1556 /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1557};
1558
1559static const int String_No_Passthrough[0x100] = {
1560 /* 0x00 */ 1 /* <NUL> */, /* 0x00 */
1561 /* 0x01 */ 1 /* <SOH> */, /* 0x01 */
1562 /* 0x02 */ 1 /* <STX> */, /* 0x02 */
1563 /* 0x03 */ 1 /* <ETX> */, /* 0x03 */
1564 /* 0x04 */ 1 /* <EOT> */, /* 0x04 */
1565 /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */
1566 /* 0x06 */ 1 /* <ACK> */, /* 0x06 */
1567 /* 0x07 */ 1 /* <BEL> */, /* 0x07 */
1568 /* 0x08 */ 1 /* <BS> */, /* 0x08 */
1569 /* 0x09 */ 1 /* <HT> */, /* 0x09 */
1570 /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1571 /* 0x0b */ 1 /* <VT> */, /* 0x0b */
1572 /* 0x0c */ 1 /* <FF> */, /* 0x0c */
1573 /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1574 /* 0x0e */ 1 /* <SO> */, /* 0x0e */
1575 /* 0x0f */ 1 /* <SI> */, /* 0x0f */
1576 /* 0x10 */ 1 /* <DLE> */, /* 0x10 */
1577 /* 0x11 */ 1 /* <DC1> */, /* 0x11 */
1578 /* 0x12 */ 1 /* <DC2> */, /* 0x12 */
1579 /* 0x13 */ 1 /* <DC3> */, /* 0x13 */
1580 /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */
1581 /* 0x22 */ 1 /* <"> */, /* 0x22 */
1582 /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */
1583 /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1584 /* 0x5c */ 1 /* <\> */, /* 0x5c */
1585 /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */
1586 /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */
1587 /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */
1588 /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */
1589 /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */
1590 /* 0xfd */ 0,0, /* 0xfe */
1591};
1592
1593/**
1594 * Allowable two-character 'common' escapes:
1595 */
1596static int Allowed_Escapes[0x100] = {
1597 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1598 /* 0x20 */ 0,0, /* 0x21 */
1599 /* 0x22 */ 1 /* <"> */, /* 0x22 */
1600 /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
1601 /* 0x2f */ 1 /* </> */, /* 0x2f */
1602 /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
1603 /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1604 /* 0x5c */ 1 /* <\> */, /* 0x5c */
1605 /* 0x5d */ 0,0,0,0,0, /* 0x61 */
1606 /* 0x62 */ 1 /* <b> */, /* 0x62 */
1607 /* 0x63 */ 0,0,0, /* 0x65 */
1608 /* 0x66 */ 1 /* <f> */, /* 0x66 */
1609 /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1610 /* 0x6e */ 1 /* <n> */, /* 0x6e */
1611 /* 0x6f */ 0,0,0, /* 0x71 */
1612 /* 0x72 */ 1 /* <r> */, /* 0x72 */
1613 /* 0x73 */ 0, /* 0x73 */
1614 /* 0x74 */ 1 /* <t> */, /* 0x74 */
1615 /* 0x75 */ 1 /* <u> */, /* 0x75 */
1616 /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
1617 /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
1618 /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
1619 /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
1620 /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
1621};
1622
1623/**
1624 * This table contains the _values_ for a given (single) escaped character.
1625 */
1626static unsigned char Escape_Equivs[0x100] = {
1627 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1628 /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
1629 /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
1630 /* 0x60 */ 0,0, /* 0x61 */
1631 /* 0x62 */ 8 /* <b> */, /* 0x62 */
1632 /* 0x63 */ 0,0,0, /* 0x65 */
1633 /* 0x66 */ 12 /* <f> */, /* 0x66 */
1634 /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1635 /* 0x6e */ 10 /* <n> */, /* 0x6e */
1636 /* 0x6f */ 0,0,0, /* 0x71 */
1637 /* 0x72 */ 13 /* <r> */, /* 0x72 */
1638 /* 0x73 */ 0, /* 0x73 */
1639 /* 0x74 */ 9 /* <t> */, /* 0x74 */
1640 /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1641 /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1642 /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1643 /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1644 /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1645};
1646
1647/* Definitions of above-declared static functions */
1648static char get_escape_equiv(unsigned c) {
1649 return Escape_Equivs[c & 0xff];
1650}
1651static unsigned extract_special(unsigned c) {
1652 return Special_Table[c & 0xff];
1653}
1654static int is_special_end(unsigned c) {
1655 return Special_Endings[c & 0xff];
1656}
1657static int is_allowed_whitespace(unsigned c) {
1658 return c == ' ' || Allowed_Whitespace[c & 0xff];
1659}
1660static int is_allowed_escape(unsigned c) {
1661 return Allowed_Escapes[c & 0xff];
1662}
1663static int is_simple_char(unsigned c) {
1664 return !String_No_Passthrough[c & 0xff];
1665}
1666
1667/* Clean up all our macros! */
1668#undef INCR_METRIC
1669#undef INCR_GENERIC
1670#undef INCR_STRINGY_CATCH
1671#undef CASE_DIGITS
1672#undef INVOKE_ERROR
1673#undef STACK_PUSH
1674#undef STACK_POP_NOPOS
1675#undef STACK_POP
1676#undef CALLBACK_AND_POP_NOPOS
1677#undef CALLBACK_AND_POP
1678#undef SPECIAL_POP
1679#undef CUR_CHAR
1680#undef DO_CALLBACK
1681#undef ENSURE_HVAL
1682#undef VERIFY_SPECIAL
1683#undef STATE_SPECIAL_LENGTH
1684#undef IS_NORMAL_NUMBER
1685#undef STATE_NUM_LAST
1686#undef FASTPARSE_EXHAUSTED
1687#undef FASTPARSE_BREAK
Note: See TracBrowser for help on using the repository browser.