source: asp3_tinet_ecnl_arm/trunk/app1_usb_watt_meter/src/jsonsl.h@ 364

Last change on this file since 364 was 364, checked in by coas-nagasima, 5 years ago

TINETとSocket APIなどを更新

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-chdr;charset=UTF-8
File size: 31.3 KB
Line 
1/**
2 * JSON Simple/Stacked/Stateful Lexer.
3 * - Does not buffer data
4 * - Maintains state
5 * - Callback oriented
6 * - Lightweight and fast. One source file and one header file
7 *
8 * Copyright (C) 2012-2015 Mark Nunberg
9 * See included LICENSE file for license details.
10 */
11/* copy from LICENSE file
12Copyright (c) 2012-2015 M. Nunberg, mnunberg@haskalah.org
13
14Permission is hereby granted, free of charge, to any person obtaining
15a copy of this software and associated documentation files (the
16"Software"), to deal in the Software without restriction, including
17without limitation the rights to use, copy, modify, merge, publish,
18distribute, sublicense, and/or sell copies of the Software, and to
19permit persons to whom the Software is furnished to do so, subject to
20the following conditions:
21
22The above copyright notice and this permission notice shall be
23included in all copies or substantial portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 */
33#ifndef JSONSL_H_
34#define JSONSL_H_
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <stddef.h>
39#include <string.h>
40#include <sys/types.h>
41#include <wchar.h>
42
43#ifdef __cplusplus
44extern "C" {
45#endif /* __cplusplus */
46
47#ifdef JSONSL_USE_WCHAR
48typedef jsonsl_char_t wchar_t;
49typedef jsonsl_uchar_t unsigned wchar_t;
50#else
51typedef char jsonsl_char_t;
52typedef unsigned char jsonsl_uchar_t;
53#endif /* JSONSL_USE_WCHAR */
54
55#ifdef JSONSL_PARSE_NAN
56#define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
57#define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
58#else
59#define JSONSL__NAN_PROXY 0
60#define JSONSL__INF_PROXY 0
61#endif
62
63/* Stolen from http-parser.h, and possibly others */
64#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
65typedef __int8 int8_t;
66typedef unsigned __int8 uint8_t;
67typedef __int16 int16_t;
68typedef unsigned __int16 uint16_t;
69typedef __int32 int32_t;
70typedef unsigned __int32 uint32_t;
71typedef __int64 int64_t;
72typedef unsigned __int64 uint64_t;
73#if !defined(_MSC_VER) || _MSC_VER<1400
74typedef unsigned int size_t;
75typedef int ssize_t;
76#endif
77#else
78#include <stdint.h>
79#endif
80
81
82#if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
83#define JSONSL_STATE_GENERIC
84#endif /* !defined JSONSL_STATE_GENERIC */
85
86#ifdef JSONSL_STATE_GENERIC
87#define JSONSL_STATE_USER_FIELDS
88#endif /* JSONSL_STATE_GENERIC */
89
90/* Additional fields for component object */
91#ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
92#define JSONSL_JPR_COMPONENT_USER_FIELDS
93#endif
94
95#ifndef JSONSL_API
96/**
97 * We require a /DJSONSL_DLL so that users already using this as a static
98 * or embedded library don't get confused
99 */
100#if defined(_WIN32) && defined(JSONSL_DLL)
101#define JSONSL_API __declspec(dllexport)
102#else
103#define JSONSL_API
104#endif /* _WIN32 */
105
106#endif /* !JSONSL_API */
107
108#ifndef JSONSL_INLINE
109#if defined(_MSC_VER)
110 #define JSONSL_INLINE __inline
111 #elif defined(__GNUC__)
112 #define JSONSL_INLINE __inline__
113 #else
114 #define JSONSL_INLINE inline
115 #endif /* _MSC_VER or __GNUC__ */
116#endif /* JSONSL_INLINE */
117
118#define JSONSL_MAX_LEVELS 512
119
120struct jsonsl_st;
121typedef struct jsonsl_st *jsonsl_t;
122
123typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
124
125/**
126 * This flag is true when AND'd against a type whose value
127 * must be in "quoutes" i.e. T_HKEY and T_STRING
128 */
129#define JSONSL_Tf_STRINGY 0xffff00
130
131/**
132 * Constant representing the special JSON types.
133 * The values are special and aid in speed (the OBJECT and LIST
134 * values are the char literals of their openings).
135 *
136 * Their actual value is a character which attempts to resemble
137 * some mnemonic reference to the actual type.
138 *
139 * If new types are added, they must fit into the ASCII printable
140 * range (so they should be AND'd with 0x7f and yield something
141 * meaningful)
142 */
143#define JSONSL_XTYPE \
144 X(STRING, '"'|JSONSL_Tf_STRINGY) \
145 X(HKEY, '#'|JSONSL_Tf_STRINGY) \
146 X(OBJECT, '{') \
147 X(LIST, '[') \
148 X(SPECIAL, '^') \
149 X(UESCAPE, 'u')
150typedef enum {
151#define X(o, c) \
152 JSONSL_T_##o = c,
153 JSONSL_XTYPE
154 JSONSL_T_UNKNOWN = '?',
155 /* Abstract 'root' object */
156 JSONSL_T_ROOT = 0
157#undef X
158} jsonsl_type_t;
159
160/**
161 * Subtypes for T_SPECIAL. We define them as flags
162 * because more than one type can be applied to a
163 * given object.
164 */
165
166#define JSONSL_XSPECIAL \
167 X(NONE, 0) \
168 X(SIGNED, 1<<0) \
169 X(UNSIGNED, 1<<1) \
170 X(TRUE, 1<<2) \
171 X(FALSE, 1<<3) \
172 X(NULL, 1<<4) \
173 X(FLOAT, 1<<5) \
174 X(EXPONENT, 1<<6) \
175 X(NONASCII, 1<<7) \
176 X(NAN, 1<<8) \
177 X(INF, 1<<9)
178typedef enum {
179#define X(o,b) \
180 JSONSL_SPECIALf_##o = b,
181 JSONSL_XSPECIAL
182#undef X
183 /* Handy flags for checking */
184
185 JSONSL_SPECIALf_UNKNOWN = 1 << 10,
186
187 /** @private Private */
188 JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
189 /** @private */
190 JSONSL_SPECIALf_DASH = 1 << 12,
191 /** @private */
192 JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
193 JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
194
195 /** Type is numeric */
196 JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
197
198 /** Type is a boolean */
199 JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
200
201 /** Type is an "extended", not integral type (but numeric) */
202 JSONSL_SPECIALf_NUMNOINT =
203 (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
204 |JSONSL_SPECIALf_INF)
205} jsonsl_special_t;
206
207
208/**
209 * These are the various types of stack (or other) events
210 * which will trigger a callback.
211 * Like the type constants, this are also mnemonic
212 */
213#define JSONSL_XACTION \
214 X(PUSH, '+') \
215 X(POP, '-') \
216 X(UESCAPE, 'U') \
217 X(ERROR, '!')
218typedef enum {
219#define X(a,c) \
220 JSONSL_ACTION_##a = c,
221 JSONSL_XACTION
222 JSONSL_ACTION_UNKNOWN = '?'
223#undef X
224} jsonsl_action_t;
225
226
227/**
228 * Various errors which may be thrown while parsing JSON
229 */
230#define JSONSL_XERR \
231/* Trailing garbage characters */ \
232 X(GARBAGE_TRAILING) \
233/* We were expecting a 'special' (numeric, true, false, null) */ \
234 X(SPECIAL_EXPECTED) \
235/* The 'special' value was incomplete */ \
236 X(SPECIAL_INCOMPLETE) \
237/* Found a stray token */ \
238 X(STRAY_TOKEN) \
239/* We were expecting a token before this one */ \
240 X(MISSING_TOKEN) \
241/* Cannot insert because the container is not ready */ \
242 X(CANT_INSERT) \
243/* Found a '\' outside a string */ \
244 X(ESCAPE_OUTSIDE_STRING) \
245/* Found a ':' outside of a hash */ \
246 X(KEY_OUTSIDE_OBJECT) \
247/* found a string outside of a container */ \
248 X(STRING_OUTSIDE_CONTAINER) \
249/* Found a null byte in middle of string */ \
250 X(FOUND_NULL_BYTE) \
251/* Current level exceeds limit specified in constructor */ \
252 X(LEVELS_EXCEEDED) \
253/* Got a } as a result of an opening [ or vice versa */ \
254 X(BRACKET_MISMATCH) \
255/* We expected a key, but got something else instead */ \
256 X(HKEY_EXPECTED) \
257/* We got an illegal control character (bad whitespace or something) */ \
258 X(WEIRD_WHITESPACE) \
259/* Found a \u-escape, but there were less than 4 following hex digits */ \
260 X(UESCAPE_TOOSHORT) \
261/* Invalid two-character escape */ \
262 X(ESCAPE_INVALID) \
263/* Trailing comma */ \
264 X(TRAILING_COMMA) \
265/* An invalid number was passed in a numeric field */ \
266 X(INVALID_NUMBER) \
267/* Value is missing for object */ \
268 X(VALUE_EXPECTED) \
269/* The following are for JPR Stuff */ \
270 \
271/* Found a literal '%' but it was only followed by a single valid hex digit */ \
272 X(PERCENT_BADHEX) \
273/* jsonpointer URI is malformed '/' */ \
274 X(JPR_BADPATH) \
275/* Duplicate slash */ \
276 X(JPR_DUPSLASH) \
277/* No leading root */ \
278 X(JPR_NOROOT) \
279/* Allocation failure */ \
280 X(ENOMEM) \
281/* Invalid unicode codepoint detected (in case of escapes) */ \
282 X(INVALID_CODEPOINT)
283
284typedef enum {
285 JSONSL_ERROR_SUCCESS = 0,
286#define X(e) \
287 JSONSL_ERROR_##e,
288 JSONSL_XERR
289#undef X
290 JSONSL_ERROR_GENERIC
291} jsonsl_error_t;
292
293
294/**
295 * A state is a single level of the stack.
296 * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
297 * will remain in tact until the item is popped.
298 *
299 * As a result, it means a parent state object may be accessed from a child
300 * object, (the parents fields will all be valid). This allows a user to create
301 * an ad-hoc hierarchy on top of the JSON one.
302 *
303 */
304struct jsonsl_state_st {
305 /**
306 * The JSON object type
307 */
308 unsigned type;
309
310 /** If this element is special, then its extended type is here */
311 unsigned special_flags;
312
313 /**
314 * The position (in terms of number of bytes since the first call to
315 * jsonsl_feed()) at which the state was first pushed. This includes
316 * opening tokens, if applicable.
317 *
318 * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
319 * be the position of the first quote.
320 *
321 * @see jsonsl_st::pos which contains the _current_ position and can be
322 * used during a POP callback to get the length of the element.
323 */
324 size_t pos_begin;
325
326 /**FIXME: This is redundant as the same information can be derived from
327 * jsonsl_st::pos at pop-time */
328 size_t pos_cur;
329
330 /**
331 * Level of recursion into nesting. This is mainly a convenience
332 * variable, as this can technically be deduced from the lexer's
333 * level parameter (though the logic is not that simple)
334 */
335 unsigned int level;
336
337
338 /**
339 * how many elements in the object/list.
340 * For objects (hashes), an element is either
341 * a key or a value. Thus for one complete pair,
342 * nelem will be 2.
343 *
344 * For special types, this will hold the sum of the digits.
345 * This only holds true for values which are simple signed/unsigned
346 * numbers. Otherwise a special flag is set, and extra handling is not
347 * performed.
348 */
349 uint64_t nelem;
350
351
352
353 /*TODO: merge this and special_flags into a union */
354
355
356 /**
357 * Useful for an opening nest, this will prevent a callback from being
358 * invoked on this item or any of its children
359 */
360 int ignore_callback;
361
362 /**
363 * Counter which is incremented each time an escape ('\') is encountered.
364 * This is used internally for non-string types and should only be
365 * inspected by the user if the state actually represents a string
366 * type.
367 */
368 unsigned int nescapes;
369
370 /**
371 * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
372 * the macro expansion happens here.
373 *
374 * You can use these fields to store hierarchical or 'tagging' information
375 * for specific objects.
376 *
377 * See the documentation above for the lifetime of the state object (i.e.
378 * if the private data points to allocated memory, it should be freed
379 * when the object is popped, as the state object will be re-used)
380 */
381#ifndef JSONSL_STATE_GENERIC
382 JSONSL_STATE_USER_FIELDS
383#else
384
385 /**
386 * Otherwise, this is a simple void * pointer for anything you want
387 */
388 void *data;
389#endif /* JSONSL_STATE_USER_FIELDS */
390};
391
392/**Gets the number of elements in the list.
393 * @param st The state. Must be of type JSONSL_T_LIST
394 * @return number of elements in the list
395 */
396#define JSONSL_LIST_SIZE(st) ((st)->nelem)
397
398/**Gets the number of key-value pairs in an object
399 * @param st The state. Must be of type JSONSL_T_OBJECT
400 * @return the number of key-value pairs in the object
401 */
402#define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
403
404/**Gets the numeric value.
405 * @param st The state. Must be of type JSONSL_T_SPECIAL and
406 * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
407 * set.
408 * @return the numeric value of the state.
409 */
410#define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
411
412/*
413 * So now we need some special structure for keeping the
414 * JPR info in sync. Preferrably all in a single block
415 * of memory (there's no need for separate allocations.
416 * So we will define a 'table' with the following layout
417 *
418 * Level nPosbl JPR1_last JPR2_last JPR3_last
419 *
420 * 0 1 NOMATCH POSSIBLE POSSIBLE
421 * 1 0 NOMATCH NOMATCH COMPLETE
422 * [ table ends here because no further path is possible]
423 *
424 * Where the JPR..n corresponds to the number of JPRs
425 * requested, and nPosble is a quick flag to determine
426 *
427 * the number of possibilities. In the future this might
428 * be made into a proper 'jump' table,
429 *
430 * Since we always mark JPRs from the higher levels descending
431 * into the lower ones, a prospective child match would first
432 * look at the parent table to check the possibilities, and then
433 * see which ones were possible..
434 *
435 * Thus, the size of this blob would be (and these are all ints here)
436 * nLevels * nJPR * 2.
437 *
438 * the 'Width' of the table would be nJPR*2, and the 'height' would be
439 * nlevels
440 */
441
442/**
443 * This is called when a stack change ocurs.
444 *
445 * @param jsn The lexer
446 * @param action The type of action, this can be PUSH or POP
447 * @param state A pointer to the stack currently affected by the action
448 * @param at A pointer to the position of the input buffer which triggered
449 * this action.
450 */
451typedef void (*jsonsl_stack_callback)(
452 jsonsl_t jsn,
453 jsonsl_action_t action,
454 struct jsonsl_state_st* state,
455 const jsonsl_char_t *at);
456
457
458/**
459 * This is called when an error is encountered.
460 * Sometimes it's possible to 'erase' characters (by replacing them
461 * with whitespace). If you think you have corrected the error, you
462 * can return a true value, in which case the parser will backtrack
463 * and try again.
464 *
465 * @param jsn The lexer
466 * @param error The error which was thrown
467 * @param state the current state
468 * @param a pointer to the position of the input buffer which triggered
469 * the error. Note that this is not const, this is because you have the
470 * possibility of modifying the character in an attempt to correct the
471 * error
472 *
473 * @return zero to bail, nonzero to try again (this only makes sense if
474 * the input buffer has been modified by this callback)
475 */
476typedef int (*jsonsl_error_callback)(
477 jsonsl_t jsn,
478 jsonsl_error_t error,
479 struct jsonsl_state_st* state,
480 jsonsl_char_t *at);
481
482struct jsonsl_st {
483 /** Public, read-only */
484
485 /** This is the current level of the stack */
486 unsigned int level;
487
488 /** Flag set to indicate we should stop processing */
489 unsigned int stopfl;
490
491 /**
492 * This is the current position, relative to the beginning
493 * of the stream.
494 */
495 size_t pos;
496
497 /** This is the 'bytes' variable passed to feed() */
498 const jsonsl_char_t *base;
499
500 /** Callback invoked for PUSH actions */
501 jsonsl_stack_callback action_callback_PUSH;
502
503 /** Callback invoked for POP actions */
504 jsonsl_stack_callback action_callback_POP;
505
506 /** Default callback for any action, if neither PUSH or POP callbacks are defined */
507 jsonsl_stack_callback action_callback;
508
509 /**
510 * Do not invoke callbacks for objects deeper than this level.
511 * NOTE: This field establishes the lower bound for ignored callbacks,
512 * and is thus misnamed. `min_ignore_level` would actually make more
513 * sense, but we don't want to break API.
514 */
515 unsigned int max_callback_level;
516
517 /** The error callback. Invoked when an error happens. Should not be NULL */
518 jsonsl_error_callback error_callback;
519
520 /* these are boolean flags you can modify. You will be called
521 * about notification for each of these types if the corresponding
522 * variable is true.
523 */
524
525 /**
526 * @name Callback Booleans.
527 * These determine whether a callback is to be invoked for certain types of objects
528 * @{*/
529
530 /** Boolean flag to enable or disable the invokcation for events on this type*/
531 int call_SPECIAL;
532 int call_OBJECT;
533 int call_LIST;
534 int call_STRING;
535 int call_HKEY;
536 /*@}*/
537
538 /**
539 * @name u-Escape handling
540 * Special handling for the \\u-f00d type sequences. These are meant
541 * to be translated back into the corresponding octet(s).
542 * A special callback (if set) is invoked with *at=='u'. An application
543 * may wish to temporarily suspend parsing and handle the 'u-' sequence
544 * internally (or not).
545 */
546
547 /*@{*/
548
549 /** Callback to be invoked for a u-escape */
550 jsonsl_stack_callback action_callback_UESCAPE;
551
552 /** Boolean flag, whether to invoke the callback */
553 int call_UESCAPE;
554
555 /** Boolean flag, whether we should return after encountering a u-escape:
556 * the callback is invoked and then we return if this is true
557 */
558 int return_UESCAPE;
559 /*@}*/
560
561 struct {
562 int allow_trailing_comma;
563 } options;
564
565 /** Put anything here */
566 void *data;
567
568 /*@{*/
569 /** Private */
570 int in_escape;
571 char expecting;
572 char tok_last;
573 int can_insert;
574 unsigned int levels_max;
575
576#ifndef JSONSL_NO_JPR
577 size_t jpr_count;
578 jsonsl_jpr_t *jprs;
579
580 /* Root pointer for JPR matching information */
581 size_t *jpr_root;
582#endif /* JSONSL_NO_JPR */
583 /*@}*/
584
585 /**
586 * This is the stack. Its upper bound is levels_max, or the
587 * nlevels argument passed to jsonsl_new. If you modify this structure,
588 * make sure that this member is last.
589 */
590 struct jsonsl_state_st stack[1];
591};
592
593
594/**
595 * Creates a new lexer object, with capacity for recursion up to nlevels
596 *
597 * @param nlevels maximum recursion depth
598 */
599JSONSL_API
600jsonsl_t jsonsl_new(int nlevels);
601
602/**
603 * Feeds data into the lexer.
604 *
605 * @param jsn the lexer object
606 * @param bytes new data to be fed
607 * @param nbytes size of new data
608 */
609JSONSL_API
610void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
611
612/**
613 * Resets the internal parser state. This does not free the parser
614 * but does clean it internally, so that the next time feed() is called,
615 * it will be treated as a new stream
616 *
617 * @param jsn the lexer
618 */
619JSONSL_API
620void jsonsl_reset(jsonsl_t jsn);
621
622/**
623 * Frees the lexer, cleaning any allocated memory taken
624 *
625 * @param jsn the lexer
626 */
627JSONSL_API
628void jsonsl_destroy(jsonsl_t jsn);
629
630/**
631 * Gets the 'parent' element, given the current one
632 *
633 * @param jsn the lexer
634 * @param cur the current nest, which should be a struct jsonsl_nest_st
635 */
636static JSONSL_INLINE
637struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
638 const struct jsonsl_state_st *state)
639{
640 /* Don't complain about overriding array bounds */
641 if (state->level > 1) {
642 return jsn->stack + state->level - 1;
643 } else {
644 return NULL;
645 }
646}
647
648/**
649 * Gets the state of the last fully consumed child of this parent. This is
650 * only valid in the parent's POP callback.
651 *
652 * @param the lexer
653 * @return A pointer to the child.
654 */
655static JSONSL_INLINE
656struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
657 const struct jsonsl_state_st *parent)
658{
659 return jsn->stack + (parent->level + 1);
660}
661
662/**Call to instruct the parser to stop parsing and return. This is valid
663 * only from within a callback */
664static JSONSL_INLINE
665void jsonsl_stop(jsonsl_t jsn)
666{
667 jsn->stopfl = 1;
668}
669
670/**
671 * This enables receiving callbacks on all events. Doesn't do
672 * anything special but helps avoid some boilerplate.
673 * This does not touch the UESCAPE callbacks or flags.
674 */
675static JSONSL_INLINE
676void jsonsl_enable_all_callbacks(jsonsl_t jsn)
677{
678 jsn->call_HKEY = 1;
679 jsn->call_STRING = 1;
680 jsn->call_OBJECT = 1;
681 jsn->call_SPECIAL = 1;
682 jsn->call_LIST = 1;
683}
684
685/**
686 * A macro which returns true if the current state object can
687 * have children. This means a list type or an object type.
688 */
689#define JSONSL_STATE_IS_CONTAINER(state) \
690 (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
691
692/**
693 * These two functions, dump a string representation
694 * of the error or type, respectively. They will never
695 * return NULL
696 */
697JSONSL_API
698const char* jsonsl_strerror(jsonsl_error_t err);
699JSONSL_API
700const char* jsonsl_strtype(jsonsl_type_t jt);
701
702/**
703 * Dumps global metrics to the screen. This is a noop unless
704 * jsonsl was compiled with JSONSL_USE_METRICS
705 */
706JSONSL_API
707void jsonsl_dump_global_metrics(void);
708
709/* This macro just here for editors to do code folding */
710#ifndef JSONSL_NO_JPR
711
712/**
713 * @name JSON Pointer API
714 *
715 * JSONPointer API. This isn't really related to the lexer (at least not yet)
716 * JSONPointer provides an extremely simple specification for providing
717 * locations within JSON objects. We will extend it a bit and allow for
718 * providing 'wildcard' characters by which to be able to 'query' the stream.
719 *
720 * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
721 *
722 * Currently I'm implementing the 'single query' API which can only use a single
723 * query component. In the future I will integrate my yet-to-be-published
724 * Boyer-Moore-esque prefix searching implementation, in order to allow
725 * multiple paths to be merged into one for quick and efficient searching.
726 *
727 *
728 * JPR (as we'll refer to it within the source) can be used by splitting
729 * the components into mutliple sections, and incrementally 'track' each
730 * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
731 * callback for an object, we will check to see whether the index matching
732 * the component corresponding to the current level contains a match
733 * for our path.
734 *
735 * In order to do this properly, a structure must be maintained within the
736 * parent indicating whether its children are possible matches. This flag
737 * will be 'inherited' by call children which may conform to the match
738 * specification, and discarded by all which do not (thereby eliminating
739 * their children from inheriting it).
740 *
741 * A successful match is a complete one. One can provide multiple paths with
742 * multiple levels of matches e.g.
743 * /foo/bar/baz/^/blah
744 *
745 * @{
746 */
747
748/** The wildcard character */
749#ifndef JSONSL_PATH_WILDCARD_CHAR
750#define JSONSL_PATH_WILDCARD_CHAR '^'
751#endif /* WILDCARD_CHAR */
752
753#define JSONSL_XMATCH \
754 X(COMPLETE,1) \
755 X(POSSIBLE,0) \
756 X(NOMATCH,-1) \
757 X(TYPE_MISMATCH, -2)
758
759typedef enum {
760
761#define X(T,v) \
762 JSONSL_MATCH_##T = v,
763 JSONSL_XMATCH
764
765#undef X
766 JSONSL_MATCH_UNKNOWN
767} jsonsl_jpr_match_t;
768
769typedef enum {
770 JSONSL_PATH_STRING = 1,
771 JSONSL_PATH_WILDCARD,
772 JSONSL_PATH_NUMERIC,
773 JSONSL_PATH_ROOT,
774
775 /* Special */
776 JSONSL_PATH_INVALID = -1,
777 JSONSL_PATH_NONE = 0
778} jsonsl_jpr_type_t;
779
780struct jsonsl_jpr_component_st {
781 /** The string the component points to */
782 char *pstr;
783 /** if this is a numeric type, the number is 'cached' here */
784 unsigned long idx;
785 /** The length of the string */
786 size_t len;
787 /** The type of component (NUMERIC or STRING) */
788 jsonsl_jpr_type_t ptype;
789
790 /** Set this to true to enforce type checking between dict keys and array
791 * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
792 * that an array index is actually a child of a dictionary. */
793 short is_arridx;
794
795 /* Extra fields (for more advanced searches. Default is empty) */
796 JSONSL_JPR_COMPONENT_USER_FIELDS
797};
798
799struct jsonsl_jpr_st {
800 /** Path components */
801 struct jsonsl_jpr_component_st *components;
802 size_t ncomponents;
803
804 /**Type of the match to be expected. If nonzero, will be compared against
805 * the actual type */
806 unsigned match_type;
807
808 /** Base of allocated string for components */
809 char *basestr;
810
811 /** The original match string. Useful for returning to the user */
812 char *orig;
813 size_t norig;
814};
815
816/**
817 * Create a new JPR object.
818 *
819 * @param path the JSONPointer path specification.
820 * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
821 * then more details will be in this variable.
822 *
823 * @return a new jsonsl_jpr_t object, or NULL on error.
824 */
825JSONSL_API
826jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
827
828/**
829 * Destroy a JPR object
830 */
831JSONSL_API
832void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
833
834/**
835 * Match a JSON object against a type and specific level
836 *
837 * @param jpr the JPR object
838 * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
839 * @param parent_level the level of the parent
840 * @param key the 'key' of the child. If the parent is an array, this should be
841 * empty.
842 * @param nkey - the length of the key. If the parent is an array (T_LIST), then
843 * this should be the current index.
844 *
845 * NOTE: The key of the child means any kind of associative data related to the
846 * element. Thus: <<< { "foo" : [ >>,
847 * the opening array's key is "foo".
848 *
849 * @return a status constant. This indicates whether a match was excluded, possible,
850 * or successful.
851 */
852JSONSL_API
853jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
854 unsigned int parent_type,
855 unsigned int parent_level,
856 const char *key, size_t nkey);
857
858/**
859 * Alternate matching algorithm. This matching algorithm does not use
860 * JSONPointer but relies on a more structured searching mechanism. It
861 * assumes that there is a clear distinction between array indices and
862 * object keys. In this case, the jsonsl_path_component_st::ptype should
863 * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
864 * jsonsl_path_comonent_st::is_arridx field will be removed in a future
865 * version).
866 *
867 * @param jpr The path
868 * @param parent The parent structure. Can be NULL if this is the root object
869 * @param child The child structure. Should not be NULL
870 * @param key Object key, if an object
871 * @param nkey Length of object key
872 * @return Status constant if successful
873 *
874 * @note
875 * For successful matching, both the key and the path itself should be normalized
876 * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
877 * should currently be done in the application. Another version of this function
878 * may use a temporary buffer in such circumstances (allocated by the application).
879 *
880 * Since this function also checks the state of the child, it should only
881 * be called on PUSH callbacks, and not POP callbacks
882 */
883JSONSL_API
884jsonsl_jpr_match_t
885jsonsl_path_match(jsonsl_jpr_t jpr,
886 const struct jsonsl_state_st *parent,
887 const struct jsonsl_state_st *child,
888 const char *key, size_t nkey);
889
890
891/**
892 * Associate a set of JPR objects with a lexer instance.
893 * This should be called before the lexer has been fed any data (and
894 * behavior is undefined if you don't adhere to this).
895 *
896 * After using this function, you may subsequently call match_state() on
897 * given states (presumably from within the callbacks).
898 *
899 * Note that currently the first JPR is the quickest and comes
900 * pre-allocated with the state structure. Further JPR objects
901 * are chained.
902 *
903 * @param jsn The lexer
904 * @param jprs An array of jsonsl_jpr_t objects
905 * @param njprs How many elements in the jprs array.
906 */
907JSONSL_API
908void jsonsl_jpr_match_state_init(jsonsl_t jsn,
909 jsonsl_jpr_t *jprs,
910 size_t njprs);
911
912/**
913 * This follows the same semantics as the normal match,
914 * except we infer parent and type information from the relevant state objects.
915 * The match status (for all possible JPR objects) is set in the *out parameter.
916 *
917 * If a match has succeeded, then its JPR object will be returned. In all other
918 * instances, NULL is returned;
919 *
920 * @param jpr The jsonsl_jpr_t handle
921 * @param state The jsonsl_state_st which is a candidate
922 * @param key The hash key (if applicable, can be NULL if parent is list)
923 * @param nkey Length of hash key (if applicable, can be zero if parent is list)
924 * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
925 * the match result
926 *
927 * @return If a match was completed in full, then the JPR object containing
928 * the matching path will be returned. Otherwise, the return is NULL (note, this
929 * does not mean matching has failed, it can still be part of the match: check
930 * the out parameter).
931 */
932JSONSL_API
933jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
934 struct jsonsl_state_st *state,
935 const char *key,
936 size_t nkey,
937 jsonsl_jpr_match_t *out);
938
939
940/**
941 * Cleanup any memory allocated and any states set by
942 * match_state_init() and match_state()
943 * @param jsn The lexer
944 */
945JSONSL_API
946void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
947
948/**
949 * Return a string representation of the match result returned by match()
950 */
951JSONSL_API
952const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
953
954/* @}*/
955
956/**
957 * Utility function to convert escape sequences into their original form.
958 *
959 * The decoders I've sampled do not seem to specify a standard behavior of what
960 * to escape/unescape.
961 *
962 * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
963 * characters (0x00-0x1f) be escaped. It is often common for applications
964 * to escape a '/' - however this may also be desired behavior. the JSON
965 * spec is not clear on this, and therefore jsonsl leaves it up to you.
966 *
967 * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
968 * true when dealing with 'u-escapes' which can be expressed perfectly fine
969 * as utf8. One use case for normalization is JPR string comparison, in which
970 * case two effectively equivalent strings may not match because one is using
971 * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
972 * an empty `toEscape` table, enabling only the `u` index.
973 *
974 * @param in The input string.
975 * @param out An allocated output (should be the same size as in)
976 * @param len the size of the buffer
977 * @param toEscape - A sparse array of characters to unescape. Characters
978 * which are not present in this array, e.g. toEscape['c'] == 0 will be
979 * ignored and passed to the output in their original form.
980 * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
981 * then this variable will have the SPECIALf_NONASCII flag on.
982 *
983 * @param err A pointer to an error variable. If an error ocurrs, it will be
984 * set in this variable
985 * @param errat If not null and an error occurs, this will be set to point
986 * to the position within the string at which the offending character was
987 * encountered.
988 *
989 * @return The effective size of the output buffer.
990 *
991 * @note
992 * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
993 * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
994 * which while still correct in some sense was confusing for many (especially
995 * considering that the inputs were variations of char).
996 *
997 * @note
998 * The output buffer will never be larger than the input buffer, since
999 * standard escape sequences (i.e. '\t') occupy two bytes in the source
1000 * but only one byte (when unescaped) in the output. Likewise u-escapes
1001 * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
1002 * two bytes when escaped.
1003 */
1004JSONSL_API
1005size_t jsonsl_util_unescape_ex(const char *in,
1006 char *out,
1007 size_t len,
1008 const int toEscape[128],
1009 unsigned *oflags,
1010 jsonsl_error_t *err,
1011 const char **errat);
1012
1013/**
1014 * Convenience macro to avoid passing too many parameters
1015 */
1016#define jsonsl_util_unescape(in, out, len, toEscape, err) \
1017 jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
1018
1019#endif /* JSONSL_NO_JPR */
1020
1021#ifdef __cplusplus
1022}
1023#endif /* __cplusplus */
1024
1025#endif /* JSONSL_H_ */
Note: See TracBrowser for help on using the repository browser.