1 | /**
|
---|
2 | * JSON Simple/Stacked/Stateful Lexer.
|
---|
3 | * - Does not buffer data
|
---|
4 | * - Maintains state
|
---|
5 | * - Callback oriented
|
---|
6 | * - Lightweight and fast. One source file and one header file
|
---|
7 | *
|
---|
8 | * Copyright (C) 2012-2015 Mark Nunberg
|
---|
9 | * See included LICENSE file for license details.
|
---|
10 | */
|
---|
11 | /* copy from LICENSE file
|
---|
12 | Copyright (c) 2012-2015 M. Nunberg, mnunberg@haskalah.org
|
---|
13 |
|
---|
14 | Permission is hereby granted, free of charge, to any person obtaining
|
---|
15 | a copy of this software and associated documentation files (the
|
---|
16 | "Software"), to deal in the Software without restriction, including
|
---|
17 | without limitation the rights to use, copy, modify, merge, publish,
|
---|
18 | distribute, sublicense, and/or sell copies of the Software, and to
|
---|
19 | permit persons to whom the Software is furnished to do so, subject to
|
---|
20 | the following conditions:
|
---|
21 |
|
---|
22 | The above copyright notice and this permission notice shall be
|
---|
23 | included in all copies or substantial portions of the Software.
|
---|
24 |
|
---|
25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
---|
26 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
---|
27 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
---|
28 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
---|
29 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
---|
30 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
---|
31 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
32 | */
|
---|
33 | #ifndef JSONSL_H_
|
---|
34 | #define JSONSL_H_
|
---|
35 |
|
---|
36 | #include <stdio.h>
|
---|
37 | #include <stdlib.h>
|
---|
38 | #include <stddef.h>
|
---|
39 | #include <string.h>
|
---|
40 | #include <t_stddef.h>
|
---|
41 | #include <t_syslog.h>
|
---|
42 | #include <wchar.h>
|
---|
43 |
|
---|
44 | #ifdef __cplusplus
|
---|
45 | extern "C" {
|
---|
46 | #endif /* __cplusplus */
|
---|
47 |
|
---|
48 | #ifdef JSONSL_USE_WCHAR
|
---|
49 | typedef jsonsl_char_t wchar_t;
|
---|
50 | typedef jsonsl_uchar_t unsigned wchar_t;
|
---|
51 | #else
|
---|
52 | typedef char jsonsl_char_t;
|
---|
53 | typedef unsigned char jsonsl_uchar_t;
|
---|
54 | #endif /* JSONSL_USE_WCHAR */
|
---|
55 |
|
---|
56 | /* Stolen from http-parser.h, and possibly others */
|
---|
57 | #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
|
---|
58 | typedef __int8 int8_t;
|
---|
59 | typedef unsigned __int8 uint8_t;
|
---|
60 | typedef __int16 int16_t;
|
---|
61 | typedef unsigned __int16 uint16_t;
|
---|
62 | typedef __int32 int32_t;
|
---|
63 | typedef unsigned __int32 uint32_t;
|
---|
64 | typedef __int64 int64_t;
|
---|
65 | typedef unsigned __int64 uint64_t;
|
---|
66 | #if !defined(_MSC_VER) || _MSC_VER<1400
|
---|
67 | typedef unsigned int size_t;
|
---|
68 | typedef int ssize_t;
|
---|
69 | #endif
|
---|
70 | #else
|
---|
71 | #include <stdint.h>
|
---|
72 | #endif
|
---|
73 |
|
---|
74 | #ifndef _MSC_VER
|
---|
75 | #ifndef sscanf_s
|
---|
76 | #define sscanf_s(frm, frmc, ...) sscanf(frm, __VA_ARGS__)
|
---|
77 | #endif
|
---|
78 | #endif
|
---|
79 |
|
---|
80 | #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
|
---|
81 | #define JSONSL_STATE_GENERIC
|
---|
82 | #endif /* !defined JSONSL_STATE_GENERIC */
|
---|
83 |
|
---|
84 | #ifdef JSONSL_STATE_GENERIC
|
---|
85 | #define JSONSL_STATE_USER_FIELDS
|
---|
86 | #endif /* JSONSL_STATE_GENERIC */
|
---|
87 |
|
---|
88 | #ifndef JSONSL_API
|
---|
89 | /**
|
---|
90 | * We require a /DJSONSL_DLL so that users already using this as a static
|
---|
91 | * or embedded library don't get confused
|
---|
92 | */
|
---|
93 | #if defined(_WIN32) && defined(JSONSL_DLL)
|
---|
94 | #define JSONSL_API __declspec(dllexport)
|
---|
95 | #else
|
---|
96 | #define JSONSL_API
|
---|
97 | #endif /* _WIN32 */
|
---|
98 |
|
---|
99 | #endif /* !JSONSL_API */
|
---|
100 |
|
---|
101 | #ifndef JSONSL_INLINE
|
---|
102 | #if defined(_MSC_VER)
|
---|
103 | #define JSONSL_INLINE __inline
|
---|
104 | #elif defined(__GNUC__)
|
---|
105 | #define JSONSL_INLINE __inline__
|
---|
106 | #else
|
---|
107 | #define JSONSL_INLINE inline
|
---|
108 | #endif /* _MSC_VER or __GNUC__ */
|
---|
109 | #endif /* JSONSL_INLINE */
|
---|
110 |
|
---|
111 | #define JSONSL_MAX_LEVELS 512
|
---|
112 |
|
---|
113 | struct jsonsl_st;
|
---|
114 | typedef struct jsonsl_st *jsonsl_t;
|
---|
115 |
|
---|
116 | typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
|
---|
117 |
|
---|
118 | /**
|
---|
119 | * This flag is true when AND'd against a type whose value
|
---|
120 | * must be in "quoutes" i.e. T_HKEY and T_STRING
|
---|
121 | */
|
---|
122 | #define JSONSL_Tf_STRINGY 0xffff00
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * Constant representing the special JSON types.
|
---|
126 | * The values are special and aid in speed (the OBJECT and LIST
|
---|
127 | * values are the char literals of their openings).
|
---|
128 | *
|
---|
129 | * Their actual value is a character which attempts to resemble
|
---|
130 | * some mnemonic reference to the actual type.
|
---|
131 | *
|
---|
132 | * If new types are added, they must fit into the ASCII printable
|
---|
133 | * range (so they should be AND'd with 0x7f and yield something
|
---|
134 | * meaningful)
|
---|
135 | */
|
---|
136 | #define JSONSL_XTYPE \
|
---|
137 | X(STRING, '"'|JSONSL_Tf_STRINGY) \
|
---|
138 | X(HKEY, '#'|JSONSL_Tf_STRINGY) \
|
---|
139 | X(OBJECT, '{') \
|
---|
140 | X(LIST, '[') \
|
---|
141 | X(SPECIAL, '^') \
|
---|
142 | X(UESCAPE, 'u')
|
---|
143 | typedef enum {
|
---|
144 | #define X(o, c) \
|
---|
145 | JSONSL_T_##o = c,
|
---|
146 | JSONSL_XTYPE
|
---|
147 | JSONSL_T_UNKNOWN = '?',
|
---|
148 | /* Abstract 'root' object */
|
---|
149 | JSONSL_T_ROOT = 0
|
---|
150 | #undef X
|
---|
151 | } jsonsl_type_t;
|
---|
152 |
|
---|
153 | /**
|
---|
154 | * Subtypes for T_SPECIAL. We define them as flags
|
---|
155 | * because more than one type can be applied to a
|
---|
156 | * given object.
|
---|
157 | */
|
---|
158 |
|
---|
159 | #define JSONSL_XSPECIAL \
|
---|
160 | X(NONE, 0) \
|
---|
161 | X(SIGNED, 1<<0) \
|
---|
162 | X(UNSIGNED, 1<<1) \
|
---|
163 | X(TRUE, 1<<2) \
|
---|
164 | X(FALSE, 1<<3) \
|
---|
165 | X(NULL, 1<<4) \
|
---|
166 | X(FLOAT, 1<<5) \
|
---|
167 | X(EXPONENT, 1<<6) \
|
---|
168 | X(NONASCII, 1<<7)
|
---|
169 | typedef enum {
|
---|
170 | #define X(o,b) \
|
---|
171 | JSONSL_SPECIALf_##o = b,
|
---|
172 | JSONSL_XSPECIAL
|
---|
173 | #undef X
|
---|
174 | /* Handy flags for checking */
|
---|
175 | JSONSL_SPECIALf_UNKNOWN = 1 << 8,
|
---|
176 | JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED|JSONSL_SPECIALf_UNSIGNED),
|
---|
177 | JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
|
---|
178 | /* For non-simple numeric types */
|
---|
179 | JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT)
|
---|
180 | } jsonsl_special_t;
|
---|
181 |
|
---|
182 |
|
---|
183 | /**
|
---|
184 | * These are the various types of stack (or other) events
|
---|
185 | * which will trigger a callback.
|
---|
186 | * Like the type constants, this are also mnemonic
|
---|
187 | */
|
---|
188 | #define JSONSL_XACTION \
|
---|
189 | X(PUSH, '+') \
|
---|
190 | X(POP, '-') \
|
---|
191 | X(UESCAPE, 'U') \
|
---|
192 | X(ERROR, '!')
|
---|
193 | typedef enum {
|
---|
194 | #define X(a,c) \
|
---|
195 | JSONSL_ACTION_##a = c,
|
---|
196 | JSONSL_XACTION
|
---|
197 | JSONSL_ACTION_UNKNOWN = '?'
|
---|
198 | #undef X
|
---|
199 | } jsonsl_action_t;
|
---|
200 |
|
---|
201 |
|
---|
202 | /**
|
---|
203 | * Various errors which may be thrown while parsing JSON
|
---|
204 | */
|
---|
205 | #define JSONSL_XERR \
|
---|
206 | /* Trailing garbage characters */ \
|
---|
207 | X(GARBAGE_TRAILING) \
|
---|
208 | /* We were expecting a 'special' (numeric, true, false, null) */ \
|
---|
209 | X(SPECIAL_EXPECTED) \
|
---|
210 | /* Found a stray token */ \
|
---|
211 | X(STRAY_TOKEN) \
|
---|
212 | /* We were expecting a token before this one */ \
|
---|
213 | X(MISSING_TOKEN) \
|
---|
214 | /* Cannot insert because the container is not ready */ \
|
---|
215 | X(CANT_INSERT) \
|
---|
216 | /* Found a '\' outside a string */ \
|
---|
217 | X(ESCAPE_OUTSIDE_STRING) \
|
---|
218 | /* Found a ':' outside of a hash */ \
|
---|
219 | X(KEY_OUTSIDE_OBJECT) \
|
---|
220 | /* found a string outside of a container */ \
|
---|
221 | X(STRING_OUTSIDE_CONTAINER) \
|
---|
222 | /* Found a null byte in middle of string */ \
|
---|
223 | X(FOUND_NULL_BYTE) \
|
---|
224 | /* Current level exceeds limit specified in constructor */ \
|
---|
225 | X(LEVELS_EXCEEDED) \
|
---|
226 | /* Got a } as a result of an opening [ or vice versa */ \
|
---|
227 | X(BRACKET_MISMATCH) \
|
---|
228 | /* We expected a key, but got something else instead */ \
|
---|
229 | X(HKEY_EXPECTED) \
|
---|
230 | /* We got an illegal control character (bad whitespace or something) */ \
|
---|
231 | X(WEIRD_WHITESPACE) \
|
---|
232 | /* Found a \u-escape, but there were less than 4 following hex digits */ \
|
---|
233 | X(UESCAPE_TOOSHORT) \
|
---|
234 | /* Invalid two-character escape */ \
|
---|
235 | X(ESCAPE_INVALID) \
|
---|
236 | /* Trailing comma */ \
|
---|
237 | X(TRAILING_COMMA) \
|
---|
238 | /* An invalid number was passed in a numeric field */ \
|
---|
239 | X(INVALID_NUMBER) \
|
---|
240 | /* The following are for JPR Stuff */ \
|
---|
241 | \
|
---|
242 | /* Found a literal '%' but it was only followed by a single valid hex digit */ \
|
---|
243 | X(PERCENT_BADHEX) \
|
---|
244 | /* jsonpointer URI is malformed '/' */ \
|
---|
245 | X(JPR_BADPATH) \
|
---|
246 | /* Duplicate slash */ \
|
---|
247 | X(JPR_DUPSLASH) \
|
---|
248 | /* No leading root */ \
|
---|
249 | X(JPR_NOROOT) \
|
---|
250 | /* Allocation failure */ \
|
---|
251 | X(ENOMEM)
|
---|
252 |
|
---|
253 | typedef enum {
|
---|
254 | JSONSL_ERROR_SUCCESS = 0,
|
---|
255 | #define X(e) \
|
---|
256 | JSONSL_ERROR_##e,
|
---|
257 | JSONSL_XERR
|
---|
258 | #undef X
|
---|
259 | JSONSL_ERROR_GENERIC
|
---|
260 | } jsonsl_error_t;
|
---|
261 |
|
---|
262 |
|
---|
263 | /**
|
---|
264 | * A state is a single level of the stack.
|
---|
265 | * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
|
---|
266 | * will remain in tact until the item is popped.
|
---|
267 | *
|
---|
268 | * As a result, it means a parent state object may be accessed from a child
|
---|
269 | * object, (the parents fields will all be valid). This allows a user to create
|
---|
270 | * an ad-hoc hierarchy on top of the JSON one.
|
---|
271 | *
|
---|
272 | */
|
---|
273 | struct jsonsl_state_st {
|
---|
274 | /**
|
---|
275 | * The JSON object type
|
---|
276 | */
|
---|
277 | unsigned type;
|
---|
278 |
|
---|
279 | /** If this element is special, then its extended type is here */
|
---|
280 | unsigned special_flags;
|
---|
281 |
|
---|
282 | /**
|
---|
283 | * The position (in terms of number of bytes since the first call to
|
---|
284 | * jsonsl_feed()) at which the state was first pushed. This includes
|
---|
285 | * opening tokens, if applicable.
|
---|
286 | *
|
---|
287 | * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
|
---|
288 | * be the position of the first quote.
|
---|
289 | *
|
---|
290 | * @see jsonsl_st::pos which contains the _current_ position and can be
|
---|
291 | * used during a POP callback to get the length of the element.
|
---|
292 | */
|
---|
293 | size_t pos_begin;
|
---|
294 |
|
---|
295 | /**FIXME: This is redundant as the same information can be derived from
|
---|
296 | * jsonsl_st::pos at pop-time */
|
---|
297 | size_t pos_cur;
|
---|
298 |
|
---|
299 | /**
|
---|
300 | * Level of recursion into nesting. This is mainly a convenience
|
---|
301 | * variable, as this can technically be deduced from the lexer's
|
---|
302 | * level parameter (though the logic is not that simple)
|
---|
303 | */
|
---|
304 | unsigned int level;
|
---|
305 |
|
---|
306 |
|
---|
307 | /**
|
---|
308 | * how many elements in the object/list.
|
---|
309 | * For objects (hashes), an element is either
|
---|
310 | * a key or a value. Thus for one complete pair,
|
---|
311 | * nelem will be 2.
|
---|
312 | *
|
---|
313 | * For special types, this will hold the sum of the digits.
|
---|
314 | * This only holds true for values which are simple signed/unsigned
|
---|
315 | * numbers. Otherwise a special flag is set, and extra handling is not
|
---|
316 | * performed.
|
---|
317 | */
|
---|
318 | uint64_t nelem;
|
---|
319 |
|
---|
320 |
|
---|
321 |
|
---|
322 | /*TODO: merge this and special_flags into a union */
|
---|
323 |
|
---|
324 |
|
---|
325 | /**
|
---|
326 | * Useful for an opening nest, this will prevent a callback from being
|
---|
327 | * invoked on this item or any of its children
|
---|
328 | */
|
---|
329 | int ignore_callback;
|
---|
330 |
|
---|
331 | /**
|
---|
332 | * Counter which is incremented each time an escape ('\') is encountered.
|
---|
333 | */
|
---|
334 | unsigned int nescapes;
|
---|
335 |
|
---|
336 | /**
|
---|
337 | * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
|
---|
338 | * the macro expansion happens here.
|
---|
339 | *
|
---|
340 | * You can use these fields to store hierarchical or 'tagging' information
|
---|
341 | * for specific objects.
|
---|
342 | *
|
---|
343 | * See the documentation above for the lifetime of the state object (i.e.
|
---|
344 | * if the private data points to allocated memory, it should be freed
|
---|
345 | * when the object is popped, as the state object will be re-used)
|
---|
346 | */
|
---|
347 | #ifndef JSONSL_STATE_GENERIC
|
---|
348 | JSONSL_STATE_USER_FIELDS
|
---|
349 | #else
|
---|
350 |
|
---|
351 | /**
|
---|
352 | * Otherwise, this is a simple void * pointer for anything you want
|
---|
353 | */
|
---|
354 | void *data;
|
---|
355 | #endif /* JSONSL_STATE_USER_FIELDS */
|
---|
356 | };
|
---|
357 |
|
---|
358 | /*
|
---|
359 | * So now we need some special structure for keeping the
|
---|
360 | * JPR info in sync. Preferrably all in a single block
|
---|
361 | * of memory (there's no need for separate allocations.
|
---|
362 | * So we will define a 'table' with the following layout
|
---|
363 | *
|
---|
364 | * Level nPosbl JPR1_last JPR2_last JPR3_last
|
---|
365 | *
|
---|
366 | * 0 1 NOMATCH POSSIBLE POSSIBLE
|
---|
367 | * 1 0 NOMATCH NOMATCH COMPLETE
|
---|
368 | * [ table ends here because no further path is possible]
|
---|
369 | *
|
---|
370 | * Where the JPR..n corresponds to the number of JPRs
|
---|
371 | * requested, and nPosble is a quick flag to determine
|
---|
372 | *
|
---|
373 | * the number of possibilities. In the future this might
|
---|
374 | * be made into a proper 'jump' table,
|
---|
375 | *
|
---|
376 | * Since we always mark JPRs from the higher levels descending
|
---|
377 | * into the lower ones, a prospective child match would first
|
---|
378 | * look at the parent table to check the possibilities, and then
|
---|
379 | * see which ones were possible..
|
---|
380 | *
|
---|
381 | * Thus, the size of this blob would be (and these are all ints here)
|
---|
382 | * nLevels * nJPR * 2.
|
---|
383 | *
|
---|
384 | * the 'Width' of the table would be nJPR*2, and the 'height' would be
|
---|
385 | * nlevels
|
---|
386 | */
|
---|
387 |
|
---|
388 | /**
|
---|
389 | * This is called when a stack change ocurs.
|
---|
390 | *
|
---|
391 | * @param jsn The lexer
|
---|
392 | * @param action The type of action, this can be PUSH or POP
|
---|
393 | * @param state A pointer to the stack currently affected by the action
|
---|
394 | * @param at A pointer to the position of the input buffer which triggered
|
---|
395 | * this action.
|
---|
396 | */
|
---|
397 | typedef void (*jsonsl_stack_callback)(
|
---|
398 | jsonsl_t jsn,
|
---|
399 | jsonsl_action_t action,
|
---|
400 | struct jsonsl_state_st* state,
|
---|
401 | const jsonsl_char_t *at);
|
---|
402 |
|
---|
403 |
|
---|
404 | /**
|
---|
405 | * This is called when an error is encountered.
|
---|
406 | * Sometimes it's possible to 'erase' characters (by replacing them
|
---|
407 | * with whitespace). If you think you have corrected the error, you
|
---|
408 | * can return a true value, in which case the parser will backtrack
|
---|
409 | * and try again.
|
---|
410 | *
|
---|
411 | * @param jsn The lexer
|
---|
412 | * @param error The error which was thrown
|
---|
413 | * @param state the current state
|
---|
414 | * @param a pointer to the position of the input buffer which triggered
|
---|
415 | * the error. Note that this is not const, this is because you have the
|
---|
416 | * possibility of modifying the character in an attempt to correct the
|
---|
417 | * error
|
---|
418 | *
|
---|
419 | * @return zero to bail, nonzero to try again (this only makes sense if
|
---|
420 | * the input buffer has been modified by this callback)
|
---|
421 | */
|
---|
422 | typedef int (*jsonsl_error_callback)(
|
---|
423 | jsonsl_t jsn,
|
---|
424 | jsonsl_error_t error,
|
---|
425 | struct jsonsl_state_st* state,
|
---|
426 | jsonsl_char_t *at);
|
---|
427 |
|
---|
428 | struct jsonsl_st {
|
---|
429 | /** Public, read-only */
|
---|
430 |
|
---|
431 | /** This is the current level of the stack */
|
---|
432 | unsigned int level;
|
---|
433 |
|
---|
434 | /** Flag set to indicate we should stop processing */
|
---|
435 | unsigned int stopfl;
|
---|
436 |
|
---|
437 | /**
|
---|
438 | * This is the current position, relative to the beginning
|
---|
439 | * of the stream.
|
---|
440 | */
|
---|
441 | size_t pos;
|
---|
442 |
|
---|
443 | /** This is the 'bytes' variable passed to feed() */
|
---|
444 | const jsonsl_char_t *base;
|
---|
445 |
|
---|
446 | /** Callback invoked for PUSH actions */
|
---|
447 | jsonsl_stack_callback action_callback_PUSH;
|
---|
448 |
|
---|
449 | /** Callback invoked for POP actions */
|
---|
450 | jsonsl_stack_callback action_callback_POP;
|
---|
451 |
|
---|
452 | /** Default callback for any action, if neither PUSH or POP callbacks are defined */
|
---|
453 | jsonsl_stack_callback action_callback;
|
---|
454 |
|
---|
455 | /** Do not invoke callbacks for objects deeper than this level */
|
---|
456 | unsigned int max_callback_level;
|
---|
457 |
|
---|
458 | /** The error callback. Invoked when an error happens. Should not be NULL */
|
---|
459 | jsonsl_error_callback error_callback;
|
---|
460 |
|
---|
461 | /* these are boolean flags you can modify. You will be called
|
---|
462 | * about notification for each of these types if the corresponding
|
---|
463 | * variable is true.
|
---|
464 | */
|
---|
465 |
|
---|
466 | /**
|
---|
467 | * @name Callback Booleans.
|
---|
468 | * These determine whether a callback is to be invoked for certain types of objects
|
---|
469 | * @{*/
|
---|
470 |
|
---|
471 | /** Boolean flag to enable or disable the invokcation for events on this type*/
|
---|
472 | int call_SPECIAL;
|
---|
473 | int call_OBJECT;
|
---|
474 | int call_LIST;
|
---|
475 | int call_STRING;
|
---|
476 | int call_HKEY;
|
---|
477 | /*@}*/
|
---|
478 |
|
---|
479 | /**
|
---|
480 | * @name u-Escape handling
|
---|
481 | * Special handling for the \\u-f00d type sequences. These are meant
|
---|
482 | * to be translated back into the corresponding octet(s).
|
---|
483 | * A special callback (if set) is invoked with *at=='u'. An application
|
---|
484 | * may wish to temporarily suspend parsing and handle the 'u-' sequence
|
---|
485 | * internally (or not).
|
---|
486 | */
|
---|
487 |
|
---|
488 | /*@{*/
|
---|
489 |
|
---|
490 | /** Callback to be invoked for a u-escape */
|
---|
491 | jsonsl_stack_callback action_callback_UESCAPE;
|
---|
492 |
|
---|
493 | /** Boolean flag, whether to invoke the callback */
|
---|
494 | int call_UESCAPE;
|
---|
495 |
|
---|
496 | /** Boolean flag, whether we should return after encountering a u-escape:
|
---|
497 | * the callback is invoked and then we return if this is true
|
---|
498 | */
|
---|
499 | int return_UESCAPE;
|
---|
500 | /*@}*/
|
---|
501 |
|
---|
502 | struct {
|
---|
503 | int allow_trailing_comma;
|
---|
504 | } options;
|
---|
505 |
|
---|
506 | /** Put anything here */
|
---|
507 | void *data;
|
---|
508 |
|
---|
509 | /*@{*/
|
---|
510 | /** Private */
|
---|
511 | int in_escape;
|
---|
512 | char expecting;
|
---|
513 | char tok_last;
|
---|
514 | int can_insert;
|
---|
515 | unsigned int levels_max;
|
---|
516 |
|
---|
517 | #ifndef JSONSL_NO_JPR
|
---|
518 | size_t jpr_count;
|
---|
519 | jsonsl_jpr_t *jprs;
|
---|
520 |
|
---|
521 | /* Root pointer for JPR matching information */
|
---|
522 | size_t *jpr_root;
|
---|
523 | #endif /* JSONSL_NO_JPR */
|
---|
524 | /*@}*/
|
---|
525 |
|
---|
526 | /**
|
---|
527 | * This is the stack. Its upper bound is levels_max, or the
|
---|
528 | * nlevels argument passed to jsonsl_new. If you modify this structure,
|
---|
529 | * make sure that this member is last.
|
---|
530 | */
|
---|
531 | struct jsonsl_state_st stack[1];
|
---|
532 | };
|
---|
533 |
|
---|
534 |
|
---|
535 | /**
|
---|
536 | * Creates a new lexer object, with capacity for recursion up to nlevels
|
---|
537 | *
|
---|
538 | * @param nlevels maximum recursion depth
|
---|
539 | */
|
---|
540 | JSONSL_API
|
---|
541 | jsonsl_t jsonsl_new(struct jsonsl_st *jsn, int nlevels);
|
---|
542 |
|
---|
543 | /**
|
---|
544 | * Feeds data into the lexer.
|
---|
545 | *
|
---|
546 | * @param jsn the lexer object
|
---|
547 | * @param bytes new data to be fed
|
---|
548 | * @param nbytes size of new data
|
---|
549 | */
|
---|
550 | JSONSL_API
|
---|
551 | void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
|
---|
552 |
|
---|
553 | /**
|
---|
554 | * Resets the internal parser state. This does not free the parser
|
---|
555 | * but does clean it internally, so that the next time feed() is called,
|
---|
556 | * it will be treated as a new stream
|
---|
557 | *
|
---|
558 | * @param jsn the lexer
|
---|
559 | */
|
---|
560 | JSONSL_API
|
---|
561 | void jsonsl_reset(jsonsl_t jsn);
|
---|
562 |
|
---|
563 | /**
|
---|
564 | * Frees the lexer, cleaning any allocated memory taken
|
---|
565 | *
|
---|
566 | * @param jsn the lexer
|
---|
567 | */
|
---|
568 | JSONSL_API
|
---|
569 | void jsonsl_destroy(jsonsl_t jsn);
|
---|
570 |
|
---|
571 | /**
|
---|
572 | * Gets the 'parent' element, given the current one
|
---|
573 | *
|
---|
574 | * @param jsn the lexer
|
---|
575 | * @param cur the current nest, which should be a struct jsonsl_nest_st
|
---|
576 | */
|
---|
577 | static JSONSL_INLINE
|
---|
578 | struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
|
---|
579 | const struct jsonsl_state_st *state)
|
---|
580 | {
|
---|
581 | /* Don't complain about overriding array bounds */
|
---|
582 | if (state->level > 1) {
|
---|
583 | return jsn->stack + state->level - 1;
|
---|
584 | } else {
|
---|
585 | return NULL;
|
---|
586 | }
|
---|
587 | }
|
---|
588 |
|
---|
589 | /**
|
---|
590 | * Gets the state of the last fully consumed child of this parent. This is
|
---|
591 | * only valid in the parent's POP callback.
|
---|
592 | *
|
---|
593 | * @param the lexer
|
---|
594 | * @return A pointer to the child.
|
---|
595 | */
|
---|
596 | static JSONSL_INLINE
|
---|
597 | struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
|
---|
598 | const struct jsonsl_state_st *parent)
|
---|
599 | {
|
---|
600 | return jsn->stack + (parent->level + 1);
|
---|
601 | }
|
---|
602 |
|
---|
603 | /**Call to instruct the parser to stop parsing and return. This is valid
|
---|
604 | * only from within a callback */
|
---|
605 | static JSONSL_INLINE
|
---|
606 | void jsonsl_stop(jsonsl_t jsn)
|
---|
607 | {
|
---|
608 | jsn->stopfl = 1;
|
---|
609 | }
|
---|
610 |
|
---|
611 | /**
|
---|
612 | * This enables receiving callbacks on all events. Doesn't do
|
---|
613 | * anything special but helps avoid some boilerplate.
|
---|
614 | * This does not touch the UESCAPE callbacks or flags.
|
---|
615 | */
|
---|
616 | static JSONSL_INLINE
|
---|
617 | void jsonsl_enable_all_callbacks(jsonsl_t jsn)
|
---|
618 | {
|
---|
619 | jsn->call_HKEY = 1;
|
---|
620 | jsn->call_STRING = 1;
|
---|
621 | jsn->call_OBJECT = 1;
|
---|
622 | jsn->call_SPECIAL = 1;
|
---|
623 | jsn->call_LIST = 1;
|
---|
624 | }
|
---|
625 |
|
---|
626 | /**
|
---|
627 | * A macro which returns true if the current state object can
|
---|
628 | * have children. This means a list type or an object type.
|
---|
629 | */
|
---|
630 | #define JSONSL_STATE_IS_CONTAINER(state) \
|
---|
631 | (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
|
---|
632 |
|
---|
633 | /**
|
---|
634 | * These two functions, dump a string representation
|
---|
635 | * of the error or type, respectively. They will never
|
---|
636 | * return NULL
|
---|
637 | */
|
---|
638 | JSONSL_API
|
---|
639 | const char* jsonsl_strerror(jsonsl_error_t err);
|
---|
640 | JSONSL_API
|
---|
641 | const char* jsonsl_strtype(jsonsl_type_t jt);
|
---|
642 |
|
---|
643 | /**
|
---|
644 | * Dumps global metrics to the screen. This is a noop unless
|
---|
645 | * jsonsl was compiled with JSONSL_USE_METRICS
|
---|
646 | */
|
---|
647 | JSONSL_API
|
---|
648 | void jsonsl_dump_global_metrics(void);
|
---|
649 |
|
---|
650 | /* This macro just here for editors to do code folding */
|
---|
651 | #ifndef JSONSL_NO_JPR
|
---|
652 |
|
---|
653 | /**
|
---|
654 | * @name JSON Pointer API
|
---|
655 | *
|
---|
656 | * JSONPointer API. This isn't really related to the lexer (at least not yet)
|
---|
657 | * JSONPointer provides an extremely simple specification for providing
|
---|
658 | * locations within JSON objects. We will extend it a bit and allow for
|
---|
659 | * providing 'wildcard' characters by which to be able to 'query' the stream.
|
---|
660 | *
|
---|
661 | * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
|
---|
662 | *
|
---|
663 | * Currently I'm implementing the 'single query' API which can only use a single
|
---|
664 | * query component. In the future I will integrate my yet-to-be-published
|
---|
665 | * Boyer-Moore-esque prefix searching implementation, in order to allow
|
---|
666 | * multiple paths to be merged into one for quick and efficient searching.
|
---|
667 | *
|
---|
668 | *
|
---|
669 | * JPR (as we'll refer to it within the source) can be used by splitting
|
---|
670 | * the components into mutliple sections, and incrementally 'track' each
|
---|
671 | * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
|
---|
672 | * callback for an object, we will check to see whether the index matching
|
---|
673 | * the component corresponding to the current level contains a match
|
---|
674 | * for our path.
|
---|
675 | *
|
---|
676 | * In order to do this properly, a structure must be maintained within the
|
---|
677 | * parent indicating whether its children are possible matches. This flag
|
---|
678 | * will be 'inherited' by call children which may conform to the match
|
---|
679 | * specification, and discarded by all which do not (thereby eliminating
|
---|
680 | * their children from inheriting it).
|
---|
681 | *
|
---|
682 | * A successful match is a complete one. One can provide multiple paths with
|
---|
683 | * multiple levels of matches e.g.
|
---|
684 | * /foo/bar/baz/^/blah
|
---|
685 | *
|
---|
686 | * @{
|
---|
687 | */
|
---|
688 |
|
---|
689 | /** The wildcard character */
|
---|
690 | #ifndef JSONSL_PATH_WILDCARD_CHAR
|
---|
691 | #define JSONSL_PATH_WILDCARD_CHAR '^'
|
---|
692 | #endif /* WILDCARD_CHAR */
|
---|
693 |
|
---|
694 | #define JSONSL_XMATCH \
|
---|
695 | X(COMPLETE,1) \
|
---|
696 | X(POSSIBLE,0) \
|
---|
697 | X(NOMATCH,-1) \
|
---|
698 | X(TYPE_MISMATCH, -2)
|
---|
699 |
|
---|
700 | typedef enum {
|
---|
701 |
|
---|
702 | #define X(T,v) \
|
---|
703 | JSONSL_MATCH_##T = v,
|
---|
704 | JSONSL_XMATCH
|
---|
705 |
|
---|
706 | #undef X
|
---|
707 | JSONSL_MATCH_UNKNOWN
|
---|
708 | } jsonsl_jpr_match_t;
|
---|
709 |
|
---|
710 | typedef enum {
|
---|
711 | JSONSL_PATH_STRING = 1,
|
---|
712 | JSONSL_PATH_WILDCARD,
|
---|
713 | JSONSL_PATH_NUMERIC,
|
---|
714 | JSONSL_PATH_ROOT,
|
---|
715 |
|
---|
716 | /* Special */
|
---|
717 | JSONSL_PATH_INVALID = -1,
|
---|
718 | JSONSL_PATH_NONE = 0
|
---|
719 | } jsonsl_jpr_type_t;
|
---|
720 |
|
---|
721 | struct jsonsl_jpr_component_st {
|
---|
722 | /** The string the component points to */
|
---|
723 | char *pstr;
|
---|
724 | /** if this is a numeric type, the number is 'cached' here */
|
---|
725 | unsigned long idx;
|
---|
726 | /** The length of the string */
|
---|
727 | size_t len;
|
---|
728 | /** The type of component (NUMERIC or STRING) */
|
---|
729 | jsonsl_jpr_type_t ptype;
|
---|
730 |
|
---|
731 | /** Set this to true to enforce type checking between dict keys and array
|
---|
732 | * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
|
---|
733 | * that an array index is actually a child of a dictionary. */
|
---|
734 | short is_arridx;
|
---|
735 | };
|
---|
736 |
|
---|
737 | struct jsonsl_jpr_st {
|
---|
738 | /** Path components */
|
---|
739 | struct jsonsl_jpr_component_st *components;
|
---|
740 | size_t ncomponents;
|
---|
741 |
|
---|
742 | /** Base of allocated string for components */
|
---|
743 | char *basestr;
|
---|
744 |
|
---|
745 | /** The original match string. Useful for returning to the user */
|
---|
746 | char *orig;
|
---|
747 | size_t norig;
|
---|
748 | };
|
---|
749 |
|
---|
750 |
|
---|
751 |
|
---|
752 | /**
|
---|
753 | * Create a new JPR object.
|
---|
754 | *
|
---|
755 | * @param path the JSONPointer path specification.
|
---|
756 | * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
|
---|
757 | * then more details will be in this variable.
|
---|
758 | *
|
---|
759 | * @return a new jsonsl_jpr_t object, or NULL on error.
|
---|
760 | */
|
---|
761 | JSONSL_API
|
---|
762 | jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
|
---|
763 |
|
---|
764 | /**
|
---|
765 | * Destroy a JPR object
|
---|
766 | */
|
---|
767 | JSONSL_API
|
---|
768 | void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
|
---|
769 |
|
---|
770 | /**
|
---|
771 | * Match a JSON object against a type and specific level
|
---|
772 | *
|
---|
773 | * @param jpr the JPR object
|
---|
774 | * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
|
---|
775 | * @param parent_level the level of the parent
|
---|
776 | * @param key the 'key' of the child. If the parent is an array, this should be
|
---|
777 | * empty.
|
---|
778 | * @param nkey - the length of the key. If the parent is an array (T_LIST), then
|
---|
779 | * this should be the current index.
|
---|
780 | *
|
---|
781 | * NOTE: The key of the child means any kind of associative data related to the
|
---|
782 | * element. Thus: <<< { "foo" : [ >>,
|
---|
783 | * the opening array's key is "foo".
|
---|
784 | *
|
---|
785 | * @return a status constant. This indicates whether a match was excluded, possible,
|
---|
786 | * or successful.
|
---|
787 | */
|
---|
788 | JSONSL_API
|
---|
789 | jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
|
---|
790 | unsigned int parent_type,
|
---|
791 | unsigned int parent_level,
|
---|
792 | const char *key, size_t nkey);
|
---|
793 |
|
---|
794 |
|
---|
795 | /**
|
---|
796 | * Associate a set of JPR objects with a lexer instance.
|
---|
797 | * This should be called before the lexer has been fed any data (and
|
---|
798 | * behavior is undefined if you don't adhere to this).
|
---|
799 | *
|
---|
800 | * After using this function, you may subsequently call match_state() on
|
---|
801 | * given states (presumably from within the callbacks).
|
---|
802 | *
|
---|
803 | * Note that currently the first JPR is the quickest and comes
|
---|
804 | * pre-allocated with the state structure. Further JPR objects
|
---|
805 | * are chained.
|
---|
806 | *
|
---|
807 | * @param jsn The lexer
|
---|
808 | * @param jprs An array of jsonsl_jpr_t objects
|
---|
809 | * @param njprs How many elements in the jprs array.
|
---|
810 | */
|
---|
811 | JSONSL_API
|
---|
812 | void jsonsl_jpr_match_state_init(jsonsl_t jsn,
|
---|
813 | jsonsl_jpr_t *jprs,
|
---|
814 | size_t njprs);
|
---|
815 |
|
---|
816 | /**
|
---|
817 | * This follows the same semantics as the normal match,
|
---|
818 | * except we infer parent and type information from the relevant state objects.
|
---|
819 | * The match status (for all possible JPR objects) is set in the *out parameter.
|
---|
820 | *
|
---|
821 | * If a match has succeeded, then its JPR object will be returned. In all other
|
---|
822 | * instances, NULL is returned;
|
---|
823 | *
|
---|
824 | * @param jpr The jsonsl_jpr_t handle
|
---|
825 | * @param state The jsonsl_state_st which is a candidate
|
---|
826 | * @param key The hash key (if applicable, can be NULL if parent is list)
|
---|
827 | * @param nkey Length of hash key (if applicable, can be zero if parent is list)
|
---|
828 | * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
|
---|
829 | * the match result
|
---|
830 | *
|
---|
831 | * @return If a match was completed in full, then the JPR object containing
|
---|
832 | * the matching path will be returned. Otherwise, the return is NULL (note, this
|
---|
833 | * does not mean matching has failed, it can still be part of the match: check
|
---|
834 | * the out parameter).
|
---|
835 | */
|
---|
836 | JSONSL_API
|
---|
837 | jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
|
---|
838 | struct jsonsl_state_st *state,
|
---|
839 | const char *key,
|
---|
840 | size_t nkey,
|
---|
841 | jsonsl_jpr_match_t *out);
|
---|
842 |
|
---|
843 |
|
---|
844 | /**
|
---|
845 | * Cleanup any memory allocated and any states set by
|
---|
846 | * match_state_init() and match_state()
|
---|
847 | * @param jsn The lexer
|
---|
848 | */
|
---|
849 | JSONSL_API
|
---|
850 | void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
|
---|
851 |
|
---|
852 | /**
|
---|
853 | * Return a string representation of the match result returned by match()
|
---|
854 | */
|
---|
855 | JSONSL_API
|
---|
856 | const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
|
---|
857 |
|
---|
858 | /* @}*/
|
---|
859 |
|
---|
860 | /**
|
---|
861 | * Utility function to convert escape sequences into their original form.
|
---|
862 | *
|
---|
863 | * The decoders I've sampled do not seem to specify a standard behavior of what
|
---|
864 | * to escape/unescape.
|
---|
865 | *
|
---|
866 | * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
|
---|
867 | * characters (0x00-0x1f) be escaped. It is often common for applications
|
---|
868 | * to escape a '/' - however this may also be desired behavior. the JSON
|
---|
869 | * spec is not clear on this, and therefore jsonsl leaves it up to you.
|
---|
870 | *
|
---|
871 | * @param in The input string.
|
---|
872 | * @param out An allocated output (should be the same size as in)
|
---|
873 | * @param len the size of the buffer
|
---|
874 | * @param toEscape - A sparse array of characters to unescape. Characters
|
---|
875 | * which are not present in this array, e.g. toEscape['c'] == 0 will be
|
---|
876 | * ignored and passed to the output in their original form.
|
---|
877 | * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
|
---|
878 | * then this variable will have the SPECIALf_NONASCII flag on.
|
---|
879 | *
|
---|
880 | * @param err A pointer to an error variable. If an error ocurrs, it will be
|
---|
881 | * set in this variable
|
---|
882 | * @param errat If not null and an error occurs, this will be set to point
|
---|
883 | * to the position within the string at which the offending character was
|
---|
884 | * encountered.
|
---|
885 | *
|
---|
886 | * @return The effective size of the output buffer.
|
---|
887 | */
|
---|
888 | JSONSL_API
|
---|
889 | size_t jsonsl_util_unescape_ex(const char *in,
|
---|
890 | char *out,
|
---|
891 | size_t len,
|
---|
892 | const int toEscape[128],
|
---|
893 | unsigned *oflags,
|
---|
894 | jsonsl_error_t *err,
|
---|
895 | const char **errat);
|
---|
896 |
|
---|
897 | /**
|
---|
898 | * Convenience macro to avoid passing too many parameters
|
---|
899 | */
|
---|
900 | #define jsonsl_util_unescape(in, out, len, toEscape, err) \
|
---|
901 | jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
|
---|
902 |
|
---|
903 | #endif /* JSONSL_NO_JPR */
|
---|
904 |
|
---|
905 | /**
|
---|
906 | * HERE BE CHARACTER TABLES!
|
---|
907 | */
|
---|
908 | #define JSONSL_CHARTABLE_string_nopass \
|
---|
909 | /* 0x00 */ 1 /* <NUL> */, /* 0x00 */ \
|
---|
910 | /* 0x01 */ 1 /* <SOH> */, /* 0x01 */ \
|
---|
911 | /* 0x02 */ 1 /* <STX> */, /* 0x02 */ \
|
---|
912 | /* 0x03 */ 1 /* <ETX> */, /* 0x03 */ \
|
---|
913 | /* 0x04 */ 1 /* <EOT> */, /* 0x04 */ \
|
---|
914 | /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */ \
|
---|
915 | /* 0x06 */ 1 /* <ACK> */, /* 0x06 */ \
|
---|
916 | /* 0x07 */ 1 /* <BEL> */, /* 0x07 */ \
|
---|
917 | /* 0x08 */ 1 /* <BS> */, /* 0x08 */ \
|
---|
918 | /* 0x09 */ 1 /* <HT> */, /* 0x09 */ \
|
---|
919 | /* 0x0a */ 1 /* <LF> */, /* 0x0a */ \
|
---|
920 | /* 0x0b */ 1 /* <VT> */, /* 0x0b */ \
|
---|
921 | /* 0x0c */ 1 /* <FF> */, /* 0x0c */ \
|
---|
922 | /* 0x0d */ 1 /* <CR> */, /* 0x0d */ \
|
---|
923 | /* 0x0e */ 1 /* <SO> */, /* 0x0e */ \
|
---|
924 | /* 0x0f */ 1 /* <SI> */, /* 0x0f */ \
|
---|
925 | /* 0x10 */ 1 /* <DLE> */, /* 0x10 */ \
|
---|
926 | /* 0x11 */ 1 /* <DC1> */, /* 0x11 */ \
|
---|
927 | /* 0x12 */ 1 /* <DC2> */, /* 0x12 */ \
|
---|
928 | /* 0x13 */ 1 /* <DC3> */, /* 0x13 */ \
|
---|
929 | /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ \
|
---|
930 | /* 0x22 */ 1 /* <"> */, /* 0x22 */ \
|
---|
931 | /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ \
|
---|
932 | /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ \
|
---|
933 | /* 0x5c */ 1 /* <\> */, /* 0x5c */ \
|
---|
934 | /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ \
|
---|
935 | /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ \
|
---|
936 | /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ \
|
---|
937 | /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ \
|
---|
938 | /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ \
|
---|
939 | /* 0xfd */ 0,0 /* 0xfe */ \
|
---|
940 |
|
---|
941 |
|
---|
942 |
|
---|
943 | #ifdef __cplusplus
|
---|
944 | }
|
---|
945 | #endif /* __cplusplus */
|
---|
946 |
|
---|
947 | #endif /* JSONSL_H_ */
|
---|