[364] | 1 | /**
|
---|
| 2 | * JSON Simple/Stacked/Stateful Lexer.
|
---|
| 3 | * - Does not buffer data
|
---|
| 4 | * - Maintains state
|
---|
| 5 | * - Callback oriented
|
---|
| 6 | * - Lightweight and fast. One source file and one header file
|
---|
| 7 | *
|
---|
| 8 | * Copyright (C) 2012-2015 Mark Nunberg
|
---|
| 9 | * See included LICENSE file for license details.
|
---|
| 10 | */
|
---|
| 11 | /* copy from LICENSE file
|
---|
| 12 | Copyright (c) 2012-2015 M. Nunberg, mnunberg@haskalah.org
|
---|
| 13 |
|
---|
| 14 | Permission is hereby granted, free of charge, to any person obtaining
|
---|
| 15 | a copy of this software and associated documentation files (the
|
---|
| 16 | "Software"), to deal in the Software without restriction, including
|
---|
| 17 | without limitation the rights to use, copy, modify, merge, publish,
|
---|
| 18 | distribute, sublicense, and/or sell copies of the Software, and to
|
---|
| 19 | permit persons to whom the Software is furnished to do so, subject to
|
---|
| 20 | the following conditions:
|
---|
| 21 |
|
---|
| 22 | The above copyright notice and this permission notice shall be
|
---|
| 23 | included in all copies or substantial portions of the Software.
|
---|
| 24 |
|
---|
| 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
---|
| 26 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
---|
| 27 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
---|
| 28 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
---|
| 29 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
---|
| 30 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
---|
| 31 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
| 32 | */
|
---|
| 33 | #ifndef JSONSL_H_
|
---|
| 34 | #define JSONSL_H_
|
---|
| 35 |
|
---|
| 36 | #include <stdio.h>
|
---|
| 37 | #include <stdlib.h>
|
---|
| 38 | #include <stddef.h>
|
---|
| 39 | #include <string.h>
|
---|
| 40 | #include <sys/types.h>
|
---|
| 41 | #include <wchar.h>
|
---|
| 42 |
|
---|
| 43 | #ifdef __cplusplus
|
---|
| 44 | extern "C" {
|
---|
| 45 | #endif /* __cplusplus */
|
---|
| 46 |
|
---|
| 47 | #ifdef JSONSL_USE_WCHAR
|
---|
| 48 | typedef jsonsl_char_t wchar_t;
|
---|
| 49 | typedef jsonsl_uchar_t unsigned wchar_t;
|
---|
| 50 | #else
|
---|
| 51 | typedef char jsonsl_char_t;
|
---|
| 52 | typedef unsigned char jsonsl_uchar_t;
|
---|
| 53 | #endif /* JSONSL_USE_WCHAR */
|
---|
| 54 |
|
---|
| 55 | #ifdef JSONSL_PARSE_NAN
|
---|
| 56 | #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
|
---|
| 57 | #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
|
---|
| 58 | #else
|
---|
| 59 | #define JSONSL__NAN_PROXY 0
|
---|
| 60 | #define JSONSL__INF_PROXY 0
|
---|
| 61 | #endif
|
---|
| 62 |
|
---|
| 63 | /* Stolen from http-parser.h, and possibly others */
|
---|
| 64 | #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
|
---|
| 65 | typedef __int8 int8_t;
|
---|
| 66 | typedef unsigned __int8 uint8_t;
|
---|
| 67 | typedef __int16 int16_t;
|
---|
| 68 | typedef unsigned __int16 uint16_t;
|
---|
| 69 | typedef __int32 int32_t;
|
---|
| 70 | typedef unsigned __int32 uint32_t;
|
---|
| 71 | typedef __int64 int64_t;
|
---|
| 72 | typedef unsigned __int64 uint64_t;
|
---|
| 73 | #if !defined(_MSC_VER) || _MSC_VER<1400
|
---|
| 74 | typedef unsigned int size_t;
|
---|
| 75 | typedef int ssize_t;
|
---|
| 76 | #endif
|
---|
| 77 | #else
|
---|
| 78 | #include <stdint.h>
|
---|
| 79 | #endif
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
|
---|
| 83 | #define JSONSL_STATE_GENERIC
|
---|
| 84 | #endif /* !defined JSONSL_STATE_GENERIC */
|
---|
| 85 |
|
---|
| 86 | #ifdef JSONSL_STATE_GENERIC
|
---|
| 87 | #define JSONSL_STATE_USER_FIELDS
|
---|
| 88 | #endif /* JSONSL_STATE_GENERIC */
|
---|
| 89 |
|
---|
| 90 | /* Additional fields for component object */
|
---|
| 91 | #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
|
---|
| 92 | #define JSONSL_JPR_COMPONENT_USER_FIELDS
|
---|
| 93 | #endif
|
---|
| 94 |
|
---|
| 95 | #ifndef JSONSL_API
|
---|
| 96 | /**
|
---|
| 97 | * We require a /DJSONSL_DLL so that users already using this as a static
|
---|
| 98 | * or embedded library don't get confused
|
---|
| 99 | */
|
---|
| 100 | #if defined(_WIN32) && defined(JSONSL_DLL)
|
---|
| 101 | #define JSONSL_API __declspec(dllexport)
|
---|
| 102 | #else
|
---|
| 103 | #define JSONSL_API
|
---|
| 104 | #endif /* _WIN32 */
|
---|
| 105 |
|
---|
| 106 | #endif /* !JSONSL_API */
|
---|
| 107 |
|
---|
| 108 | #ifndef JSONSL_INLINE
|
---|
| 109 | #if defined(_MSC_VER)
|
---|
| 110 | #define JSONSL_INLINE __inline
|
---|
| 111 | #elif defined(__GNUC__)
|
---|
| 112 | #define JSONSL_INLINE __inline__
|
---|
| 113 | #else
|
---|
| 114 | #define JSONSL_INLINE inline
|
---|
| 115 | #endif /* _MSC_VER or __GNUC__ */
|
---|
| 116 | #endif /* JSONSL_INLINE */
|
---|
| 117 |
|
---|
| 118 | #define JSONSL_MAX_LEVELS 512
|
---|
| 119 |
|
---|
| 120 | struct jsonsl_st;
|
---|
| 121 | typedef struct jsonsl_st *jsonsl_t;
|
---|
| 122 |
|
---|
| 123 | typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
|
---|
| 124 |
|
---|
| 125 | /**
|
---|
| 126 | * This flag is true when AND'd against a type whose value
|
---|
| 127 | * must be in "quoutes" i.e. T_HKEY and T_STRING
|
---|
| 128 | */
|
---|
| 129 | #define JSONSL_Tf_STRINGY 0xffff00
|
---|
| 130 |
|
---|
| 131 | /**
|
---|
| 132 | * Constant representing the special JSON types.
|
---|
| 133 | * The values are special and aid in speed (the OBJECT and LIST
|
---|
| 134 | * values are the char literals of their openings).
|
---|
| 135 | *
|
---|
| 136 | * Their actual value is a character which attempts to resemble
|
---|
| 137 | * some mnemonic reference to the actual type.
|
---|
| 138 | *
|
---|
| 139 | * If new types are added, they must fit into the ASCII printable
|
---|
| 140 | * range (so they should be AND'd with 0x7f and yield something
|
---|
| 141 | * meaningful)
|
---|
| 142 | */
|
---|
| 143 | #define JSONSL_XTYPE \
|
---|
| 144 | X(STRING, '"'|JSONSL_Tf_STRINGY) \
|
---|
| 145 | X(HKEY, '#'|JSONSL_Tf_STRINGY) \
|
---|
| 146 | X(OBJECT, '{') \
|
---|
| 147 | X(LIST, '[') \
|
---|
| 148 | X(SPECIAL, '^') \
|
---|
| 149 | X(UESCAPE, 'u')
|
---|
| 150 | typedef enum {
|
---|
| 151 | #define X(o, c) \
|
---|
| 152 | JSONSL_T_##o = c,
|
---|
| 153 | JSONSL_XTYPE
|
---|
| 154 | JSONSL_T_UNKNOWN = '?',
|
---|
| 155 | /* Abstract 'root' object */
|
---|
| 156 | JSONSL_T_ROOT = 0
|
---|
| 157 | #undef X
|
---|
| 158 | } jsonsl_type_t;
|
---|
| 159 |
|
---|
| 160 | /**
|
---|
| 161 | * Subtypes for T_SPECIAL. We define them as flags
|
---|
| 162 | * because more than one type can be applied to a
|
---|
| 163 | * given object.
|
---|
| 164 | */
|
---|
| 165 |
|
---|
| 166 | #define JSONSL_XSPECIAL \
|
---|
| 167 | X(NONE, 0) \
|
---|
| 168 | X(SIGNED, 1<<0) \
|
---|
| 169 | X(UNSIGNED, 1<<1) \
|
---|
| 170 | X(TRUE, 1<<2) \
|
---|
| 171 | X(FALSE, 1<<3) \
|
---|
| 172 | X(NULL, 1<<4) \
|
---|
| 173 | X(FLOAT, 1<<5) \
|
---|
| 174 | X(EXPONENT, 1<<6) \
|
---|
| 175 | X(NONASCII, 1<<7) \
|
---|
| 176 | X(NAN, 1<<8) \
|
---|
| 177 | X(INF, 1<<9)
|
---|
| 178 | typedef enum {
|
---|
| 179 | #define X(o,b) \
|
---|
| 180 | JSONSL_SPECIALf_##o = b,
|
---|
| 181 | JSONSL_XSPECIAL
|
---|
| 182 | #undef X
|
---|
| 183 | /* Handy flags for checking */
|
---|
| 184 |
|
---|
| 185 | JSONSL_SPECIALf_UNKNOWN = 1 << 10,
|
---|
| 186 |
|
---|
| 187 | /** @private Private */
|
---|
| 188 | JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
|
---|
| 189 | /** @private */
|
---|
| 190 | JSONSL_SPECIALf_DASH = 1 << 12,
|
---|
| 191 | /** @private */
|
---|
| 192 | JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
|
---|
| 193 | JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
|
---|
| 194 |
|
---|
| 195 | /** Type is numeric */
|
---|
| 196 | JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
|
---|
| 197 |
|
---|
| 198 | /** Type is a boolean */
|
---|
| 199 | JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
|
---|
| 200 |
|
---|
| 201 | /** Type is an "extended", not integral type (but numeric) */
|
---|
| 202 | JSONSL_SPECIALf_NUMNOINT =
|
---|
| 203 | (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
|
---|
| 204 | |JSONSL_SPECIALf_INF)
|
---|
| 205 | } jsonsl_special_t;
|
---|
| 206 |
|
---|
| 207 |
|
---|
| 208 | /**
|
---|
| 209 | * These are the various types of stack (or other) events
|
---|
| 210 | * which will trigger a callback.
|
---|
| 211 | * Like the type constants, this are also mnemonic
|
---|
| 212 | */
|
---|
| 213 | #define JSONSL_XACTION \
|
---|
| 214 | X(PUSH, '+') \
|
---|
| 215 | X(POP, '-') \
|
---|
| 216 | X(UESCAPE, 'U') \
|
---|
| 217 | X(ERROR, '!')
|
---|
| 218 | typedef enum {
|
---|
| 219 | #define X(a,c) \
|
---|
| 220 | JSONSL_ACTION_##a = c,
|
---|
| 221 | JSONSL_XACTION
|
---|
| 222 | JSONSL_ACTION_UNKNOWN = '?'
|
---|
| 223 | #undef X
|
---|
| 224 | } jsonsl_action_t;
|
---|
| 225 |
|
---|
| 226 |
|
---|
| 227 | /**
|
---|
| 228 | * Various errors which may be thrown while parsing JSON
|
---|
| 229 | */
|
---|
| 230 | #define JSONSL_XERR \
|
---|
| 231 | /* Trailing garbage characters */ \
|
---|
| 232 | X(GARBAGE_TRAILING) \
|
---|
| 233 | /* We were expecting a 'special' (numeric, true, false, null) */ \
|
---|
| 234 | X(SPECIAL_EXPECTED) \
|
---|
| 235 | /* The 'special' value was incomplete */ \
|
---|
| 236 | X(SPECIAL_INCOMPLETE) \
|
---|
| 237 | /* Found a stray token */ \
|
---|
| 238 | X(STRAY_TOKEN) \
|
---|
| 239 | /* We were expecting a token before this one */ \
|
---|
| 240 | X(MISSING_TOKEN) \
|
---|
| 241 | /* Cannot insert because the container is not ready */ \
|
---|
| 242 | X(CANT_INSERT) \
|
---|
| 243 | /* Found a '\' outside a string */ \
|
---|
| 244 | X(ESCAPE_OUTSIDE_STRING) \
|
---|
| 245 | /* Found a ':' outside of a hash */ \
|
---|
| 246 | X(KEY_OUTSIDE_OBJECT) \
|
---|
| 247 | /* found a string outside of a container */ \
|
---|
| 248 | X(STRING_OUTSIDE_CONTAINER) \
|
---|
| 249 | /* Found a null byte in middle of string */ \
|
---|
| 250 | X(FOUND_NULL_BYTE) \
|
---|
| 251 | /* Current level exceeds limit specified in constructor */ \
|
---|
| 252 | X(LEVELS_EXCEEDED) \
|
---|
| 253 | /* Got a } as a result of an opening [ or vice versa */ \
|
---|
| 254 | X(BRACKET_MISMATCH) \
|
---|
| 255 | /* We expected a key, but got something else instead */ \
|
---|
| 256 | X(HKEY_EXPECTED) \
|
---|
| 257 | /* We got an illegal control character (bad whitespace or something) */ \
|
---|
| 258 | X(WEIRD_WHITESPACE) \
|
---|
| 259 | /* Found a \u-escape, but there were less than 4 following hex digits */ \
|
---|
| 260 | X(UESCAPE_TOOSHORT) \
|
---|
| 261 | /* Invalid two-character escape */ \
|
---|
| 262 | X(ESCAPE_INVALID) \
|
---|
| 263 | /* Trailing comma */ \
|
---|
| 264 | X(TRAILING_COMMA) \
|
---|
| 265 | /* An invalid number was passed in a numeric field */ \
|
---|
| 266 | X(INVALID_NUMBER) \
|
---|
| 267 | /* Value is missing for object */ \
|
---|
| 268 | X(VALUE_EXPECTED) \
|
---|
| 269 | /* The following are for JPR Stuff */ \
|
---|
| 270 | \
|
---|
| 271 | /* Found a literal '%' but it was only followed by a single valid hex digit */ \
|
---|
| 272 | X(PERCENT_BADHEX) \
|
---|
| 273 | /* jsonpointer URI is malformed '/' */ \
|
---|
| 274 | X(JPR_BADPATH) \
|
---|
| 275 | /* Duplicate slash */ \
|
---|
| 276 | X(JPR_DUPSLASH) \
|
---|
| 277 | /* No leading root */ \
|
---|
| 278 | X(JPR_NOROOT) \
|
---|
| 279 | /* Allocation failure */ \
|
---|
| 280 | X(ENOMEM) \
|
---|
| 281 | /* Invalid unicode codepoint detected (in case of escapes) */ \
|
---|
| 282 | X(INVALID_CODEPOINT)
|
---|
| 283 |
|
---|
| 284 | typedef enum {
|
---|
| 285 | JSONSL_ERROR_SUCCESS = 0,
|
---|
| 286 | #define X(e) \
|
---|
| 287 | JSONSL_ERROR_##e,
|
---|
| 288 | JSONSL_XERR
|
---|
| 289 | #undef X
|
---|
| 290 | JSONSL_ERROR_GENERIC
|
---|
| 291 | } jsonsl_error_t;
|
---|
| 292 |
|
---|
| 293 |
|
---|
| 294 | /**
|
---|
| 295 | * A state is a single level of the stack.
|
---|
| 296 | * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
|
---|
| 297 | * will remain in tact until the item is popped.
|
---|
| 298 | *
|
---|
| 299 | * As a result, it means a parent state object may be accessed from a child
|
---|
| 300 | * object, (the parents fields will all be valid). This allows a user to create
|
---|
| 301 | * an ad-hoc hierarchy on top of the JSON one.
|
---|
| 302 | *
|
---|
| 303 | */
|
---|
| 304 | struct jsonsl_state_st {
|
---|
| 305 | /**
|
---|
| 306 | * The JSON object type
|
---|
| 307 | */
|
---|
| 308 | unsigned type;
|
---|
| 309 |
|
---|
| 310 | /** If this element is special, then its extended type is here */
|
---|
| 311 | unsigned special_flags;
|
---|
| 312 |
|
---|
| 313 | /**
|
---|
| 314 | * The position (in terms of number of bytes since the first call to
|
---|
| 315 | * jsonsl_feed()) at which the state was first pushed. This includes
|
---|
| 316 | * opening tokens, if applicable.
|
---|
| 317 | *
|
---|
| 318 | * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
|
---|
| 319 | * be the position of the first quote.
|
---|
| 320 | *
|
---|
| 321 | * @see jsonsl_st::pos which contains the _current_ position and can be
|
---|
| 322 | * used during a POP callback to get the length of the element.
|
---|
| 323 | */
|
---|
| 324 | size_t pos_begin;
|
---|
| 325 |
|
---|
| 326 | /**FIXME: This is redundant as the same information can be derived from
|
---|
| 327 | * jsonsl_st::pos at pop-time */
|
---|
| 328 | size_t pos_cur;
|
---|
| 329 |
|
---|
| 330 | /**
|
---|
| 331 | * Level of recursion into nesting. This is mainly a convenience
|
---|
| 332 | * variable, as this can technically be deduced from the lexer's
|
---|
| 333 | * level parameter (though the logic is not that simple)
|
---|
| 334 | */
|
---|
| 335 | unsigned int level;
|
---|
| 336 |
|
---|
| 337 |
|
---|
| 338 | /**
|
---|
| 339 | * how many elements in the object/list.
|
---|
| 340 | * For objects (hashes), an element is either
|
---|
| 341 | * a key or a value. Thus for one complete pair,
|
---|
| 342 | * nelem will be 2.
|
---|
| 343 | *
|
---|
| 344 | * For special types, this will hold the sum of the digits.
|
---|
| 345 | * This only holds true for values which are simple signed/unsigned
|
---|
| 346 | * numbers. Otherwise a special flag is set, and extra handling is not
|
---|
| 347 | * performed.
|
---|
| 348 | */
|
---|
| 349 | uint64_t nelem;
|
---|
| 350 |
|
---|
| 351 |
|
---|
| 352 |
|
---|
| 353 | /*TODO: merge this and special_flags into a union */
|
---|
| 354 |
|
---|
| 355 |
|
---|
| 356 | /**
|
---|
| 357 | * Useful for an opening nest, this will prevent a callback from being
|
---|
| 358 | * invoked on this item or any of its children
|
---|
| 359 | */
|
---|
| 360 | int ignore_callback;
|
---|
| 361 |
|
---|
| 362 | /**
|
---|
| 363 | * Counter which is incremented each time an escape ('\') is encountered.
|
---|
| 364 | * This is used internally for non-string types and should only be
|
---|
| 365 | * inspected by the user if the state actually represents a string
|
---|
| 366 | * type.
|
---|
| 367 | */
|
---|
| 368 | unsigned int nescapes;
|
---|
| 369 |
|
---|
| 370 | /**
|
---|
| 371 | * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
|
---|
| 372 | * the macro expansion happens here.
|
---|
| 373 | *
|
---|
| 374 | * You can use these fields to store hierarchical or 'tagging' information
|
---|
| 375 | * for specific objects.
|
---|
| 376 | *
|
---|
| 377 | * See the documentation above for the lifetime of the state object (i.e.
|
---|
| 378 | * if the private data points to allocated memory, it should be freed
|
---|
| 379 | * when the object is popped, as the state object will be re-used)
|
---|
| 380 | */
|
---|
| 381 | #ifndef JSONSL_STATE_GENERIC
|
---|
| 382 | JSONSL_STATE_USER_FIELDS
|
---|
| 383 | #else
|
---|
| 384 |
|
---|
| 385 | /**
|
---|
| 386 | * Otherwise, this is a simple void * pointer for anything you want
|
---|
| 387 | */
|
---|
| 388 | void *data;
|
---|
| 389 | #endif /* JSONSL_STATE_USER_FIELDS */
|
---|
| 390 | };
|
---|
| 391 |
|
---|
| 392 | /**Gets the number of elements in the list.
|
---|
| 393 | * @param st The state. Must be of type JSONSL_T_LIST
|
---|
| 394 | * @return number of elements in the list
|
---|
| 395 | */
|
---|
| 396 | #define JSONSL_LIST_SIZE(st) ((st)->nelem)
|
---|
| 397 |
|
---|
| 398 | /**Gets the number of key-value pairs in an object
|
---|
| 399 | * @param st The state. Must be of type JSONSL_T_OBJECT
|
---|
| 400 | * @return the number of key-value pairs in the object
|
---|
| 401 | */
|
---|
| 402 | #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
|
---|
| 403 |
|
---|
| 404 | /**Gets the numeric value.
|
---|
| 405 | * @param st The state. Must be of type JSONSL_T_SPECIAL and
|
---|
| 406 | * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
|
---|
| 407 | * set.
|
---|
| 408 | * @return the numeric value of the state.
|
---|
| 409 | */
|
---|
| 410 | #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
|
---|
| 411 |
|
---|
| 412 | /*
|
---|
| 413 | * So now we need some special structure for keeping the
|
---|
| 414 | * JPR info in sync. Preferrably all in a single block
|
---|
| 415 | * of memory (there's no need for separate allocations.
|
---|
| 416 | * So we will define a 'table' with the following layout
|
---|
| 417 | *
|
---|
| 418 | * Level nPosbl JPR1_last JPR2_last JPR3_last
|
---|
| 419 | *
|
---|
| 420 | * 0 1 NOMATCH POSSIBLE POSSIBLE
|
---|
| 421 | * 1 0 NOMATCH NOMATCH COMPLETE
|
---|
| 422 | * [ table ends here because no further path is possible]
|
---|
| 423 | *
|
---|
| 424 | * Where the JPR..n corresponds to the number of JPRs
|
---|
| 425 | * requested, and nPosble is a quick flag to determine
|
---|
| 426 | *
|
---|
| 427 | * the number of possibilities. In the future this might
|
---|
| 428 | * be made into a proper 'jump' table,
|
---|
| 429 | *
|
---|
| 430 | * Since we always mark JPRs from the higher levels descending
|
---|
| 431 | * into the lower ones, a prospective child match would first
|
---|
| 432 | * look at the parent table to check the possibilities, and then
|
---|
| 433 | * see which ones were possible..
|
---|
| 434 | *
|
---|
| 435 | * Thus, the size of this blob would be (and these are all ints here)
|
---|
| 436 | * nLevels * nJPR * 2.
|
---|
| 437 | *
|
---|
| 438 | * the 'Width' of the table would be nJPR*2, and the 'height' would be
|
---|
| 439 | * nlevels
|
---|
| 440 | */
|
---|
| 441 |
|
---|
| 442 | /**
|
---|
| 443 | * This is called when a stack change ocurs.
|
---|
| 444 | *
|
---|
| 445 | * @param jsn The lexer
|
---|
| 446 | * @param action The type of action, this can be PUSH or POP
|
---|
| 447 | * @param state A pointer to the stack currently affected by the action
|
---|
| 448 | * @param at A pointer to the position of the input buffer which triggered
|
---|
| 449 | * this action.
|
---|
| 450 | */
|
---|
| 451 | typedef void (*jsonsl_stack_callback)(
|
---|
| 452 | jsonsl_t jsn,
|
---|
| 453 | jsonsl_action_t action,
|
---|
| 454 | struct jsonsl_state_st* state,
|
---|
| 455 | const jsonsl_char_t *at);
|
---|
| 456 |
|
---|
| 457 |
|
---|
| 458 | /**
|
---|
| 459 | * This is called when an error is encountered.
|
---|
| 460 | * Sometimes it's possible to 'erase' characters (by replacing them
|
---|
| 461 | * with whitespace). If you think you have corrected the error, you
|
---|
| 462 | * can return a true value, in which case the parser will backtrack
|
---|
| 463 | * and try again.
|
---|
| 464 | *
|
---|
| 465 | * @param jsn The lexer
|
---|
| 466 | * @param error The error which was thrown
|
---|
| 467 | * @param state the current state
|
---|
| 468 | * @param a pointer to the position of the input buffer which triggered
|
---|
| 469 | * the error. Note that this is not const, this is because you have the
|
---|
| 470 | * possibility of modifying the character in an attempt to correct the
|
---|
| 471 | * error
|
---|
| 472 | *
|
---|
| 473 | * @return zero to bail, nonzero to try again (this only makes sense if
|
---|
| 474 | * the input buffer has been modified by this callback)
|
---|
| 475 | */
|
---|
| 476 | typedef int (*jsonsl_error_callback)(
|
---|
| 477 | jsonsl_t jsn,
|
---|
| 478 | jsonsl_error_t error,
|
---|
| 479 | struct jsonsl_state_st* state,
|
---|
| 480 | jsonsl_char_t *at);
|
---|
| 481 |
|
---|
| 482 | struct jsonsl_st {
|
---|
| 483 | /** Public, read-only */
|
---|
| 484 |
|
---|
| 485 | /** This is the current level of the stack */
|
---|
| 486 | unsigned int level;
|
---|
| 487 |
|
---|
| 488 | /** Flag set to indicate we should stop processing */
|
---|
| 489 | unsigned int stopfl;
|
---|
| 490 |
|
---|
| 491 | /**
|
---|
| 492 | * This is the current position, relative to the beginning
|
---|
| 493 | * of the stream.
|
---|
| 494 | */
|
---|
| 495 | size_t pos;
|
---|
| 496 |
|
---|
| 497 | /** This is the 'bytes' variable passed to feed() */
|
---|
| 498 | const jsonsl_char_t *base;
|
---|
| 499 |
|
---|
| 500 | /** Callback invoked for PUSH actions */
|
---|
| 501 | jsonsl_stack_callback action_callback_PUSH;
|
---|
| 502 |
|
---|
| 503 | /** Callback invoked for POP actions */
|
---|
| 504 | jsonsl_stack_callback action_callback_POP;
|
---|
| 505 |
|
---|
| 506 | /** Default callback for any action, if neither PUSH or POP callbacks are defined */
|
---|
| 507 | jsonsl_stack_callback action_callback;
|
---|
| 508 |
|
---|
| 509 | /**
|
---|
| 510 | * Do not invoke callbacks for objects deeper than this level.
|
---|
| 511 | * NOTE: This field establishes the lower bound for ignored callbacks,
|
---|
| 512 | * and is thus misnamed. `min_ignore_level` would actually make more
|
---|
| 513 | * sense, but we don't want to break API.
|
---|
| 514 | */
|
---|
| 515 | unsigned int max_callback_level;
|
---|
| 516 |
|
---|
| 517 | /** The error callback. Invoked when an error happens. Should not be NULL */
|
---|
| 518 | jsonsl_error_callback error_callback;
|
---|
| 519 |
|
---|
| 520 | /* these are boolean flags you can modify. You will be called
|
---|
| 521 | * about notification for each of these types if the corresponding
|
---|
| 522 | * variable is true.
|
---|
| 523 | */
|
---|
| 524 |
|
---|
| 525 | /**
|
---|
| 526 | * @name Callback Booleans.
|
---|
| 527 | * These determine whether a callback is to be invoked for certain types of objects
|
---|
| 528 | * @{*/
|
---|
| 529 |
|
---|
| 530 | /** Boolean flag to enable or disable the invokcation for events on this type*/
|
---|
| 531 | int call_SPECIAL;
|
---|
| 532 | int call_OBJECT;
|
---|
| 533 | int call_LIST;
|
---|
| 534 | int call_STRING;
|
---|
| 535 | int call_HKEY;
|
---|
| 536 | /*@}*/
|
---|
| 537 |
|
---|
| 538 | /**
|
---|
| 539 | * @name u-Escape handling
|
---|
| 540 | * Special handling for the \\u-f00d type sequences. These are meant
|
---|
| 541 | * to be translated back into the corresponding octet(s).
|
---|
| 542 | * A special callback (if set) is invoked with *at=='u'. An application
|
---|
| 543 | * may wish to temporarily suspend parsing and handle the 'u-' sequence
|
---|
| 544 | * internally (or not).
|
---|
| 545 | */
|
---|
| 546 |
|
---|
| 547 | /*@{*/
|
---|
| 548 |
|
---|
| 549 | /** Callback to be invoked for a u-escape */
|
---|
| 550 | jsonsl_stack_callback action_callback_UESCAPE;
|
---|
| 551 |
|
---|
| 552 | /** Boolean flag, whether to invoke the callback */
|
---|
| 553 | int call_UESCAPE;
|
---|
| 554 |
|
---|
| 555 | /** Boolean flag, whether we should return after encountering a u-escape:
|
---|
| 556 | * the callback is invoked and then we return if this is true
|
---|
| 557 | */
|
---|
| 558 | int return_UESCAPE;
|
---|
| 559 | /*@}*/
|
---|
| 560 |
|
---|
| 561 | struct {
|
---|
| 562 | int allow_trailing_comma;
|
---|
| 563 | } options;
|
---|
| 564 |
|
---|
| 565 | /** Put anything here */
|
---|
| 566 | void *data;
|
---|
| 567 |
|
---|
| 568 | /*@{*/
|
---|
| 569 | /** Private */
|
---|
| 570 | int in_escape;
|
---|
| 571 | char expecting;
|
---|
| 572 | char tok_last;
|
---|
| 573 | int can_insert;
|
---|
| 574 | unsigned int levels_max;
|
---|
| 575 |
|
---|
| 576 | #ifndef JSONSL_NO_JPR
|
---|
| 577 | size_t jpr_count;
|
---|
| 578 | jsonsl_jpr_t *jprs;
|
---|
| 579 |
|
---|
| 580 | /* Root pointer for JPR matching information */
|
---|
| 581 | size_t *jpr_root;
|
---|
| 582 | #endif /* JSONSL_NO_JPR */
|
---|
| 583 | /*@}*/
|
---|
| 584 |
|
---|
| 585 | /**
|
---|
| 586 | * This is the stack. Its upper bound is levels_max, or the
|
---|
| 587 | * nlevels argument passed to jsonsl_new. If you modify this structure,
|
---|
| 588 | * make sure that this member is last.
|
---|
| 589 | */
|
---|
| 590 | struct jsonsl_state_st stack[1];
|
---|
| 591 | };
|
---|
| 592 |
|
---|
| 593 |
|
---|
| 594 | /**
|
---|
| 595 | * Creates a new lexer object, with capacity for recursion up to nlevels
|
---|
| 596 | *
|
---|
| 597 | * @param nlevels maximum recursion depth
|
---|
| 598 | */
|
---|
| 599 | JSONSL_API
|
---|
| 600 | jsonsl_t jsonsl_new(int nlevels);
|
---|
| 601 |
|
---|
| 602 | /**
|
---|
| 603 | * Feeds data into the lexer.
|
---|
| 604 | *
|
---|
| 605 | * @param jsn the lexer object
|
---|
| 606 | * @param bytes new data to be fed
|
---|
| 607 | * @param nbytes size of new data
|
---|
| 608 | */
|
---|
| 609 | JSONSL_API
|
---|
| 610 | void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
|
---|
| 611 |
|
---|
| 612 | /**
|
---|
| 613 | * Resets the internal parser state. This does not free the parser
|
---|
| 614 | * but does clean it internally, so that the next time feed() is called,
|
---|
| 615 | * it will be treated as a new stream
|
---|
| 616 | *
|
---|
| 617 | * @param jsn the lexer
|
---|
| 618 | */
|
---|
| 619 | JSONSL_API
|
---|
| 620 | void jsonsl_reset(jsonsl_t jsn);
|
---|
| 621 |
|
---|
| 622 | /**
|
---|
| 623 | * Frees the lexer, cleaning any allocated memory taken
|
---|
| 624 | *
|
---|
| 625 | * @param jsn the lexer
|
---|
| 626 | */
|
---|
| 627 | JSONSL_API
|
---|
| 628 | void jsonsl_destroy(jsonsl_t jsn);
|
---|
| 629 |
|
---|
| 630 | /**
|
---|
| 631 | * Gets the 'parent' element, given the current one
|
---|
| 632 | *
|
---|
| 633 | * @param jsn the lexer
|
---|
| 634 | * @param cur the current nest, which should be a struct jsonsl_nest_st
|
---|
| 635 | */
|
---|
| 636 | static JSONSL_INLINE
|
---|
| 637 | struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
|
---|
| 638 | const struct jsonsl_state_st *state)
|
---|
| 639 | {
|
---|
| 640 | /* Don't complain about overriding array bounds */
|
---|
| 641 | if (state->level > 1) {
|
---|
| 642 | return jsn->stack + state->level - 1;
|
---|
| 643 | } else {
|
---|
| 644 | return NULL;
|
---|
| 645 | }
|
---|
| 646 | }
|
---|
| 647 |
|
---|
| 648 | /**
|
---|
| 649 | * Gets the state of the last fully consumed child of this parent. This is
|
---|
| 650 | * only valid in the parent's POP callback.
|
---|
| 651 | *
|
---|
| 652 | * @param the lexer
|
---|
| 653 | * @return A pointer to the child.
|
---|
| 654 | */
|
---|
| 655 | static JSONSL_INLINE
|
---|
| 656 | struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
|
---|
| 657 | const struct jsonsl_state_st *parent)
|
---|
| 658 | {
|
---|
| 659 | return jsn->stack + (parent->level + 1);
|
---|
| 660 | }
|
---|
| 661 |
|
---|
| 662 | /**Call to instruct the parser to stop parsing and return. This is valid
|
---|
| 663 | * only from within a callback */
|
---|
| 664 | static JSONSL_INLINE
|
---|
| 665 | void jsonsl_stop(jsonsl_t jsn)
|
---|
| 666 | {
|
---|
| 667 | jsn->stopfl = 1;
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | /**
|
---|
| 671 | * This enables receiving callbacks on all events. Doesn't do
|
---|
| 672 | * anything special but helps avoid some boilerplate.
|
---|
| 673 | * This does not touch the UESCAPE callbacks or flags.
|
---|
| 674 | */
|
---|
| 675 | static JSONSL_INLINE
|
---|
| 676 | void jsonsl_enable_all_callbacks(jsonsl_t jsn)
|
---|
| 677 | {
|
---|
| 678 | jsn->call_HKEY = 1;
|
---|
| 679 | jsn->call_STRING = 1;
|
---|
| 680 | jsn->call_OBJECT = 1;
|
---|
| 681 | jsn->call_SPECIAL = 1;
|
---|
| 682 | jsn->call_LIST = 1;
|
---|
| 683 | }
|
---|
| 684 |
|
---|
| 685 | /**
|
---|
| 686 | * A macro which returns true if the current state object can
|
---|
| 687 | * have children. This means a list type or an object type.
|
---|
| 688 | */
|
---|
| 689 | #define JSONSL_STATE_IS_CONTAINER(state) \
|
---|
| 690 | (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
|
---|
| 691 |
|
---|
| 692 | /**
|
---|
| 693 | * These two functions, dump a string representation
|
---|
| 694 | * of the error or type, respectively. They will never
|
---|
| 695 | * return NULL
|
---|
| 696 | */
|
---|
| 697 | JSONSL_API
|
---|
| 698 | const char* jsonsl_strerror(jsonsl_error_t err);
|
---|
| 699 | JSONSL_API
|
---|
| 700 | const char* jsonsl_strtype(jsonsl_type_t jt);
|
---|
| 701 |
|
---|
| 702 | /**
|
---|
| 703 | * Dumps global metrics to the screen. This is a noop unless
|
---|
| 704 | * jsonsl was compiled with JSONSL_USE_METRICS
|
---|
| 705 | */
|
---|
| 706 | JSONSL_API
|
---|
| 707 | void jsonsl_dump_global_metrics(void);
|
---|
| 708 |
|
---|
| 709 | /* This macro just here for editors to do code folding */
|
---|
| 710 | #ifndef JSONSL_NO_JPR
|
---|
| 711 |
|
---|
| 712 | /**
|
---|
| 713 | * @name JSON Pointer API
|
---|
| 714 | *
|
---|
| 715 | * JSONPointer API. This isn't really related to the lexer (at least not yet)
|
---|
| 716 | * JSONPointer provides an extremely simple specification for providing
|
---|
| 717 | * locations within JSON objects. We will extend it a bit and allow for
|
---|
| 718 | * providing 'wildcard' characters by which to be able to 'query' the stream.
|
---|
| 719 | *
|
---|
| 720 | * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
|
---|
| 721 | *
|
---|
| 722 | * Currently I'm implementing the 'single query' API which can only use a single
|
---|
| 723 | * query component. In the future I will integrate my yet-to-be-published
|
---|
| 724 | * Boyer-Moore-esque prefix searching implementation, in order to allow
|
---|
| 725 | * multiple paths to be merged into one for quick and efficient searching.
|
---|
| 726 | *
|
---|
| 727 | *
|
---|
| 728 | * JPR (as we'll refer to it within the source) can be used by splitting
|
---|
| 729 | * the components into mutliple sections, and incrementally 'track' each
|
---|
| 730 | * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
|
---|
| 731 | * callback for an object, we will check to see whether the index matching
|
---|
| 732 | * the component corresponding to the current level contains a match
|
---|
| 733 | * for our path.
|
---|
| 734 | *
|
---|
| 735 | * In order to do this properly, a structure must be maintained within the
|
---|
| 736 | * parent indicating whether its children are possible matches. This flag
|
---|
| 737 | * will be 'inherited' by call children which may conform to the match
|
---|
| 738 | * specification, and discarded by all which do not (thereby eliminating
|
---|
| 739 | * their children from inheriting it).
|
---|
| 740 | *
|
---|
| 741 | * A successful match is a complete one. One can provide multiple paths with
|
---|
| 742 | * multiple levels of matches e.g.
|
---|
| 743 | * /foo/bar/baz/^/blah
|
---|
| 744 | *
|
---|
| 745 | * @{
|
---|
| 746 | */
|
---|
| 747 |
|
---|
| 748 | /** The wildcard character */
|
---|
| 749 | #ifndef JSONSL_PATH_WILDCARD_CHAR
|
---|
| 750 | #define JSONSL_PATH_WILDCARD_CHAR '^'
|
---|
| 751 | #endif /* WILDCARD_CHAR */
|
---|
| 752 |
|
---|
| 753 | #define JSONSL_XMATCH \
|
---|
| 754 | X(COMPLETE,1) \
|
---|
| 755 | X(POSSIBLE,0) \
|
---|
| 756 | X(NOMATCH,-1) \
|
---|
| 757 | X(TYPE_MISMATCH, -2)
|
---|
| 758 |
|
---|
| 759 | typedef enum {
|
---|
| 760 |
|
---|
| 761 | #define X(T,v) \
|
---|
| 762 | JSONSL_MATCH_##T = v,
|
---|
| 763 | JSONSL_XMATCH
|
---|
| 764 |
|
---|
| 765 | #undef X
|
---|
| 766 | JSONSL_MATCH_UNKNOWN
|
---|
| 767 | } jsonsl_jpr_match_t;
|
---|
| 768 |
|
---|
| 769 | typedef enum {
|
---|
| 770 | JSONSL_PATH_STRING = 1,
|
---|
| 771 | JSONSL_PATH_WILDCARD,
|
---|
| 772 | JSONSL_PATH_NUMERIC,
|
---|
| 773 | JSONSL_PATH_ROOT,
|
---|
| 774 |
|
---|
| 775 | /* Special */
|
---|
| 776 | JSONSL_PATH_INVALID = -1,
|
---|
| 777 | JSONSL_PATH_NONE = 0
|
---|
| 778 | } jsonsl_jpr_type_t;
|
---|
| 779 |
|
---|
| 780 | struct jsonsl_jpr_component_st {
|
---|
| 781 | /** The string the component points to */
|
---|
| 782 | char *pstr;
|
---|
| 783 | /** if this is a numeric type, the number is 'cached' here */
|
---|
| 784 | unsigned long idx;
|
---|
| 785 | /** The length of the string */
|
---|
| 786 | size_t len;
|
---|
| 787 | /** The type of component (NUMERIC or STRING) */
|
---|
| 788 | jsonsl_jpr_type_t ptype;
|
---|
| 789 |
|
---|
| 790 | /** Set this to true to enforce type checking between dict keys and array
|
---|
| 791 | * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
|
---|
| 792 | * that an array index is actually a child of a dictionary. */
|
---|
| 793 | short is_arridx;
|
---|
| 794 |
|
---|
| 795 | /* Extra fields (for more advanced searches. Default is empty) */
|
---|
| 796 | JSONSL_JPR_COMPONENT_USER_FIELDS
|
---|
| 797 | };
|
---|
| 798 |
|
---|
| 799 | struct jsonsl_jpr_st {
|
---|
| 800 | /** Path components */
|
---|
| 801 | struct jsonsl_jpr_component_st *components;
|
---|
| 802 | size_t ncomponents;
|
---|
| 803 |
|
---|
| 804 | /**Type of the match to be expected. If nonzero, will be compared against
|
---|
| 805 | * the actual type */
|
---|
| 806 | unsigned match_type;
|
---|
| 807 |
|
---|
| 808 | /** Base of allocated string for components */
|
---|
| 809 | char *basestr;
|
---|
| 810 |
|
---|
| 811 | /** The original match string. Useful for returning to the user */
|
---|
| 812 | char *orig;
|
---|
| 813 | size_t norig;
|
---|
| 814 | };
|
---|
| 815 |
|
---|
| 816 | /**
|
---|
| 817 | * Create a new JPR object.
|
---|
| 818 | *
|
---|
| 819 | * @param path the JSONPointer path specification.
|
---|
| 820 | * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
|
---|
| 821 | * then more details will be in this variable.
|
---|
| 822 | *
|
---|
| 823 | * @return a new jsonsl_jpr_t object, or NULL on error.
|
---|
| 824 | */
|
---|
| 825 | JSONSL_API
|
---|
| 826 | jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
|
---|
| 827 |
|
---|
| 828 | /**
|
---|
| 829 | * Destroy a JPR object
|
---|
| 830 | */
|
---|
| 831 | JSONSL_API
|
---|
| 832 | void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
|
---|
| 833 |
|
---|
| 834 | /**
|
---|
| 835 | * Match a JSON object against a type and specific level
|
---|
| 836 | *
|
---|
| 837 | * @param jpr the JPR object
|
---|
| 838 | * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
|
---|
| 839 | * @param parent_level the level of the parent
|
---|
| 840 | * @param key the 'key' of the child. If the parent is an array, this should be
|
---|
| 841 | * empty.
|
---|
| 842 | * @param nkey - the length of the key. If the parent is an array (T_LIST), then
|
---|
| 843 | * this should be the current index.
|
---|
| 844 | *
|
---|
| 845 | * NOTE: The key of the child means any kind of associative data related to the
|
---|
| 846 | * element. Thus: <<< { "foo" : [ >>,
|
---|
| 847 | * the opening array's key is "foo".
|
---|
| 848 | *
|
---|
| 849 | * @return a status constant. This indicates whether a match was excluded, possible,
|
---|
| 850 | * or successful.
|
---|
| 851 | */
|
---|
| 852 | JSONSL_API
|
---|
| 853 | jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
|
---|
| 854 | unsigned int parent_type,
|
---|
| 855 | unsigned int parent_level,
|
---|
| 856 | const char *key, size_t nkey);
|
---|
| 857 |
|
---|
| 858 | /**
|
---|
| 859 | * Alternate matching algorithm. This matching algorithm does not use
|
---|
| 860 | * JSONPointer but relies on a more structured searching mechanism. It
|
---|
| 861 | * assumes that there is a clear distinction between array indices and
|
---|
| 862 | * object keys. In this case, the jsonsl_path_component_st::ptype should
|
---|
| 863 | * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
|
---|
| 864 | * jsonsl_path_comonent_st::is_arridx field will be removed in a future
|
---|
| 865 | * version).
|
---|
| 866 | *
|
---|
| 867 | * @param jpr The path
|
---|
| 868 | * @param parent The parent structure. Can be NULL if this is the root object
|
---|
| 869 | * @param child The child structure. Should not be NULL
|
---|
| 870 | * @param key Object key, if an object
|
---|
| 871 | * @param nkey Length of object key
|
---|
| 872 | * @return Status constant if successful
|
---|
| 873 | *
|
---|
| 874 | * @note
|
---|
| 875 | * For successful matching, both the key and the path itself should be normalized
|
---|
| 876 | * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
|
---|
| 877 | * should currently be done in the application. Another version of this function
|
---|
| 878 | * may use a temporary buffer in such circumstances (allocated by the application).
|
---|
| 879 | *
|
---|
| 880 | * Since this function also checks the state of the child, it should only
|
---|
| 881 | * be called on PUSH callbacks, and not POP callbacks
|
---|
| 882 | */
|
---|
| 883 | JSONSL_API
|
---|
| 884 | jsonsl_jpr_match_t
|
---|
| 885 | jsonsl_path_match(jsonsl_jpr_t jpr,
|
---|
| 886 | const struct jsonsl_state_st *parent,
|
---|
| 887 | const struct jsonsl_state_st *child,
|
---|
| 888 | const char *key, size_t nkey);
|
---|
| 889 |
|
---|
| 890 |
|
---|
| 891 | /**
|
---|
| 892 | * Associate a set of JPR objects with a lexer instance.
|
---|
| 893 | * This should be called before the lexer has been fed any data (and
|
---|
| 894 | * behavior is undefined if you don't adhere to this).
|
---|
| 895 | *
|
---|
| 896 | * After using this function, you may subsequently call match_state() on
|
---|
| 897 | * given states (presumably from within the callbacks).
|
---|
| 898 | *
|
---|
| 899 | * Note that currently the first JPR is the quickest and comes
|
---|
| 900 | * pre-allocated with the state structure. Further JPR objects
|
---|
| 901 | * are chained.
|
---|
| 902 | *
|
---|
| 903 | * @param jsn The lexer
|
---|
| 904 | * @param jprs An array of jsonsl_jpr_t objects
|
---|
| 905 | * @param njprs How many elements in the jprs array.
|
---|
| 906 | */
|
---|
| 907 | JSONSL_API
|
---|
| 908 | void jsonsl_jpr_match_state_init(jsonsl_t jsn,
|
---|
| 909 | jsonsl_jpr_t *jprs,
|
---|
| 910 | size_t njprs);
|
---|
| 911 |
|
---|
| 912 | /**
|
---|
| 913 | * This follows the same semantics as the normal match,
|
---|
| 914 | * except we infer parent and type information from the relevant state objects.
|
---|
| 915 | * The match status (for all possible JPR objects) is set in the *out parameter.
|
---|
| 916 | *
|
---|
| 917 | * If a match has succeeded, then its JPR object will be returned. In all other
|
---|
| 918 | * instances, NULL is returned;
|
---|
| 919 | *
|
---|
| 920 | * @param jpr The jsonsl_jpr_t handle
|
---|
| 921 | * @param state The jsonsl_state_st which is a candidate
|
---|
| 922 | * @param key The hash key (if applicable, can be NULL if parent is list)
|
---|
| 923 | * @param nkey Length of hash key (if applicable, can be zero if parent is list)
|
---|
| 924 | * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
|
---|
| 925 | * the match result
|
---|
| 926 | *
|
---|
| 927 | * @return If a match was completed in full, then the JPR object containing
|
---|
| 928 | * the matching path will be returned. Otherwise, the return is NULL (note, this
|
---|
| 929 | * does not mean matching has failed, it can still be part of the match: check
|
---|
| 930 | * the out parameter).
|
---|
| 931 | */
|
---|
| 932 | JSONSL_API
|
---|
| 933 | jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
|
---|
| 934 | struct jsonsl_state_st *state,
|
---|
| 935 | const char *key,
|
---|
| 936 | size_t nkey,
|
---|
| 937 | jsonsl_jpr_match_t *out);
|
---|
| 938 |
|
---|
| 939 |
|
---|
| 940 | /**
|
---|
| 941 | * Cleanup any memory allocated and any states set by
|
---|
| 942 | * match_state_init() and match_state()
|
---|
| 943 | * @param jsn The lexer
|
---|
| 944 | */
|
---|
| 945 | JSONSL_API
|
---|
| 946 | void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
|
---|
| 947 |
|
---|
| 948 | /**
|
---|
| 949 | * Return a string representation of the match result returned by match()
|
---|
| 950 | */
|
---|
| 951 | JSONSL_API
|
---|
| 952 | const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
|
---|
| 953 |
|
---|
| 954 | /* @}*/
|
---|
| 955 |
|
---|
| 956 | /**
|
---|
| 957 | * Utility function to convert escape sequences into their original form.
|
---|
| 958 | *
|
---|
| 959 | * The decoders I've sampled do not seem to specify a standard behavior of what
|
---|
| 960 | * to escape/unescape.
|
---|
| 961 | *
|
---|
| 962 | * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
|
---|
| 963 | * characters (0x00-0x1f) be escaped. It is often common for applications
|
---|
| 964 | * to escape a '/' - however this may also be desired behavior. the JSON
|
---|
| 965 | * spec is not clear on this, and therefore jsonsl leaves it up to you.
|
---|
| 966 | *
|
---|
| 967 | * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
|
---|
| 968 | * true when dealing with 'u-escapes' which can be expressed perfectly fine
|
---|
| 969 | * as utf8. One use case for normalization is JPR string comparison, in which
|
---|
| 970 | * case two effectively equivalent strings may not match because one is using
|
---|
| 971 | * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
|
---|
| 972 | * an empty `toEscape` table, enabling only the `u` index.
|
---|
| 973 | *
|
---|
| 974 | * @param in The input string.
|
---|
| 975 | * @param out An allocated output (should be the same size as in)
|
---|
| 976 | * @param len the size of the buffer
|
---|
| 977 | * @param toEscape - A sparse array of characters to unescape. Characters
|
---|
| 978 | * which are not present in this array, e.g. toEscape['c'] == 0 will be
|
---|
| 979 | * ignored and passed to the output in their original form.
|
---|
| 980 | * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
|
---|
| 981 | * then this variable will have the SPECIALf_NONASCII flag on.
|
---|
| 982 | *
|
---|
| 983 | * @param err A pointer to an error variable. If an error ocurrs, it will be
|
---|
| 984 | * set in this variable
|
---|
| 985 | * @param errat If not null and an error occurs, this will be set to point
|
---|
| 986 | * to the position within the string at which the offending character was
|
---|
| 987 | * encountered.
|
---|
| 988 | *
|
---|
| 989 | * @return The effective size of the output buffer.
|
---|
| 990 | *
|
---|
| 991 | * @note
|
---|
| 992 | * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
|
---|
| 993 | * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
|
---|
| 994 | * which while still correct in some sense was confusing for many (especially
|
---|
| 995 | * considering that the inputs were variations of char).
|
---|
| 996 | *
|
---|
| 997 | * @note
|
---|
| 998 | * The output buffer will never be larger than the input buffer, since
|
---|
| 999 | * standard escape sequences (i.e. '\t') occupy two bytes in the source
|
---|
| 1000 | * but only one byte (when unescaped) in the output. Likewise u-escapes
|
---|
| 1001 | * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
|
---|
| 1002 | * two bytes when escaped.
|
---|
| 1003 | */
|
---|
| 1004 | JSONSL_API
|
---|
| 1005 | size_t jsonsl_util_unescape_ex(const char *in,
|
---|
| 1006 | char *out,
|
---|
| 1007 | size_t len,
|
---|
| 1008 | const int toEscape[128],
|
---|
| 1009 | unsigned *oflags,
|
---|
| 1010 | jsonsl_error_t *err,
|
---|
| 1011 | const char **errat);
|
---|
| 1012 |
|
---|
| 1013 | /**
|
---|
| 1014 | * Convenience macro to avoid passing too many parameters
|
---|
| 1015 | */
|
---|
| 1016 | #define jsonsl_util_unescape(in, out, len, toEscape, err) \
|
---|
| 1017 | jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
|
---|
| 1018 |
|
---|
| 1019 | #endif /* JSONSL_NO_JPR */
|
---|
| 1020 |
|
---|
| 1021 | #ifdef __cplusplus
|
---|
| 1022 | }
|
---|
| 1023 | #endif /* __cplusplus */
|
---|
| 1024 |
|
---|
| 1025 | #endif /* JSONSL_H_ */
|
---|