[352] | 1 | /***************************************************************************
|
---|
| 2 | * _ _ ____ _
|
---|
| 3 | * Project ___| | | | _ \| |
|
---|
| 4 | * / __| | | | |_) | |
|
---|
| 5 | * | (__| |_| | _ <| |___
|
---|
| 6 | * \___|\___/|_| \_\_____|
|
---|
| 7 | *
|
---|
| 8 | * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
|
---|
| 9 | *
|
---|
| 10 | * This software is licensed as described in the file COPYING, which
|
---|
| 11 | * you should have received as part of this distribution. The terms
|
---|
| 12 | * are also available at https://curl.haxx.se/docs/copyright.html.
|
---|
| 13 | *
|
---|
| 14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell
|
---|
| 15 | * copies of the Software, and permit persons to whom the Software is
|
---|
| 16 | * furnished to do so, under the terms of the COPYING file.
|
---|
| 17 | *
|
---|
| 18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
---|
| 19 | * KIND, either express or implied.
|
---|
| 20 | *
|
---|
| 21 | ***************************************************************************/
|
---|
| 22 | #include "tool_setup.h"
|
---|
| 23 |
|
---|
| 24 | #define ENABLE_CURLX_PRINTF
|
---|
| 25 | /* use our own printf() functions */
|
---|
| 26 | #include "curlx.h"
|
---|
| 27 | #include "tool_cfgable.h"
|
---|
| 28 | #include "tool_doswin.h"
|
---|
| 29 | #include "tool_urlglob.h"
|
---|
| 30 | #include "tool_vms.h"
|
---|
| 31 |
|
---|
| 32 | #include "memdebug.h" /* keep this as LAST include */
|
---|
| 33 |
|
---|
| 34 | #define GLOBERROR(string, column, code) \
|
---|
| 35 | glob->error = string, glob->pos = column, code
|
---|
| 36 |
|
---|
| 37 | void glob_cleanup(URLGlob* glob);
|
---|
| 38 |
|
---|
| 39 | static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
|
---|
| 40 | {
|
---|
| 41 | URLPattern *pat = &glob->pattern[glob->size];
|
---|
| 42 | pat->type = UPTSet;
|
---|
| 43 | pat->content.Set.size = 1;
|
---|
| 44 | pat->content.Set.ptr_s = 0;
|
---|
| 45 | pat->globindex = -1;
|
---|
| 46 |
|
---|
| 47 | pat->content.Set.elements = malloc(sizeof(char *));
|
---|
| 48 |
|
---|
| 49 | if(!pat->content.Set.elements)
|
---|
| 50 | return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
|
---|
| 51 |
|
---|
| 52 | pat->content.Set.elements[0] = malloc(len + 1);
|
---|
| 53 | if(!pat->content.Set.elements[0])
|
---|
| 54 | return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
|
---|
| 55 |
|
---|
| 56 | memcpy(pat->content.Set.elements[0], fixed, len);
|
---|
| 57 | pat->content.Set.elements[0][len] = 0;
|
---|
| 58 |
|
---|
| 59 | return CURLE_OK;
|
---|
| 60 | }
|
---|
| 61 |
|
---|
| 62 | /* multiply
|
---|
| 63 | *
|
---|
| 64 | * Multiplies and checks for overflow.
|
---|
| 65 | */
|
---|
| 66 | static int multiply(unsigned long *amount, long with)
|
---|
| 67 | {
|
---|
| 68 | unsigned long sum = *amount * with;
|
---|
| 69 | if(!with) {
|
---|
| 70 | *amount = 0;
|
---|
| 71 | return 0;
|
---|
| 72 | }
|
---|
| 73 | if(sum/with != *amount)
|
---|
| 74 | return 1; /* didn't fit, bail out */
|
---|
| 75 | *amount = sum;
|
---|
| 76 | return 0;
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | static CURLcode glob_set(URLGlob *glob, char **patternp,
|
---|
| 80 | size_t *posp, unsigned long *amount,
|
---|
| 81 | int globindex)
|
---|
| 82 | {
|
---|
| 83 | /* processes a set expression with the point behind the opening '{'
|
---|
| 84 | ','-separated elements are collected until the next closing '}'
|
---|
| 85 | */
|
---|
| 86 | URLPattern *pat;
|
---|
| 87 | bool done = FALSE;
|
---|
| 88 | char *buf = glob->glob_buffer;
|
---|
| 89 | char *pattern = *patternp;
|
---|
| 90 | char *opattern = pattern;
|
---|
| 91 | size_t opos = *posp-1;
|
---|
| 92 |
|
---|
| 93 | pat = &glob->pattern[glob->size];
|
---|
| 94 | /* patterns 0,1,2,... correspond to size=1,3,5,... */
|
---|
| 95 | pat->type = UPTSet;
|
---|
| 96 | pat->content.Set.size = 0;
|
---|
| 97 | pat->content.Set.ptr_s = 0;
|
---|
| 98 | pat->content.Set.elements = NULL;
|
---|
| 99 | pat->globindex = globindex;
|
---|
| 100 |
|
---|
| 101 | while(!done) {
|
---|
| 102 | switch (*pattern) {
|
---|
| 103 | case '\0': /* URL ended while set was still open */
|
---|
| 104 | return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
|
---|
| 105 |
|
---|
| 106 | case '{':
|
---|
| 107 | case '[': /* no nested expressions at this time */
|
---|
| 108 | return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
|
---|
| 109 |
|
---|
| 110 | case '}': /* set element completed */
|
---|
| 111 | if(opattern == pattern)
|
---|
| 112 | return GLOBERROR("empty string within braces", *posp,
|
---|
| 113 | CURLE_URL_MALFORMAT);
|
---|
| 114 |
|
---|
| 115 | /* add 1 to size since it'll be incremented below */
|
---|
| 116 | if(multiply(amount, pat->content.Set.size + 1))
|
---|
| 117 | return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
|
---|
| 118 |
|
---|
| 119 | /* fall-through */
|
---|
| 120 | case ',':
|
---|
| 121 |
|
---|
| 122 | *buf = '\0';
|
---|
| 123 | if(pat->content.Set.elements) {
|
---|
| 124 | char **new_arr = realloc(pat->content.Set.elements,
|
---|
| 125 | (pat->content.Set.size + 1) * sizeof(char *));
|
---|
| 126 | if(!new_arr)
|
---|
| 127 | return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
|
---|
| 128 |
|
---|
| 129 | pat->content.Set.elements = new_arr;
|
---|
| 130 | }
|
---|
| 131 | else
|
---|
| 132 | pat->content.Set.elements = malloc(sizeof(char *));
|
---|
| 133 |
|
---|
| 134 | if(!pat->content.Set.elements)
|
---|
| 135 | return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
|
---|
| 136 |
|
---|
| 137 | pat->content.Set.elements[pat->content.Set.size] =
|
---|
| 138 | strdup(glob->glob_buffer);
|
---|
| 139 | if(!pat->content.Set.elements[pat->content.Set.size])
|
---|
| 140 | return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
|
---|
| 141 | ++pat->content.Set.size;
|
---|
| 142 |
|
---|
| 143 | if(*pattern == '}') {
|
---|
| 144 | pattern++; /* pass the closing brace */
|
---|
| 145 | done = TRUE;
|
---|
| 146 | continue;
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | buf = glob->glob_buffer;
|
---|
| 150 | ++pattern;
|
---|
| 151 | ++(*posp);
|
---|
| 152 | break;
|
---|
| 153 |
|
---|
| 154 | case ']': /* illegal closing bracket */
|
---|
| 155 | return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
|
---|
| 156 |
|
---|
| 157 | case '\\': /* escaped character, skip '\' */
|
---|
| 158 | if(pattern[1]) {
|
---|
| 159 | ++pattern;
|
---|
| 160 | ++(*posp);
|
---|
| 161 | }
|
---|
| 162 | /* intentional fallthrough */
|
---|
| 163 | default:
|
---|
| 164 | *buf++ = *pattern++; /* copy character to set element */
|
---|
| 165 | ++(*posp);
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | *patternp = pattern; /* return with the new position */
|
---|
| 170 | return CURLE_OK;
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | static CURLcode glob_range(URLGlob *glob, char **patternp,
|
---|
| 174 | size_t *posp, unsigned long *amount,
|
---|
| 175 | int globindex)
|
---|
| 176 | {
|
---|
| 177 | /* processes a range expression with the point behind the opening '['
|
---|
| 178 | - char range: e.g. "a-z]", "B-Q]"
|
---|
| 179 | - num range: e.g. "0-9]", "17-2000]"
|
---|
| 180 | - num range with leading zeros: e.g. "001-999]"
|
---|
| 181 | expression is checked for well-formedness and collected until the next ']'
|
---|
| 182 | */
|
---|
| 183 | URLPattern *pat;
|
---|
| 184 | int rc;
|
---|
| 185 | char *pattern = *patternp;
|
---|
| 186 | char *c;
|
---|
| 187 |
|
---|
| 188 | pat = &glob->pattern[glob->size];
|
---|
| 189 | pat->globindex = globindex;
|
---|
| 190 |
|
---|
| 191 | if(ISALPHA(*pattern)) {
|
---|
| 192 | /* character range detected */
|
---|
| 193 | char min_c;
|
---|
| 194 | char max_c;
|
---|
| 195 | char end_c;
|
---|
| 196 | unsigned long step = 1;
|
---|
| 197 |
|
---|
| 198 | pat->type = UPTCharRange;
|
---|
| 199 |
|
---|
| 200 | rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
|
---|
| 201 |
|
---|
| 202 | if(rc == 3) {
|
---|
| 203 | if(end_c == ':') {
|
---|
| 204 | char *endp;
|
---|
| 205 | errno = 0;
|
---|
| 206 | step = strtoul(&pattern[4], &endp, 10);
|
---|
| 207 | if(errno || &pattern[4] == endp || *endp != ']')
|
---|
| 208 | step = 0;
|
---|
| 209 | else
|
---|
| 210 | pattern = endp + 1;
|
---|
| 211 | }
|
---|
| 212 | else if(end_c != ']')
|
---|
| 213 | /* then this is wrong */
|
---|
| 214 | rc = 0;
|
---|
| 215 | else
|
---|
| 216 | /* end_c == ']' */
|
---|
| 217 | pattern += 4;
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 | *posp += (pattern - *patternp);
|
---|
| 221 |
|
---|
| 222 | if(rc != 3 || !step || step > (unsigned)INT_MAX ||
|
---|
| 223 | (min_c == max_c && step != 1) ||
|
---|
| 224 | (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
|
---|
| 225 | (max_c - min_c) > ('z' - 'a'))))
|
---|
| 226 | /* the pattern is not well-formed */
|
---|
| 227 | return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
|
---|
| 228 |
|
---|
| 229 | /* if there was a ":[num]" thing, use that as step or else use 1 */
|
---|
| 230 | pat->content.CharRange.step = (int)step;
|
---|
| 231 | pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
|
---|
| 232 | pat->content.CharRange.max_c = max_c;
|
---|
| 233 |
|
---|
| 234 | if(multiply(amount, ((pat->content.CharRange.max_c -
|
---|
| 235 | pat->content.CharRange.min_c) /
|
---|
| 236 | pat->content.CharRange.step + 1)))
|
---|
| 237 | return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
|
---|
| 238 | }
|
---|
| 239 | else if(ISDIGIT(*pattern)) {
|
---|
| 240 | /* numeric range detected */
|
---|
| 241 | unsigned long min_n;
|
---|
| 242 | unsigned long max_n = 0;
|
---|
| 243 | unsigned long step_n = 0;
|
---|
| 244 | char *endp;
|
---|
| 245 |
|
---|
| 246 | pat->type = UPTNumRange;
|
---|
| 247 | pat->content.NumRange.padlength = 0;
|
---|
| 248 |
|
---|
| 249 | if(*pattern == '0') {
|
---|
| 250 | /* leading zero specified, count them! */
|
---|
| 251 | c = pattern;
|
---|
| 252 | while(ISDIGIT(*c)) {
|
---|
| 253 | c++;
|
---|
| 254 | ++pat->content.NumRange.padlength; /* padding length is set for all
|
---|
| 255 | instances of this pattern */
|
---|
| 256 | }
|
---|
| 257 | }
|
---|
| 258 |
|
---|
| 259 | errno = 0;
|
---|
| 260 | min_n = strtoul(pattern, &endp, 10);
|
---|
| 261 | if(errno || (endp == pattern))
|
---|
| 262 | endp = NULL;
|
---|
| 263 | else {
|
---|
| 264 | if(*endp != '-')
|
---|
| 265 | endp = NULL;
|
---|
| 266 | else {
|
---|
| 267 | pattern = endp + 1;
|
---|
| 268 | while(*pattern && ISBLANK(*pattern))
|
---|
| 269 | pattern++;
|
---|
| 270 | if(!ISDIGIT(*pattern)) {
|
---|
| 271 | endp = NULL;
|
---|
| 272 | goto fail;
|
---|
| 273 | }
|
---|
| 274 | errno = 0;
|
---|
| 275 | max_n = strtoul(pattern, &endp, 10);
|
---|
| 276 | if(errno)
|
---|
| 277 | /* overflow */
|
---|
| 278 | endp = NULL;
|
---|
| 279 | else if(*endp == ':') {
|
---|
| 280 | pattern = endp + 1;
|
---|
| 281 | errno = 0;
|
---|
| 282 | step_n = strtoul(pattern, &endp, 10);
|
---|
| 283 | if(errno)
|
---|
| 284 | /* over/underflow situation */
|
---|
| 285 | endp = NULL;
|
---|
| 286 | }
|
---|
| 287 | else
|
---|
| 288 | step_n = 1;
|
---|
| 289 | if(endp && (*endp == ']')) {
|
---|
| 290 | pattern = endp + 1;
|
---|
| 291 | }
|
---|
| 292 | else
|
---|
| 293 | endp = NULL;
|
---|
| 294 | }
|
---|
| 295 | }
|
---|
| 296 |
|
---|
| 297 | fail:
|
---|
| 298 | *posp += (pattern - *patternp);
|
---|
| 299 |
|
---|
| 300 | if(!endp || !step_n ||
|
---|
| 301 | (min_n == max_n && step_n != 1) ||
|
---|
| 302 | (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
|
---|
| 303 | /* the pattern is not well-formed */
|
---|
| 304 | return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
|
---|
| 305 |
|
---|
| 306 | /* typecasting to ints are fine here since we make sure above that we
|
---|
| 307 | are within 31 bits */
|
---|
| 308 | pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
|
---|
| 309 | pat->content.NumRange.max_n = max_n;
|
---|
| 310 | pat->content.NumRange.step = step_n;
|
---|
| 311 |
|
---|
| 312 | if(multiply(amount, ((pat->content.NumRange.max_n -
|
---|
| 313 | pat->content.NumRange.min_n) /
|
---|
| 314 | pat->content.NumRange.step + 1)))
|
---|
| 315 | return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
|
---|
| 316 | }
|
---|
| 317 | else
|
---|
| 318 | return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
|
---|
| 319 |
|
---|
| 320 | *patternp = pattern;
|
---|
| 321 | return CURLE_OK;
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 | static bool peek_ipv6(const char *str, size_t *skip)
|
---|
| 325 | {
|
---|
| 326 | /*
|
---|
| 327 | * Scan for a potential IPv6 literal.
|
---|
| 328 | * - Valid globs contain a hyphen and <= 1 colon.
|
---|
| 329 | * - IPv6 literals contain no hyphens and >= 2 colons.
|
---|
| 330 | */
|
---|
| 331 | size_t i = 0;
|
---|
| 332 | size_t colons = 0;
|
---|
| 333 | if(str[i++] != '[') {
|
---|
| 334 | return FALSE;
|
---|
| 335 | }
|
---|
| 336 | for(;;) {
|
---|
| 337 | const char c = str[i++];
|
---|
| 338 | if(ISALNUM(c) || c == '.' || c == '%') {
|
---|
| 339 | /* ok */
|
---|
| 340 | }
|
---|
| 341 | else if(c == ':') {
|
---|
| 342 | colons++;
|
---|
| 343 | }
|
---|
| 344 | else if(c == ']') {
|
---|
| 345 | *skip = i;
|
---|
| 346 | return colons >= 2 ? TRUE : FALSE;
|
---|
| 347 | }
|
---|
| 348 | else {
|
---|
| 349 | return FALSE;
|
---|
| 350 | }
|
---|
| 351 | }
|
---|
| 352 | }
|
---|
| 353 |
|
---|
| 354 | static CURLcode glob_parse(URLGlob *glob, char *pattern,
|
---|
| 355 | size_t pos, unsigned long *amount)
|
---|
| 356 | {
|
---|
| 357 | /* processes a literal string component of a URL
|
---|
| 358 | special characters '{' and '[' branch to set/range processing functions
|
---|
| 359 | */
|
---|
| 360 | CURLcode res = CURLE_OK;
|
---|
| 361 | int globindex = 0; /* count "actual" globs */
|
---|
| 362 |
|
---|
| 363 | *amount = 1;
|
---|
| 364 |
|
---|
| 365 | while(*pattern && !res) {
|
---|
| 366 | char *buf = glob->glob_buffer;
|
---|
| 367 | size_t sublen = 0;
|
---|
| 368 | while(*pattern && *pattern != '{') {
|
---|
| 369 | if(*pattern == '[') {
|
---|
| 370 | /* skip over IPv6 literals and [] */
|
---|
| 371 | size_t skip = 0;
|
---|
| 372 | if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
|
---|
| 373 | skip = 2;
|
---|
| 374 | if(skip) {
|
---|
| 375 | memcpy(buf, pattern, skip);
|
---|
| 376 | buf += skip;
|
---|
| 377 | pattern += skip;
|
---|
| 378 | sublen += skip;
|
---|
| 379 | continue;
|
---|
| 380 | }
|
---|
| 381 | break;
|
---|
| 382 | }
|
---|
| 383 | if(*pattern == '}' || *pattern == ']')
|
---|
| 384 | return GLOBERROR("unmatched close brace/bracket", pos,
|
---|
| 385 | CURLE_URL_MALFORMAT);
|
---|
| 386 |
|
---|
| 387 | /* only allow \ to escape known "special letters" */
|
---|
| 388 | if(*pattern == '\\' &&
|
---|
| 389 | (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
|
---|
| 390 | *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
|
---|
| 391 |
|
---|
| 392 | /* escape character, skip '\' */
|
---|
| 393 | ++pattern;
|
---|
| 394 | ++pos;
|
---|
| 395 | }
|
---|
| 396 | *buf++ = *pattern++; /* copy character to literal */
|
---|
| 397 | ++pos;
|
---|
| 398 | sublen++;
|
---|
| 399 | }
|
---|
| 400 | if(sublen) {
|
---|
| 401 | /* we got a literal string, add it as a single-item list */
|
---|
| 402 | *buf = '\0';
|
---|
| 403 | res = glob_fixed(glob, glob->glob_buffer, sublen);
|
---|
| 404 | }
|
---|
| 405 | else {
|
---|
| 406 | switch (*pattern) {
|
---|
| 407 | case '\0': /* done */
|
---|
| 408 | break;
|
---|
| 409 |
|
---|
| 410 | case '{':
|
---|
| 411 | /* process set pattern */
|
---|
| 412 | pattern++;
|
---|
| 413 | pos++;
|
---|
| 414 | res = glob_set(glob, &pattern, &pos, amount, globindex++);
|
---|
| 415 | break;
|
---|
| 416 |
|
---|
| 417 | case '[':
|
---|
| 418 | /* process range pattern */
|
---|
| 419 | pattern++;
|
---|
| 420 | pos++;
|
---|
| 421 | res = glob_range(glob, &pattern, &pos, amount, globindex++);
|
---|
| 422 | break;
|
---|
| 423 | }
|
---|
| 424 | }
|
---|
| 425 |
|
---|
| 426 | if(++glob->size >= GLOB_PATTERN_NUM)
|
---|
| 427 | return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
|
---|
| 428 | }
|
---|
| 429 | return res;
|
---|
| 430 | }
|
---|
| 431 |
|
---|
| 432 | CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
|
---|
| 433 | FILE *error)
|
---|
| 434 | {
|
---|
| 435 | /*
|
---|
| 436 | * We can deal with any-size, just make a buffer with the same length
|
---|
| 437 | * as the specified URL!
|
---|
| 438 | */
|
---|
| 439 | URLGlob *glob_expand;
|
---|
| 440 | unsigned long amount = 0;
|
---|
| 441 | char *glob_buffer;
|
---|
| 442 | CURLcode res;
|
---|
| 443 |
|
---|
| 444 | *glob = NULL;
|
---|
| 445 |
|
---|
| 446 | glob_buffer = malloc(strlen(url) + 1);
|
---|
| 447 | if(!glob_buffer)
|
---|
| 448 | return CURLE_OUT_OF_MEMORY;
|
---|
| 449 | glob_buffer[0] = 0;
|
---|
| 450 |
|
---|
| 451 | glob_expand = calloc(1, sizeof(URLGlob));
|
---|
| 452 | if(!glob_expand) {
|
---|
| 453 | Curl_safefree(glob_buffer);
|
---|
| 454 | return CURLE_OUT_OF_MEMORY;
|
---|
| 455 | }
|
---|
| 456 | glob_expand->urllen = strlen(url);
|
---|
| 457 | glob_expand->glob_buffer = glob_buffer;
|
---|
| 458 |
|
---|
| 459 | res = glob_parse(glob_expand, url, 1, &amount);
|
---|
| 460 | if(!res)
|
---|
| 461 | *urlnum = amount;
|
---|
| 462 | else {
|
---|
| 463 | if(error && glob_expand->error) {
|
---|
| 464 | char text[128];
|
---|
| 465 | const char *t;
|
---|
| 466 | if(glob_expand->pos) {
|
---|
| 467 | snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error,
|
---|
| 468 | glob_expand->pos);
|
---|
| 469 | t = text;
|
---|
| 470 | }
|
---|
| 471 | else
|
---|
| 472 | t = glob_expand->error;
|
---|
| 473 |
|
---|
| 474 | /* send error description to the error-stream */
|
---|
| 475 | fprintf(error, "curl: (%d) [globbing] %s\n", res, t);
|
---|
| 476 | }
|
---|
| 477 | /* it failed, we cleanup */
|
---|
| 478 | glob_cleanup(glob_expand);
|
---|
| 479 | *urlnum = 1;
|
---|
| 480 | return res;
|
---|
| 481 | }
|
---|
| 482 |
|
---|
| 483 | *glob = glob_expand;
|
---|
| 484 | return CURLE_OK;
|
---|
| 485 | }
|
---|
| 486 |
|
---|
| 487 | void glob_cleanup(URLGlob* glob)
|
---|
| 488 | {
|
---|
| 489 | size_t i;
|
---|
| 490 | int elem;
|
---|
| 491 |
|
---|
| 492 | for(i = 0; i < glob->size; i++) {
|
---|
| 493 | if((glob->pattern[i].type == UPTSet) &&
|
---|
| 494 | (glob->pattern[i].content.Set.elements)) {
|
---|
| 495 | for(elem = glob->pattern[i].content.Set.size - 1;
|
---|
| 496 | elem >= 0;
|
---|
| 497 | --elem) {
|
---|
| 498 | Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
|
---|
| 499 | }
|
---|
| 500 | Curl_safefree(glob->pattern[i].content.Set.elements);
|
---|
| 501 | }
|
---|
| 502 | }
|
---|
| 503 | Curl_safefree(glob->glob_buffer);
|
---|
| 504 | Curl_safefree(glob);
|
---|
| 505 | }
|
---|
| 506 |
|
---|
| 507 | CURLcode glob_next_url(char **globbed, URLGlob *glob)
|
---|
| 508 | {
|
---|
| 509 | URLPattern *pat;
|
---|
| 510 | size_t i;
|
---|
| 511 | size_t len;
|
---|
| 512 | size_t buflen = glob->urllen + 1;
|
---|
| 513 | char *buf = glob->glob_buffer;
|
---|
| 514 |
|
---|
| 515 | *globbed = NULL;
|
---|
| 516 |
|
---|
| 517 | if(!glob->beenhere)
|
---|
| 518 | glob->beenhere = 1;
|
---|
| 519 | else {
|
---|
| 520 | bool carry = TRUE;
|
---|
| 521 |
|
---|
| 522 | /* implement a counter over the index ranges of all patterns, starting
|
---|
| 523 | with the rightmost pattern */
|
---|
| 524 | for(i = 0; carry && (i < glob->size); i++) {
|
---|
| 525 | carry = FALSE;
|
---|
| 526 | pat = &glob->pattern[glob->size - 1 - i];
|
---|
| 527 | switch(pat->type) {
|
---|
| 528 | case UPTSet:
|
---|
| 529 | if((pat->content.Set.elements) &&
|
---|
| 530 | (++pat->content.Set.ptr_s == pat->content.Set.size)) {
|
---|
| 531 | pat->content.Set.ptr_s = 0;
|
---|
| 532 | carry = TRUE;
|
---|
| 533 | }
|
---|
| 534 | break;
|
---|
| 535 | case UPTCharRange:
|
---|
| 536 | pat->content.CharRange.ptr_c =
|
---|
| 537 | (char)(pat->content.CharRange.step +
|
---|
| 538 | (int)((unsigned char)pat->content.CharRange.ptr_c));
|
---|
| 539 | if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
|
---|
| 540 | pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
|
---|
| 541 | carry = TRUE;
|
---|
| 542 | }
|
---|
| 543 | break;
|
---|
| 544 | case UPTNumRange:
|
---|
| 545 | pat->content.NumRange.ptr_n += pat->content.NumRange.step;
|
---|
| 546 | if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
|
---|
| 547 | pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
|
---|
| 548 | carry = TRUE;
|
---|
| 549 | }
|
---|
| 550 | break;
|
---|
| 551 | default:
|
---|
| 552 | printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
|
---|
| 553 | return CURLE_FAILED_INIT;
|
---|
| 554 | }
|
---|
| 555 | }
|
---|
| 556 | if(carry) { /* first pattern ptr has run into overflow, done! */
|
---|
| 557 | /* TODO: verify if this should actally return CURLE_OK. */
|
---|
| 558 | return CURLE_OK; /* CURLE_OK to match previous behavior */
|
---|
| 559 | }
|
---|
| 560 | }
|
---|
| 561 |
|
---|
| 562 | for(i = 0; i < glob->size; ++i) {
|
---|
| 563 | pat = &glob->pattern[i];
|
---|
| 564 | switch(pat->type) {
|
---|
| 565 | case UPTSet:
|
---|
| 566 | if(pat->content.Set.elements) {
|
---|
| 567 | snprintf(buf, buflen, "%s",
|
---|
| 568 | pat->content.Set.elements[pat->content.Set.ptr_s]);
|
---|
| 569 | len = strlen(buf);
|
---|
| 570 | buf += len;
|
---|
| 571 | buflen -= len;
|
---|
| 572 | }
|
---|
| 573 | break;
|
---|
| 574 | case UPTCharRange:
|
---|
| 575 | if(buflen) {
|
---|
| 576 | *buf++ = pat->content.CharRange.ptr_c;
|
---|
| 577 | *buf = '\0';
|
---|
| 578 | buflen--;
|
---|
| 579 | }
|
---|
| 580 | break;
|
---|
| 581 | case UPTNumRange:
|
---|
| 582 | snprintf(buf, buflen, "%0*ld",
|
---|
| 583 | pat->content.NumRange.padlength,
|
---|
| 584 | pat->content.NumRange.ptr_n);
|
---|
| 585 | len = strlen(buf);
|
---|
| 586 | buf += len;
|
---|
| 587 | buflen -= len;
|
---|
| 588 | break;
|
---|
| 589 | default:
|
---|
| 590 | printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
|
---|
| 591 | return CURLE_FAILED_INIT;
|
---|
| 592 | }
|
---|
| 593 | }
|
---|
| 594 |
|
---|
| 595 | *globbed = strdup(glob->glob_buffer);
|
---|
| 596 | if(!*globbed)
|
---|
| 597 | return CURLE_OUT_OF_MEMORY;
|
---|
| 598 |
|
---|
| 599 | return CURLE_OK;
|
---|
| 600 | }
|
---|
| 601 |
|
---|
| 602 | CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
|
---|
| 603 | {
|
---|
| 604 | char *target;
|
---|
| 605 | size_t allocsize;
|
---|
| 606 | char numbuf[18];
|
---|
| 607 | char *appendthis = NULL;
|
---|
| 608 | size_t appendlen = 0;
|
---|
| 609 | size_t stringlen = 0;
|
---|
| 610 |
|
---|
| 611 | *result = NULL;
|
---|
| 612 |
|
---|
| 613 | /* We cannot use the glob_buffer for storage here since the filename may
|
---|
| 614 | * be longer than the URL we use. We allocate a good start size, then
|
---|
| 615 | * we need to realloc in case of need.
|
---|
| 616 | */
|
---|
| 617 | allocsize = strlen(filename) + 1; /* make it at least one byte to store the
|
---|
| 618 | trailing zero */
|
---|
| 619 | target = malloc(allocsize);
|
---|
| 620 | if(!target)
|
---|
| 621 | return CURLE_OUT_OF_MEMORY;
|
---|
| 622 |
|
---|
| 623 | while(*filename) {
|
---|
| 624 | if(*filename == '#' && ISDIGIT(filename[1])) {
|
---|
| 625 | unsigned long i;
|
---|
| 626 | char *ptr = filename;
|
---|
| 627 | unsigned long num = strtoul(&filename[1], &filename, 10);
|
---|
| 628 | URLPattern *pat = NULL;
|
---|
| 629 |
|
---|
| 630 | if(num < glob->size) {
|
---|
| 631 | num--; /* make it zero based */
|
---|
| 632 | /* find the correct glob entry */
|
---|
| 633 | for(i = 0; i<glob->size; i++) {
|
---|
| 634 | if(glob->pattern[i].globindex == (int)num) {
|
---|
| 635 | pat = &glob->pattern[i];
|
---|
| 636 | break;
|
---|
| 637 | }
|
---|
| 638 | }
|
---|
| 639 | }
|
---|
| 640 |
|
---|
| 641 | if(pat) {
|
---|
| 642 | switch(pat->type) {
|
---|
| 643 | case UPTSet:
|
---|
| 644 | if(pat->content.Set.elements) {
|
---|
| 645 | appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
|
---|
| 646 | appendlen =
|
---|
| 647 | strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
|
---|
| 648 | }
|
---|
| 649 | break;
|
---|
| 650 | case UPTCharRange:
|
---|
| 651 | numbuf[0] = pat->content.CharRange.ptr_c;
|
---|
| 652 | numbuf[1] = 0;
|
---|
| 653 | appendthis = numbuf;
|
---|
| 654 | appendlen = 1;
|
---|
| 655 | break;
|
---|
| 656 | case UPTNumRange:
|
---|
| 657 | snprintf(numbuf, sizeof(numbuf), "%0*lu",
|
---|
| 658 | pat->content.NumRange.padlength,
|
---|
| 659 | pat->content.NumRange.ptr_n);
|
---|
| 660 | appendthis = numbuf;
|
---|
| 661 | appendlen = strlen(numbuf);
|
---|
| 662 | break;
|
---|
| 663 | default:
|
---|
| 664 | fprintf(stderr, "internal error: invalid pattern type (%d)\n",
|
---|
| 665 | (int)pat->type);
|
---|
| 666 | Curl_safefree(target);
|
---|
| 667 | return CURLE_FAILED_INIT;
|
---|
| 668 | }
|
---|
| 669 | }
|
---|
| 670 | else {
|
---|
| 671 | /* #[num] out of range, use the #[num] in the output */
|
---|
| 672 | filename = ptr;
|
---|
| 673 | appendthis = filename++;
|
---|
| 674 | appendlen = 1;
|
---|
| 675 | }
|
---|
| 676 | }
|
---|
| 677 | else {
|
---|
| 678 | appendthis = filename++;
|
---|
| 679 | appendlen = 1;
|
---|
| 680 | }
|
---|
| 681 | if(appendlen + stringlen >= allocsize) {
|
---|
| 682 | char *newstr;
|
---|
| 683 | /* we append a single byte to allow for the trailing byte to be appended
|
---|
| 684 | at the end of this function outside the while() loop */
|
---|
| 685 | allocsize = (appendlen + stringlen) * 2;
|
---|
| 686 | newstr = realloc(target, allocsize + 1);
|
---|
| 687 | if(!newstr) {
|
---|
| 688 | Curl_safefree(target);
|
---|
| 689 | return CURLE_OUT_OF_MEMORY;
|
---|
| 690 | }
|
---|
| 691 | target = newstr;
|
---|
| 692 | }
|
---|
| 693 | memcpy(&target[stringlen], appendthis, appendlen);
|
---|
| 694 | stringlen += appendlen;
|
---|
| 695 | }
|
---|
| 696 | target[stringlen]= '\0';
|
---|
| 697 |
|
---|
| 698 | #if defined(MSDOS) || defined(WIN32)
|
---|
| 699 | {
|
---|
| 700 | char *sanitized;
|
---|
| 701 | SANITIZEcode sc = sanitize_file_name(&sanitized, target,
|
---|
| 702 | (SANITIZE_ALLOW_PATH |
|
---|
| 703 | SANITIZE_ALLOW_RESERVED));
|
---|
| 704 | Curl_safefree(target);
|
---|
| 705 | if(sc)
|
---|
| 706 | return CURLE_URL_MALFORMAT;
|
---|
| 707 | target = sanitized;
|
---|
| 708 | }
|
---|
| 709 | #endif /* MSDOS || WIN32 */
|
---|
| 710 |
|
---|
| 711 | *result = target;
|
---|
| 712 | return CURLE_OK;
|
---|
| 713 | }
|
---|