[444] | 1 | /*
|
---|
| 2 | * An implementation of what I call the "Sea of Stars" algorithm for
|
---|
| 3 | * POSIX fnmatch(). The basic idea is that we factor the pattern into
|
---|
| 4 | * a head component (which we match first and can reject without ever
|
---|
| 5 | * measuring the length of the string), an optional tail component
|
---|
| 6 | * (which only exists if the pattern contains at least one star), and
|
---|
| 7 | * an optional "sea of stars", a set of star-separated components
|
---|
| 8 | * between the head and tail. After the head and tail matches have
|
---|
| 9 | * been removed from the input string, the components in the "sea of
|
---|
| 10 | * stars" are matched sequentially by searching for their first
|
---|
| 11 | * occurrence past the end of the previous match.
|
---|
| 12 | *
|
---|
| 13 | * - Rich Felker, April 2012
|
---|
| 14 | */
|
---|
| 15 |
|
---|
| 16 | #include <string.h>
|
---|
| 17 | #include <fnmatch.h>
|
---|
| 18 | #include <stdlib.h>
|
---|
| 19 | #include <wchar.h>
|
---|
| 20 | #include <wctype.h>
|
---|
| 21 | #include "locale_impl.h"
|
---|
| 22 |
|
---|
| 23 | #define END 0
|
---|
| 24 | #define UNMATCHABLE -2
|
---|
| 25 | #define BRACKET -3
|
---|
| 26 | #define QUESTION -4
|
---|
| 27 | #define STAR -5
|
---|
| 28 |
|
---|
| 29 | static int str_next(const char *str, size_t n, size_t *step)
|
---|
| 30 | {
|
---|
| 31 | if (!n) {
|
---|
| 32 | *step = 0;
|
---|
| 33 | return 0;
|
---|
| 34 | }
|
---|
| 35 | if (str[0] >= 128U) {
|
---|
| 36 | wchar_t wc;
|
---|
| 37 | int k = mbtowc(&wc, str, n);
|
---|
| 38 | if (k<0) {
|
---|
| 39 | *step = 1;
|
---|
| 40 | return -1;
|
---|
| 41 | }
|
---|
| 42 | *step = k;
|
---|
| 43 | return wc;
|
---|
| 44 | }
|
---|
| 45 | *step = 1;
|
---|
| 46 | return str[0];
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | static int pat_next(const char *pat, size_t m, size_t *step, int flags)
|
---|
| 50 | {
|
---|
| 51 | int esc = 0;
|
---|
| 52 | if (!m || !*pat) {
|
---|
| 53 | *step = 0;
|
---|
| 54 | return END;
|
---|
| 55 | }
|
---|
| 56 | *step = 1;
|
---|
| 57 | if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
|
---|
| 58 | *step = 2;
|
---|
| 59 | pat++;
|
---|
| 60 | esc = 1;
|
---|
| 61 | goto escaped;
|
---|
| 62 | }
|
---|
| 63 | if (pat[0]=='[') {
|
---|
| 64 | size_t k = 1;
|
---|
| 65 | if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
|
---|
| 66 | if (k<m) if (pat[k] == ']') k++;
|
---|
| 67 | for (; k<m && pat[k] && pat[k]!=']'; k++) {
|
---|
| 68 | if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
|
---|
| 69 | int z = pat[k+1];
|
---|
| 70 | k+=2;
|
---|
| 71 | if (k<m && pat[k]) k++;
|
---|
| 72 | while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
|
---|
| 73 | if (k==m || !pat[k]) break;
|
---|
| 74 | }
|
---|
| 75 | }
|
---|
| 76 | if (k==m || !pat[k]) {
|
---|
| 77 | *step = 1;
|
---|
| 78 | return '[';
|
---|
| 79 | }
|
---|
| 80 | *step = k+1;
|
---|
| 81 | return BRACKET;
|
---|
| 82 | }
|
---|
| 83 | if (pat[0] == '*')
|
---|
| 84 | return STAR;
|
---|
| 85 | if (pat[0] == '?')
|
---|
| 86 | return QUESTION;
|
---|
| 87 | escaped:
|
---|
| 88 | if (pat[0] >= 128U) {
|
---|
| 89 | wchar_t wc;
|
---|
| 90 | int k = mbtowc(&wc, pat, m);
|
---|
| 91 | if (k<0) {
|
---|
| 92 | *step = 0;
|
---|
| 93 | return UNMATCHABLE;
|
---|
| 94 | }
|
---|
| 95 | *step = k + esc;
|
---|
| 96 | return wc;
|
---|
| 97 | }
|
---|
| 98 | return pat[0];
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | static int casefold(int k)
|
---|
| 102 | {
|
---|
| 103 | int c = towupper(k);
|
---|
| 104 | return c == k ? towlower(k) : c;
|
---|
| 105 | }
|
---|
| 106 |
|
---|
| 107 | static int match_bracket(const char *p, int k, int kfold)
|
---|
| 108 | {
|
---|
| 109 | wchar_t wc;
|
---|
| 110 | int inv = 0;
|
---|
| 111 | p++;
|
---|
| 112 | if (*p=='^' || *p=='!') {
|
---|
| 113 | inv = 1;
|
---|
| 114 | p++;
|
---|
| 115 | }
|
---|
| 116 | if (*p==']') {
|
---|
| 117 | if (k==']') return !inv;
|
---|
| 118 | p++;
|
---|
| 119 | } else if (*p=='-') {
|
---|
| 120 | if (k=='-') return !inv;
|
---|
| 121 | p++;
|
---|
| 122 | }
|
---|
| 123 | wc = p[-1];
|
---|
| 124 | for (; *p != ']'; p++) {
|
---|
| 125 | if (p[0]=='-' && p[1]!=']') {
|
---|
| 126 | wchar_t wc2;
|
---|
| 127 | int l = mbtowc(&wc2, p+1, 4);
|
---|
| 128 | if (l < 0) return 0;
|
---|
| 129 | if (wc <= wc2)
|
---|
| 130 | if ((unsigned)k-wc <= wc2-wc ||
|
---|
| 131 | (unsigned)kfold-wc <= wc2-wc)
|
---|
| 132 | return !inv;
|
---|
| 133 | p += l-1;
|
---|
| 134 | continue;
|
---|
| 135 | }
|
---|
| 136 | if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
|
---|
| 137 | const char *p0 = p+2;
|
---|
| 138 | int z = p[1];
|
---|
| 139 | p+=3;
|
---|
| 140 | while (p[-1]!=z || p[0]!=']') p++;
|
---|
| 141 | if (z == ':' && p-1-p0 < 16) {
|
---|
| 142 | char buf[16];
|
---|
| 143 | memcpy(buf, p0, p-1-p0);
|
---|
| 144 | buf[p-1-p0] = 0;
|
---|
| 145 | if (iswctype(k, wctype(buf)) ||
|
---|
| 146 | iswctype(kfold, wctype(buf)))
|
---|
| 147 | return !inv;
|
---|
| 148 | }
|
---|
| 149 | continue;
|
---|
| 150 | }
|
---|
| 151 | if (*p < 128U) {
|
---|
| 152 | wc = (unsigned char)*p;
|
---|
| 153 | } else {
|
---|
| 154 | int l = mbtowc(&wc, p, 4);
|
---|
| 155 | if (l < 0) return 0;
|
---|
| 156 | p += l-1;
|
---|
| 157 | }
|
---|
| 158 | if (wc==k || wc==kfold) return !inv;
|
---|
| 159 | }
|
---|
| 160 | return inv;
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
|
---|
| 164 | {
|
---|
| 165 | const char *p, *ptail, *endpat;
|
---|
| 166 | const char *s, *stail, *endstr;
|
---|
| 167 | size_t pinc, sinc, tailcnt=0;
|
---|
| 168 | int c, k, kfold;
|
---|
| 169 |
|
---|
| 170 | if (flags & FNM_PERIOD) {
|
---|
| 171 | if (*str == '.' && *pat != '.')
|
---|
| 172 | return FNM_NOMATCH;
|
---|
| 173 | }
|
---|
| 174 | for (;;) {
|
---|
| 175 | switch ((c = pat_next(pat, m, &pinc, flags))) {
|
---|
| 176 | case UNMATCHABLE:
|
---|
| 177 | return FNM_NOMATCH;
|
---|
| 178 | case STAR:
|
---|
| 179 | pat++;
|
---|
| 180 | m--;
|
---|
| 181 | break;
|
---|
| 182 | default:
|
---|
| 183 | k = str_next(str, n, &sinc);
|
---|
| 184 | if (k <= 0)
|
---|
| 185 | return (c==END) ? 0 : FNM_NOMATCH;
|
---|
| 186 | str += sinc;
|
---|
| 187 | n -= sinc;
|
---|
| 188 | kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
---|
| 189 | if (c == BRACKET) {
|
---|
| 190 | if (!match_bracket(pat, k, kfold))
|
---|
| 191 | return FNM_NOMATCH;
|
---|
| 192 | } else if (c != QUESTION && k != c && kfold != c) {
|
---|
| 193 | return FNM_NOMATCH;
|
---|
| 194 | }
|
---|
| 195 | pat+=pinc;
|
---|
| 196 | m-=pinc;
|
---|
| 197 | continue;
|
---|
| 198 | }
|
---|
| 199 | break;
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | /* Compute real pat length if it was initially unknown/-1 */
|
---|
| 203 | m = strnlen(pat, m);
|
---|
| 204 | endpat = pat + m;
|
---|
| 205 |
|
---|
| 206 | /* Find the last * in pat and count chars needed after it */
|
---|
| 207 | for (p=ptail=pat; p<endpat; p+=pinc) {
|
---|
| 208 | switch (pat_next(p, endpat-p, &pinc, flags)) {
|
---|
| 209 | case UNMATCHABLE:
|
---|
| 210 | return FNM_NOMATCH;
|
---|
| 211 | case STAR:
|
---|
| 212 | tailcnt=0;
|
---|
| 213 | ptail = p+1;
|
---|
| 214 | break;
|
---|
| 215 | default:
|
---|
| 216 | tailcnt++;
|
---|
| 217 | break;
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 |
|
---|
| 221 | /* Past this point we need not check for UNMATCHABLE in pat,
|
---|
| 222 | * because all of pat has already been parsed once. */
|
---|
| 223 |
|
---|
| 224 | /* Compute real str length if it was initially unknown/-1 */
|
---|
| 225 | n = strnlen(str, n);
|
---|
| 226 | endstr = str + n;
|
---|
| 227 | if (n < tailcnt) return FNM_NOMATCH;
|
---|
| 228 |
|
---|
| 229 | /* Find the final tailcnt chars of str, accounting for UTF-8.
|
---|
| 230 | * On illegal sequences we may get it wrong, but in that case
|
---|
| 231 | * we necessarily have a matching failure anyway. */
|
---|
| 232 | for (s=endstr; s>str && tailcnt; tailcnt--) {
|
---|
| 233 | if (s[-1] < 128U || MB_CUR_MAX==1) s--;
|
---|
| 234 | else while ((unsigned char)*--s-0x80U<0x40 && s>str);
|
---|
| 235 | }
|
---|
| 236 | if (tailcnt) return FNM_NOMATCH;
|
---|
| 237 | stail = s;
|
---|
| 238 |
|
---|
| 239 | /* Check that the pat and str tails match */
|
---|
| 240 | p = ptail;
|
---|
| 241 | for (;;) {
|
---|
| 242 | c = pat_next(p, endpat-p, &pinc, flags);
|
---|
| 243 | p += pinc;
|
---|
| 244 | if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
|
---|
| 245 | if (c != END) return FNM_NOMATCH;
|
---|
| 246 | break;
|
---|
| 247 | }
|
---|
| 248 | s += sinc;
|
---|
| 249 | kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
---|
| 250 | if (c == BRACKET) {
|
---|
| 251 | if (!match_bracket(p-pinc, k, kfold))
|
---|
| 252 | return FNM_NOMATCH;
|
---|
| 253 | } else if (c != QUESTION && k != c && kfold != c) {
|
---|
| 254 | return FNM_NOMATCH;
|
---|
| 255 | }
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | /* We're all done with the tails now, so throw them out */
|
---|
| 259 | endstr = stail;
|
---|
| 260 | endpat = ptail;
|
---|
| 261 |
|
---|
| 262 | /* Match pattern components until there are none left */
|
---|
| 263 | while (pat<endpat) {
|
---|
| 264 | p = pat;
|
---|
| 265 | s = str;
|
---|
| 266 | for (;;) {
|
---|
| 267 | c = pat_next(p, endpat-p, &pinc, flags);
|
---|
| 268 | p += pinc;
|
---|
| 269 | /* Encountering * completes/commits a component */
|
---|
| 270 | if (c == STAR) {
|
---|
| 271 | pat = p;
|
---|
| 272 | str = s;
|
---|
| 273 | break;
|
---|
| 274 | }
|
---|
| 275 | k = str_next(s, endstr-s, &sinc);
|
---|
| 276 | if (!k)
|
---|
| 277 | return FNM_NOMATCH;
|
---|
| 278 | kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
|
---|
| 279 | if (c == BRACKET) {
|
---|
| 280 | if (!match_bracket(p-pinc, k, kfold))
|
---|
| 281 | break;
|
---|
| 282 | } else if (c != QUESTION && k != c && kfold != c) {
|
---|
| 283 | break;
|
---|
| 284 | }
|
---|
| 285 | s += sinc;
|
---|
| 286 | }
|
---|
| 287 | if (c == STAR) continue;
|
---|
| 288 | /* If we failed, advance str, by 1 char if it's a valid
|
---|
| 289 | * char, or past all invalid bytes otherwise. */
|
---|
| 290 | k = str_next(str, endstr-str, &sinc);
|
---|
| 291 | if (k > 0) str += sinc;
|
---|
| 292 | else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | return 0;
|
---|
| 296 | }
|
---|
| 297 |
|
---|
| 298 | int fnmatch(const char *pat, const char *str, int flags)
|
---|
| 299 | {
|
---|
| 300 | const char *s, *p;
|
---|
| 301 | size_t inc;
|
---|
| 302 | int c;
|
---|
| 303 | if (flags & FNM_PATHNAME) for (;;) {
|
---|
| 304 | for (s=str; *s && *s!='/'; s++);
|
---|
| 305 | for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
|
---|
| 306 | if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
|
---|
| 307 | return FNM_NOMATCH;
|
---|
| 308 | if (fnmatch_internal(pat, p-pat, str, s-str, flags))
|
---|
| 309 | return FNM_NOMATCH;
|
---|
| 310 | if (!c) return 0;
|
---|
| 311 | str = s+1;
|
---|
| 312 | pat = p+inc;
|
---|
| 313 | } else if (flags & FNM_LEADING_DIR) {
|
---|
| 314 | for (s=str; *s; s++) {
|
---|
| 315 | if (*s != '/') continue;
|
---|
| 316 | if (!fnmatch_internal(pat, -1, str, s-str, flags))
|
---|
| 317 | return 0;
|
---|
| 318 | }
|
---|
| 319 | }
|
---|
| 320 | return fnmatch_internal(pat, -1, str, -1, flags);
|
---|
| 321 | }
|
---|