1 | /*
|
---|
2 | * The information in this document is subject to change
|
---|
3 | * without notice and should not be construed as a commitment
|
---|
4 | * by Digital Equipment Corporation or by DECUS.
|
---|
5 | *
|
---|
6 | * Neither Digital Equipment Corporation, DECUS, nor the authors
|
---|
7 | * assume any responsibility for the use or reliability of this
|
---|
8 | * document or the described software.
|
---|
9 | *
|
---|
10 | * Copyright (C) 1980, DECUS
|
---|
11 | *
|
---|
12 | * General permission to copy or modify, but not for profit, is
|
---|
13 | * hereby granted, provided that the above copyright notice is
|
---|
14 | * included and reference made to the fact that reproduction
|
---|
15 | * privileges were granted by DECUS.
|
---|
16 | */
|
---|
17 | #include <stdio.h>
|
---|
18 | #include <stdlib.h>
|
---|
19 | #include <ctype.h> // tolower()
|
---|
20 |
|
---|
21 | /*
|
---|
22 | * grep
|
---|
23 | *
|
---|
24 | * Runs on the Decus compiler or on vms, On vms, define as:
|
---|
25 | * grep :== "$disk:[account]grep" (native)
|
---|
26 | * grep :== "$disk:[account]grep grep" (Decus)
|
---|
27 | * See below for more information.
|
---|
28 | */
|
---|
29 |
|
---|
30 | char *documentation[] = {
|
---|
31 | "grep searches a file for a given pattern. Execute by",
|
---|
32 | " grep [flags] regular_expression file_list\n",
|
---|
33 | "Flags are single characters preceded by '-':",
|
---|
34 | " -c Only a count of matching lines is printed",
|
---|
35 | " -f Print file name for matching lines switch, see below",
|
---|
36 | " -n Each line is preceded by its line number",
|
---|
37 | " -v Only print non-matching lines\n",
|
---|
38 | "The file_list is a list of files (wildcards are acceptable on RSX modes).",
|
---|
39 | "\nThe file name is normally printed if there is a file given.",
|
---|
40 | "The -f flag reverses this action (print name no file, not if more).\n",
|
---|
41 | 0 };
|
---|
42 |
|
---|
43 | char *patdoc[] = {
|
---|
44 | "The regular_expression defines the pattern to search for. Upper- and",
|
---|
45 | "lower-case are always ignored. Blank lines never match. The expression",
|
---|
46 | "should be quoted to prevent file-name translation.",
|
---|
47 | "x An ordinary character (not mentioned below) matches that character.",
|
---|
48 | "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
|
---|
49 | "'^' A circumflex at the beginning of an expression matches the",
|
---|
50 | " beginning of a line.",
|
---|
51 | "'$' A dollar-sign at the end of an expression matches the end of a line.",
|
---|
52 | "'.' A period matches any character except \"new-line\".",
|
---|
53 | "':a' A colon matches a class of characters described by the following",
|
---|
54 | "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
|
---|
55 | "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
|
---|
56 | "': ' other control characters, such as new-line.",
|
---|
57 | "'*' An expression followed by an asterisk matches zero or more",
|
---|
58 | " occurrences of that expression: \"fo*\" matches \"f\", \"fo\"",
|
---|
59 | " \"foo\", etc.",
|
---|
60 | "'+' An expression followed by a plus sign matches one or more",
|
---|
61 | " occurrences of that expression: \"fo+\" matches \"fo\", etc.",
|
---|
62 | "'-' An expression followed by a minus sign optionally matches",
|
---|
63 | " the expression.",
|
---|
64 | "'[]' A string enclosed in square brackets matches any character in",
|
---|
65 | " that string, but no others. If the first character in the",
|
---|
66 | " string is a circumflex, the expression matches any character",
|
---|
67 | " except \"new-line\" and the characters in the string. For",
|
---|
68 | " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
|
---|
69 | " matches \"abc\" but not \"axb\". A range of characters may be",
|
---|
70 | " specified by two characters separated by \"-\". Note that,",
|
---|
71 | " [a-z] matches alphabetics, while [z-a] never matches.",
|
---|
72 | "The concatenation of regular expressions is a regular expression.",
|
---|
73 | 0};
|
---|
74 |
|
---|
75 | #define LMAX 512
|
---|
76 | #define PMAX 256
|
---|
77 |
|
---|
78 | #define CHAR 1
|
---|
79 | #define BOL 2
|
---|
80 | #define EOL 3
|
---|
81 | #define ANY 4
|
---|
82 | #define CLASS 5
|
---|
83 | #define NCLASS 6
|
---|
84 | #define STAR 7
|
---|
85 | #define PLUS 8
|
---|
86 | #define MINUS 9
|
---|
87 | #define ALPHA 10
|
---|
88 | #define DIGIT 11
|
---|
89 | #define NALPHA 12
|
---|
90 | #define PUNCT 13
|
---|
91 | #define RANGE 14
|
---|
92 | #define ENDPAT 15
|
---|
93 |
|
---|
94 | int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
|
---|
95 |
|
---|
96 | char *pp, lbuf[LMAX], pbuf[PMAX];
|
---|
97 |
|
---|
98 | char *cclass();
|
---|
99 | char *pmatch();
|
---|
100 | void store(int);
|
---|
101 | void error(char *);
|
---|
102 | void badpat(char *, char *, char *);
|
---|
103 | int match(void);
|
---|
104 |
|
---|
105 |
|
---|
106 | /*** Display a file name *******************************/
|
---|
107 | void file(char *s)
|
---|
108 | {
|
---|
109 | printf("File %s:\n", s);
|
---|
110 | }
|
---|
111 |
|
---|
112 | /*** Report unopenable file ****************************/
|
---|
113 | void cant(char *s)
|
---|
114 | {
|
---|
115 | fprintf(stderr, "%s: cannot open\n", s);
|
---|
116 | }
|
---|
117 |
|
---|
118 | /*** Give good help ************************************/
|
---|
119 | void help(char **hp)
|
---|
120 | {
|
---|
121 | char **dp;
|
---|
122 |
|
---|
123 | for (dp = hp; *dp; ++dp)
|
---|
124 | printf("%s\n", *dp);
|
---|
125 | }
|
---|
126 |
|
---|
127 | /*** Display usage summary *****************************/
|
---|
128 | void usage(char *s)
|
---|
129 | {
|
---|
130 | fprintf(stderr, "?GREP-E-%s\n", s);
|
---|
131 | fprintf(stderr,
|
---|
132 | "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
|
---|
133 | exit(1);
|
---|
134 | }
|
---|
135 |
|
---|
136 | /*** Compile the pattern into global pbuf[] ************/
|
---|
137 | void compile(char *source)
|
---|
138 | {
|
---|
139 | char *s; /* Source string pointer */
|
---|
140 | char *lp; /* Last pattern pointer */
|
---|
141 | int c; /* Current character */
|
---|
142 | int o; /* Temp */
|
---|
143 | char *spp; /* Save beginning of pattern */
|
---|
144 |
|
---|
145 | s = source;
|
---|
146 | if (debug)
|
---|
147 | printf("Pattern = \"%s\"\n", s);
|
---|
148 | pp = pbuf;
|
---|
149 | while (c = *s++) {
|
---|
150 | /*
|
---|
151 | * STAR, PLUS and MINUS are special.
|
---|
152 | */
|
---|
153 | if (c == '*' || c == '+' || c == '-') {
|
---|
154 | if (pp == pbuf ||
|
---|
155 | (o=pp[-1]) == BOL ||
|
---|
156 | o == EOL ||
|
---|
157 | o == STAR ||
|
---|
158 | o == PLUS ||
|
---|
159 | o == MINUS)
|
---|
160 | badpat("Illegal occurrence op.", source, s);
|
---|
161 | store(ENDPAT);
|
---|
162 | store(ENDPAT);
|
---|
163 | spp = pp; /* Save pattern end */
|
---|
164 | while (--pp > lp) /* Move pattern down */
|
---|
165 | *pp = pp[-1]; /* one byte */
|
---|
166 | *pp = (c == '*') ? STAR :
|
---|
167 | (c == '-') ? MINUS : PLUS;
|
---|
168 | pp = spp; /* Restore pattern end */
|
---|
169 | continue;
|
---|
170 | }
|
---|
171 | /*
|
---|
172 | * All the rest.
|
---|
173 | */
|
---|
174 | lp = pp; /* Remember start */
|
---|
175 | switch(c) {
|
---|
176 |
|
---|
177 | case '^':
|
---|
178 | store(BOL);
|
---|
179 | break;
|
---|
180 |
|
---|
181 | case '$':
|
---|
182 | store(EOL);
|
---|
183 | break;
|
---|
184 |
|
---|
185 | case '.':
|
---|
186 | store(ANY);
|
---|
187 | break;
|
---|
188 |
|
---|
189 | case '[':
|
---|
190 | s = cclass(source, s);
|
---|
191 | break;
|
---|
192 |
|
---|
193 | case ':':
|
---|
194 | if (*s) {
|
---|
195 | switch(tolower(c = *s++)) {
|
---|
196 |
|
---|
197 | case 'a':
|
---|
198 | case 'A':
|
---|
199 | store(ALPHA);
|
---|
200 | break;
|
---|
201 |
|
---|
202 | case 'd':
|
---|
203 | case 'D':
|
---|
204 | store(DIGIT);
|
---|
205 | break;
|
---|
206 |
|
---|
207 | case 'n':
|
---|
208 | case 'N':
|
---|
209 | store(NALPHA);
|
---|
210 | break;
|
---|
211 |
|
---|
212 | case ' ':
|
---|
213 | store(PUNCT);
|
---|
214 | break;
|
---|
215 |
|
---|
216 | default:
|
---|
217 | badpat("Unknown : type", source, s);
|
---|
218 |
|
---|
219 | }
|
---|
220 | break;
|
---|
221 | }
|
---|
222 | else badpat("No : type", source, s);
|
---|
223 |
|
---|
224 | case '\\':
|
---|
225 | if (*s)
|
---|
226 | c = *s++;
|
---|
227 |
|
---|
228 | default:
|
---|
229 | store(CHAR);
|
---|
230 | store(tolower(c));
|
---|
231 | }
|
---|
232 | }
|
---|
233 | store(ENDPAT);
|
---|
234 | store(0); /* Terminate string */
|
---|
235 | if (debug) {
|
---|
236 | for (lp = pbuf; lp < pp;) {
|
---|
237 | if ((c = (*lp++ & 0377)) < ' ')
|
---|
238 | printf("\\%o ", c);
|
---|
239 | else printf("%c ", c);
|
---|
240 | }
|
---|
241 | printf("\n");
|
---|
242 | }
|
---|
243 | }
|
---|
244 |
|
---|
245 | /*** Compile a class (within []) ***********************/
|
---|
246 | char *cclass(char *source, char *src)
|
---|
247 | /* char *source; // Pattern start -- for error msg. */
|
---|
248 | /* char *src; // Class start */
|
---|
249 | {
|
---|
250 | char *s; /* Source pointer */
|
---|
251 | char *cp; /* Pattern start */
|
---|
252 | int c; /* Current character */
|
---|
253 | int o; /* Temp */
|
---|
254 |
|
---|
255 | s = src;
|
---|
256 | o = CLASS;
|
---|
257 | if (*s == '^') {
|
---|
258 | ++s;
|
---|
259 | o = NCLASS;
|
---|
260 | }
|
---|
261 | store(o);
|
---|
262 | cp = pp;
|
---|
263 | store(0); /* Byte count */
|
---|
264 | while ((c = *s++) && c!=']') {
|
---|
265 | if (c == '\\') { /* Store quoted char */
|
---|
266 | if ((c = *s++) == '\0') /* Gotta get something */
|
---|
267 | badpat("Class terminates badly", source, s);
|
---|
268 | else store(tolower(c));
|
---|
269 | }
|
---|
270 | else if (c == '-' &&
|
---|
271 | (pp - cp) > 1 && *s != ']' && *s != '\0') {
|
---|
272 | c = pp[-1]; /* Range start */
|
---|
273 | pp[-1] = RANGE; /* Range signal */
|
---|
274 | store(c); /* Re-store start */
|
---|
275 | c = *s++; /* Get end char and*/
|
---|
276 | store(tolower(c)); /* Store it */
|
---|
277 | }
|
---|
278 | else {
|
---|
279 | store(tolower(c)); /* Store normal char */
|
---|
280 | }
|
---|
281 | }
|
---|
282 | if (c != ']')
|
---|
283 | badpat("Unterminated class", source, s);
|
---|
284 | if ((c = (pp - cp)) >= 256)
|
---|
285 | badpat("Class too large", source, s);
|
---|
286 | if (c == 0)
|
---|
287 | badpat("Empty class", source, s);
|
---|
288 | *cp = c;
|
---|
289 | return(s);
|
---|
290 | }
|
---|
291 |
|
---|
292 | /*** Store an entry in the pattern buffer **************/
|
---|
293 | void store(int op)
|
---|
294 | {
|
---|
295 | if (pp >= &pbuf[PMAX])
|
---|
296 | error("Pattern too complex\n");
|
---|
297 | *pp++ = op;
|
---|
298 | }
|
---|
299 |
|
---|
300 | /*** Report a bad pattern specification ****************/
|
---|
301 | void badpat(char *message, char *source, char *stop)
|
---|
302 | /* char *message; // Error message */
|
---|
303 | /* char *source; // Pattern start */
|
---|
304 | /* char *stop; // Pattern end */
|
---|
305 | {
|
---|
306 | fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
|
---|
307 | fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
|
---|
308 | stop-source, stop[-1]);
|
---|
309 | error("?GREP-E-Bad pattern\n");
|
---|
310 | }
|
---|
311 |
|
---|
312 | /*** Scan the file for the pattern in pbuf[] ***********/
|
---|
313 | void grep(FILE *fp, char *fn)
|
---|
314 | /* FILE *fp; // File to process */
|
---|
315 | /* char *fn; // File name (for -f option) */
|
---|
316 | {
|
---|
317 | int lno, count, m;
|
---|
318 |
|
---|
319 | lno = 0;
|
---|
320 | count = 0;
|
---|
321 | while (fgets(lbuf, LMAX, fp)) {
|
---|
322 | ++lno;
|
---|
323 | m = match();
|
---|
324 | if ((m && !vflag) || (!m && vflag)) {
|
---|
325 | ++count;
|
---|
326 | if (!cflag) {
|
---|
327 | if (fflag && fn) {
|
---|
328 | file(fn);
|
---|
329 | fn = 0;
|
---|
330 | }
|
---|
331 | if (nflag)
|
---|
332 | printf("%d\t", lno);
|
---|
333 | printf("%s\n", lbuf);
|
---|
334 | }
|
---|
335 | }
|
---|
336 | }
|
---|
337 | if (cflag) {
|
---|
338 | if (fflag && fn)
|
---|
339 | file(fn);
|
---|
340 | printf("%d\n", count);
|
---|
341 | }
|
---|
342 | }
|
---|
343 |
|
---|
344 | /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
|
---|
345 | int match()
|
---|
346 | {
|
---|
347 | char *l; /* Line pointer */
|
---|
348 |
|
---|
349 | for (l = lbuf; *l; ++l) {
|
---|
350 | if (pmatch(l, pbuf))
|
---|
351 | return(1);
|
---|
352 | }
|
---|
353 | return(0);
|
---|
354 | }
|
---|
355 |
|
---|
356 | /*** Match partial line with pattern *******************/
|
---|
357 | char *pmatch(char *line, char *pattern)
|
---|
358 | /* char *line; // (partial) line to match */
|
---|
359 | /* char *pattern; // (partial) pattern to match */
|
---|
360 | {
|
---|
361 | char *l; /* Current line pointer */
|
---|
362 | char *p; /* Current pattern pointer */
|
---|
363 | char c; /* Current character */
|
---|
364 | char *e; /* End for STAR and PLUS match */
|
---|
365 | int op; /* Pattern operation */
|
---|
366 | int n; /* Class counter */
|
---|
367 | char *are; /* Start of STAR match */
|
---|
368 |
|
---|
369 | l = line;
|
---|
370 | if (debug > 1)
|
---|
371 | printf("pmatch(\"%s\")\n", line);
|
---|
372 | p = pattern;
|
---|
373 | while ((op = *p++) != ENDPAT) {
|
---|
374 | if (debug > 1)
|
---|
375 | printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
|
---|
376 | l-line, *l, *l, op);
|
---|
377 | switch(op) {
|
---|
378 |
|
---|
379 | case CHAR:
|
---|
380 | if (tolower(*l++) != *p++)
|
---|
381 | return(0);
|
---|
382 | break;
|
---|
383 |
|
---|
384 | case BOL:
|
---|
385 | if (l != lbuf)
|
---|
386 | return(0);
|
---|
387 | break;
|
---|
388 |
|
---|
389 | case EOL:
|
---|
390 | if (*l != '\0')
|
---|
391 | return(0);
|
---|
392 | break;
|
---|
393 |
|
---|
394 | case ANY:
|
---|
395 | if (*l++ == '\0')
|
---|
396 | return(0);
|
---|
397 | break;
|
---|
398 |
|
---|
399 | case DIGIT:
|
---|
400 | if ((c = *l++) < '0' || (c > '9'))
|
---|
401 | return(0);
|
---|
402 | break;
|
---|
403 |
|
---|
404 | case ALPHA:
|
---|
405 | c = tolower(*l++);
|
---|
406 | if (c < 'a' || c > 'z')
|
---|
407 | return(0);
|
---|
408 | break;
|
---|
409 |
|
---|
410 | case NALPHA:
|
---|
411 | c = tolower(*l++);
|
---|
412 | if (c >= 'a' && c <= 'z')
|
---|
413 | break;
|
---|
414 | else if (c < '0' || c > '9')
|
---|
415 | return(0);
|
---|
416 | break;
|
---|
417 |
|
---|
418 | case PUNCT:
|
---|
419 | c = *l++;
|
---|
420 | if (c == 0 || c > ' ')
|
---|
421 | return(0);
|
---|
422 | break;
|
---|
423 |
|
---|
424 | case CLASS:
|
---|
425 | case NCLASS:
|
---|
426 | c = tolower(*l++);
|
---|
427 | n = *p++ & 0377;
|
---|
428 | do {
|
---|
429 | if (*p == RANGE) {
|
---|
430 | p += 3;
|
---|
431 | n -= 2;
|
---|
432 | if (c >= p[-2] && c <= p[-1])
|
---|
433 | break;
|
---|
434 | }
|
---|
435 | else if (c == *p++)
|
---|
436 | break;
|
---|
437 | } while (--n > 1);
|
---|
438 | if ((op == CLASS) == (n <= 1))
|
---|
439 | return(0);
|
---|
440 | if (op == CLASS)
|
---|
441 | p += n - 2;
|
---|
442 | break;
|
---|
443 |
|
---|
444 | case MINUS:
|
---|
445 | e = pmatch(l, p); /* Look for a match */
|
---|
446 | while (*p++ != ENDPAT); /* Skip over pattern */
|
---|
447 | if (e) /* Got a match? */
|
---|
448 | l = e; /* Yes, update string */
|
---|
449 | break; /* Always succeeds */
|
---|
450 |
|
---|
451 | case PLUS: /* One or more ... */
|
---|
452 | if ((l = pmatch(l, p)) == 0)
|
---|
453 | return(0); /* Gotta have a match */
|
---|
454 | case STAR: /* Zero or more ... */
|
---|
455 | are = l; /* Remember line start */
|
---|
456 | while (*l && (e = pmatch(l, p)))
|
---|
457 | l = e; /* Get longest match */
|
---|
458 | while (*p++ != ENDPAT); /* Skip over pattern */
|
---|
459 | while (l >= are) { /* Try to match rest */
|
---|
460 | if (e = pmatch(l, p))
|
---|
461 | return(e);
|
---|
462 | --l; /* Nope, try earlier */
|
---|
463 | }
|
---|
464 | return(0); /* Nothing else worked */
|
---|
465 |
|
---|
466 | default:
|
---|
467 | printf("Bad op code %d\n", op);
|
---|
468 | error("Cannot happen -- match\n");
|
---|
469 | }
|
---|
470 | }
|
---|
471 | return(l);
|
---|
472 | }
|
---|
473 |
|
---|
474 | /*** Report an error ***********************************/
|
---|
475 | void error(char *s)
|
---|
476 | {
|
---|
477 | fprintf(stderr, "%s", s);
|
---|
478 | exit(1);
|
---|
479 | }
|
---|
480 |
|
---|
481 | /*** Main program - parse arguments & grep *************/
|
---|
482 | int main(int argc, char **argv)
|
---|
483 | {
|
---|
484 | char *p;
|
---|
485 | int c, i;
|
---|
486 | int gotpattern;
|
---|
487 |
|
---|
488 | FILE *f;
|
---|
489 |
|
---|
490 | if (argc <= 1)
|
---|
491 | usage("No arguments");
|
---|
492 | if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
|
---|
493 | help(documentation);
|
---|
494 | help(patdoc);
|
---|
495 | return 0;
|
---|
496 | }
|
---|
497 | nfile = argc-1;
|
---|
498 | gotpattern = 0;
|
---|
499 | for (i=1; i < argc; ++i) {
|
---|
500 | p = argv[i];
|
---|
501 | if (*p == '-') {
|
---|
502 | ++p;
|
---|
503 | while (c = *p++) {
|
---|
504 | switch(tolower(c)) {
|
---|
505 |
|
---|
506 | case '?':
|
---|
507 | help(documentation);
|
---|
508 | break;
|
---|
509 |
|
---|
510 | case 'C':
|
---|
511 | case 'c':
|
---|
512 | ++cflag;
|
---|
513 | break;
|
---|
514 |
|
---|
515 | case 'D':
|
---|
516 | case 'd':
|
---|
517 | ++debug;
|
---|
518 | break;
|
---|
519 |
|
---|
520 | case 'F':
|
---|
521 | case 'f':
|
---|
522 | ++fflag;
|
---|
523 | break;
|
---|
524 |
|
---|
525 | case 'n':
|
---|
526 | case 'N':
|
---|
527 | ++nflag;
|
---|
528 | break;
|
---|
529 |
|
---|
530 | case 'v':
|
---|
531 | case 'V':
|
---|
532 | ++vflag;
|
---|
533 | break;
|
---|
534 |
|
---|
535 | default:
|
---|
536 | usage("Unknown flag");
|
---|
537 | }
|
---|
538 | }
|
---|
539 | argv[i] = 0;
|
---|
540 | --nfile;
|
---|
541 | } else if (!gotpattern) {
|
---|
542 | compile(p);
|
---|
543 | argv[i] = 0;
|
---|
544 | ++gotpattern;
|
---|
545 | --nfile;
|
---|
546 | }
|
---|
547 | }
|
---|
548 | if (!gotpattern)
|
---|
549 | usage("No pattern");
|
---|
550 | if (nfile == 0)
|
---|
551 | grep(stdin, 0);
|
---|
552 | else {
|
---|
553 | fflag = fflag ^ (nfile > 0);
|
---|
554 | for (i=1; i < argc; ++i) {
|
---|
555 | if (p = argv[i]) {
|
---|
556 | if ((f=fopen(p, "r")) == NULL)
|
---|
557 | cant(p);
|
---|
558 | else {
|
---|
559 | grep(f, p);
|
---|
560 | fclose(f);
|
---|
561 | }
|
---|
562 | }
|
---|
563 | }
|
---|
564 | }
|
---|
565 | return 0;
|
---|
566 | }
|
---|
567 |
|
---|
568 | /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/
|
---|