/* Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06> */ /* UNIX(R) Regular Expression Tools Copyright (C) 2001 Caldera International, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* copyright "%c%" */ /* from RCS Header: awk.g.y 1.2 91/06/25 */ %{ #include "awk.h" #include #include #include int yywrap(void) { return(1); } #ifndef DEBUG # define PUTS(x) #endif Node *beginloc = 0, *endloc = 0; int infunc = 0; /* = 1 if in arglist or body of func */ unsigned char *curfname = 0; Node *arglist = 0; /* list of args for current function */ static void setfname(Cell *); static int constnode(Node *); static unsigned char *strnode(Node *); static Node *notnull(Node *); extern const char illstat[]; extern int yylex(void); %} %union { Node *p; Cell *cp; intptr_t i; unsigned char *s; } %token FIRSTTOKEN /* must be first */ %token

PROGRAM PASTAT PASTAT2 XBEGIN XEND %token NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token ARRAY %token MATCH NOTMATCH MATCHOP %token FINAL DOT ALL CCL NCCL CHAR MCHAR OR STAR QUEST PLUS %token AND BOR APPEND EQ GE GT LE LT NE IN %token ARG BLTIN BREAK CONTINUE DELETE DO EXIT FOR FUNC %token SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT %token ADD MINUS MULT DIVIDE MOD %token ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token PRINT PRINTF SPRINTF %token

ELSE INTEST CONDEXPR %token POSTINCR PREINCR POSTDECR PREDECR %token VAR IVAR VARNF CALL NUMBER STRING FIELD %token REGEXPR %type

pas pattern ppattern plist pplist patlist prarg term re %type

pa_pat pa_stat pa_stats %type reg_expr %type

simple_stmt opt_simple_stmt stmt stmtlist %type

var varname funcname varlist %type

for if while %type pst opt_pst lbrace rparen comma nl opt_nl and bor %type subop print %right ASGNOP %right '?' %right ':' %left BOR %left AND %left GETLINE %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' %left ARG BLTIN BREAK CALL CONTINUE DELETE DO EXIT FOR FIELD FUNC %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR %left REGEXPR VAR VARNF IVAR WHILE '(' %left CAT %left '+' '-' %left '*' '/' '%' %left NOT UMINUS %right POWER %right DECR INCR %left INDIRECT %token LASTTOKEN /* must be last */ %% program: pas { if (errorflag==0) winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } | error { yyclearin; bracecheck(); vyyerror(":95:Bailing out"); } ; and: AND | and NL ; bor: BOR | bor NL ; comma: ',' | comma NL ; do: DO { } | do NL ; else: ELSE { } | else NL ; for: FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt { $$ = stat4(FOR, $3, notnull($5), $7, $9); } | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt { $$ = stat4(FOR, $3, NIL, $6, $8); } | FOR '(' varname IN varname rparen stmt { $$ = stat3(IN, $3, makearr($5), $7); } ; funcname: VAR { setfname($1); } | CALL { setfname($1); } ; if: IF '(' pattern rparen { $$ = notnull($3); } ; lbrace: '{' | lbrace NL ; nl: NL | nl NL ; opt_nl: /* empty */ { $$ = 0; } | nl ; opt_pst: /* empty */ { $$ = 0; } | pst ; opt_simple_stmt: /* empty */ { $$ = 0; } | simple_stmt ; pas: opt_pst { $$ = 0; } | opt_pst pa_stats opt_pst { $$ = $2; } ; pa_pat: pattern { $$ = notnull($1); } ; pa_stat: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } | XBEGIN lbrace stmtlist '}' { beginloc = linkum(beginloc, $3); $$ = 0; } | XEND lbrace stmtlist '}' { endloc = linkum(endloc, $3); $$ = 0; } | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } ; pa_stats: pa_stat | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } ; patlist: pattern | patlist comma pattern { $$ = linkum($1, $3); } ; ppattern: var ASGNOP ppattern { $$ = op2($2, $1, $3); } | ppattern '?' ppattern ':' ppattern %prec '?' { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | ppattern bor ppattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | ppattern and ppattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } | ppattern MATCHOP ppattern { if (constnode($3)) $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); else $$ = op3($2, (Node *)1, $1, $3); } | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } | re | term ; pattern: var ASGNOP pattern { $$ = op2($2, $1, $3); } | pattern '?' pattern ':' pattern %prec '?' { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | pattern bor pattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | pattern and pattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } | NOT pattern { $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); } | pattern EQ pattern { $$ = op2($2, $1, $3); } | pattern GE pattern { $$ = op2($2, $1, $3); } | pattern GT pattern { $$ = op2($2, $1, $3); } | pattern LE pattern { $$ = op2($2, $1, $3); } | pattern LT pattern { $$ = op2($2, $1, $3); } | pattern NE pattern { $$ = op2($2, $1, $3); } | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } | pattern MATCHOP pattern { if (constnode($3)) $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); else $$ = op3($2, (Node *)1, $1, $3); } | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); } | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); } | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } | re | term ; plist: pattern comma pattern { $$ = linkum($1, $3); } | plist comma pattern { $$ = linkum($1, $3); } ; pplist: ppattern | pplist comma ppattern { $$ = linkum($1, $3); } ; prarg: /* empty */ { $$ = rectonode(); } | pplist | '(' plist ')' { $$ = $2; } ; print: PRINT | PRINTF ; pst: NL | ';' | pst NL | pst ';' ; rbrace: '}' { } | rbrace NL ; re: reg_expr { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1,0)); } | NOT re {$$ = op1(NOT, notnull($2)); } ; reg_expr: '/' {startreg();} REGEXPR '/' { $$ = $3; } ; rparen: ')' | rparen NL ; simple_stmt: print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); } | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); } | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); } | print prarg { $$ = stat3($1, $2, NIL, NIL); } | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } | DELETE varname { yyclearin; vyyerror(":96:You can only delete array[element]"); $$ = stat1(DELETE, $2); } | pattern { $$ = exptostat($1); } | error { yyclearin; vyyerror(illstat); } ; st: nl { } | ';' opt_nl { } ; stmt: BREAK st { $$ = stat1(BREAK, NIL); } | CONTINUE st { $$ = stat1(CONTINUE, NIL); } | do stmt WHILE '(' pattern ')' st { $$ = stat2(DO, $2, notnull($5)); } | EXIT pattern st { $$ = stat1(EXIT, $2); } | EXIT st { $$ = stat1(EXIT, NIL); } | for | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } | if stmt { $$ = stat3(IF, $1, $2, NIL); } | lbrace stmtlist rbrace { $$ = $2; } | NEXT st { if (infunc) vyyerror(":97:Next is illegal inside a function"); $$ = stat1(NEXT, NIL); } | RETURN pattern st { $$ = stat1(RETURN, $2); } | RETURN st { $$ = stat1(RETURN, NIL); } | simple_stmt st | while stmt { $$ = stat2(WHILE, $1, $2); } | ';' opt_nl { $$ = 0; } ; stmtlist: stmt | stmtlist stmt { $$ = linkum($1, $2); } ; subop: SUB | GSUB ; term: term '+' term { $$ = op2(ADD, $1, $3); } | term '-' term { $$ = op2(MINUS, $1, $3); } | term '*' term { $$ = op2(MULT, $1, $3); } | term '/' term { $$ = op2(DIVIDE, $1, $3); } | term '%' term { $$ = op2(MOD, $1, $3); } | term POWER term { $$ = op2(POWER, $1, $3); } | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } | '+' term %prec UMINUS { $$ = $2; } | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); } | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); } | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); } | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); } | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); } | DECR var { $$ = op1(PREDECR, $2); } | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } | INDEX '(' pattern comma pattern ')' { $$ = op2(INDEX, $3, $5); } | INDEX '(' pattern comma reg_expr ')' { vyyerror(":98:Index() doesn't permit regular expressions"); $$ = op2(INDEX, $3, (Node*)$5); } | '(' pattern ')' { $$ = $2; } | MATCHFCN '(' pattern comma reg_expr ')' { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } | MATCHFCN '(' pattern comma pattern ')' { if (constnode($5)) $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); else $$ = op3(MATCHFCN, (Node *)1, $3, $5); } | NUMBER { $$ = valtonode($1, CCON); } | SPLIT '(' pattern comma varname comma pattern ')' /* string */ { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } | STRING { $$ = valtonode($1, CCON); } | subop '(' reg_expr comma pattern ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } | subop '(' pattern comma pattern ')' { if (constnode($3)) $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); else $$ = op4($1, (Node *)1, $3, $5, rectonode()); } | subop '(' reg_expr comma pattern comma var ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } | subop '(' pattern comma pattern comma var ')' { if (constnode($3)) $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); else $$ = op4($1, (Node *)1, $3, $5, $7); } | SUBSTR '(' pattern comma pattern comma pattern ')' { $$ = op3(SUBSTR, $3, $5, $7); } | SUBSTR '(' pattern comma pattern ')' { $$ = op3(SUBSTR, $3, $5, NIL); } | var ; var: varname | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } | FIELD { $$ = valtonode($1, CFLD); } | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); } | INDIRECT term { $$ = op1(INDIRECT, $2); } ; varlist: /* nothing */ { arglist = $$ = 0; } | VAR { arglist = $$ = valtonode($1,CVAR); } | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); } ; varname: VAR { $$ = valtonode($1, CVAR); } | ARG { $$ = op1(ARG, (Node *) $1); } | VARNF { $$ = op1(VARNF, (Node *) $1); } ; while: WHILE '(' pattern rparen { $$ = notnull($3); } ; %% static void setfname(Cell *p) { if (isarr(p)) vyyerror(":99:%s is an array, not a function", p->nval); else if (isfunc(p)) vyyerror(":100:You cannot define function %s more than once", p->nval); curfname = p->nval; } static int constnode(Node *p) { return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON; } static unsigned char *strnode(Node *p) { return ((Cell *)(p->narg[0]))->sval; } static Node *notnull(Node *n) { switch (n->nobj) { case LE: case LT: case EQ: case NE: case GT: case GE: case BOR: case AND: case NOT: return n; default: return op2(NE, n, nullnode); } }