diff --git a/app/mk/inc/cabuild b/app/mk/inc/cabuild deleted file mode 100644 index b479206..0000000 --- a/app/mk/inc/cabuild +++ /dev/null @@ -1,12 +0,0 @@ -TARG = shr/obj/$OBJNAME.a -CFLAGS = $DEFS $INCS $CPPFLAGS -OFILES = ${CFILES:%.c=%.o} -all:V: $TARG -$TARG : $OFILES - mkdir -p shr/obj - $AR $ARFLAGS $TARG $OFILES -%.o : %.c - $CC -c -o $target $CFLAGS $stem.c -clean:V: - rm -f $TARG $OFILES -<$MKINCDIR/std/install diff --git a/app/mk/inc/cbuild b/app/mk/inc/cbuild deleted file mode 100644 index 16f80fd..0000000 --- a/app/mk/inc/cbuild +++ /dev/null @@ -1,8 +0,0 @@ -# Universal powerful simple C build file. -<$(MKINCDIR)/std/cbuild -<$(MKINCDIR)/std/install - -clean:V: - rm -f $TARG $TARG.strip $OFILES -uninstall:V: - rm -f $EXEDIR/$TARG diff --git a/app/mk/inc/config b/app/mk/inc/config deleted file mode 100644 index f16b179..0000000 --- a/app/mk/inc/config +++ /dev/null @@ -1,59 +0,0 @@ -# Main configuration file. - -MKINCDIR = $(MKINCDIR) - -EXEDIR = $(HOME)/exe -APPDIR = `goblin paths -fr $HOME/app` -SHRDIR = $(HOME)/shr -INCDIR = $SHRDIR/inc -OBJDIR = $SHRDIR/obj -MANDIR = $SHRDIR/man -INSTALLDIRS = $OBJDIR $MANDIR $EXEDIR $INCDIR $APPDIR - -USRDIR = /usr -USRINC = -I$USRDIR/include -USRLIB = -L$USRDIR/lib -X11 = $USRDIR/X11R6 -X11INC = -I$X11/include -X11LIB = -L$X11/lib -lX11 -XFTLIB = -lXft -FTINC = $USRINC/freetype2 -FTLIB = -lfreetype -FCINC = -FCLIB = -lfontconfig -MLIB = -lm -CLIB = -lc -UTILLIB = -lutil -XINLIB = -lXinerama -XINCPP = -DXINERAMA -XTLIB = -lXt -XILIB = -lXi -XEXTLIB = -lXext -DOTINC = -I. -SECINC = -Isec -PATH9 = lib/9 -SECINC9 = -I$PATH9/sec -CRYPTLIB = -lcrypt -XRLIB = -lXrandr -CURLIB = -lcurses -PNGLIB = -lpng -JPGLIB = -ljpeg -OBJ9 = $OBJDIR/9.a -INC9 = -I$INCDIR/9 -FRAMEOBJ = $OBJDIR/frame.a -FRAMEINC = -I$INCDIR/frame -XGOBJ = $OBJDIR/Xg.a -XGINC = -I$INCDIR/Xg -SLINC = -I$INCDIR/sl -XMULIB = -lXmu -GCC = cc -CC = tcc -LD = $CC -STRIP = strip -AR = ar -YACC = 9yacc -LEX = lex -PKG_CONFIG_CFLAGS = pkg-config --cflags -PkG_CONFIG_LIBS = pkg-config --libs - -<$(HOME)/env/mk/config diff --git a/app/mk/inc/cybuild b/app/mk/inc/cybuild deleted file mode 100644 index 12f7ae0..0000000 --- a/app/mk/inc/cybuild +++ /dev/null @@ -1,22 +0,0 @@ -# File to build programs with Yacc files. -INSTALLDIRS = $APPDIR $EXEDIR $SHRDIR -OFILES = ${CFILES:%.c=%.o} -YOFILES = ${YCFILES:%.c=%.o} -TARG = exe/$PROGNAME -CFLAGS = $CPPFLAGS $DEFS $INCS $CFLAGS -all :V: $TARG -strip :V: $TARG - $STRIP $TARG -$TARG : $OFILES $YOFILES - mkdir -p exe - $LD -o $target $LDFLAGS $OFILES $YOFILES $AFILES $LIBS -%.o : %.c - $CC -c -o $target $CFLAGS $stem.c -$CFILES $YCFILES :N: $HFILES $YHFILES -$HFILES :N: -$YHFILES $YCFILES : $YFILES - $YACC -d $YFILES -$YFILES :N: -clean: - rm -f $TARG $OFILES $YOFILES $YHFILES $YCFILES -<$MKINCDIR/std/install diff --git a/app/mk/inc/dirs b/app/mk/inc/dirs deleted file mode 100644 index 79c1786..0000000 --- a/app/mk/inc/dirs +++ /dev/null @@ -1,14 +0,0 @@ -% :V: %-$MKSHELL -%-sh :QV: - pwd=`pwd` - export pwd - for d in $DIRS ; do - echo "[ cd $d ; mk $stem]" - cd "$d" ; mk $MKFLAGS $stem ; cd "$pwd" - done -%-rc :QV: - pwd = `{pwd} - for(d in $DIRS){ - echo [ cd $d ';' mk $stem] - { builtin cd $d ; mk $MKFLAGS $stem ; builtin cd $pwd} - } diff --git a/app/mk/inc/gobuild b/app/mk/inc/gobuild deleted file mode 100644 index bcb3919..0000000 --- a/app/mk/inc/gobuild +++ /dev/null @@ -1,4 +0,0 @@ -# Compatible with pkg module for Golang. -<$MKINCDIR/std/gobuild -<$MKINCDIR/std/install - diff --git a/app/mk/inc/script b/app/mk/inc/script deleted file mode 100644 index 84ef683..0000000 --- a/app/mk/inc/script +++ /dev/null @@ -1,7 +0,0 @@ -TARG = exe -exe: - mkdir -p $target -all:VQ: - echo -n -<$MKINCDIR/std/install - diff --git a/app/mk/inc/std/cbuild b/app/mk/inc/std/cbuild deleted file mode 100644 index d9c6d50..0000000 --- a/app/mk/inc/std/cbuild +++ /dev/null @@ -1,15 +0,0 @@ -OFILES = ${CFILES:%.c=%.o} -TARG = exe/$PROGNAME -CFLAGS = $CPPFLAGS $DEFS $INCS $CFLAGS -all :V: $TARG -strip :V: $TARG.strip -$TARG.strip : $TARG - cp -f $TARG $target - $STRIP $target -$TARG : $OFILES - mkdir -p exe - $LD -o $target $LDFLAGS $OFILES $AFILES $LIBS -%.o : %.c $HFILES - $CC -c -o $target $CFLAGS $stem.c -run :V: $TARG - exec ./$TARG $MKFLAGS diff --git a/app/mk/inc/std/gobuild b/app/mk/inc/std/gobuild deleted file mode 100644 index daf200c..0000000 --- a/app/mk/inc/std/gobuild +++ /dev/null @@ -1,34 +0,0 @@ -all :V: build -build :VQ: build-$MKSHELL -build-sh :VQ: - mkdir -p exe - pwd=`pwd` - for name in `command cd src/cmd && goblin ls && command cd $pwd`; do - cd src/cmd/$name - echo Buliding "$name"... - if go build -o $pwd/exe/$name ; then - echo Done building "$name" - else - echo "Error(s) while building $name" - fi - if echo "$name" | goblin in $BUILD_WASM ; then - echo Bulding WASM for "'$name'"... - if GOARCH=wasm GOOS=js go build -o "$pwd/$STATIC/$name.wasm" ; then - echo Done building WASM for "$name" - else - echo "Error(s) while building WASM for '$name'" - fi - fi - command cd $pwd - done -clean:VQ: - pwd=`pwd` - names=`command cd src/cmd && goblin ls && command cd $pwd` - for name in $names ; do - echo Removing "'$name'..." - rm -f "$pwd/exe/$name" - if echo "$name" | goblin in $BUILD_WASM ; then - rm -f "$pwd/$STATIC/$name.wasm" - fi - done - diff --git a/app/mk/inc/std/install b/app/mk/inc/std/install deleted file mode 100644 index 9069c9d..0000000 --- a/app/mk/inc/std/install +++ /dev/null @@ -1,26 +0,0 @@ -install:V: install-$MKSHELL -install-sh:VQ: build - if test -d shr ; then - echo Installing shared files... - cp -rf shr $HOME/shr/.. - echo Done installing shared files - fi - if test -d app ; then - echo Installing application files... - echo "'$HOME'" - echo "'$APPDIR'" - echo `goblin paths app/* $APPDIR/$PKG_NAME/` - #goblin mkdir -p `goblin path $APPDIR/$PKG_NAME` && cp -rf `goblin paths app/* $APPDIR/$PKG_NAME/` - echo Done installing application files - fi - if test -d exe ; then - echo Installing executables... - goblin mkdir -p $EXEDIR - cp -rf exe/* $EXEDIR/ - files=`goblin basename $(ls exe)` - for i in $files ; do - chmod 0755 $EXEDIR/$i - done - echo Done installing executables - fi - diff --git a/app/mk/inc/std/usegcc b/app/mk/inc/std/usegcc deleted file mode 100644 index da1d646..0000000 --- a/app/mk/inc/std/usegcc +++ /dev/null @@ -1,2 +0,0 @@ -CC = $GCC -LD = $CC diff --git a/app/mk/inc/w3/run-dev b/app/mk/inc/w3/run-dev deleted file mode 100644 index 6cb415c..0000000 --- a/app/mk/inc/w3/run-dev +++ /dev/null @@ -1,14 +0,0 @@ -run-dev:V: - while true ; do - goblin echo -n '> ' - input=`goblin read` - case $input in - exit) - exit - ;; - esac - goblin ls -r 100 $WATCH_FILES \ - | entr -d -r sh -c \ - 'mk && ./exe/w3site' \ - || pkill w3site && ./exe/w3site - done diff --git a/check.anko b/check.anko deleted file mode 100644 index 5d91c5a..0000000 --- a/check.anko +++ /dev/null @@ -1,20 +0,0 @@ -var strings = import("strings") - -e = 5 -v = 53 -println(e + v) - -for v in Cmd("ls").Stdout().ShSplit() { - println("file:", v) -} - -for v in strings.Split("big dick and me", " ") { - println(v) -} - -if v < 55 { - println("it fucking works") -} - -Rcmd("goblin", "ls", "-r", "100", "src") || println("it works also") && Rcmd("goblin", "cat", "check.anko") - diff --git a/go.mod b/go.mod index b4edb6c..14bd2f6 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,5 @@ -module github.com/mojosa-software/goblin +module github.com/reklesio/tk go 1.18 -require ( - github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f - github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c -) +require github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e // indirect diff --git a/go.sum b/go.sum index 12e26ae..31c8b98 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ -github.com/mojosa-software/gomtool v0.0.0-20230626085847-176486ff01a2 h1:xbw1/w6ZB8xRmaTS0mQvfTETF8M2/tSBfHJIR+cJyNE= -github.com/mojosa-software/gomtool v0.0.0-20230626085847-176486ff01a2/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw= -github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f h1:lsvXiy5XeOGCiOvkzuX0jA11jJf3j998Xes0/gmk50A= -github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw= github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c h1:y7RQZz/zJDARRJkn4szD8N2rK6K9NU1vUNPwahtW5zw= github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c/go.mod h1:LtBn7lQTgA/TMEL8Y+dGkD6XWHV2gxRPZXiqCZt3HRc= +github.com/reklesio v0.0.0-20230626085847-176486ff01a2 h1:xbw1/w6ZB8xRmaTS0mQvfTETF8M2/tSBfHJIR+cJyNE= +github.com/reklesio v0.0.0-20230626085847-176486ff01a2/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw= +github.com/reklesio v0.0.0-20230628111258-73d5a2f1940f h1:lsvXiy5XeOGCiOvkzuX0jA11jJf3j998Xes0/gmk50A= +github.com/reklesio v0.0.0-20230628111258-73d5a2f1940f/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw= +github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e h1:2ntFru8B2HDixWKy5EBU4QOcJGyHR4GhB8tWua4Leos= +github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e/go.mod h1:G6WEew5BI+7sorvUztT8wh7mr2jp2Vh5IFjkqWGVM34= diff --git a/src/input/read.go b/input/read.go similarity index 100% rename from src/input/read.go rename to input/read.go diff --git a/install.sh b/install.sh deleted file mode 100755 index 53c5d68..0000000 --- a/install.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -wd=`pwd` - -# cd $wd/src/cmd/goblin && go install && cd $wd -go install - -mkdir -p $HOME/app/goblin -cp -rf $wd/app/* $HOME/app/goblin - diff --git a/license.txt b/license.txt index a06c742..e943bcb 100644 --- a/license.txt +++ b/license.txt @@ -1,25 +1,21 @@ -Copyright (c) 2020 surdeus, aka Andrey Parhomenko +MIT License -Permission is hereby granted, free of charge, -to any person obtaining a copy of this software -and associated documentation files (the "Software"), -to deal in the Software without restriction, -including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons -to whom the Software is furnished to do so, -subject to the following conditions: +Copyright (c) 2023 surdeus -The above copyright notice and this permission notice -shall be included in all copies or substantial portions of the Software. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -THE SOFTWARE IS PROVIDED "AS IS", -WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/main.go b/main.go index 72ab7b3..e39ac76 100644 --- a/main.go +++ b/main.go @@ -1,42 +1,37 @@ package main import ( - "github.com/mojosa-software/gomtool/src/mtool" + "github.com/reklesio/mtool" - "github.com/mojosa-software/goblin/src/tool/awk" - "github.com/mojosa-software/goblin/src/tool/basename" - "github.com/mojosa-software/goblin/src/tool/cat" - "github.com/mojosa-software/goblin/src/tool/date" - "github.com/mojosa-software/goblin/src/tool/ec" - "github.com/mojosa-software/goblin/src/tool/echo" - "github.com/mojosa-software/goblin/src/tool/ftest" - "github.com/mojosa-software/goblin/src/tool/gfalse" - "github.com/mojosa-software/goblin/src/tool/grange" - "github.com/mojosa-software/goblin/src/tool/gtrue" - "github.com/mojosa-software/goblin/src/tool/in" - "github.com/mojosa-software/goblin/src/tool/ln" - "github.com/mojosa-software/goblin/src/tool/ls" - "github.com/mojosa-software/goblin/src/tool/mergelbl" - "github.com/mojosa-software/goblin/src/tool/mk" - "github.com/mojosa-software/goblin/src/tool/mkdir" - "github.com/mojosa-software/goblin/src/tool/noext" - "github.com/mojosa-software/goblin/src/tool/paths" - "github.com/mojosa-software/goblin/src/tool/quote" - "github.com/mojosa-software/goblin/src/tool/read" - "github.com/mojosa-software/goblin/src/tool/sort" - "github.com/mojosa-software/goblin/src/tool/tac" - "github.com/mojosa-software/goblin/src/tool/uniq" - "github.com/mojosa-software/goblin/src/tool/urlprs" - "github.com/mojosa-software/goblin/src/tool/useprog" - "github.com/mojosa-software/goblin/src/tool/wc" - "github.com/mojosa-software/goblin/src/tool/whoami" - "github.com/mojosa-software/goblin/src/tool/yes" - "github.com/mojosa-software/goblin/src/tool/script" + "github.com/reklesio/tk/tool/cat" + "github.com/reklesio/tk/tool/date" + "github.com/reklesio/tk/tool/ec" + "github.com/reklesio/tk/tool/echo" + "github.com/reklesio/tk/tool/ftest" + "github.com/reklesio/tk/tool/gfalse" + "github.com/reklesio/tk/tool/grange" + "github.com/reklesio/tk/tool/gtrue" + "github.com/reklesio/tk/tool/in" + "github.com/reklesio/tk/tool/ln" + "github.com/reklesio/tk/tool/ls" + "github.com/reklesio/tk/tool/mergelbl" + "github.com/reklesio/tk/tool/mkdir" + "github.com/reklesio/tk/tool/noext" + "github.com/reklesio/tk/tool/paths" + "github.com/reklesio/tk/tool/quote" + "github.com/reklesio/tk/tool/read" + "github.com/reklesio/tk/tool/sort" + "github.com/reklesio/tk/tool/tac" + "github.com/reklesio/tk/tool/uniq" + "github.com/reklesio/tk/tool/urlprs" + "github.com/reklesio/tk/tool/useprog" + "github.com/reklesio/tk/tool/wc" + "github.com/reklesio/tk/tool/whoami" + "github.com/reklesio/tk/tool/yes" ) func main() { tools := mtool.Tools{ - "basename": mtool.Tool{basename.Run, "get base name of file path", ""}, "cat": mtool.Tool{cat.Run, "print file data to the standard output", ""}, "mkdir": mtool.Tool{mkdir.Run, "make new directory", ""}, "echo": mtool.Tool{echo.Run, "print strings to the standard output", ""}, @@ -59,8 +54,6 @@ func main() { "range": mtool.Tool{grange.Run, "too lazy", ""}, "in": mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments", ""}, "which": mtool.Tool{useprog.Run, "print the name or the path of the first existing program in arg list", ""}, - "mk": mtool.Tool{mk.Run, "file dependency system, simpler make", ""}, - "awk": mtool.Tool{awk.Run, "simple scripting language for working with string templates", ""}, "paths": mtool.Tool{ paths.Run, "convert UNIX slash separated paths into the OS compatible ones", @@ -76,12 +69,7 @@ func main() { "link files", "", }, - "script": mtool.Tool{ - script.Run, - "run embedded anko", - "", - }, } - mtool.Main("goblin", tools) + mtool.Main("tk", tools) } diff --git a/media/gopher.png b/media/gopher.png new file mode 100644 index 0000000..4eff3e7 Binary files /dev/null and b/media/gopher.png differ diff --git a/mkconfig b/mkconfig deleted file mode 100644 index b3be456..0000000 --- a/mkconfig +++ /dev/null @@ -1,5 +0,0 @@ -MKSHELL = sh -<$(MKINCDIR)/config -PKG_NAME = goblin -CC = cc - diff --git a/mkfile b/mkfile deleted file mode 100644 index 0c5baf7..0000000 --- a/mkfile +++ /dev/null @@ -1,3 +0,0 @@ -] [comment=] [header] -``` - -The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows: - -* `separator=`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format. -* `comment=`: consider lines starting with the given character to be comments and skip them, for example `comment=#` will ignore any lines starting with `#` (without preceding whitespace). The default is not to support comments. -* `header`: treat the first line of each input file as a header row providing the field names, and enable the `@"field"` syntax as well as the `FIELDS` array. This option is equivalent to the `-H` command line argument. If neither `header` or `-H` is specified, you can't use named fields. - - - -## CSV output configuration - -When in CSV output mode, the GoAWK `print` statement with one or more arguments ignores `OFS` and `ORS` and separates its arguments (fields) and records using CSV formatting. No header row is printed; if required, a header row can be printed in the `BEGIN` block manually. No other functionality is changed, for example, `printf` doesn't do anything different in CSV output mode. - -**NOTE:** The behaviour of `print` without arguments remains unchanged. This means you can print the input line (`$0`) without further quoting by using a bare `print` statement, but `print $0` will print the input line as a single CSV field, which is probably not what you want. See the [example](#example-convert-between-formats-all-fields) below. - -To enable CSV output mode when using the `goawk` program, use the `-o mode` command line argument. You can also enable CSV output mode by setting the `OUTPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows: - -``` -csv|tsv [separator=] -``` - -The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows: - -* `separator=`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format. - - -## Named field syntax - -If the `header` option or `-H` argument is given, CSV input mode parses the first row of each input file as a header row containing a list of field names. - -When the header option is enabled, you can use the GoAWK-specific "named field" operator (`@`) to access fields by name instead of by number (`$`). For example, given the header row `id,name,email`, for each record you can access the email address using `@"email"`, `$3`, or even `$-1` (first field from the right). Further usage examples are shown [below](#examples). - -Every time a header row is processed, the `FIELDS` special array is updated: it is a mapping of field number to field name, allowing you to loop over the field names dynamically. For example, given the header row `id,name,email`, GoAWK sets `FIELDS` using the equivalent of: - -``` -FIELDS[1] = "id" -FIELDS[2] = "name" -FIELDS[3] = "email" -``` - -Note that named field assignment such as `@"id" = 42` is not yet supported, but this feature may be added later. - - -## Go API - -When using GoAWK via the Go API, you can still use `INPUTMODE`, but it may be more convenient to use the `interp.Config` fields directly: `InputMode`, `CSVInput`, `OutputMode`, and `CSVOutput`. - -Here's a simple snippet showing the use of the `InputMode` and `CSVInput` fields to enable `#` as the comment character: - -``` -prog, err := parser.ParseProgram([]byte(src), nil) -if err != nil { ... } - -config := &interp.Config{ - InputMode: interp.CSVMode, - CSVInput: interp.CSVInputConfig{Comment: '#'}, -} -_, err = interp.ExecProgram(prog, config) -if err != nil { ... } -``` - -Note that `INPUTMODE` and `OUTPUTMODE` set using `Vars` or in the `BEGIN` block will override these settings. - -See the [full reference documentation](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk/interp#Config) for the `interp.Config` struct. - - -## Examples - -Below are some examples using the [testdata/csv/states.csv](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/testdata/csv/states.csv) file, which is a simple CSV file whose contents are as follows: - -``` -"State","Abbreviation" -"Alabama","AL" -"Alaska","AK" -"Arizona","AZ" -"Arkansas","AR" -"California","CA" -... -``` - -### Example: output a field by name - -To output a field by name (in this case the state's abbreviation): - -``` -$ goawk -i csv -H '{ print @"Abbreviation" }' testdata/csv/states.csv -AL -AK -AZ -... -``` - -### Example: match a field and count - -To count the number of states that have "New" in the name, and then print out what they are: - -``` -$ goawk -i csv -H '@"State" ~ /New/ { n++ } END { print n }' testdata/csv/states.csv -4 -$ goawk -i csv -H '@"State" ~ /New/ { print @"State" }' testdata/csv/states.csv -New Hampshire -New Jersey -New Mexico -New York -``` - -### Example: rename and reorder fields - -To rename and reorder the fields from `State`, `Abbreviation` to `abbr`, `name`. Note that the `print` statement in the `BEGIN` block prints the header row for the output: - -``` -$ goawk -i csv -H -o csv 'BEGIN { print "abbr", "name" } { print @"Abbreviation", @"State" }' testdata/csv/states.csv -abbr,name -AL,Alabama -AK,Alaska -... -``` - -### Example: convert between formats (explicit field list) - -To convert the file from CSV to TSV format (note how we're *not* using `-H`, so the header row is included): - -``` -$ goawk -i csv -o tsv '{ print $1, $2 }' testdata/csv/states.csv -State Abbreviation -Alabama AL -Alaska AK -... -``` - -### Example: convert between formats (all fields) - -If you want to convert between CSV and TSV format but don't know the number of fields, you can use a field assignment like `$1=$1` so that GoAWK reformats `$0` according to the output format (TSV in this case). This is similar to how in POSIX AWK a field assignment reformats `$0` according to the output field separator (`OFS`). Then `print` without arguments prints the raw value of `$0`: - -``` -$ goawk -i csv -o tsv '{ $1=$1; print }' testdata/csv/states.csv -State Abbreviation -Alabama AL -Alaska AK -... -``` - -**NOTE:** It's not correct to use `print $0` in this case, because that would print `$0` as a single TSV field, which you generally don't want: - -``` -$ goawk -i csv -o tsv '{ $1=$1; print $0 }' testdata/csv/states.csv # INCORRECT! -"State Abbreviation" -"Alabama AL" -"Alaska AK" -... -``` - -### Example: override separator - -To test overriding the separator character, we can use GoAWK to add a comment and convert the separator to `|` (pipe). We'll also add a comment line to test comment handling: - -``` -$ goawk -i csv -o 'csv separator=|' 'BEGIN { printf "# comment\n" } { $1=$1; print }' testdata/csv/states.csv -# comment -State|Abbreviation -Alabama|AL -Alaska|AK -... -``` - -### Example: skip comment lines - -We can process the "pipe-separated values" file generated above, skipping comment lines, and printing the first three state names (accessed by field number this time): - -``` -$ goawk -i 'csv header comment=# separator=|' 'NR<=3 { print $1 }' testdata/csv/states.psv -Alabama -Alaska -Arizona -``` - -### Example: use dynamic field names - -Similar to the `$` operator, you can also use `@` with dynamic values. For example, if there are fields named `address_1`, `address_2`, up through `address_5`, you could loop over them as follows: - -``` -$ cat testdata/csv/address5.csv -name,address_1,address_2,address_3,address_4,address_5 -Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates -$ goawk -i csv -H '{ for (i=1; i<=5; i++) print @("address_" i) }' testdata/csv/address5.csv -123 Way St -Apt 2B -Township -Cityville -United Plates -``` - -### Example: use the `FIELDS` array - -A somewhat contrived example showing use of the `FIELDS` array: - -``` -$ cat testdata/csv/fields.csv -id,name,email -1,Bob,b@bob.com -$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) print i, FIELDS[i] }' testdata/csv/fields.csv -1 id -2 name -3 email -``` - -### Example: create CSV file from array - -The following example shows how you might pull fields out of an integer-indexed array to produce a CSV file: - -``` -$ goawk -o csv 'BEGIN { print "id", "name"; names[1]="Bob"; names[2]="Jane"; for (i=1; i in names; i++) print i, names[i] }' -id,name -1,Bob -2,Jane -``` - -### Example: create CSV file by assigning fields - -This example shows the same result, but producing the CSV output by assigning individual fields and then using a bare `print` statement: - -``` -$ goawk -o csv 'BEGIN { print "id", "name"; $1=1; $2="Bob"; print; $1=2; $2="Jane"; print }' -id,name -1,Bob -2,Jane -``` - -### Example: different ways to specify CSV mode - -And finally, four equivalent examples showing different ways to specify the input mode, using `-i` or the `INPUTMODE` special variable (the same techniques work for `-o` and `OUTPUTMODE`): - -``` -$ goawk -i csv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv -NY -$ goawk -icsv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv -NY -$ goawk 'BEGIN { INPUTMODE="csv header" } @"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv -NY -$ goawk -v 'INPUTMODE=csv header' '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv -NY -``` - - -## Examples based on csvkit - -The [csvkit](https://csvkit.readthedocs.io/en/latest/index.html) suite is a set of tools that allow you to quickly analyze and extract fields from CSV files. Each csvkit tool allows you to do a specific task; GoAWK is more low-level and verbose, but also a more general tool ([`csvsql`](https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html#csvsql-and-sql2csv-ultimate-power) being the exception!). GoAWK also runs significantly faster than csvkit (the latter is written in Python). - -Below are a few snippets showing how you'd do some of the tasks in the csvkit documentation, but using GoAWK (the input file is [testdata/csv/nz-schools.csv](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/testdata/csv/nz-schools.csv)): - -### csvkit example: print column names - -``` -$ csvcut -n testdata/csv/nz-schools.csv - 1: School_Id - 2: Org_Name - 3: Decile - 4: Total - -# In GoAWK you have to loop through the fields, but you can print the data in -# any format you want (note the "exit" so it stops after the first row): -$ goawk -i csv '{ for (i=1; i<=NF; i++) printf "%3d: %s\n", i, $i; exit }' testdata/csv/nz-schools.csv - 1: School_Id - 2: Org_Name - 3: Decile - 4: Total - -# You could also use -H and the FIELDS array to do this: -$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) printf "%3d: %s\n", i, FIELDS[i]; exit }' testdata/csv/nz-schools.csv - 1: School_Id - 2: Org_Name - 3: Decile - 4: Total -``` - -### csvkit example: select a subset of columns - -``` -$ csvcut -c Org_Name,Total testdata/csv/nz-schools.csv -Org_Name,Total -Waipa Christian School,60 -Remarkables Primary School,494 -... - -# In GoAWK you need to print the field names explicitly in BEGIN: -$ goawk -i csv -H -o csv 'BEGIN { print "Org_Name", "Total" } { print @"Org_Name", @"Total" }' testdata/csv/nz-schools.csv -Org_Name,Total -Waipa Christian School,60 -Remarkables Primary School,494 -... - -# But you can also change the column names and reorder them: -$ goawk -i csv -H -o csv 'BEGIN { print "# Students", "School" } { print @"Total", @"Org_Name" }' testdata/csv/nz-schools.csv -# Students,School -60,Waipa Christian School -494,Remarkables Primary School -... -``` - -### csvkit example: generate statistics - -There's no equivalent of the `csvstat` tool in GoAWK, but you can calculate statistics yourself. For example, to calculate the total number of students in New Zealand schools, you can do the following (`csvstat` is giving a warning due to the single-column input): - -``` -$ csvcut -c Total testdata/csv/nz-schools.csv | csvstat --sum -/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter -802,516 - -$ goawk -i csv -H '{ sum += @"Total" } END { print sum }' testdata/csv/nz-schools.csv -802516 -``` - -To calculate the average (mean) decile level for boys' and girls' schools (sorry, boys!): - -``` -$ csvgrep -c Org_Name -m Boys testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean -/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter -6.45 -$ csvgrep -c Org_Name -m Girls testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean -/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter -8.889 - -$ goawk -i csv -H '/Boys/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv -6.45 -$ goawk -i csv -H '/Girls/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv -8.88889 -``` - - -## Performance - -The performance of GoAWK's CSV input and output mode is quite good, on a par with using the `encoding/csv` package from Go directly, and much faster than the `csv` module in Python. CSV input speed is significantly slower than `frawk`, though CSV output speed is significantly faster than `frawk`. - -Below are the results of some simple read and write [benchmarks](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/scripts/csvbench) using `goawk` and `frawk` as well as plain Python and Go. The output of the write benchmarks is a 1GB, 3.5 million row CSV file with 20 columns (including quoted columns); the input for the read benchmarks uses that same file. Times are in seconds, showing the best of three runs on a 64-bit Linux laptop with an SSD drive: - -Test | goawk | frawk | Python | Go ---------------- | ----- | ----- | ------ | ---- -Reading 1GB CSV | 3.18 | 1.01 | 13.4 | 3.22 -Writing 1GB CSV | 5.64 | 13.0 | 17.0 | 3.24 - - -## Future work - -* Consider adding a `printrow(a)` or similar function to make it easier to construct CSV rows from scratch. - - `a` would be an array such as: `a["name"] = "Bob"; a["age"] = 7` - - keys would be ordered by `OFIELDS` (eg: `OFIELDS[1] = "name"; OFIELDS[2] = "age"`) or by "smart name" if `OFIELDS` not set ("smart name" meaning numeric if `a` keys are numeric, string otherwise) - - `printrow(a)` could take an optional second `fields` array arg to use that instead of the global `OFIELDS` -* Consider allowing `-H` to accept an optional list of field names which could be used as headers in the absence of headers in the file itself (either `-H=name,age` or `-i 'csv header=name,age'`). -* Consider adding TrimLeadingSpace CSV input option. See: https://github.com/mojosa-software/goblin/src/tool/awk/issues/109 -* Consider supporting `@"id" = 42` named field assignment. - - -## Feedback - -Please [open an issue](https://github.com/mojosa-software/goblin/src/tool/awk/issues) if you have bug reports or feature requests for GoAWK's CSV support. diff --git a/src/tool/awk/goawk b/src/tool/awk/goawk deleted file mode 100755 index d9fbe96..0000000 Binary files a/src/tool/awk/goawk and /dev/null differ diff --git a/src/tool/awk/goawk.go b/src/tool/awk/goawk.go deleted file mode 100644 index a55ce95..0000000 --- a/src/tool/awk/goawk.go +++ /dev/null @@ -1,401 +0,0 @@ -// Package goawk is an implementation of AWK with CSV support -// -// You can use the command-line "goawk" command or run AWK from your -// Go programs using the "interp" package. The command-line program -// has the same interface as regular awk: -// -// goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...] -// -// The -F flag specifies the field separator (the default is to split -// on whitespace). The -v flag allows you to set a variable to a -// given value (multiple -v flags allowed). The -f flag allows you to -// read AWK source from a file instead of the 'prog' command-line -// argument. The rest of the arguments are input filenames (default -// is to read from stdin). -// -// A simple example (prints the sum of the numbers in the file's -// second column): -// -// $ echo 'foo 12 -// > bar 34 -// > baz 56' >file.txt -// $ goawk '{ sum += $2 } END { print sum }' file.txt -// 102 -// -// To use GoAWK in your Go programs, see README.md or the "interp" -// package docs. -package awk - -import ( - "bytes" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "runtime/pprof" - "strings" - "unicode/utf8" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/lexer" - "github.com/mojosa-software/goblin/src/tool/awk/parser" - - "github.com/mojosa-software/gomtool/src/mtool" -) - -const ( - version = "v1.19.0" - copyright = "GoAWK " + version + " - Copyright (c) 2022 Ben Hoyt" - shortUsage = "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]" - longUsage = `Standard AWK arguments: - -F separator field separator (default " ") - -f progfile load AWK source from progfile (multiple allowed) - -v var=value variable assignment (multiple allowed) - -Additional GoAWK arguments: - -cpuprofile file write CPU profile to file - -d print parsed syntax tree to stderr (debug mode) - -da print virtual machine assembly instructions to stderr - -dt print variable type information to stderr - -H parse header row and enable @"field" in CSV input mode - -h, --help show this help message - -i mode parse input into fields using CSV format (ignore FS and RS) - 'csv|tsv [separator=] [comment=] [header]' - -o mode use CSV output for print with args (ignore OFS and ORS) - 'csv|tsv [separator=]' - -version show GoAWK version and exit -` -) - -func Run(flags *mtool.Flags) { - // Parse command line arguments manually rather than using the - // "flag" package, so we can support flags with no space between - // flag and argument, like '-F:' (allowed by POSIX) - - // J's comment: nope, we will change it, lol. - var progFiles []string - var vars []string - fieldSep := " " - cpuprofile := "" - debug := false - debugAsm := false - debugTypes := false - memprofile := "" - inputMode := "" - outputMode := "" - header := false - - argv0 := flags.UtilName() - args := flags.AllArgs() - - var i int - for i = 0; i < len(args); i++ { - // Stop on explicit end of args or first arg not prefixed with "-" - arg := args[i] - if arg == "--" { - i++ - break - } - if arg == "-" || !strings.HasPrefix(arg, "-") { - break - } - - switch arg { - case "-F": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -F") - } - i++ - fieldSep = args[i] - case "-f": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -f") - } - i++ - progFiles = append(progFiles, args[i]) - case "-v": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -v") - } - i++ - vars = append(vars, args[i]) - case "-cpuprofile": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -cpuprofile") - } - i++ - cpuprofile = args[i] - case "-d": - debug = true - case "-da": - debugAsm = true - case "-dt": - debugTypes = true - case "-H": - header = true - case "-h", "--help": - fmt.Printf("%s\n\n%s\n\n%s", copyright, shortUsage, longUsage) - os.Exit(0) - case "-i": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -i") - } - i++ - inputMode = args[i] - case "-memprofile": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -memprofile") - } - i++ - memprofile = args[i] - case "-o": - if i+1 >= len(args) { - errorExitf("flag needs an argument: -o") - } - i++ - outputMode = args[i] - case "-version", "--version": - fmt.Println(version) - os.Exit(0) - default: - switch { - case strings.HasPrefix(arg, "-F"): - fieldSep = arg[2:] - case strings.HasPrefix(arg, "-f"): - progFiles = append(progFiles, arg[2:]) - case strings.HasPrefix(arg, "-i"): - inputMode = arg[2:] - case strings.HasPrefix(arg, "-o"): - outputMode = arg[2:] - case strings.HasPrefix(arg, "-v"): - vars = append(vars, arg[2:]) - case strings.HasPrefix(arg, "-cpuprofile="): - cpuprofile = arg[12:] - case strings.HasPrefix(arg, "-memprofile="): - memprofile = arg[12:] - default: - errorExitf("flag provided but not defined: %s", arg) - } - } - } - - // Any remaining args are program and input files - args = args[i:] - - var src []byte - var stdinBytes []byte // used if there's a parse error - if len(progFiles) > 0 { - // Read source: the concatenation of all source files specified - buf := &bytes.Buffer{} - progFiles = expandWildcardsOnWindows(progFiles) - for _, progFile := range progFiles { - if progFile == "-" { - b, err := ioutil.ReadAll(os.Stdin) - if err != nil { - errorExit(err) - } - stdinBytes = b - _, _ = buf.Write(b) - } else { - f, err := os.Open(progFile) - if err != nil { - errorExit(err) - } - _, err = buf.ReadFrom(f) - if err != nil { - _ = f.Close() - errorExit(err) - } - _ = f.Close() - } - // Append newline to file in case it doesn't end with one - _ = buf.WriteByte('\n') - } - src = buf.Bytes() - } else { - if len(args) < 1 { - errorExitf(shortUsage) - } - src = []byte(args[0]) - args = args[1:] - } - - // Parse source code and setup interpreter - parserConfig := &parser.ParserConfig{ - DebugTypes: debugTypes, - DebugWriter: os.Stderr, - } - prog, err := parser.ParseProgram(src, parserConfig) - if err != nil { - if err, ok := err.(*parser.ParseError); ok { - name, line := errorFileLine(progFiles, stdinBytes, err.Position.Line) - fmt.Fprintf(os.Stderr, "%s:%d:%d: %s\n", - name, line, err.Position.Column, err.Message) - showSourceLine(src, err.Position) - os.Exit(1) - } - errorExitf("%s", err) - } - - if debug { - fmt.Fprintln(os.Stderr, prog) - } - - if debugAsm { - err := prog.Disassemble(os.Stderr) - if err != nil { - errorExitf("could not disassemble program: %v", err) - } - } - - if header { - if inputMode == "" { - errorExitf("-H only allowed together with -i") - } - inputMode += " header" - } - - // Don't buffer output if stdout is a terminal (default output writer when - // Config.Output is nil is a buffered version of os.Stdout). - var stdout io.Writer - stdoutInfo, err := os.Stdout.Stat() - if err == nil && stdoutInfo.Mode()&os.ModeCharDevice != 0 { - stdout = os.Stdout - } - - config := &interp.Config{ - Argv0: filepath.Base(argv0), - Args: expandWildcardsOnWindows(args), - Vars: []string{ - "FS", fieldSep, - "INPUTMODE", inputMode, - "OUTPUTMODE", outputMode, - }, - Output: stdout, - } - for _, v := range vars { - equals := strings.IndexByte(v, '=') - if equals < 0 { - errorExitf("-v flag must be in format name=value") - } - name, value := v[:equals], v[equals+1:] - // Oddly, -v must interpret escapes (issue #129) - unescaped, err := lexer.Unescape(value) - if err == nil { - value = unescaped - } - config.Vars = append(config.Vars, name, value) - } - - if cpuprofile != "" { - f, err := os.Create(cpuprofile) - if err != nil { - errorExitf("could not create CPU profile: %v", err) - } - if err := pprof.StartCPUProfile(f); err != nil { - errorExitf("could not start CPU profile: %v", err) - } - } - - // Run the program! - status, err := interp.ExecProgram(prog, config) - if err != nil { - errorExit(err) - } - - if cpuprofile != "" { - pprof.StopCPUProfile() - } - if memprofile != "" { - f, err := os.Create(memprofile) - if err != nil { - errorExitf("could not create memory profile: %v", err) - } - runtime.GC() // get up-to-date statistics - if err := pprof.WriteHeapProfile(f); err != nil { - errorExitf("could not write memory profile: %v", err) - } - _ = f.Close() - } - - os.Exit(status) -} - -// Show source line and position of error, for example: -// -// BEGIN { x*; } -// ^ -func showSourceLine(src []byte, pos lexer.Position) { - lines := bytes.Split(src, []byte{'\n'}) - srcLine := string(lines[pos.Line-1]) - numTabs := strings.Count(srcLine[:pos.Column-1], "\t") - runeColumn := utf8.RuneCountInString(srcLine[:pos.Column-1]) - fmt.Fprintln(os.Stderr, strings.Replace(srcLine, "\t", " ", -1)) - fmt.Fprintln(os.Stderr, strings.Repeat(" ", runeColumn)+strings.Repeat(" ", numTabs)+"^") -} - -// Determine which filename and line number to display for the overall -// error line number. -func errorFileLine(progFiles []string, stdinBytes []byte, errorLine int) (string, int) { - if len(progFiles) == 0 { - return "", errorLine - } - startLine := 1 - for _, progFile := range progFiles { - var content []byte - if progFile == "-" { - progFile = "" - content = stdinBytes - } else { - b, err := ioutil.ReadFile(progFile) - if err != nil { - return "", errorLine - } - content = b - } - content = append(content, '\n') - - numLines := bytes.Count(content, []byte{'\n'}) - if errorLine >= startLine && errorLine < startLine+numLines { - return progFile, errorLine - startLine + 1 - } - startLine += numLines - } - return "", errorLine -} - -func errorExit(err error) { - pathErr, ok := err.(*os.PathError) - if ok && os.IsNotExist(err) { - errorExitf("file %q not found", pathErr.Path) - } - errorExitf("%s", err) -} - -func errorExitf(format string, args ...interface{}) { - fmt.Fprintf(os.Stderr, format+"\n", args...) - os.Exit(1) -} - -func expandWildcardsOnWindows(args []string) []string { - if runtime.GOOS != "windows" { - return args - } - return expandWildcards(args) -} - -// Originally from https://github.com/mattn/getwild (compatible LICENSE). -func expandWildcards(args []string) []string { - result := make([]string, 0, len(args)) - for _, arg := range args { - matches, err := filepath.Glob(arg) - if err == nil && len(matches) > 0 { - result = append(result, matches...) - } else { - result = append(result, arg) - } - } - return result -} diff --git a/src/tool/awk/goawk_test.go b/src/tool/awk/goawk_test.go deleted file mode 100644 index 7360d65..0000000 --- a/src/tool/awk/goawk_test.go +++ /dev/null @@ -1,749 +0,0 @@ -// GoAWK tests - -package awk_test - -import ( - "bufio" - "bytes" - "flag" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "runtime" - "sort" - "strings" - "sync" - "testing" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -var ( - goExe string - testsDir string - outputDir string - awkExe string - goAWKExe string - writeAWK bool - writeGoAWK bool -) - -func TestMain(m *testing.M) { - flag.StringVar(&goExe, "goexe", "go", "set to override Go executable used to build goawk") - flag.StringVar(&testsDir, "testsdir", "./testdata", "directory with one-true-awk tests") - flag.StringVar(&outputDir, "outputdir", "./testdata/output", "directory for test output") - flag.StringVar(&awkExe, "awk", "gawk", "awk executable name") - flag.StringVar(&goAWKExe, "goawk", "./goawk", "goawk executable name") - flag.BoolVar(&writeAWK, "writeawk", false, "write expected output") - flag.BoolVar(&writeGoAWK, "writegoawk", true, "write Go AWK output") - flag.Parse() - - cmd := exec.Command(goExe, "build", "-ldflags=-w") - stderr, err := cmd.CombinedOutput() - if err != nil { - fmt.Fprintf(os.Stderr, "error building goawk: %v\n%s\n", err, stderr) - os.Exit(1) - } - - os.Exit(m.Run()) -} - -func TestAWK(t *testing.T) { - inputByPrefix := map[string]string{ - "t": "test.data", - "p": "test.countries", - } - // These programs exit with non-zero status code - errorExits := map[string]bool{ - "t.exit": true, - "t.exit1": true, - "t.gsub4": true, - "t.split3": true, - } - // These programs have known different output - knownDifferent := map[string]bool{ - "t.printf2": true, // because awk is weird here (our behavior is like mawk) - } - // Can't really diff test rand() tests as we're using a totally - // different algorithm for random numbers - randTests := map[string]bool{ - "p.48b": true, - "t.randk": true, - } - // These tests use "for (x in a)", which iterates in an undefined - // order (according to the spec), so sort lines before comparing. - sortLines := map[string]bool{ - "p.43": true, - "t.in1": true, // because "sort" is locale-dependent - "t.in2": true, - "t.intest2": true, - } - dontRunOnWindows := map[string]bool{ - "p.50": true, // because this pipes to Unix sort "sort -t: +0 -1 +2nr" - } - - infos, err := ioutil.ReadDir(testsDir) - if err != nil { - t.Fatalf("couldn't read test files: %v", err) - } - for _, info := range infos { - if !strings.HasPrefix(info.Name(), "t.") && !strings.HasPrefix(info.Name(), "p.") { - continue - } - if runtime.GOOS == "windows" && dontRunOnWindows[info.Name()] { - continue - } - t.Run(info.Name(), func(t *testing.T) { - srcPath := filepath.Join(testsDir, info.Name()) - inputPath := filepath.Join(testsDir, inputByPrefix[info.Name()[:1]]) - outputPath := filepath.Join(outputDir, info.Name()) - - cmd := exec.Command(awkExe, "-f", srcPath, inputPath) - expected, err := cmd.Output() - if err != nil && !errorExits[info.Name()] { - t.Fatalf("error running %s: %v", awkExe, err) - } - expected = bytes.Replace(expected, []byte{0}, []byte("<00>"), -1) - expected = normalizeNewlines(expected) - if sortLines[info.Name()] { - expected = sortedLines(expected) - } - if writeAWK { - err := ioutil.WriteFile(outputPath, expected, 0644) - if err != nil { - t.Fatalf("error writing awk output: %v", err) - } - } - - prog, err := parseGoAWK(srcPath) - if err != nil { - t.Fatal(err) - } - output, err := interpGoAWK(prog, inputPath) - if err != nil && !errorExits[info.Name()] { - t.Fatal(err) - } - output = bytes.Replace(output, []byte{0}, []byte("<00>"), -1) - output = normalizeNewlines(output) - if randTests[info.Name()] || knownDifferent[info.Name()] { - // For tests that use rand(), run them to ensure they - // parse and interpret, but can't compare the output, - // so stop now - return - } - if sortLines[info.Name()] { - output = sortedLines(output) - } - if writeGoAWK { - err := ioutil.WriteFile(outputPath, output, 0644) - if err != nil { - t.Fatalf("error writing goawk output: %v", err) - } - } - if string(output) != string(expected) { - t.Fatalf("output differs, run: git diff %s", outputPath) - } - }) - } - - _ = os.Remove("tempbig") - _ = os.Remove("tempsmall") -} - -func parseGoAWK(srcPath string) (*parser.Program, error) { - src, err := ioutil.ReadFile(srcPath) - if err != nil { - return nil, err - } - prog, err := parser.ParseProgram(src, nil) - if err != nil { - return nil, err - } - return prog, nil -} - -func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) { - outBuf := &bytes.Buffer{} - errBuf := &bytes.Buffer{} - config := &interp.Config{ - Output: outBuf, - Error: &concurrentWriter{w: errBuf}, - Args: []string{inputPath}, - } - _, err := interp.ExecProgram(prog, config) - result := outBuf.Bytes() - result = append(result, errBuf.Bytes()...) - return result, err -} - -func interpGoAWKStdin(prog *parser.Program, inputPath string) ([]byte, error) { - input, _ := ioutil.ReadFile(inputPath) - outBuf := &bytes.Buffer{} - errBuf := &bytes.Buffer{} - config := &interp.Config{ - Stdin: &concurrentReader{r: bytes.NewReader(input)}, - Output: outBuf, - Error: &concurrentWriter{w: errBuf}, - // srcdir is for "redfilnm.awk" - Vars: []string{"srcdir", filepath.Dir(inputPath)}, - } - _, err := interp.ExecProgram(prog, config) - result := outBuf.Bytes() - result = append(result, errBuf.Bytes()...) - return result, err -} - -// Wraps a Writer but makes Write calls safe for concurrent use. -type concurrentWriter struct { - w io.Writer - mu sync.Mutex -} - -func (w *concurrentWriter) Write(p []byte) (int, error) { - w.mu.Lock() - defer w.mu.Unlock() - return w.w.Write(p) -} - -// Wraps a Reader but makes Read calls safe for concurrent use. -type concurrentReader struct { - r io.Reader - mu sync.Mutex -} - -func (r *concurrentReader) Read(p []byte) (int, error) { - r.mu.Lock() - defer r.mu.Unlock() - return r.r.Read(p) -} - -func sortedLines(data []byte) []byte { - trimmed := strings.TrimSuffix(string(data), "\n") - lines := strings.Split(trimmed, "\n") - sort.Strings(lines) - return []byte(strings.Join(lines, "\n") + "\n") -} - -func TestGAWK(t *testing.T) { - skip := map[string]bool{ // TODO: fix these (at least the ones that are bugs) - "getline": true, // getline syntax issues (may be okay, see grammar notes at http://pubs.opengroup.org/onlinepubs/007904975/utilities/awk.html#tag_04_06_13_14) - "getline3": true, // getline syntax issues (similar to above) - - "gsubtst7": true, // something wrong with gsub or field split/join - "splitwht": true, // other awks handle split(s, a, " ") differently from split(s, a, / /) - "status-close": true, // hmmm, not sure what's up here - "sigpipe1": true, // probable race condition: sometimes fails, sometimes passes - - "parse1": true, // incorrect parsing of $$a++++ (see TODOs in interp_test.go too) - - "rscompat": true, // GoAWK allows multi-char RS by default - "rsstart2": true, // GoAWK ^ and $ anchors match beginning and end of line, not file (unlike Gawk) - - "hex2": true, // GoAWK allows hex numbers / floating point (per POSIX) - "strtod": true, // GoAWK allows hex numbers / floating point (per POSIX) - } - - dontRunOnWindows := map[string]bool{ - "delargv": true, // reads from /dev/null - "eofsplit": true, // reads from /etc/passwd - "getline5": true, // removes a file while it's open - "iobug1": true, // reads from /dev/null - } - - sortLines := map[string]bool{ - "arryref2": true, - "delargv": true, - "delarpm2": true, - "forref": true, - } - - gawkDir := filepath.Join(testsDir, "gawk") - infos, err := ioutil.ReadDir(gawkDir) - if err != nil { - t.Fatalf("couldn't read test files: %v", err) - } - for _, info := range infos { - if !strings.HasSuffix(info.Name(), ".awk") { - continue - } - testName := info.Name()[:len(info.Name())-4] - if skip[testName] { - continue - } - if runtime.GOOS == "windows" && dontRunOnWindows[testName] { - continue - } - t.Run(testName, func(t *testing.T) { - srcPath := filepath.Join(gawkDir, info.Name()) - inputPath := filepath.Join(gawkDir, testName+".in") - okPath := filepath.Join(gawkDir, testName+".ok") - - expected, err := ioutil.ReadFile(okPath) - if err != nil { - t.Fatal(err) - } - expected = normalizeNewlines(expected) - - prog, err := parseGoAWK(srcPath) - if err != nil { - if err.Error() != string(expected) { - t.Fatalf("parser error differs, got:\n%s\nexpected:\n%s", err.Error(), expected) - } - return - } - output, err := interpGoAWKStdin(prog, inputPath) - output = normalizeNewlines(output) - if err != nil { - errStr := string(output) + err.Error() - if errStr != string(expected) { - t.Fatalf("interp error differs, got:\n%s\nexpected:\n%s", errStr, expected) - } - return - } - - if sortLines[testName] { - output = sortedLines(output) - expected = sortedLines(expected) - } - - if string(output) != string(expected) { - t.Fatalf("output differs, got:\n%s\nexpected:\n%s", output, expected) - } - }) - } - - _ = os.Remove("seq") -} - -func TestCommandLine(t *testing.T) { - tests := []struct { - args []string - stdin string - output string - error string - }{ - // Load source from stdin - {[]string{"-f", "-"}, `BEGIN { print "b" }`, "b\n", ""}, - {[]string{"-f", "-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""}, - {[]string{"-f-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""}, - - // Program with no input - {[]string{`BEGIN { print "a" }`}, "", "a\n", ""}, - - // Read input from stdin - {[]string{`$0`}, "one\n\nthree", "one\nthree\n", ""}, - {[]string{`$0`, "-"}, "one\n\nthree", "one\nthree\n", ""}, - {[]string{`$0`, "-", "-"}, "one\n\nthree", "one\nthree\n", ""}, - {[]string{"-f", "testdata/t.0", "-"}, "one\ntwo\n", "one\ntwo\n", ""}, - {[]string{"{ print FILENAME }"}, "a", "-\n", ""}, - {[]string{"{ print FILENAME }", "-"}, "a", "-\n", ""}, - - // Read input from file(s) - {[]string{`$0`, "testdata/g.1"}, "", "ONE\n", ""}, - {[]string{`$0`, "testdata/g.1", "testdata/g.2"}, "", "ONE\nTWO\n", ""}, - {[]string{`{ print FILENAME ":" FNR "/" NR ": " $0 }`, "testdata/g.1", "testdata/g.4"}, "", - "testdata/g.1:1/1: ONE\ntestdata/g.4:1/2: FOUR a\ntestdata/g.4:2/3: FOUR b\n", ""}, - {[]string{`$0`, "testdata/g.1", "-", "testdata/g.2"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""}, - {[]string{`$0`, "testdata/g.1", "-", "testdata/g.2", "-"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""}, - {[]string{"-F", " ", "--", "$0", "testdata/g.1"}, "", "ONE\n", ""}, - {[]string{"{ print NR, FNR } END { print NR, FNR }", "-"}, "a\nb\nc\n", "1 1\n2 2\n3 3\n3 3\n", ""}, - // I've deleted the "-ftest" file for now as it was causing problems with "go install" zip files - // {[]string{"--", "$0", "-ftest"}, "", "used in tests; do not delete\n", ""}, // Issue #53 - // {[]string{"$0", "-ftest"}, "", "used in tests; do not delete\n", ""}, - - // Specifying field separator with -F - {[]string{`{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 3\n4 6\n", ""}, - {[]string{"-F", ",", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 2 3 \n4 5 6 \n", ""}, - {[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""}, - {[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""}, - {[]string{"-F,", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""}, - - // Assigning other variables with -v - {[]string{"-v", "OFS=.", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.3\n4.6\n", ""}, - {[]string{"-v", "OFS=.", "-v", "ORS=", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.34.6", ""}, - {[]string{"-v", "x=42", "-v", "y=foo", `BEGIN { print x, y }`}, "", "42 foo\n", ""}, - {[]string{"-v", "RS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""}, - {[]string{"-vRS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""}, - {[]string{"-v", `X=x\ty`, `BEGIN { printf X }`}, "", "x\ty", ""}, - - // ARGV/ARGC handling - {[]string{` - BEGIN { - for (i=1; i:1:1: unexpected char\n`\n^"}, - {[]string{"BEGIN {\n\tx*;\n}"}, "", "", ":2:4: expected expression instead of ;\n x*;\n ^"}, - {[]string{"BEGIN {\n\tx*\r\n}"}, "", "", ":2:4: expected expression instead of \n x*\n ^"}, - {[]string{"-f", "-"}, "\n ++", "", ":2:4: expected expression instead of \n ++\n ^"}, - {[]string{"-f", "testdata/parseerror/good.awk", "-f", "testdata/parseerror/bad.awk"}, - "", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of \nx*\n ^"}, - {[]string{"-f", "testdata/parseerror/bad.awk", "-f", "testdata/parseerror/good.awk"}, - "", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of \nx*\n ^"}, - {[]string{"-f", "testdata/parseerror/good.awk", "-f", "-", "-f", "testdata/parseerror/bad.awk"}, - "`", "", ":1:1: unexpected char\n`\n^"}, - } - for _, test := range tests { - testName := strings.Join(test.args, " ") - t.Run(testName, func(t *testing.T) { - runAWKs(t, test.args, test.stdin, test.output, test.error) - }) - } -} - -func TestDevStdout(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("/dev/stdout not presnt on Windows") - } - runAWKs(t, []string{`BEGIN { print "1"; print "2">"/dev/stdout" }`}, "", "1\n2\n", "") -} - -func runGoAWK(args []string, stdin string) (stdout, stderr string, err error) { - cmd := exec.Command(goAWKExe, args...) - if stdin != "" { - cmd.Stdin = strings.NewReader(stdin) - } - errBuf := &bytes.Buffer{} - cmd.Stderr = errBuf - output, err := cmd.Output() - stdout = string(normalizeNewlines(output)) - stderr = string(normalizeNewlines(errBuf.Bytes())) - return stdout, stderr, err -} - -func runAWKs(t *testing.T, testArgs []string, testStdin, testOutput, testError string) { - var args []string - if strings.Contains(awkExe, "gawk") { - args = append(args, "--posix") - } - args = append(args, testArgs...) - cmd := exec.Command(awkExe, testArgs...) - if testStdin != "" { - cmd.Stdin = strings.NewReader(testStdin) - } - errBuf := &bytes.Buffer{} - cmd.Stderr = errBuf - output, err := cmd.Output() - if err != nil { - if testError == "" { - t.Fatalf("expected no error, got AWK error: %v (%s)", err, errBuf.String()) - } - } else { - if testError != "" { - t.Fatalf("expected AWK error, got none") - } - } - stdout := string(normalizeNewlines(output)) - if stdout != testOutput { - t.Fatalf("expected AWK to give %q, got %q", testOutput, stdout) - } - - stdout, stderr, err := runGoAWK(testArgs, testStdin) - if err != nil { - stderr = strings.TrimSpace(stderr) - if stderr != testError { - t.Fatalf("expected GoAWK error %q, got %q", testError, stderr) - } - } else { - if testError != "" { - t.Fatalf("expected GoAWK error %q, got none", testError) - } - } - if stdout != testOutput { - t.Fatalf("expected GoAWK to give %q, got %q", testOutput, stdout) - } -} - -func TestWildcards(t *testing.T) { - if runtime.GOOS != "windows" { - // Wildcards shouldn't be expanded on non-Windows systems, and a file - // literally named "*.go" doesn't exist, so expect a failure. - _, stderr, err := runGoAWK([]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"}, "") - if err == nil { - t.Fatal("expected error using wildcards on non-Windows system") - } - expected := "file \"testdata/wildcards/*.txt\" not found\n" - if stderr != expected { - t.Fatalf("expected %q, got %q", expected, stderr) - } - return - } - - tests := []struct { - args []string - output string - }{ - { - []string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"}, - "testdata/wildcards/one.txt\ntestdata/wildcards/two.txt\n", - }, - { - []string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/one.txt"}, - "testdata/wildcards/one.txt\nbee\n", - }, - { - []string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/*.txt"}, - "testdata/wildcards/one.txt\nbee\ntestdata/wildcards/two.txt\nbee\n", - }, - } - - for _, test := range tests { - testName := strings.Join(test.args, " ") - t.Run(testName, func(t *testing.T) { - stdout, stderr, err := runGoAWK(test.args, "") - if err != nil { - t.Fatalf("expected no error, got %v (%q)", err, stderr) - } - stdout = strings.Replace(stdout, "\\", "/", -1) - if stdout != test.output { - t.Fatalf("expected %q, got %q", test.output, stdout) - } - }) - } -} - -func TestFILENAME(t *testing.T) { - origGoAWKExe := goAWKExe - goAWKExe = "../../" + goAWKExe - defer func() { goAWKExe = origGoAWKExe }() - - origDir, err := os.Getwd() - if err != nil { - t.Fatal(err) - } - err = os.Chdir("testdata/filename") - if err != nil { - t.Fatal(err) - } - defer os.Chdir(origDir) - - src := ` -BEGIN { FILENAME = "10"; print(FILENAME, FILENAME<2) } -BEGIN { FILENAME = 10; print(FILENAME, FILENAME<2) } -{ print(FILENAME, FILENAME<2) } -` - runAWKs(t, []string{src, "10", "10x"}, "", "10 1\n10 0\n10 0\n10x 1\n", "") -} - -func normalizeNewlines(b []byte) []byte { - return bytes.Replace(b, []byte("\r\n"), []byte{'\n'}, -1) -} - -func TestInputOutputMode(t *testing.T) { - tests := []struct { - args []string - input string - output string - error string - }{ - {[]string{"-icsv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""}, - {[]string{"-i", "csv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""}, - {[]string{"-icsv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""}, - {[]string{"-i", "csv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""}, - {[]string{"-icsv", "-H", "-ocsv", `{ print @"age", @"name" }`}, "name,age\n\"Bo,ba\",42\nJane,37", "42,\"Bo,ba\"\n37,Jane\n", ""}, - {[]string{"-o", "csv", `BEGIN { print "foo,bar", 3.14, "baz" }`}, "", "\"foo,bar\",3.14,baz\n", ""}, - {[]string{"-iabc", `{}`}, "", "", "invalid input mode \"abc\"\n"}, - {[]string{"-oxyz", `{}`}, "", "", "invalid output mode \"xyz\"\n"}, - {[]string{"-H", `{}`}, "", "", "-H only allowed together with -i\n"}, - } - - for _, test := range tests { - testName := strings.Join(test.args, " ") - t.Run(testName, func(t *testing.T) { - stdout, stderr, err := runGoAWK(test.args, test.input) - if err != nil { - if test.error == "" { - t.Fatalf("expected no error, got %v (%q)", err, stderr) - } else if stderr != test.error { - t.Fatalf("expected error message %q, got %q", test.error, stderr) - } - } - if stdout != test.output { - t.Fatalf("expected %q, got %q", test.output, stdout) - } - }) - } -} - -func TestMultipleCSVFiles(t *testing.T) { - // Ensure CSV handling works across multiple files with different headers (field names). - src := ` -{ - for (i=1; i in FIELDS; i++) { - if (i>1) - printf ","; - printf "%s", FIELDS[i] - } - printf " " -} -{ print @"name", @"age" } -` - stdout, stderr, err := runGoAWK([]string{"-i", "csv", "-H", src, "testdata/csv/1.csv", "testdata/csv/2.csv"}, "") - if err != nil { - t.Fatalf("expected no error, got %v (%q)", err, stderr) - } - expected := ` -name,age Bob 42 -name,age Jill 37 -age,email,name Sarah 25 -`[1:] - if stdout != expected { - t.Fatalf("expected %q, got %q", expected, stdout) - } -} - -func TestCSVDocExamples(t *testing.T) { - f, err := os.Open("csv.md") - if err != nil { - t.Fatalf("error opening examples file: %v", err) - } - defer f.Close() - - var ( - command string - output string - truncated bool - n = 1 - ) - runTest := func() { - t.Run(fmt.Sprintf("Example%d", n), func(t *testing.T) { - shell := "/bin/sh" - if runtime.GOOS == "windows" { - shell = "sh" - } - cmd := exec.Command(shell, "-c", command) - gotBytes, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("error running %q: %v\n%s", command, err, gotBytes) - } - got := string(gotBytes) - if truncated { - numLines := strings.Count(output, "\n") - got = strings.Join(strings.Split(got, "\n")[:numLines], "\n") + "\n" - } - got = string(normalizeNewlines([]byte(got))) - if got != output { - t.Fatalf("error running %q\ngot:\n%s\nexpected:\n%s", command, got, output) - } - }) - n++ - } - - scanner := bufio.NewScanner(f) - inTest := false - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "$ goawk") { - if inTest { - runTest() - } - inTest = true - command = "./" + line[2:] - output = "" - truncated = false - } else if inTest { - switch line { - case "```", "": - runTest() - inTest = false - case "...": - truncated = true - runTest() - inTest = false - default: - output += line + "\n" - } - } - } - if scanner.Err() != nil { - t.Errorf("error reading input: %v", scanner.Err()) - } - if inTest { - t.Error("unexpectedly in test at end of file") - } -} - -func TestMandelbrot(t *testing.T) { - stdout, stderr, err := runGoAWK([]string{"-v", "width=80", "-v", "height=25", "-f", "testdata/tt.x1_mandelbrot"}, "") - if err != nil { - t.Fatalf("expected no error, got %v (%q)", err, stderr) - } - expected := ` -................................................................................ -......................................................--+-----.................. -....................................................-----+*+-++-................ -.................................................--------+* *+-----............. -..............................................--------+# #%*-------......... -.........................................------------++$ +-----------..... -...................................---------* # +* # *+++++%+--... -............................----------------++ @ *----.. -.......................-+----------------+$ %+----.. -..................-------*++%++**+++---++ #+--. -...............----------+* #*++* %*---. -.............-------+++++* # #----. -....------+-------++**@ @ ------. -....------+-------++**@ @ ------. -.............-------+++++* # #----. -...............----------+* #*++* %*---. -..................-------*++%++**+++---++ #+--. -.......................-+----------------+$ %+----.. -............................----------------++ @ *----.. -...................................---------* # +* # *+++++%+--... -.........................................------------++$ +-----------..... -..............................................--------+# #%*-------......... -.................................................--------+* *+-----............. -....................................................-----+*+-++-................ -......................................................--+-----.................. -`[1:] - if stdout != expected { - t.Fatalf("expected:\n%s\ngot:\n%s", expected, stdout) - } -} diff --git a/src/tool/awk/internal/ast/ast.go b/src/tool/awk/internal/ast/ast.go deleted file mode 100644 index 3ef56fe..0000000 --- a/src/tool/awk/internal/ast/ast.go +++ /dev/null @@ -1,600 +0,0 @@ -// GoAWK parser - abstract syntax tree structs - -package ast - -import ( - "fmt" - "strconv" - "strings" - - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Program is an entire AWK program. -type Program struct { - Begin []Stmts - Actions []Action - End []Stmts - Functions []Function - Scalars map[string]int - Arrays map[string]int -} - -// String returns an indented, pretty-printed version of the parsed -// program. -func (p *Program) String() string { - parts := []string{} - for _, ss := range p.Begin { - parts = append(parts, "BEGIN {\n"+ss.String()+"}") - } - for _, a := range p.Actions { - parts = append(parts, a.String()) - } - for _, ss := range p.End { - parts = append(parts, "END {\n"+ss.String()+"}") - } - for _, function := range p.Functions { - parts = append(parts, function.String()) - } - return strings.Join(parts, "\n\n") -} - -// Stmts is a block containing multiple statements. -type Stmts []Stmt - -func (ss Stmts) String() string { - lines := []string{} - for _, s := range ss { - subLines := strings.Split(s.String(), "\n") - for _, sl := range subLines { - lines = append(lines, " "+sl+"\n") - } - } - return strings.Join(lines, "") -} - -// Action is pattern-action section of a program. -type Action struct { - Pattern []Expr - Stmts Stmts -} - -func (a *Action) String() string { - patterns := make([]string, len(a.Pattern)) - for i, p := range a.Pattern { - patterns[i] = p.String() - } - sep := "" - if len(patterns) > 0 && a.Stmts != nil { - sep = " " - } - stmtsStr := "" - if a.Stmts != nil { - stmtsStr = "{\n" + a.Stmts.String() + "}" - } - return strings.Join(patterns, ", ") + sep + stmtsStr -} - -// Expr is the abstract syntax tree for any AWK expression. -type Expr interface { - expr() - String() string -} - -// All these types implement the Expr interface. -func (e *FieldExpr) expr() {} -func (e *NamedFieldExpr) expr() {} -func (e *UnaryExpr) expr() {} -func (e *BinaryExpr) expr() {} -func (e *ArrayExpr) expr() {} -func (e *InExpr) expr() {} -func (e *CondExpr) expr() {} -func (e *NumExpr) expr() {} -func (e *StrExpr) expr() {} -func (e *RegExpr) expr() {} -func (e *VarExpr) expr() {} -func (e *IndexExpr) expr() {} -func (e *AssignExpr) expr() {} -func (e *AugAssignExpr) expr() {} -func (e *IncrExpr) expr() {} -func (e *CallExpr) expr() {} -func (e *UserCallExpr) expr() {} -func (e *MultiExpr) expr() {} -func (e *GetlineExpr) expr() {} - -// FieldExpr is an expression like $0. -type FieldExpr struct { - Index Expr -} - -func (e *FieldExpr) String() string { - return "$" + e.Index.String() -} - -// NamedFieldExpr is an expression like @"name". -type NamedFieldExpr struct { - Field Expr -} - -func (e *NamedFieldExpr) String() string { - return "@" + e.Field.String() -} - -// UnaryExpr is an expression like -1234. -type UnaryExpr struct { - Op Token - Value Expr -} - -func (e *UnaryExpr) String() string { - return e.Op.String() + e.Value.String() -} - -// BinaryExpr is an expression like 1 + 2. -type BinaryExpr struct { - Left Expr - Op Token - Right Expr -} - -func (e *BinaryExpr) String() string { - var opStr string - if e.Op == CONCAT { - opStr = " " - } else { - opStr = " " + e.Op.String() + " " - } - return "(" + e.Left.String() + opStr + e.Right.String() + ")" -} - -// ArrayExpr is an array reference. Not really a stand-alone -// expression, except as an argument to split() or a user function -// call. -type ArrayExpr struct { - Scope VarScope - Index int - Name string -} - -func (e *ArrayExpr) String() string { - return e.Name -} - -// InExpr is an expression like (index in array). -type InExpr struct { - Index []Expr - Array *ArrayExpr -} - -func (e *InExpr) String() string { - if len(e.Index) == 1 { - return "(" + e.Index[0].String() + " in " + e.Array.String() + ")" - } - indices := make([]string, len(e.Index)) - for i, index := range e.Index { - indices[i] = index.String() - } - return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")" -} - -// CondExpr is an expression like cond ? 1 : 0. -type CondExpr struct { - Cond Expr - True Expr - False Expr -} - -func (e *CondExpr) String() string { - return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")" -} - -// NumExpr is a literal number like 1234. -type NumExpr struct { - Value float64 -} - -func (e *NumExpr) String() string { - if e.Value == float64(int(e.Value)) { - return strconv.Itoa(int(e.Value)) - } else { - return fmt.Sprintf("%.6g", e.Value) - } -} - -// StrExpr is a literal string like "foo". -type StrExpr struct { - Value string -} - -func (e *StrExpr) String() string { - return strconv.Quote(e.Value) -} - -// RegExpr is a stand-alone regex expression, equivalent to: -// $0 ~ /regex/. -type RegExpr struct { - Regex string -} - -func (e *RegExpr) String() string { - escaped := strings.Replace(e.Regex, "/", `\/`, -1) - return "/" + escaped + "/" -} - -type VarScope int - -const ( - ScopeSpecial VarScope = iota - ScopeGlobal - ScopeLocal -) - -// VarExpr is a variable reference (special var, global, or local). -// Index is the resolved variable index used by the interpreter; Name -// is the original name used by String(). -type VarExpr struct { - Scope VarScope - Index int - Name string -} - -func (e *VarExpr) String() string { - return e.Name -} - -// IndexExpr is an expression like a[k] (rvalue or lvalue). -type IndexExpr struct { - Array *ArrayExpr - Index []Expr -} - -func (e *IndexExpr) String() string { - indices := make([]string, len(e.Index)) - for i, index := range e.Index { - indices[i] = index.String() - } - return e.Array.String() + "[" + strings.Join(indices, ", ") + "]" -} - -// AssignExpr is an expression like x = 1234. -type AssignExpr struct { - Left Expr // can be one of: var, array[x], $n - Right Expr -} - -func (e *AssignExpr) String() string { - return e.Left.String() + " = " + e.Right.String() -} - -// AugAssignExpr is an assignment expression like x += 5. -type AugAssignExpr struct { - Left Expr // can be one of: var, array[x], $n - Op Token - Right Expr -} - -func (e *AugAssignExpr) String() string { - return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String() -} - -// IncrExpr is an increment or decrement expression like x++ or --y. -type IncrExpr struct { - Expr Expr - Op Token - Pre bool -} - -func (e *IncrExpr) String() string { - if e.Pre { - return e.Op.String() + e.Expr.String() - } else { - return e.Expr.String() + e.Op.String() - } -} - -// CallExpr is a builtin function call like length($1). -type CallExpr struct { - Func Token - Args []Expr -} - -func (e *CallExpr) String() string { - args := make([]string, len(e.Args)) - for i, a := range e.Args { - args[i] = a.String() - } - return e.Func.String() + "(" + strings.Join(args, ", ") + ")" -} - -// UserCallExpr is a user-defined function call like my_func(1, 2, 3) -// -// Index is the resolved function index used by the interpreter; Name -// is the original name used by String(). -type UserCallExpr struct { - Native bool // false = AWK-defined function, true = native Go func - Index int - Name string - Args []Expr -} - -func (e *UserCallExpr) String() string { - args := make([]string, len(e.Args)) - for i, a := range e.Args { - args[i] = a.String() - } - return e.Name + "(" + strings.Join(args, ", ") + ")" -} - -// MultiExpr isn't an interpretable expression, but it's used as a -// pseudo-expression for print[f] parsing. -type MultiExpr struct { - Exprs []Expr -} - -func (e *MultiExpr) String() string { - exprs := make([]string, len(e.Exprs)) - for i, e := range e.Exprs { - exprs[i] = e.String() - } - return "(" + strings.Join(exprs, ", ") + ")" -} - -// GetlineExpr is an expression read from file or pipe input. -type GetlineExpr struct { - Command Expr - Target Expr - File Expr -} - -func (e *GetlineExpr) String() string { - s := "" - if e.Command != nil { - s += e.Command.String() + " |" - } - s += "getline" - if e.Target != nil { - s += " " + e.Target.String() - } - if e.File != nil { - s += " <" + e.File.String() - } - return s -} - -// IsLValue returns true if the given expression can be used as an -// lvalue (on the left-hand side of an assignment, in a ++ or -- -// operation, or as the third argument to sub or gsub). -func IsLValue(expr Expr) bool { - switch expr.(type) { - case *VarExpr, *IndexExpr, *FieldExpr: - return true - default: - return false - } -} - -// Stmt is the abstract syntax tree for any AWK statement. -type Stmt interface { - stmt() - String() string -} - -// All these types implement the Stmt interface. -func (s *PrintStmt) stmt() {} -func (s *PrintfStmt) stmt() {} -func (s *ExprStmt) stmt() {} -func (s *IfStmt) stmt() {} -func (s *ForStmt) stmt() {} -func (s *ForInStmt) stmt() {} -func (s *WhileStmt) stmt() {} -func (s *DoWhileStmt) stmt() {} -func (s *BreakStmt) stmt() {} -func (s *ContinueStmt) stmt() {} -func (s *NextStmt) stmt() {} -func (s *ExitStmt) stmt() {} -func (s *DeleteStmt) stmt() {} -func (s *ReturnStmt) stmt() {} -func (s *BlockStmt) stmt() {} - -// PrintStmt is a statement like print $1, $3. -type PrintStmt struct { - Args []Expr - Redirect Token - Dest Expr -} - -func (s *PrintStmt) String() string { - return printString("print", s.Args, s.Redirect, s.Dest) -} - -func printString(f string, args []Expr, redirect Token, dest Expr) string { - parts := make([]string, len(args)) - for i, a := range args { - parts[i] = a.String() - } - str := f + " " + strings.Join(parts, ", ") - if dest != nil { - str += " " + redirect.String() + dest.String() - } - return str -} - -// PrintfStmt is a statement like printf "%3d", 1234. -type PrintfStmt struct { - Args []Expr - Redirect Token - Dest Expr -} - -func (s *PrintfStmt) String() string { - return printString("printf", s.Args, s.Redirect, s.Dest) -} - -// ExprStmt is statement like a bare function call: my_func(x). -type ExprStmt struct { - Expr Expr -} - -func (s *ExprStmt) String() string { - return s.Expr.String() -} - -// IfStmt is an if or if-else statement. -type IfStmt struct { - Cond Expr - Body Stmts - Else Stmts -} - -func (s *IfStmt) String() string { - str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" - if len(s.Else) > 0 { - str += " else {\n" + s.Else.String() + "}" - } - return str -} - -// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i. -type ForStmt struct { - Pre Stmt - Cond Expr - Post Stmt - Body Stmts -} - -func (s *ForStmt) String() string { - preStr := "" - if s.Pre != nil { - preStr = s.Pre.String() - } - condStr := "" - if s.Cond != nil { - condStr = " " + trimParens(s.Cond.String()) - } - postStr := "" - if s.Post != nil { - postStr = " " + s.Post.String() - } - return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}" -} - -// ForInStmt is a for loop like for (k in a) print k, a[k]. -type ForInStmt struct { - Var *VarExpr - Array *ArrayExpr - Body Stmts -} - -func (s *ForInStmt) String() string { - return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}" -} - -// WhileStmt is a while loop. -type WhileStmt struct { - Cond Expr - Body Stmts -} - -func (s *WhileStmt) String() string { - return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" -} - -// DoWhileStmt is a do-while loop. -type DoWhileStmt struct { - Body Stmts - Cond Expr -} - -func (s *DoWhileStmt) String() string { - return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")" -} - -// BreakStmt is a break statement. -type BreakStmt struct{} - -func (s *BreakStmt) String() string { - return "break" -} - -// ContinueStmt is a continue statement. -type ContinueStmt struct{} - -func (s *ContinueStmt) String() string { - return "continue" -} - -// NextStmt is a next statement. -type NextStmt struct{} - -func (s *NextStmt) String() string { - return "next" -} - -// ExitStmt is an exit statement. -type ExitStmt struct { - Status Expr -} - -func (s *ExitStmt) String() string { - var statusStr string - if s.Status != nil { - statusStr = " " + s.Status.String() - } - return "exit" + statusStr -} - -// DeleteStmt is a statement like delete a[k]. -type DeleteStmt struct { - Array *ArrayExpr - Index []Expr -} - -func (s *DeleteStmt) String() string { - indices := make([]string, len(s.Index)) - for i, index := range s.Index { - indices[i] = index.String() - } - return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]" -} - -// ReturnStmt is a return statement. -type ReturnStmt struct { - Value Expr -} - -func (s *ReturnStmt) String() string { - var valueStr string - if s.Value != nil { - valueStr = " " + s.Value.String() - } - return "return" + valueStr -} - -// BlockStmt is a stand-alone block like { print "x" }. -type BlockStmt struct { - Body Stmts -} - -func (s *BlockStmt) String() string { - return "{\n" + s.Body.String() + "}" -} - -// Function is the AST for a user-defined function. -type Function struct { - Name string - Params []string - Arrays []bool - Body Stmts -} - -func (f *Function) String() string { - return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" + - f.Body.String() + "}" -} - -func trimParens(s string) string { - if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") { - s = s[1 : len(s)-1] - } - return s -} diff --git a/src/tool/awk/internal/ast/specialvars.go b/src/tool/awk/internal/ast/specialvars.go deleted file mode 100644 index c8207e4..0000000 --- a/src/tool/awk/internal/ast/specialvars.go +++ /dev/null @@ -1,100 +0,0 @@ -// Special variable constants - -package ast - -import ( - "fmt" -) - -const ( - V_ILLEGAL = iota - V_ARGC - V_CONVFMT - V_FILENAME - V_FNR - V_FS - V_INPUTMODE - V_NF - V_NR - V_OFMT - V_OFS - V_ORS - V_OUTPUTMODE - V_RLENGTH - V_RS - V_RSTART - V_RT - V_SUBSEP - - V_LAST = V_SUBSEP -) - -var specialVars = map[string]int{ - "ARGC": V_ARGC, - "CONVFMT": V_CONVFMT, - "FILENAME": V_FILENAME, - "FNR": V_FNR, - "FS": V_FS, - "INPUTMODE": V_INPUTMODE, - "NF": V_NF, - "NR": V_NR, - "OFMT": V_OFMT, - "OFS": V_OFS, - "ORS": V_ORS, - "OUTPUTMODE": V_OUTPUTMODE, - "RLENGTH": V_RLENGTH, - "RS": V_RS, - "RSTART": V_RSTART, - "RT": V_RT, - "SUBSEP": V_SUBSEP, -} - -// SpecialVarIndex returns the "index" of the special variable, or 0 -// if it's not a special variable. -func SpecialVarIndex(name string) int { - return specialVars[name] -} - -// SpecialVarName returns the name of the special variable by index. -func SpecialVarName(index int) string { - switch index { - case V_ILLEGAL: - return "ILLEGAL" - case V_ARGC: - return "ARGC" - case V_CONVFMT: - return "CONVFMT" - case V_FILENAME: - return "FILENAME" - case V_FNR: - return "FNR" - case V_FS: - return "FS" - case V_INPUTMODE: - return "INPUTMODE" - case V_NF: - return "NF" - case V_NR: - return "NR" - case V_OFMT: - return "OFMT" - case V_OFS: - return "OFS" - case V_ORS: - return "ORS" - case V_OUTPUTMODE: - return "OUTPUTMODE" - case V_RLENGTH: - return "RLENGTH" - case V_RS: - return "RS" - case V_RSTART: - return "RSTART" - case V_RT: - return "RT" - case V_SUBSEP: - return "SUBSEP" - default: - return fmt.Sprintf("", index) - } -} diff --git a/src/tool/awk/internal/ast/specialvars_test.go b/src/tool/awk/internal/ast/specialvars_test.go deleted file mode 100644 index 0bc2440..0000000 --- a/src/tool/awk/internal/ast/specialvars_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package ast - -import ( - "testing" -) - -func TestNameIndex(t *testing.T) { - tests := []struct { - name string - index int - }{ - {"ILLEGAL", V_ILLEGAL}, - {"ARGC", V_ARGC}, - {"CONVFMT", V_CONVFMT}, - {"FILENAME", V_FILENAME}, - {"FNR", V_FNR}, - {"FS", V_FS}, - {"INPUTMODE", V_INPUTMODE}, - {"NF", V_NF}, - {"NR", V_NR}, - {"OFMT", V_OFMT}, - {"OFS", V_OFS}, - {"ORS", V_ORS}, - {"OUTPUTMODE", V_OUTPUTMODE}, - {"RLENGTH", V_RLENGTH}, - {"RS", V_RS}, - {"RSTART", V_RSTART}, - {"RT", V_RT}, - {"SUBSEP", V_SUBSEP}, - {"", 42}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - name := SpecialVarName(test.index) - if name != test.name { - t.Errorf("got %q, want %q", name, test.name) - } - if test.index <= V_LAST { - index := SpecialVarIndex(test.name) - if index != test.index { - t.Errorf("got %d, want %d", index, test.index) - } - } - }) - } -} diff --git a/src/tool/awk/internal/compiler/compiler.go b/src/tool/awk/internal/compiler/compiler.go deleted file mode 100644 index 04ee7d3..0000000 --- a/src/tool/awk/internal/compiler/compiler.go +++ /dev/null @@ -1,1005 +0,0 @@ -// Package compiler compiles an AST to virtual machine instructions. -package compiler - -import ( - "fmt" - "math" - "regexp" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Program holds an entire compiled program. -type Program struct { - Begin []Opcode - Actions []Action - End []Opcode - Functions []Function - Nums []float64 - Strs []string - Regexes []*regexp.Regexp - - // For disassembly - scalarNames []string - arrayNames []string - nativeFuncNames []string -} - -// Action holds a compiled pattern-action block. -type Action struct { - Pattern [][]Opcode - Body []Opcode -} - -// Function holds a compiled function. -type Function struct { - Name string - Params []string - Arrays []bool - NumScalars int - NumArrays int - Body []Opcode -} - -// compileError is the internal error type raised in the rare cases when -// compilation can't succeed, such as program too large (jump offsets greater -// than 2GB). Most actual problems are caught as parse time. -type compileError struct { - message string -} - -func (e *compileError) Error() string { - return e.message -} - -// Compile compiles an AST (parsed program) into virtual machine instructions. -func Compile(prog *ast.Program) (compiledProg *Program, err error) { - defer func() { - // The compiler uses panic with a *compileError to signal compile - // errors internally, and they're caught here. This avoids the - // need to check errors everywhere. - if r := recover(); r != nil { - // Convert to compileError or re-panic - err = r.(*compileError) - } - }() - - p := &Program{} - - // Reuse identical constants across entire program. - indexes := constantIndexes{ - nums: make(map[float64]int), - strs: make(map[string]int), - regexes: make(map[string]int), - } - - // Compile functions. For functions called before they're defined or - // recursive functions, we have to set most p.Functions data first, then - // compile Body afterward. - p.Functions = make([]Function, len(prog.Functions)) - for i, astFunc := range prog.Functions { - numArrays := 0 - for _, a := range astFunc.Arrays { - if a { - numArrays++ - } - } - compiledFunc := Function{ - Name: astFunc.Name, - Params: astFunc.Params, - Arrays: astFunc.Arrays, - NumScalars: len(astFunc.Arrays) - numArrays, - NumArrays: numArrays, - } - p.Functions[i] = compiledFunc - } - for i, astFunc := range prog.Functions { - c := &compiler{program: p, indexes: indexes} - c.stmts(astFunc.Body) - p.Functions[i].Body = c.finish() - } - - // Compile BEGIN blocks. - for _, stmts := range prog.Begin { - c := &compiler{program: p, indexes: indexes} - c.stmts(stmts) - p.Begin = append(p.Begin, c.finish()...) - } - - // Compile pattern-action blocks. - for _, action := range prog.Actions { - var pattern [][]Opcode - switch len(action.Pattern) { - case 0: - // Always considered a match - case 1: - c := &compiler{program: p, indexes: indexes} - c.expr(action.Pattern[0]) - pattern = [][]Opcode{c.finish()} - case 2: - c := &compiler{program: p, indexes: indexes} - c.expr(action.Pattern[0]) - pattern = append(pattern, c.finish()) - c = &compiler{program: p, indexes: indexes} - c.expr(action.Pattern[1]) - pattern = append(pattern, c.finish()) - } - var body []Opcode - if len(action.Stmts) > 0 { - c := &compiler{program: p, indexes: indexes} - c.stmts(action.Stmts) - body = c.finish() - } - p.Actions = append(p.Actions, Action{ - Pattern: pattern, - Body: body, - }) - } - - // Compile END blocks. - for _, stmts := range prog.End { - c := &compiler{program: p, indexes: indexes} - c.stmts(stmts) - p.End = append(p.End, c.finish()...) - } - - // These are only used for disassembly, but set them up here. - p.scalarNames = make([]string, len(prog.Scalars)) - for name, index := range prog.Scalars { - p.scalarNames[index] = name - } - p.arrayNames = make([]string, len(prog.Arrays)) - for name, index := range prog.Arrays { - p.arrayNames[index] = name - } - - return p, nil -} - -// So we can look up the indexes of constants that have been used before. -type constantIndexes struct { - nums map[float64]int - strs map[string]int - regexes map[string]int -} - -// Holds the compilation state. -type compiler struct { - program *Program - indexes constantIndexes - code []Opcode - breaks [][]int - continues [][]int -} - -func (c *compiler) add(ops ...Opcode) { - c.code = append(c.code, ops...) -} - -func (c *compiler) finish() []Opcode { - return c.code -} - -func (c *compiler) stmts(stmts []ast.Stmt) { - for _, stmt := range stmts { - c.stmt(stmt) - } -} - -func (c *compiler) stmt(stmt ast.Stmt) { - switch s := stmt.(type) { - case *ast.ExprStmt: - // Optimize assignment expressions to avoid the extra Dupe and Drop - switch expr := s.Expr.(type) { - case *ast.AssignExpr: - c.expr(expr.Right) - c.assign(expr.Left) - return - - case *ast.IncrExpr: - // Pre or post doesn't matter for an assignment expression - switch target := expr.Expr.(type) { - case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(target.Index)) - default: // ScopeSpecial - c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(target.Index)) - } - case *ast.FieldExpr: - c.expr(target.Index) - c.add(IncrField, incrAmount(expr.Op)) - case *ast.IndexExpr: - c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) - default: // ScopeLocal - c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) - } - } - return - - case *ast.AugAssignExpr: - c.expr(expr.Right) - - var augOp AugOp - switch expr.Op { - case lexer.ADD: - augOp = AugOpAdd - case lexer.SUB: - augOp = AugOpSub - case lexer.MUL: - augOp = AugOpMul - case lexer.DIV: - augOp = AugOpDiv - case lexer.POW: - augOp = AugOpPow - default: // MOD - augOp = AugOpMod - } - - switch target := expr.Left.(type) { - case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(AugAssignLocal, Opcode(augOp), opcodeInt(target.Index)) - default: // ScopeSpecial - c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(target.Index)) - } - case *ast.FieldExpr: - c.expr(target.Index) - c.add(AugAssignField, Opcode(augOp)) - case *ast.IndexExpr: - c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(target.Array.Index)) - default: // ScopeLocal - c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(target.Array.Index)) - } - } - return - } - - // Non-optimized ExprStmt: push value and then drop it - c.expr(s.Expr) - c.add(Drop) - - case *ast.PrintStmt: - if s.Redirect != lexer.ILLEGAL { - c.expr(s.Dest) // redirect destination - } - for _, a := range s.Args { - c.expr(a) - } - c.add(Print, opcodeInt(len(s.Args)), Opcode(s.Redirect)) - - case *ast.PrintfStmt: - if s.Redirect != lexer.ILLEGAL { - c.expr(s.Dest) // redirect destination - } - for _, a := range s.Args { - c.expr(a) - } - c.add(Printf, opcodeInt(len(s.Args)), Opcode(s.Redirect)) - - case *ast.IfStmt: - if len(s.Else) == 0 { - jumpOp := c.condition(s.Cond, true) - ifMark := c.jumpForward(jumpOp) - c.stmts(s.Body) - c.patchForward(ifMark) - } else { - jumpOp := c.condition(s.Cond, true) - ifMark := c.jumpForward(jumpOp) - c.stmts(s.Body) - elseMark := c.jumpForward(Jump) - c.patchForward(ifMark) - c.stmts(s.Else) - c.patchForward(elseMark) - } - - case *ast.ForStmt: - if s.Pre != nil { - c.stmt(s.Pre) - } - c.breaks = append(c.breaks, []int{}) - c.continues = append(c.continues, []int{}) - - // Optimization: include condition once before loop and at the end. - // This avoids one jump (a conditional jump at the top and an - // unconditional one at the end). This idea was stolen from an - // optimization CPython did recently in its "while" loop. - var mark int - if s.Cond != nil { - jumpOp := c.condition(s.Cond, true) - mark = c.jumpForward(jumpOp) - } - - loopStart := c.labelBackward() - c.stmts(s.Body) - c.patchContinues() - if s.Post != nil { - c.stmt(s.Post) - } - - if s.Cond != nil { - jumpOp := c.condition(s.Cond, false) - c.jumpBackward(loopStart, jumpOp) - c.patchForward(mark) - } else { - c.jumpBackward(loopStart, Jump) - } - - c.patchBreaks() - - case *ast.ForInStmt: - // ForIn is handled a bit differently from the other loops, because we - // want to use Go's "for range" construct directly in the interpreter. - // Otherwise we'd need to build a slice of all keys rather than - // iterating, or write our own hash table that has a more flexible - // iterator. - mark := c.jumpForward(ForIn, opcodeInt(int(s.Var.Scope)), opcodeInt(s.Var.Index), - Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) - - c.breaks = append(c.breaks, nil) // nil tells BreakStmt it's a for-in loop - c.continues = append(c.continues, []int{}) - - c.stmts(s.Body) - - c.patchForward(mark) - c.patchContinues() - c.breaks = c.breaks[:len(c.breaks)-1] - - case *ast.ReturnStmt: - if s.Value != nil { - c.expr(s.Value) - c.add(Return) - } else { - c.add(ReturnNull) - } - - case *ast.WhileStmt: - c.breaks = append(c.breaks, []int{}) - c.continues = append(c.continues, []int{}) - - // Optimization: include condition once before loop and at the end. - // See ForStmt for more details. - jumpOp := c.condition(s.Cond, true) - mark := c.jumpForward(jumpOp) - - loopStart := c.labelBackward() - c.stmts(s.Body) - c.patchContinues() - - jumpOp = c.condition(s.Cond, false) - c.jumpBackward(loopStart, jumpOp) - c.patchForward(mark) - - c.patchBreaks() - - case *ast.DoWhileStmt: - c.breaks = append(c.breaks, []int{}) - c.continues = append(c.continues, []int{}) - - loopStart := c.labelBackward() - c.stmts(s.Body) - c.patchContinues() - - jumpOp := c.condition(s.Cond, false) - c.jumpBackward(loopStart, jumpOp) - - c.patchBreaks() - - case *ast.BreakStmt: - i := len(c.breaks) - 1 - if c.breaks[i] == nil { - // Break in for-in loop is executed differently, use errBreak to exit - c.add(BreakForIn) - } else { - mark := c.jumpForward(Jump) - c.breaks[i] = append(c.breaks[i], mark) - } - - case *ast.ContinueStmt: - i := len(c.continues) - 1 - mark := c.jumpForward(Jump) - c.continues[i] = append(c.continues[i], mark) - - case *ast.NextStmt: - c.add(Next) - - case *ast.ExitStmt: - if s.Status != nil { - c.expr(s.Status) - } else { - c.expr(&ast.NumExpr{0}) - } - c.add(Exit) - - case *ast.DeleteStmt: - if len(s.Index) > 0 { - c.index(s.Index) - c.add(Delete, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) - } else { - c.add(DeleteAll, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) - } - - case *ast.BlockStmt: - c.stmts(s.Body) - - default: - // Should never happen - panic(fmt.Sprintf("unexpected stmt type: %T", stmt)) - } -} - -// Return the amount (+1 or -1) to add for an increment expression. -func incrAmount(op lexer.Token) Opcode { - if op == lexer.INCR { - return 1 - } else { - return -1 // DECR - } -} - -// Generate opcodes for an assignment. -func (c *compiler) assign(target ast.Expr) { - switch target := target.(type) { - case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(AssignGlobal, opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(AssignLocal, opcodeInt(target.Index)) - case ast.ScopeSpecial: - c.add(AssignSpecial, opcodeInt(target.Index)) - } - case *ast.FieldExpr: - c.expr(target.Index) - c.add(AssignField) - case *ast.IndexExpr: - c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(AssignArrayGlobal, opcodeInt(target.Array.Index)) - case ast.ScopeLocal: - c.add(AssignArrayLocal, opcodeInt(target.Array.Index)) - } - } -} - -// Convert int to Opcode, raising a *compileError if it doesn't fit. -func opcodeInt(n int) Opcode { - if n > math.MaxInt32 || n < math.MinInt32 { - // Two billion should be enough for anybody. - panic(&compileError{message: fmt.Sprintf("program too large (constant index or jump offset %d doesn't fit in int32)", n)}) - } - return Opcode(n) -} - -// Patch jump addresses for break statements in a loop. -func (c *compiler) patchBreaks() { - breaks := c.breaks[len(c.breaks)-1] - for _, mark := range breaks { - c.patchForward(mark) - } - c.breaks = c.breaks[:len(c.breaks)-1] -} - -// Patch jump addresses for continue statements in a loop -func (c *compiler) patchContinues() { - continues := c.continues[len(c.continues)-1] - for _, mark := range continues { - c.patchForward(mark) - } - c.continues = c.continues[:len(c.continues)-1] -} - -// Generate a forward jump (patched later) and return a "mark". -func (c *compiler) jumpForward(jumpOp Opcode, args ...Opcode) int { - c.add(jumpOp) - c.add(args...) - c.add(0) - return len(c.code) -} - -// Patch a previously-generated forward jump. -func (c *compiler) patchForward(mark int) { - offset := len(c.code) - mark - c.code[mark-1] = opcodeInt(offset) -} - -// Return a "label" for a subsequent backward jump. -func (c *compiler) labelBackward() int { - return len(c.code) -} - -// Jump to a previously-created label. -func (c *compiler) jumpBackward(label int, jumpOp Opcode, args ...Opcode) { - offset := label - (len(c.code) + len(args) + 2) - c.add(jumpOp) - c.add(args...) - c.add(opcodeInt(offset)) -} - -// Generate opcodes for a boolean condition. -func (c *compiler) condition(expr ast.Expr, invert bool) Opcode { - jumpOp := func(normal, inverted Opcode) Opcode { - if invert { - return inverted - } - return normal - } - - switch cond := expr.(type) { - case *ast.BinaryExpr: - // Optimize binary comparison expressions like "x < 10" into just - // JumpLess instead of two instructions (Less and JumpTrue). - switch cond.Op { - case lexer.EQUALS: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpEquals, JumpNotEquals) - - case lexer.NOT_EQUALS: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpNotEquals, JumpEquals) - - case lexer.LESS: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpLess, JumpGreaterOrEqual) - - case lexer.LTE: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpLessOrEqual, JumpGreater) - - case lexer.GREATER: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpGreater, JumpLessOrEqual) - - case lexer.GTE: - c.expr(cond.Left) - c.expr(cond.Right) - return jumpOp(JumpGreaterOrEqual, JumpLess) - } - } - - // Fall back to evaluating the expression normally, followed by JumpTrue - // or JumpFalse. - c.expr(expr) - return jumpOp(JumpTrue, JumpFalse) -} - -func (c *compiler) expr(expr ast.Expr) { - switch e := expr.(type) { - case *ast.NumExpr: - c.add(Num, opcodeInt(c.numIndex(e.Value))) - - case *ast.StrExpr: - c.add(Str, opcodeInt(c.strIndex(e.Value))) - - case *ast.FieldExpr: - switch index := e.Index.(type) { - case *ast.NumExpr: - if index.Value == float64(Opcode(index.Value)) { - // Optimize $i to FieldInt opcode with integer argument - c.add(FieldInt, opcodeInt(int(index.Value))) - return - } - } - c.expr(e.Index) - c.add(Field) - - case *ast.NamedFieldExpr: - switch index := e.Field.(type) { - case *ast.StrExpr: - c.add(FieldByNameStr, opcodeInt(c.strIndex(index.Value))) - return - } - c.expr(e.Field) - c.add(FieldByName) - - case *ast.VarExpr: - switch e.Scope { - case ast.ScopeGlobal: - c.add(Global, opcodeInt(e.Index)) - case ast.ScopeLocal: - c.add(Local, opcodeInt(e.Index)) - case ast.ScopeSpecial: - c.add(Special, opcodeInt(e.Index)) - } - - case *ast.RegExpr: - c.add(Regex, opcodeInt(c.regexIndex(e.Regex))) - - case *ast.BinaryExpr: - // && and || are special cases as they're short-circuit operators. - switch e.Op { - case lexer.AND: - c.expr(e.Left) - c.add(Dupe) - mark := c.jumpForward(JumpFalse) - c.add(Drop) - c.expr(e.Right) - c.patchForward(mark) - c.add(Boolean) - case lexer.OR: - c.expr(e.Left) - c.add(Dupe) - mark := c.jumpForward(JumpTrue) - c.add(Drop) - c.expr(e.Right) - c.patchForward(mark) - c.add(Boolean) - case lexer.CONCAT: - c.concatOp(e) - default: - // All other binary expressions - c.expr(e.Left) - c.expr(e.Right) - c.binaryOp(e.Op) - } - - case *ast.IncrExpr: - // Most IncrExpr (standalone) will be handled by the ExprStmt special case - op := Add - if e.Op == lexer.DECR { - op = Subtract - } - if e.Pre { - c.expr(e.Expr) - c.expr(&ast.NumExpr{1}) - c.add(op) - c.add(Dupe) - } else { - c.expr(e.Expr) - c.expr(&ast.NumExpr{0}) - c.add(Add) - c.add(Dupe) - c.expr(&ast.NumExpr{1}) - c.add(op) - } - c.assign(e.Expr) - - case *ast.AssignExpr: - // Most AssignExpr (standalone) will be handled by the ExprStmt special case - c.expr(e.Right) - c.add(Dupe) - c.assign(e.Left) - - case *ast.AugAssignExpr: - // Most AugAssignExpr (standalone) will be handled by the ExprStmt special case - c.expr(e.Right) - c.expr(e.Left) - c.add(Swap) - c.binaryOp(e.Op) - c.add(Dupe) - c.assign(e.Left) - - case *ast.CondExpr: - jump := c.condition(e.Cond, true) - ifMark := c.jumpForward(jump) - c.expr(e.True) - elseMark := c.jumpForward(Jump) - c.patchForward(ifMark) - c.expr(e.False) - c.patchForward(elseMark) - - case *ast.IndexExpr: - c.index(e.Index) - switch e.Array.Scope { - case ast.ScopeGlobal: - c.add(ArrayGlobal, opcodeInt(e.Array.Index)) - case ast.ScopeLocal: - c.add(ArrayLocal, opcodeInt(e.Array.Index)) - } - - case *ast.CallExpr: - // split and sub/gsub require special cases as they have lvalue arguments - switch e.Func { - case lexer.F_SPLIT: - c.expr(e.Args[0]) - arrayExpr := e.Args[1].(*ast.ArrayExpr) - if len(e.Args) > 2 { - c.expr(e.Args[2]) - c.add(CallSplitSep, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) - } else { - c.add(CallSplit, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) - } - return - case lexer.F_SUB, lexer.F_GSUB: - op := BuiltinSub - if e.Func == lexer.F_GSUB { - op = BuiltinGsub - } - var target ast.Expr = &ast.FieldExpr{&ast.NumExpr{0}} // default value and target is $0 - if len(e.Args) == 3 { - target = e.Args[2] - } - c.expr(e.Args[0]) - c.expr(e.Args[1]) - c.expr(target) - c.add(CallBuiltin, Opcode(op)) - c.assign(target) - return - } - - for _, arg := range e.Args { - c.expr(arg) - } - switch e.Func { - case lexer.F_ATAN2: - c.add(CallBuiltin, Opcode(BuiltinAtan2)) - case lexer.F_CLOSE: - c.add(CallBuiltin, Opcode(BuiltinClose)) - case lexer.F_COS: - c.add(CallBuiltin, Opcode(BuiltinCos)) - case lexer.F_EXP: - c.add(CallBuiltin, Opcode(BuiltinExp)) - case lexer.F_FFLUSH: - if len(e.Args) > 0 { - c.add(CallBuiltin, Opcode(BuiltinFflush)) - } else { - c.add(CallBuiltin, Opcode(BuiltinFflushAll)) - } - case lexer.F_INDEX: - c.add(CallBuiltin, Opcode(BuiltinIndex)) - case lexer.F_INT: - c.add(CallBuiltin, Opcode(BuiltinInt)) - case lexer.F_LENGTH: - if len(e.Args) > 0 { - c.add(CallBuiltin, Opcode(BuiltinLengthArg)) - } else { - c.add(CallBuiltin, Opcode(BuiltinLength)) - } - case lexer.F_LOG: - c.add(CallBuiltin, Opcode(BuiltinLog)) - case lexer.F_MATCH: - c.add(CallBuiltin, Opcode(BuiltinMatch)) - case lexer.F_RAND: - c.add(CallBuiltin, Opcode(BuiltinRand)) - case lexer.F_SIN: - c.add(CallBuiltin, Opcode(BuiltinSin)) - case lexer.F_SPRINTF: - c.add(CallSprintf, opcodeInt(len(e.Args))) - case lexer.F_SQRT: - c.add(CallBuiltin, Opcode(BuiltinSqrt)) - case lexer.F_SRAND: - if len(e.Args) > 0 { - c.add(CallBuiltin, Opcode(BuiltinSrandSeed)) - } else { - c.add(CallBuiltin, Opcode(BuiltinSrand)) - } - case lexer.F_SUBSTR: - if len(e.Args) > 2 { - c.add(CallBuiltin, Opcode(BuiltinSubstrLength)) - } else { - c.add(CallBuiltin, Opcode(BuiltinSubstr)) - } - case lexer.F_SYSTEM: - c.add(CallBuiltin, Opcode(BuiltinSystem)) - case lexer.F_TOLOWER: - c.add(CallBuiltin, Opcode(BuiltinTolower)) - case lexer.F_TOUPPER: - c.add(CallBuiltin, Opcode(BuiltinToupper)) - default: - panic(fmt.Sprintf("unexpected function: %s", e.Func)) - } - - case *ast.UnaryExpr: - c.expr(e.Value) - switch e.Op { - case lexer.SUB: - c.add(UnaryMinus) - case lexer.NOT: - c.add(Not) - default: // ADD - c.add(UnaryPlus) - } - - case *ast.InExpr: - c.index(e.Index) - switch e.Array.Scope { - case ast.ScopeGlobal: - c.add(InGlobal, opcodeInt(e.Array.Index)) - default: // ScopeLocal - c.add(InLocal, opcodeInt(e.Array.Index)) - } - - case *ast.UserCallExpr: - if e.Native { - for _, arg := range e.Args { - c.expr(arg) - } - c.add(CallNative, opcodeInt(e.Index), opcodeInt(len(e.Args))) - for len(c.program.nativeFuncNames) <= e.Index { - c.program.nativeFuncNames = append(c.program.nativeFuncNames, "") - } - c.program.nativeFuncNames[e.Index] = e.Name - } else { - f := c.program.Functions[e.Index] - var arrayOpcodes []Opcode - numScalarArgs := 0 - for i, arg := range e.Args { - if f.Arrays[i] { - a := arg.(*ast.VarExpr) - arrayOpcodes = append(arrayOpcodes, Opcode(a.Scope), opcodeInt(a.Index)) - } else { - c.expr(arg) - numScalarArgs++ - } - } - if numScalarArgs < f.NumScalars { - c.add(Nulls, opcodeInt(f.NumScalars-numScalarArgs)) - } - c.add(CallUser, opcodeInt(e.Index), opcodeInt(len(arrayOpcodes)/2)) - c.add(arrayOpcodes...) - } - - case *ast.GetlineExpr: - redirect := func() Opcode { - switch { - case e.Command != nil: - c.expr(e.Command) - return Opcode(lexer.PIPE) - case e.File != nil: - c.expr(e.File) - return Opcode(lexer.LESS) - default: - return Opcode(lexer.ILLEGAL) - } - } - switch target := e.Target.(type) { - case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(GetlineGlobal, redirect(), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(GetlineLocal, redirect(), opcodeInt(target.Index)) - case ast.ScopeSpecial: - c.add(GetlineSpecial, redirect(), opcodeInt(target.Index)) - } - case *ast.FieldExpr: - c.expr(target.Index) - c.add(GetlineField, redirect()) - case *ast.IndexExpr: - c.index(target.Index) - c.add(GetlineArray, redirect(), Opcode(target.Array.Scope), opcodeInt(target.Array.Index)) - default: - c.add(Getline, redirect()) - } - - default: - // Should never happen - panic(fmt.Sprintf("unexpected expr type: %T", expr)) - } -} - -// Generate a Concat opcode or, if possible, compact multiple Concats into one -// ConcatMulti opcode. -func (c *compiler) concatOp(expr *ast.BinaryExpr) { - var values []ast.Expr - for { - values = append(values, expr.Right) - left, isBinary := expr.Left.(*ast.BinaryExpr) - if !isBinary || left.Op != lexer.CONCAT { - break - } - expr = left - } - values = append(values, expr.Left) - - // values are appended right to left - // but need to pushed left to right - - if len(values) == 2 { - c.expr(values[1]) - c.expr(values[0]) - c.add(Concat) - return - } - - for i := len(values) - 1; i >= 0; i-- { - c.expr(values[i]) - } - - c.add(ConcatMulti, opcodeInt(len(values))) -} - -// Add (or reuse) a number constant and returns its index. -func (c *compiler) numIndex(n float64) int { - if index, ok := c.indexes.nums[n]; ok { - return index // reuse existing constant - } - index := len(c.program.Nums) - c.program.Nums = append(c.program.Nums, n) - c.indexes.nums[n] = index - return index -} - -// Add (or reuse) a string constant and returns its index. -func (c *compiler) strIndex(s string) int { - if index, ok := c.indexes.strs[s]; ok { - return index // reuse existing constant - } - index := len(c.program.Strs) - c.program.Strs = append(c.program.Strs, s) - c.indexes.strs[s] = index - return index -} - -// Add (or reuse) a regex constant and returns its index. -func (c *compiler) regexIndex(r string) int { - if index, ok := c.indexes.regexes[r]; ok { - return index // reuse existing constant - } - index := len(c.program.Regexes) - c.program.Regexes = append(c.program.Regexes, regexp.MustCompile(AddRegexFlags(r))) - c.indexes.regexes[r] = index - return index -} - -// AddRegexFlags add the necessary flags to regex to make it work like other -// AWKs (exported so we can also use this in the interpreter). -func AddRegexFlags(regex string) string { - // "s" flag lets . match \n (multi-line matching like other AWKs) - return "(?s:" + regex + ")" -} - -func (c *compiler) binaryOp(op lexer.Token) { - var opcode Opcode - switch op { - case lexer.ADD: - opcode = Add - case lexer.SUB: - opcode = Subtract - case lexer.EQUALS: - opcode = Equals - case lexer.LESS: - opcode = Less - case lexer.LTE: - opcode = LessOrEqual - case lexer.MUL: - opcode = Multiply - case lexer.DIV: - opcode = Divide - case lexer.GREATER: - opcode = Greater - case lexer.GTE: - opcode = GreaterOrEqual - case lexer.NOT_EQUALS: - opcode = NotEquals - case lexer.MATCH: - opcode = Match - case lexer.NOT_MATCH: - opcode = NotMatch - case lexer.POW: - opcode = Power - case lexer.MOD: - opcode = Modulo - default: - panic(fmt.Sprintf("unexpected binary operation: %s", op)) - } - c.add(opcode) -} - -// Generate an array index, handling multi-indexes properly. -func (c *compiler) index(index []ast.Expr) { - for _, expr := range index { - c.expr(expr) - } - if len(index) > 1 { - c.add(IndexMulti, opcodeInt(len(index))) - } -} diff --git a/src/tool/awk/internal/compiler/disassembler.go b/src/tool/awk/internal/compiler/disassembler.go deleted file mode 100644 index 37c2162..0000000 --- a/src/tool/awk/internal/compiler/disassembler.go +++ /dev/null @@ -1,495 +0,0 @@ -// Disassembles compiled program to text assembly instructions - -package compiler - -import ( - "fmt" - "io" - "strings" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Disassemble writes a human-readable form of the program's virtual machine -// instructions to writer. -func (p *Program) Disassemble(writer io.Writer) error { - if p.Begin != nil { - d := &disassembler{ - program: p, - writer: writer, - code: p.Begin, - nativeFuncNames: p.nativeFuncNames, - } - err := d.disassemble("BEGIN") - if err != nil { - return err - } - } - - for _, action := range p.Actions { - switch len(action.Pattern) { - case 0: - // Nothing to do here. - case 1: - d := &disassembler{ - program: p, - writer: writer, - code: action.Pattern[0], - nativeFuncNames: p.nativeFuncNames, - } - err := d.disassemble("pattern") - if err != nil { - return err - } - case 2: - d := &disassembler{ - program: p, - writer: writer, - code: action.Pattern[0], - nativeFuncNames: p.nativeFuncNames, - } - err := d.disassemble("start") - if err != nil { - return err - } - d = &disassembler{ - program: p, - writer: writer, - code: action.Pattern[1], - nativeFuncNames: p.nativeFuncNames, - } - err = d.disassemble("stop") - if err != nil { - return err - } - } - if len(action.Body) > 0 { - d := &disassembler{ - program: p, - writer: writer, - code: action.Body, - nativeFuncNames: p.nativeFuncNames, - } - err := d.disassemble("{ body }") - if err != nil { - return err - } - } - } - - if p.End != nil { - d := &disassembler{ - program: p, - writer: writer, - code: p.End, - nativeFuncNames: p.nativeFuncNames, - } - err := d.disassemble("END") - if err != nil { - return err - } - } - - for i, f := range p.Functions { - d := &disassembler{ - program: p, - writer: writer, - code: f.Body, - nativeFuncNames: p.nativeFuncNames, - funcIndex: i, - } - err := d.disassemble("function " + f.Name) - if err != nil { - return err - } - } - - return nil -} - -// Disassembles a single block of opcodes. -type disassembler struct { - program *Program - writer io.Writer - code []Opcode - nativeFuncNames []string - funcIndex int - ip int - opAddr int - err error -} - -func (d *disassembler) disassemble(prefix string) error { - if prefix != "" { - d.writef(" // %s\n", prefix) - } - - for d.ip < len(d.code) && d.err == nil { - d.opAddr = d.ip - op := d.fetch() - - switch op { - case Num: - index := d.fetch() - num := d.program.Nums[index] - if num == float64(int(num)) { - d.writeOpf("Num %d (%d)", int(num), index) - } else { - d.writeOpf("Num %.6g (%d)", num, index) - } - - case Str: - index := d.fetch() - d.writeOpf("Str %q (%d)", d.program.Strs[index], index) - - case FieldInt: - index := d.fetch() - d.writeOpf("FieldInt %d", index) - - case FieldByNameStr: - index := d.fetch() - d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index) - - case Global: - index := d.fetch() - d.writeOpf("Global %s", d.program.scalarNames[index]) - - case Local: - index := int(d.fetch()) - d.writeOpf("Local %s", d.localName(index)) - - case Special: - index := d.fetch() - d.writeOpf("Special %s", ast.SpecialVarName(int(index))) - - case ArrayGlobal: - arrayIndex := d.fetch() - d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex]) - - case ArrayLocal: - arrayIndex := d.fetch() - d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex))) - - case InGlobal: - arrayIndex := d.fetch() - d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex]) - - case InLocal: - arrayIndex := int(d.fetch()) - d.writeOpf("InLocal %s", d.localArrayName(arrayIndex)) - - case AssignGlobal: - index := d.fetch() - d.writeOpf("AssignGlobal %s", d.program.scalarNames[index]) - - case AssignLocal: - index := int(d.fetch()) - d.writeOpf("AssignLocal %s", d.localName(index)) - - case AssignSpecial: - index := d.fetch() - d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index))) - - case AssignArrayGlobal: - arrayIndex := d.fetch() - d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex]) - - case AssignArrayLocal: - arrayIndex := int(d.fetch()) - d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex)) - - case Delete: - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex)) - - case DeleteAll: - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex)) - - case IncrField: - amount := d.fetch() - d.writeOpf("IncrField %d", amount) - - case IncrGlobal: - amount := d.fetch() - index := d.fetch() - d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index]) - - case IncrLocal: - amount := d.fetch() - index := int(d.fetch()) - d.writeOpf("IncrLocal %d %s", amount, d.localName(index)) - - case IncrSpecial: - amount := d.fetch() - index := d.fetch() - d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index))) - - case IncrArrayGlobal: - amount := d.fetch() - arrayIndex := d.fetch() - d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex]) - - case IncrArrayLocal: - amount := d.fetch() - arrayIndex := int(d.fetch()) - d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex)) - - case AugAssignField: - operation := AugOp(d.fetch()) - d.writeOpf("AugAssignField %s", operation) - - case AugAssignGlobal: - operation := AugOp(d.fetch()) - index := d.fetch() - d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index]) - - case AugAssignLocal: - operation := AugOp(d.fetch()) - index := int(d.fetch()) - d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index)) - - case AugAssignSpecial: - operation := AugOp(d.fetch()) - index := d.fetch() - d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index))) - - case AugAssignArrayGlobal: - operation := AugOp(d.fetch()) - arrayIndex := d.fetch() - d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex]) - - case AugAssignArrayLocal: - operation := AugOp(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex)) - - case Regex: - regexIndex := d.fetch() - d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex) - - case IndexMulti: - num := d.fetch() - d.writeOpf("IndexMulti %d", num) - - case ConcatMulti: - num := d.fetch() - d.writeOpf("ConcatMulti %d", num) - - case Jump: - offset := d.fetch() - d.writeOpf("Jump 0x%04x", d.ip+int(offset)) - - case JumpFalse: - offset := d.fetch() - d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset)) - - case JumpTrue: - offset := d.fetch() - d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset)) - - case JumpEquals: - offset := d.fetch() - d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset)) - - case JumpNotEquals: - offset := d.fetch() - d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset)) - - case JumpLess: - offset := d.fetch() - d.writeOpf("JumpLess 0x%04x", d.ip+int(offset)) - - case JumpGreater: - offset := d.fetch() - d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset)) - - case JumpLessOrEqual: - offset := d.fetch() - d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset)) - - case JumpGreaterOrEqual: - offset := d.fetch() - d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset)) - - case ForIn: - varScope := ast.VarScope(d.fetch()) - varIndex := int(d.fetch()) - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - offset := d.fetch() - d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset)) - - case CallBuiltin: - builtinOp := BuiltinOp(d.fetch()) - d.writeOpf("CallBuiltin %s", builtinOp) - - case CallSplit: - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex)) - - case CallSplitSep: - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex)) - - case CallSprintf: - numArgs := d.fetch() - d.writeOpf("CallSprintf %d", numArgs) - - case CallUser: - funcIndex := d.fetch() - numArrayArgs := int(d.fetch()) - var arrayArgs []string - for i := 0; i < numArrayArgs; i++ { - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex)) - } - d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", ")) - - case CallNative: - funcIndex := d.fetch() - numArgs := d.fetch() - d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs) - - case Nulls: - numNulls := d.fetch() - d.writeOpf("Nulls %d", numNulls) - - case Print: - numArgs := d.fetch() - redirect := lexer.Token(d.fetch()) - if redirect == lexer.ILLEGAL { - d.writeOpf("Print %d", numArgs) - } else { - d.writeOpf("Print %d %s", numArgs, redirect) - } - - case Printf: - numArgs := d.fetch() - redirect := lexer.Token(d.fetch()) - if redirect == lexer.ILLEGAL { - d.writeOpf("Printf %d", numArgs) - } else { - d.writeOpf("Printf %d %s", numArgs, redirect) - } - - case Getline: - redirect := lexer.Token(d.fetch()) - d.writeOpf("Getline %s", redirect) - - case GetlineField: - redirect := lexer.Token(d.fetch()) - d.writeOpf("GetlineField %s", redirect) - - case GetlineGlobal: - redirect := lexer.Token(d.fetch()) - index := d.fetch() - d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index]) - - case GetlineLocal: - redirect := lexer.Token(d.fetch()) - index := int(d.fetch()) - d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index)) - - case GetlineSpecial: - redirect := lexer.Token(d.fetch()) - index := d.fetch() - d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index))) - - case GetlineArray: - redirect := lexer.Token(d.fetch()) - arrayScope := ast.VarScope(d.fetch()) - arrayIndex := int(d.fetch()) - d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex)) - - default: - // Handles all other opcodes with no arguments - d.writeOpf("%s", op) - } - } - - d.writef("\n") - return d.err -} - -// Fetch the next opcode and increment the "instruction pointer". -func (d *disassembler) fetch() Opcode { - op := d.code[d.ip] - d.ip++ - return op -} - -// Write formatted string to the disassembly output. -func (d *disassembler) writef(format string, args ...interface{}) { - if d.err != nil { - return - } - _, d.err = fmt.Fprintf(d.writer, format, args...) -} - -// Write formatted opcode (with address and newline) to disassembly output. -func (d *disassembler) writeOpf(format string, args ...interface{}) { - if d.err != nil { - return - } - addrStr := fmt.Sprintf("%04x", d.opAddr) - _, d.err = fmt.Fprintf(d.writer, addrStr+" "+format+"\n", args...) -} - -// Return the scalar variable name described by scope and index. -func (d *disassembler) varName(scope ast.VarScope, index int) string { - switch scope { - case ast.ScopeGlobal: - return d.program.scalarNames[index] - case ast.ScopeLocal: - return d.localName(index) - default: // ScopeSpecial - return ast.SpecialVarName(index) - } -} - -// Return the local variable name with the given index. -func (d *disassembler) localName(index int) string { - f := d.program.Functions[d.funcIndex] - n := 0 - for i, p := range f.Params { - if f.Arrays[i] { - continue - } - if n == index { - return p - } - n++ - } - panic(fmt.Sprintf("unexpected local variable index %d", index)) -} - -// Return the array variable name describes by scope and index. -func (d *disassembler) arrayName(scope ast.VarScope, index int) string { - if scope == ast.ScopeLocal { - return d.localArrayName(index) - } - return d.program.arrayNames[index] -} - -// Return the local array name with the given index. -func (d *disassembler) localArrayName(index int) string { - f := d.program.Functions[d.funcIndex] - n := 0 - for i, p := range f.Params { - if !f.Arrays[i] { - continue - } - if n == index { - return p - } - n++ - } - panic(fmt.Sprintf("unexpected local array index %d", index)) -} diff --git a/src/tool/awk/internal/compiler/disassembler_test.go b/src/tool/awk/internal/compiler/disassembler_test.go deleted file mode 100644 index 297224d..0000000 --- a/src/tool/awk/internal/compiler/disassembler_test.go +++ /dev/null @@ -1,51 +0,0 @@ -package compiler - -import ( - "bytes" - "regexp" - "strings" - "testing" -) - -func TestDisassembler(t *testing.T) { - // Note: this doesn't really test the disassembly, just that each opcode - // disassembly includes the opcode name, to help catch silly typos. - for op := Nop; op < EndOpcode; op++ { - t.Run(op.String(), func(t *testing.T) { - p := Program{ - Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0}, - Functions: []Function{ - { - Name: "f", - Params: []string{"a", "k"}, - Arrays: []bool{true, false}, - NumScalars: 1, - NumArrays: 1, - }, - }, - Nums: []float64{0}, - Strs: []string{""}, - Regexes: []*regexp.Regexp{regexp.MustCompile("")}, - scalarNames: []string{"s"}, - arrayNames: []string{"a"}, - nativeFuncNames: []string{"n"}, - } - var buf bytes.Buffer - err := p.Disassemble(&buf) - if err != nil { - t.Fatalf("error disassembling opcode %s: %v", op, err) - } - lines := strings.Split(buf.String(), "\n") - if strings.TrimSpace(lines[0]) != "// BEGIN" { - t.Fatalf("first line should be \"// BEGIN\", not %q", lines[0]) - } - fields := strings.Fields(lines[1]) - if fields[0] != "0000" { - t.Fatalf("address should be \"0000\", not %q", fields[0]) - } - if fields[1] != op.String() { - t.Fatalf("opcode name should be %q, not %q", op.String(), fields[1]) - } - }) - } -} diff --git a/src/tool/awk/internal/compiler/opcode_string.go b/src/tool/awk/internal/compiler/opcode_string.go deleted file mode 100644 index bfa2f0c..0000000 --- a/src/tool/awk/internal/compiler/opcode_string.go +++ /dev/null @@ -1,174 +0,0 @@ -// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT. - -package compiler - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[Nop-0] - _ = x[Num-1] - _ = x[Str-2] - _ = x[Dupe-3] - _ = x[Drop-4] - _ = x[Swap-5] - _ = x[Field-6] - _ = x[FieldInt-7] - _ = x[FieldByName-8] - _ = x[FieldByNameStr-9] - _ = x[Global-10] - _ = x[Local-11] - _ = x[Special-12] - _ = x[ArrayGlobal-13] - _ = x[ArrayLocal-14] - _ = x[InGlobal-15] - _ = x[InLocal-16] - _ = x[AssignField-17] - _ = x[AssignGlobal-18] - _ = x[AssignLocal-19] - _ = x[AssignSpecial-20] - _ = x[AssignArrayGlobal-21] - _ = x[AssignArrayLocal-22] - _ = x[Delete-23] - _ = x[DeleteAll-24] - _ = x[IncrField-25] - _ = x[IncrGlobal-26] - _ = x[IncrLocal-27] - _ = x[IncrSpecial-28] - _ = x[IncrArrayGlobal-29] - _ = x[IncrArrayLocal-30] - _ = x[AugAssignField-31] - _ = x[AugAssignGlobal-32] - _ = x[AugAssignLocal-33] - _ = x[AugAssignSpecial-34] - _ = x[AugAssignArrayGlobal-35] - _ = x[AugAssignArrayLocal-36] - _ = x[Regex-37] - _ = x[IndexMulti-38] - _ = x[ConcatMulti-39] - _ = x[Add-40] - _ = x[Subtract-41] - _ = x[Multiply-42] - _ = x[Divide-43] - _ = x[Power-44] - _ = x[Modulo-45] - _ = x[Equals-46] - _ = x[NotEquals-47] - _ = x[Less-48] - _ = x[Greater-49] - _ = x[LessOrEqual-50] - _ = x[GreaterOrEqual-51] - _ = x[Concat-52] - _ = x[Match-53] - _ = x[NotMatch-54] - _ = x[Not-55] - _ = x[UnaryMinus-56] - _ = x[UnaryPlus-57] - _ = x[Boolean-58] - _ = x[Jump-59] - _ = x[JumpFalse-60] - _ = x[JumpTrue-61] - _ = x[JumpEquals-62] - _ = x[JumpNotEquals-63] - _ = x[JumpLess-64] - _ = x[JumpGreater-65] - _ = x[JumpLessOrEqual-66] - _ = x[JumpGreaterOrEqual-67] - _ = x[Next-68] - _ = x[Exit-69] - _ = x[ForIn-70] - _ = x[BreakForIn-71] - _ = x[CallBuiltin-72] - _ = x[CallSplit-73] - _ = x[CallSplitSep-74] - _ = x[CallSprintf-75] - _ = x[CallUser-76] - _ = x[CallNative-77] - _ = x[Return-78] - _ = x[ReturnNull-79] - _ = x[Nulls-80] - _ = x[Print-81] - _ = x[Printf-82] - _ = x[Getline-83] - _ = x[GetlineField-84] - _ = x[GetlineGlobal-85] - _ = x[GetlineLocal-86] - _ = x[GetlineSpecial-87] - _ = x[GetlineArray-88] - _ = x[EndOpcode-89] -} - -const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode" - -var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826} - -func (i Opcode) String() string { - if i < 0 || i >= Opcode(len(_Opcode_index)-1) { - return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[AugOpAdd-0] - _ = x[AugOpSub-1] - _ = x[AugOpMul-2] - _ = x[AugOpDiv-3] - _ = x[AugOpPow-4] - _ = x[AugOpMod-5] -} - -const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod" - -var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48} - -func (i AugOp) String() string { - if i < 0 || i >= AugOp(len(_AugOp_index)-1) { - return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[BuiltinAtan2-0] - _ = x[BuiltinClose-1] - _ = x[BuiltinCos-2] - _ = x[BuiltinExp-3] - _ = x[BuiltinFflush-4] - _ = x[BuiltinFflushAll-5] - _ = x[BuiltinGsub-6] - _ = x[BuiltinIndex-7] - _ = x[BuiltinInt-8] - _ = x[BuiltinLength-9] - _ = x[BuiltinLengthArg-10] - _ = x[BuiltinLog-11] - _ = x[BuiltinMatch-12] - _ = x[BuiltinRand-13] - _ = x[BuiltinSin-14] - _ = x[BuiltinSqrt-15] - _ = x[BuiltinSrand-16] - _ = x[BuiltinSrandSeed-17] - _ = x[BuiltinSub-18] - _ = x[BuiltinSubstr-19] - _ = x[BuiltinSubstrLength-20] - _ = x[BuiltinSystem-21] - _ = x[BuiltinTolower-22] - _ = x[BuiltinToupper-23] -} - -const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper" - -var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300} - -func (i BuiltinOp) String() string { - if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) { - return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]] -} diff --git a/src/tool/awk/internal/compiler/opcodes.go b/src/tool/awk/internal/compiler/opcodes.go deleted file mode 100644 index 36c4c93..0000000 --- a/src/tool/awk/internal/compiler/opcodes.go +++ /dev/null @@ -1,180 +0,0 @@ -package compiler - -//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp - -// Opcode represents a single virtual machine instruction (or argument). The -// comments beside each opcode show any arguments that instruction consumes. -// -// Normally this is called "bytecode", but I've avoided that term here as each -// opcode is a 32-bit word, not an 8-bit byte. -// -// I tested various bit widths, and I believe 32 bit was the fastest, but also -// means we don't have to worry about jump offsets overflowing. That's tested -// in the compiler, but who's going to have an AWK program bigger than 2GB? -type Opcode int32 - -const ( - Nop Opcode = iota - - // Stack operations - Num // numIndex - Str // strIndex - Dupe - Drop - Swap - - // Fetch a field, variable, or array item - Field - FieldInt // index - FieldByName - FieldByNameStr // strIndex - Global // index - Local // index - Special // index - ArrayGlobal // arrayIndex - ArrayLocal // arrayIndex - InGlobal // arrayIndex - InLocal // arrayIndex - - // Assign a field, variable, or array item - AssignField - AssignGlobal // index - AssignLocal // index - AssignSpecial // index - AssignArrayGlobal // arrayIndex - AssignArrayLocal // arrayIndex - - // Delete statement - Delete // arrayScope arrayIndex - DeleteAll // arrayScope arrayIndex - - // Post-increment and post-decrement - IncrField // amount - IncrGlobal // amount index - IncrLocal // amount index - IncrSpecial // amount index - IncrArrayGlobal // amount arrayIndex - IncrArrayLocal // amount arrayIndex - - // Augmented assignment (also used for pre-increment and pre-decrement) - AugAssignField // augOp - AugAssignGlobal // augOp index - AugAssignLocal // augOp index - AugAssignSpecial // augOp index - AugAssignArrayGlobal // augOp arrayIndex - AugAssignArrayLocal // augOp arrayIndex - - // Stand-alone regex expression /foo/ - Regex // regexIndex - - // Multi-index concatenation - IndexMulti // num - - // Multi-value concatenation - ConcatMulti // num - - // Binary operators - Add - Subtract - Multiply - Divide - Power - Modulo - Equals - NotEquals - Less - Greater - LessOrEqual - GreaterOrEqual - Concat - Match - NotMatch - - // Unary operators - Not - UnaryMinus - UnaryPlus - Boolean - - // Control flow - Jump // offset - JumpFalse // offset - JumpTrue // offset - JumpEquals // offset - JumpNotEquals // offset - JumpLess // offset - JumpGreater // offset - JumpLessOrEqual // offset - JumpGreaterOrEqual // offset - Next - Exit - ForIn // varScope varIndex arrayScope arrayIndex offset - BreakForIn - - // Builtin functions - CallBuiltin // builtinOp - CallSplit // arrayScope arrayIndex - CallSplitSep // arrayScope arrayIndex - CallSprintf // numArgs - - // User and native functions - CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...] - CallNative // funcIndex numArgs - Return - ReturnNull - Nulls // numNulls - - // Print, printf, and getline - Print // numArgs redirect - Printf // numArgs redirect - Getline // redirect - GetlineField // redirect - GetlineGlobal // redirect index - GetlineLocal // redirect index - GetlineSpecial // redirect index - GetlineArray // redirect arrayScope arrayIndex - - EndOpcode -) - -// AugOp represents an augmented assignment operation. -type AugOp Opcode - -const ( - AugOpAdd AugOp = iota - AugOpSub - AugOpMul - AugOpDiv - AugOpPow - AugOpMod -) - -// BuiltinOp represents a builtin function call. -type BuiltinOp Opcode - -const ( - BuiltinAtan2 BuiltinOp = iota - BuiltinClose - BuiltinCos - BuiltinExp - BuiltinFflush - BuiltinFflushAll - BuiltinGsub - BuiltinIndex - BuiltinInt - BuiltinLength - BuiltinLengthArg - BuiltinLog - BuiltinMatch - BuiltinRand - BuiltinSin - BuiltinSqrt - BuiltinSrand - BuiltinSrandSeed - BuiltinSub - BuiltinSubstr - BuiltinSubstrLength - BuiltinSystem - BuiltinTolower - BuiltinToupper -) diff --git a/src/tool/awk/interp/csvreader_test.go b/src/tool/awk/interp/csvreader_test.go deleted file mode 100644 index cb76fc2..0000000 --- a/src/tool/awk/interp/csvreader_test.go +++ /dev/null @@ -1,392 +0,0 @@ -// Tests copied from encoding/csv to ensure we pass all the relevant cases. - -// These tests are a subset of those in encoding/csv used to test Reader. -// However, the §, ¶ and ∑ special characters (for error positions) have been -// removed, and some tests have been removed or tweaked slightly because we -// don't support all the encoding/csv features (FieldsPerRecord is not -// supported, LazyQuotes is always on, and TrimLeadingSpace is always off). - -package interp - -import ( - "bufio" - "encoding/csv" - "reflect" - "strings" - "testing" - "unicode/utf8" -) - -type readTest struct { - Name string - Input string - Output [][]string - Error string - - // These fields are copied into the CSVInputConfig - Comma rune - Comment rune -} - -var readTests = []readTest{{ - Name: "Simple", - Input: "a,b,c\n", - Output: [][]string{{"a", "b", "c"}}, -}, { - Name: "CRLF", - Input: "a,b\r\nc,d\r\n", - Output: [][]string{{"a", "b"}, {"c", "d"}}, -}, { - Name: "BareCR", - Input: "a,b\rc,d\r\n", - Output: [][]string{{"a", "b\rc", "d"}}, -}, { - Name: "RFC4180test", - Input: `#field1,field2,field3 -"aaa","bb -b","ccc" -"a,a","b""bb","ccc" -zzz,yyy,xxx -`, - Output: [][]string{ - {"#field1", "field2", "field3"}, - {"aaa", "bb\nb", "ccc"}, - {"a,a", `b"bb`, "ccc"}, - {"zzz", "yyy", "xxx"}, - }, -}, { - Name: "NoEOLTest", - Input: "a,b,c", - Output: [][]string{{"a", "b", "c"}}, -}, { - Name: "Semicolon", - Input: "a;b;c\n", - Output: [][]string{{"a", "b", "c"}}, - Comma: ';', -}, { - Name: "MultiLine", - Input: `"two -line","one line","three -line -field"`, - Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, -}, { - Name: "BlankLine", - Input: "a,b,c\n\nd,e,f\n\n", - Output: [][]string{ - {"a", "b", "c"}, - {"d", "e", "f"}, - }, -}, { - Name: "BlankLineFieldCount", - Input: "a,b,c\n\nd,e,f\n\n", - Output: [][]string{ - {"a", "b", "c"}, - {"d", "e", "f"}, - }, -}, { - Name: "LeadingSpace", - Input: " a, b, c\n", - Output: [][]string{{" a", " b", " c"}}, -}, { - Name: "Comment", - Input: "#1,2,3\na,b,c\n#comment", - Output: [][]string{{"a", "b", "c"}}, - Comment: '#', -}, { - Name: "NoComment", - Input: "#1,2,3\na,b,c", - Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, -}, { - Name: "LazyQuotes", - Input: `a "word","1"2",a","b`, - Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, -}, { - Name: "BareQuotes", - Input: `a "word","1"2",a"`, - Output: [][]string{{`a "word"`, `1"2`, `a"`}}, -}, { - Name: "BareDoubleQuotes", - Input: `a""b,c`, - Output: [][]string{{`a""b`, `c`}}, -}, { - Name: "TrimQuote", - Input: `"a"," b",c`, - Output: [][]string{{"a", " b", "c"}}, -}, { - Name: "FieldCount", - Input: "a,b,c\nd,e", - Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, -}, { - Name: "TrailingCommaEOF", - Input: "a,b,c,", - Output: [][]string{{"a", "b", "c", ""}}, -}, { - Name: "TrailingCommaEOL", - Input: "a,b,c,\n", - Output: [][]string{{"a", "b", "c", ""}}, -}, { - Name: "TrailingCommaSpaceEOF", - Input: "a,b,c, ", - Output: [][]string{{"a", "b", "c", " "}}, -}, { - Name: "TrailingCommaSpaceEOL", - Input: "a,b,c, \n", - Output: [][]string{{"a", "b", "c", " "}}, -}, { - Name: "TrailingCommaLine3", - Input: "a,b,c\nd,e,f\ng,hi,", - Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, -}, { - Name: "NotTrailingComma3", - Input: "a,b,c, \n", - Output: [][]string{{"a", "b", "c", " "}}, -}, { - Name: "CommaFieldTest", - Input: `x,y,z,w -x,y,z, -x,y,, -x,,, -,,, -"x","y","z","w" -"x","y","z","" -"x","y","","" -"x","","","" -"","","","" -`, - Output: [][]string{ - {"x", "y", "z", "w"}, - {"x", "y", "z", ""}, - {"x", "y", "", ""}, - {"x", "", "", ""}, - {"", "", "", ""}, - {"x", "y", "z", "w"}, - {"x", "y", "z", ""}, - {"x", "y", "", ""}, - {"x", "", "", ""}, - {"", "", "", ""}, - }, -}, { - Name: "TrailingCommaIneffective1", - Input: "a,b,\nc,d,e", - Output: [][]string{ - {"a", "b", ""}, - {"c", "d", "e"}, - }, -}, { - Name: "ReadAllReuseRecord", - Input: "a,b\nc,d", - Output: [][]string{ - {"a", "b"}, - {"c", "d"}, - }, -}, { - Name: "CRLFInQuotedField", // Issue 21201 - Input: "A,\"Hello\r\nHi\",B\r\n", - Output: [][]string{ - {"A", "Hello\nHi", "B"}, - }, -}, { - Name: "BinaryBlobField", // Issue 19410 - Input: "x09\x41\xb4\x1c,aktau", - Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, -}, { - Name: "TrailingCR", - Input: "field1,field2\r", - Output: [][]string{{"field1", "field2"}}, -}, { - Name: "QuotedTrailingCR", - Input: "\"field\"\r", - Output: [][]string{{"field"}}, -}, { - Name: "FieldCR", - Input: "field\rfield\r", - Output: [][]string{{"field\rfield"}}, -}, { - Name: "FieldCRCR", - Input: "field\r\rfield\r\r", - Output: [][]string{{"field\r\rfield\r"}}, -}, { - Name: "FieldCRCRLF", - Input: "field\r\r\nfield\r\r\n", - Output: [][]string{{"field\r"}, {"field\r"}}, -}, { - Name: "FieldCRCRLFCR", - Input: "field\r\r\n\rfield\r\r\n\r", - Output: [][]string{{"field\r"}, {"\rfield\r"}}, -}, { - Name: "FieldCRCRLFCRCR", - Input: "field\r\r\n\r\rfield\r\r\n\r\r", - Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, -}, { - Name: "MultiFieldCRCRLFCRCR", - Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", - Output: [][]string{ - {"field1", "field2\r"}, - {"\r\rfield1", "field2\r"}, - {"\r\r", ""}, - }, -}, { - Name: "NonASCIICommaAndComment", - Input: "a£b,c£ \td,e\n€ comment\n", - Output: [][]string{{"a", "b,c", " \td,e"}}, - Comma: '£', - Comment: '€', -}, { - Name: "NonASCIICommaAndCommentWithQuotes", - Input: "a€\" b,\"€ c\nλ comment\n", - Output: [][]string{{"a", " b,", " c"}}, - Comma: '€', - Comment: 'λ', -}, { - // λ and θ start with the same byte. - // This tests that the parser doesn't confuse such characters. - Name: "NonASCIICommaConfusion", - Input: "\"abθcd\"λefθgh", - Output: [][]string{{"abθcd", "efθgh"}}, - Comma: 'λ', - Comment: '€', -}, { - Name: "NonASCIICommentConfusion", - Input: "λ\nλ\nθ\nλ\n", - Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, - Comment: 'θ', -}, { - Name: "QuotedFieldMultipleLF", - Input: "\"\n\n\n\n\"", - Output: [][]string{{"\n\n\n\n"}}, -}, { - Name: "MultipleCRLF", - Input: "\r\n\r\n\r\n\r\n", -}, { - // The implementation may read each line in several chunks if it doesn't fit entirely - // in the read buffer, so we should test the code to handle that condition. - Name: "HugeLines", - Input: strings.Repeat("#ignore\n", 10000) + "" + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), - Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, - Comment: '#', -}, { - Name: "LazyQuoteWithTrailingCRLF", - Input: "\"foo\"bar\"\r\n", - Output: [][]string{{`foo"bar`}}, -}, { - Name: "DoubleQuoteWithTrailingCRLF", - Input: "\"foo\"\"bar\"\r\n", - Output: [][]string{{`foo"bar`}}, -}, { - Name: "EvenQuotes", - Input: `""""""""`, - Output: [][]string{{`"""`}}, -}, { - Name: "LazyOddQuotes", - Input: `"""""""`, - Output: [][]string{{`"""`}}, -}, { - Name: "BadComma1", - Comma: '\n', - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComma2", - Comma: '\r', - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComma3", - Comma: '"', - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComma4", - Comma: utf8.RuneError, - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComment1", - Comment: '\n', - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComment2", - Comment: '\r', - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadComment3", - Comment: utf8.RuneError, - Error: "invalid CSV field separator or comment delimiter", -}, { - Name: "BadCommaComment", - Comma: 'X', - Comment: 'X', - Error: "invalid CSV field separator or comment delimiter", -}} - -func TestCSVReader(t *testing.T) { - for _, tt := range readTests { - t.Run(tt.Name, func(t *testing.T) { - inputConfig := CSVInputConfig{ - Separator: tt.Comma, - Comment: tt.Comment, - } - if inputConfig.Separator == 0 { - inputConfig.Separator = ',' - } - - var out [][]string - err := validateCSVInputConfig(CSVMode, inputConfig) - if err == nil { - var fields []string - splitter := csvSplitter{ - separator: inputConfig.Separator, - sepLen: utf8.RuneLen(inputConfig.Separator), - comment: inputConfig.Comment, - fields: &fields, - } - scanner := bufio.NewScanner(strings.NewReader(tt.Input)) - scanner.Split(splitter.scan) - scanner.Buffer(make([]byte, inputBufSize), maxRecordLength) - - for scanner.Scan() { - row := make([]string, len(fields)) - copy(row, fields) - out = append(out, row) - - // We don't explicitly check the returned token, but at - // least check it parses to the same row. - if strings.ContainsRune(tt.Input, '\r') { - // But FieldCRCRLF and similar tests don't round-trip - continue - } - token := scanner.Text() - reader := csv.NewReader(strings.NewReader(token)) - reader.Comma = inputConfig.Separator - reader.Comment = inputConfig.Comment - reader.FieldsPerRecord = -1 - reader.LazyQuotes = true - tokenRow, err := reader.Read() - if err != nil { - t.Fatalf("error reparsing token: %v", err) - } - if !reflect.DeepEqual(tokenRow, row) { - t.Fatalf("token mismatch:\ngot %q\nwant %q", tokenRow, row) - } - } - err = scanner.Err() - } - - if tt.Error != "" { - if err == nil { - t.Fatalf("error mismatch:\ngot nil\nwant %q", tt.Error) - } - if err.Error() != tt.Error { - t.Fatalf("error mismatch:\ngot %q\nwant %q", err.Error(), tt.Error) - } - if out != nil { - t.Fatalf("output mismatch:\ngot %q\nwant nil", out) - } - } else { - if err != nil { - t.Fatalf("error mismatch:\ngot %q\nwant nil", err.Error()) - } - if !reflect.DeepEqual(out, tt.Output) { - t.Fatalf("output mismatch:\ngot %q\nwant %q", out, tt.Output) - } - } - }) - } -} diff --git a/src/tool/awk/interp/example_test.go b/src/tool/awk/interp/example_test.go deleted file mode 100644 index b2f2fd6..0000000 --- a/src/tool/awk/interp/example_test.go +++ /dev/null @@ -1,177 +0,0 @@ -// Don't run these on Windows, because newline handling means they don't pass. - -//go:build !windows -// +build !windows - -package interp_test - -import ( - "fmt" - "strings" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -func Example() { - input := strings.NewReader("foo bar\n\nbaz buz") - err := interp.Exec("$0 { print $1 }", " ", input, nil) - if err != nil { - fmt.Println(err) - return - } - // Output: - // foo - // baz -} - -func Example_fieldsep() { - // Use ',' as the field separator - input := strings.NewReader("1,2\n3,4") - err := interp.Exec("{ print $1, $2 }", ",", input, nil) - if err != nil { - fmt.Println(err) - return - } - // Output: - // 1 2 - // 3 4 -} - -func Example_program() { - src := "{ print NR, tolower($0) }" - input := "A\naB\nAbC" - - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - fmt.Println(err) - return - } - config := &interp.Config{ - Stdin: strings.NewReader(input), - Vars: []string{"OFS", ":"}, - } - _, err = interp.ExecProgram(prog, config) - if err != nil { - fmt.Println(err) - return - } - // Output: - // 1:a - // 2:ab - // 3:abc -} - -func Example_funcs() { - src := `BEGIN { print sum(), sum(1), sum(2, 3, 4), repeat("xyz", 3) }` - - parserConfig := &parser.ParserConfig{ - Funcs: map[string]interface{}{ - "sum": func(args ...float64) float64 { - sum := 0.0 - for _, a := range args { - sum += a - } - return sum - }, - "repeat": strings.Repeat, - }, - } - prog, err := parser.ParseProgram([]byte(src), parserConfig) - if err != nil { - fmt.Println(err) - return - } - interpConfig := &interp.Config{ - Funcs: parserConfig.Funcs, - } - _, err = interp.ExecProgram(prog, interpConfig) - if err != nil { - fmt.Println(err) - return - } - // Output: - // 0 1 9 xyzxyzxyz -} - -func Example_new() { - // We'll execute this program multiple times on different inputs. - src := `{ print $1, x, $3; x++ }` - - // Parse the program and set up the interpreter. - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - fmt.Println(err) - return - } - interpreter, err := interp.New(prog) - if err != nil { - fmt.Println(err) - return - } - - // Run it once on one input. - _, err = interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("one two three"), - Environ: []string{}, // avoid calling os.Environ each time - }) - if err != nil { - fmt.Println(err) - return - } - - // Reset variables and run it again efficiently on a different input (this - // could be from a completely different data source). - interpreter.ResetVars() - _, err = interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("a b c\nd e f\n"), - Environ: []string{}, - }) - if err != nil { - fmt.Println(err) - return - } - - // Run it on another input, this time without resetting variables. - _, err = interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("x y z"), - Environ: []string{}, - }) - if err != nil { - fmt.Println(err) - return - } - - // Output: - // one three - // a c - // d 1 f - // x 2 z -} - -func Example_csv() { - src := `{ total += @"amount" } END { print total }` - input := `# comment -name,amount -Bob,17.50 -Jill,20 -"Boba Fett",100.00 -` - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - fmt.Println(err) - return - } - config := &interp.Config{ - Stdin: strings.NewReader(input), - InputMode: interp.CSVMode, - CSVInput: interp.CSVInputConfig{Comment: '#', Header: true}, - } - _, err = interp.ExecProgram(prog, config) - if err != nil { - fmt.Println(err) - return - } - // Output: - // 137.5 -} diff --git a/src/tool/awk/interp/functions.go b/src/tool/awk/interp/functions.go deleted file mode 100644 index c7f3dbf..0000000 --- a/src/tool/awk/interp/functions.go +++ /dev/null @@ -1,413 +0,0 @@ -// Call native Go functions; helpers for some builtin function calls. - -package interp - -import ( - "bytes" - "errors" - "fmt" - "reflect" - "sort" - "strconv" - "strings" - "unicode/utf8" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Call native-defined function with given name and arguments, return -// its return value (or null value if it doesn't return anything). -func (p *interp) callNative(index int, args []value) (value, error) { - f := p.nativeFuncs[index] - minIn := len(f.in) // Minimum number of args we should pass - var variadicType reflect.Type - if f.isVariadic { - variadicType = f.in[len(f.in)-1].Elem() - minIn-- - } - - // Build list of args to pass to function - values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation - for i, a := range args { - var argType reflect.Type - if !f.isVariadic || i < len(f.in)-1 { - argType = f.in[i] - } else { - // Final arg(s) when calling a variadic are all of this type - argType = variadicType - } - values = append(values, p.toNative(a, argType)) - } - // Use zero value for any unspecified args - for i := len(args); i < minIn; i++ { - values = append(values, reflect.Zero(f.in[i])) - } - - // Call Go function, determine return value - outs := f.value.Call(values) - switch len(outs) { - case 0: - // No return value, return null value to AWK - return null(), nil - case 1: - // Single return value - return fromNative(outs[0]), nil - case 2: - // Two-valued return of (scalar, error) - if !outs[1].IsNil() { - return null(), outs[1].Interface().(error) - } - return fromNative(outs[0]), nil - default: - // Should never happen (checked at parse time) - panic(fmt.Sprintf("unexpected number of return values: %d", len(outs))) - } -} - -// Convert from an AWK value to a native Go value -func (p *interp) toNative(v value, typ reflect.Type) reflect.Value { - switch typ.Kind() { - case reflect.Bool: - return reflect.ValueOf(v.boolean()) - case reflect.Int: - return reflect.ValueOf(int(v.num())) - case reflect.Int8: - return reflect.ValueOf(int8(v.num())) - case reflect.Int16: - return reflect.ValueOf(int16(v.num())) - case reflect.Int32: - return reflect.ValueOf(int32(v.num())) - case reflect.Int64: - return reflect.ValueOf(int64(v.num())) - case reflect.Uint: - return reflect.ValueOf(uint(v.num())) - case reflect.Uint8: - return reflect.ValueOf(uint8(v.num())) - case reflect.Uint16: - return reflect.ValueOf(uint16(v.num())) - case reflect.Uint32: - return reflect.ValueOf(uint32(v.num())) - case reflect.Uint64: - return reflect.ValueOf(uint64(v.num())) - case reflect.Float32: - return reflect.ValueOf(float32(v.num())) - case reflect.Float64: - return reflect.ValueOf(v.num()) - case reflect.String: - return reflect.ValueOf(p.toString(v)) - case reflect.Slice: - if typ.Elem().Kind() != reflect.Uint8 { - // Shouldn't happen: prevented by checkNativeFunc - panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind())) - } - return reflect.ValueOf([]byte(p.toString(v))) - default: - // Shouldn't happen: prevented by checkNativeFunc - panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind())) - } -} - -// Convert from a native Go value to an AWK value -func fromNative(v reflect.Value) value { - switch v.Kind() { - case reflect.Bool: - return boolean(v.Bool()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return num(float64(v.Int())) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - return num(float64(v.Uint())) - case reflect.Float32, reflect.Float64: - return num(v.Float()) - case reflect.String: - return str(v.String()) - case reflect.Slice: - if b, ok := v.Interface().([]byte); ok { - return str(string(b)) - } - // Shouldn't happen: prevented by checkNativeFunc - panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind())) - default: - // Shouldn't happen: prevented by checkNativeFunc - panic(fmt.Sprintf("unexpected return type: %s", v.Kind())) - } -} - -// Used for caching native function type information on init -type nativeFunc struct { - isVariadic bool - in []reflect.Type - value reflect.Value -} - -// Check and initialize native functions -func (p *interp) initNativeFuncs(funcs map[string]interface{}) error { - for name, f := range funcs { - err := checkNativeFunc(name, f) - if err != nil { - return err - } - } - - // Sort functions by name, then use those indexes to build slice - // (this has to match how the parser sets the indexes). - names := make([]string, 0, len(funcs)) - for name := range funcs { - names = append(names, name) - } - sort.Strings(names) - p.nativeFuncs = make([]nativeFunc, len(names)) - for i, name := range names { - f := funcs[name] - typ := reflect.TypeOf(f) - in := make([]reflect.Type, typ.NumIn()) - for j := 0; j < len(in); j++ { - in[j] = typ.In(j) - } - p.nativeFuncs[i] = nativeFunc{ - isVariadic: typ.IsVariadic(), - in: in, - value: reflect.ValueOf(f), - } - } - return nil -} - -// Got this trick from the Go stdlib text/template source -var errorType = reflect.TypeOf((*error)(nil)).Elem() - -// Check that native function with given name is okay to call from -// AWK, return an *interp.Error if not. This checks that f is actually -// a function, and that its parameter and return types are good. -func checkNativeFunc(name string, f interface{}) error { - if KeywordToken(name) != ILLEGAL { - return newError("can't use keyword %q as native function name", name) - } - - typ := reflect.TypeOf(f) - if typ.Kind() != reflect.Func { - return newError("native function %q is not a function", name) - } - for i := 0; i < typ.NumIn(); i++ { - param := typ.In(i) - if typ.IsVariadic() && i == typ.NumIn()-1 { - param = param.Elem() - } - if !validNativeType(param) { - return newError("native function %q param %d is not int or string", name, i) - } - } - - switch typ.NumOut() { - case 0: - // No return value is fine - case 1: - // Single scalar return value is fine - if !validNativeType(typ.Out(0)) { - return newError("native function %q return value is not int or string", name) - } - case 2: - // Returning (scalar, error) is handled too - if !validNativeType(typ.Out(0)) { - return newError("native function %q first return value is not int or string", name) - } - if typ.Out(1) != errorType { - return newError("native function %q second return value is not an error", name) - } - default: - return newError("native function %q returns more than two values", name) - } - return nil -} - -// Return true if typ is a valid parameter or return type. -func validNativeType(typ reflect.Type) bool { - switch typ.Kind() { - case reflect.Bool: - return true - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - return true - case reflect.Float32, reflect.Float64: - return true - case reflect.String: - return true - case reflect.Slice: - // Only allow []byte (convert to string in AWK) - return typ.Elem().Kind() == reflect.Uint8 - default: - return false - } -} - -// Guts of the split() function -func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) { - var parts []string - if fs == " " { - parts = strings.Fields(s) - } else if s == "" { - // Leave parts 0 length on empty string - } else if utf8.RuneCountInString(fs) <= 1 { - parts = strings.Split(s, fs) - } else { - re, err := p.compileRegex(fs) - if err != nil { - return 0, err - } - parts = re.Split(s, -1) - } - array := make(map[string]value, len(parts)) - for i, part := range parts { - array[strconv.Itoa(i+1)] = numStr(part) - } - p.arrays[p.arrayIndex(scope, index)] = array - return len(array), nil -} - -// Guts of the sub() and gsub() functions -func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) { - re, err := p.compileRegex(regex) - if err != nil { - return "", 0, err - } - count := 0 - out = re.ReplaceAllStringFunc(in, func(s string) string { - // Only do the first replacement for sub(), or all for gsub() - if !global && count > 0 { - return s - } - count++ - // Handle & (ampersand) properly in replacement string - r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation - for i := 0; i < len(repl); i++ { - switch repl[i] { - case '&': - r = append(r, s...) - case '\\': - i++ - if i < len(repl) { - switch repl[i] { - case '&': - r = append(r, '&') - case '\\': - r = append(r, '\\') - default: - r = append(r, '\\', repl[i]) - } - } else { - r = append(r, '\\') - } - default: - r = append(r, repl[i]) - } - } - return string(r) - }) - return out, count, nil -} - -type cachedFormat struct { - format string - types []byte -} - -// Parse given sprintf format string into Go format string, along with -// type conversion specifiers. Output is memoized in a simple cache -// for performance. -func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) { - if item, ok := p.formatCache[s]; ok { - return item.format, item.types, nil - } - - out := []byte(s) - for i := 0; i < len(s); i++ { - if s[i] == '%' { - i++ - if i >= len(s) { - return "", nil, errors.New("expected type specifier after %") - } - if s[i] == '%' { - continue - } - for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 { - if s[i] == '*' { - types = append(types, 'd') - } - i++ - } - if i >= len(s) { - return "", nil, errors.New("expected type specifier after %") - } - var t byte - switch s[i] { - case 's': - t = 's' - case 'd', 'i', 'o', 'x', 'X': - t = 'd' - case 'f', 'e', 'E', 'g', 'G': - t = 'f' - case 'u': - t = 'u' - out[i] = 'd' - case 'c': - t = 'c' - out[i] = 's' - default: - return "", nil, fmt.Errorf("invalid format type %q", s[i]) - } - types = append(types, t) - } - } - - // Dumb, non-LRU cache: just cache the first N formats - format = string(out) - if len(p.formatCache) < maxCachedFormats { - p.formatCache[s] = cachedFormat{format, types} - } - return format, types, nil -} - -// Guts of sprintf() function (also used by "printf" statement) -func (p *interp) sprintf(format string, args []value) (string, error) { - format, types, err := p.parseFmtTypes(format) - if err != nil { - return "", newError("format error: %s", err) - } - if len(types) > len(args) { - return "", newError("format error: got %d args, expected %d", len(args), len(types)) - } - converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation - for i, t := range types { - a := args[i] - var v interface{} - switch t { - case 's': - v = p.toString(a) - case 'd': - v = int(a.num()) - case 'f': - v = a.num() - case 'u': - v = uint(a.num()) - case 'c': - var c []byte - n, isStr := a.isTrueStr() - if isStr { - s := p.toString(a) - if len(s) > 0 { - c = []byte{s[0]} - } else { - c = []byte{0} - } - } else { - // Follow the behaviour of awk and mawk, where %c - // operates on bytes (0-255), not Unicode codepoints - c = []byte{byte(n)} - } - v = c - } - converted = append(converted, v) - } - return fmt.Sprintf(format, converted...), nil -} diff --git a/src/tool/awk/interp/fuzz_test.go b/src/tool/awk/interp/fuzz_test.go deleted file mode 100644 index fc4ee4c..0000000 --- a/src/tool/awk/interp/fuzz_test.go +++ /dev/null @@ -1,107 +0,0 @@ -// Fuzz tests for use with the Go 1.18 fuzzer. - -//go:build go1.18 -// +build go1.18 - -package interp_test - -import ( - "context" - "fmt" - "io/ioutil" - "strings" - "testing" - "time" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -func isFuzzTest(test interpTest) bool { - return test.err == "" && test.awkErr == "" && !strings.Contains(test.src, "!fuzz") -} - -func FuzzSource(f *testing.F) { - for _, test := range interpTests { - if isFuzzTest(test) { - f.Add(test.src) - } - } - - f.Fuzz(func(t *testing.T, src string) { - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - return - } - interpreter, err := interp.New(prog) - if err != nil { - f.Fatalf("interp.New error: %v", err) - } - config := interp.Config{ - Stdin: strings.NewReader("foo bar\nbazz\n"), - Output: ioutil.Discard, - Error: ioutil.Discard, - NoExec: true, - NoFileWrites: true, - NoFileReads: true, - Environ: []string{}, - } - ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) - defer cancel() - _, _ = interpreter.ExecuteContext(ctx, &config) - }) -} - -func FuzzInput(f *testing.F) { - f.Add("") - added := make(map[string]bool) - for _, test := range interpTests { - if test.in != "" && !added[test.in] { - f.Add(test.in) - added[test.in] = true - } - } - - prog, err := parser.ParseProgram([]byte(`{ print $0, $3, $1, $10 }`), nil) - if err != nil { - f.Fatalf("parse error: %v", err) - } - - interpreter, err := interp.New(prog) - if err != nil { - f.Fatalf("interp.New error: %v", err) - } - - var vars = [][]string{ - {"FS", " ", "RS", "\n"}, - {"FS", ",", "RS", "\n"}, - {"FS", "\t", "RS", "\n"}, - {"FS", "@+", "RS", "\n"}, - {"FS", "\n", "RS", ""}, - {"FS", " ", "RS", "X+"}, - } - - f.Fuzz(func(t *testing.T, in string) { - for _, v := range vars { - t.Run(fmt.Sprintf("Vars=%q", v), func(t *testing.T) { - interpreter.ResetVars() - config := interp.Config{ - Stdin: strings.NewReader(in), - Output: ioutil.Discard, - Error: ioutil.Discard, - Vars: v, - NoExec: true, - NoFileWrites: true, - NoFileReads: true, - Environ: []string{}, - } - ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) - defer cancel() - _, err := interpreter.ExecuteContext(ctx, &config) - if err != nil { - t.Fatalf("execute error: %v", err) - } - }) - } - }) -} diff --git a/src/tool/awk/interp/fuzz_unexported_test.go b/src/tool/awk/interp/fuzz_unexported_test.go deleted file mode 100644 index abd3a75..0000000 --- a/src/tool/awk/interp/fuzz_unexported_test.go +++ /dev/null @@ -1,75 +0,0 @@ -// Fuzz tests for unexported functions for use with the Go 1.18 fuzzer. - -//go:build go1.18 -// +build go1.18 - -package interp - -import ( - "math" - "strconv" - "strings" - "testing" -) - -func FuzzParseFloatPrefix(f *testing.F) { - f.Add("") - f.Add("foo") - f.Add("The quick.") - f.Add("0") - f.Add("9") - f.Add("1.3e4") - f.Add("1.3E0") - f.Add("1.3e+5") - f.Add("1.3e-5") - f.Add("1E1000") - f.Add(" 1234 ") - f.Add("1234xyz") - f.Add("-1234567890") - f.Add("0x0") - f.Add("0X10") - f.Add("0x1234567890") - f.Add("0xabcdef") - f.Add("0xABCDEF") - f.Add("-0xa") - f.Add("+0XA") - f.Add("0xf.f") - f.Add("0xf.fp10") - f.Add("0xf.fp-10") - f.Add("0x.f") - f.Add("0xf.") - f.Add("0x.") - f.Add("nan") - f.Add("+nan") - f.Add("-nan") - f.Add("NAN") - f.Add("inf") - f.Add("+inf") - f.Add("-inf") - f.Add("INF") - - f.Fuzz(func(t *testing.T, in string) { - nPrefix := parseFloatPrefix(in) - if nPrefix != 0 { - for i := 1; i <= len(in); i++ { - n, _ := parseFloatHelper(in[:i]) - if n == nPrefix || math.IsNaN(n) && math.IsNaN(nPrefix) { - return - } - } - t.Fatalf("no ParseFloat match: %q", in) - } - }) -} - -func parseFloatHelper(s string) (float64, error) { - s = strings.TrimSpace(s) - s = strings.ToLower(s) - if s == "+nan" || s == "-nan" { - return math.NaN(), nil - } - if strings.Contains(s, "0x") && strings.IndexAny(s, "pP") < 0 { - s += "p0" - } - return strconv.ParseFloat(s, 64) -} diff --git a/src/tool/awk/interp/interp.go b/src/tool/awk/interp/interp.go deleted file mode 100644 index 36c1b2e..0000000 --- a/src/tool/awk/interp/interp.go +++ /dev/null @@ -1,1095 +0,0 @@ -// Package interp is the GoAWK interpreter. -// -// For basic usage, use the Exec function. For more complicated use -// cases and configuration options, first use the parser package to -// parse the AWK source, and then use ExecProgram to execute it with -// a specific configuration. -// -// If you need to re-run the same parsed program repeatedly on different -// inputs or with different variables, use New to instantiate an Interpreter -// and then call the Interpreter.Execute method as many times as you need. -package interp - -import ( - "bufio" - "bytes" - "context" - "errors" - "fmt" - "io" - "io/ioutil" - "math" - "math/rand" - "os" - "os/exec" - "regexp" - "runtime" - "strconv" - "strings" - "unicode/utf8" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - "github.com/mojosa-software/goblin/src/tool/awk/internal/compiler" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -var ( - errExit = errors.New("exit") - errBreak = errors.New("break") - errNext = errors.New("next") - - errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter") - - crlfNewline = runtime.GOOS == "windows" - varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`) - - defaultShellCommand = getDefaultShellCommand() -) - -// Error (actually *Error) is returned by Exec and Eval functions on -// interpreter error, for example FS being set to an invalid regex. -type Error struct { - message string -} - -func (e *Error) Error() string { - return e.message -} - -func newError(format string, args ...interface{}) error { - return &Error{fmt.Sprintf(format, args...)} -} - -type returnValue struct { - Value value -} - -func (r returnValue) Error() string { - return "" -} - -type interp struct { - // Input/output - output io.Writer - errorOutput io.Writer - scanner *bufio.Scanner - scanners map[string]*bufio.Scanner - stdin io.Reader - filenameIndex int - hadFiles bool - input io.Reader - inputBuffer []byte - inputStreams map[string]io.ReadCloser - outputStreams map[string]io.WriteCloser - commands map[string]*exec.Cmd - noExec bool - noFileWrites bool - noFileReads bool - shellCommand []string - csvOutput *bufio.Writer - - // Scalars, arrays, and function state - globals []value - stack []value - sp int - frame []value - arrays []map[string]value - localArrays [][]int - callDepth int - nativeFuncs []nativeFunc - - // File, line, and field handling - filename value - line string - lineIsTrueStr bool - lineNum int - fileLineNum int - fields []string - fieldsIsTrueStr []bool - numFields int - haveFields bool - fieldNames []string - fieldIndexes map[string]int - reparseCSV bool - - // Built-in variables - argc int - convertFormat string - outputFormat string - fieldSep string - fieldSepRegex *regexp.Regexp - recordSep string - recordSepRegex *regexp.Regexp - recordTerminator string - outputFieldSep string - outputRecordSep string - subscriptSep string - matchLength int - matchStart int - inputMode IOMode - csvInputConfig CSVInputConfig - outputMode IOMode - csvOutputConfig CSVOutputConfig - - // Parsed program, compiled functions and constants - program *parser.Program - functions []compiler.Function - nums []float64 - strs []string - regexes []*regexp.Regexp - - // Context support (for Interpreter.ExecuteContext) - checkCtx bool - ctx context.Context - ctxDone <-chan struct{} - ctxOps int - - // Misc pieces of state - random *rand.Rand - randSeed float64 - exitStatus int - regexCache map[string]*regexp.Regexp - formatCache map[string]cachedFormat - csvJoinFieldsBuf bytes.Buffer -} - -// Various const configuration. Could make these part of Config if -// we wanted to, but no need for now. -const ( - maxCachedRegexes = 100 - maxCachedFormats = 100 - maxRecordLength = 10 * 1024 * 1024 // 10MB seems like plenty - maxFieldIndex = 1000000 - maxCallDepth = 1000 - initialStackSize = 100 - outputBufSize = 64 * 1024 - inputBufSize = 64 * 1024 -) - -// Config defines the interpreter configuration for ExecProgram. -type Config struct { - // Standard input reader (defaults to os.Stdin) - Stdin io.Reader - - // Writer for normal output (defaults to a buffered version of os.Stdout). - // If you need to write to stdout but want control over the buffer size or - // allocation, wrap os.Stdout yourself and set Output to that. - Output io.Writer - - // Writer for non-fatal error messages (defaults to os.Stderr) - Error io.Writer - - // The name of the executable (accessible via ARGV[0]) - Argv0 string - - // Input arguments (usually filenames): empty slice means read - // only from Stdin, and a filename of "-" means read from Stdin - // instead of a real file. - Args []string - - // List of name-value pairs for variables to set before executing - // the program (useful for setting FS and other built-in - // variables, for example []string{"FS", ",", "OFS", ","}). - Vars []string - - // Map of named Go functions to allow calling from AWK. You need - // to pass this same map to the parser.ParseProgram config. - // - // Functions can have any number of parameters, and variadic - // functions are supported. Functions can have no return values, - // one return value, or two return values (result, error). In the - // two-value case, if the function returns a non-nil error, - // program execution will stop and ExecProgram will return that - // error. - // - // Apart from the error return value, the types supported are - // bool, integer and floating point types (excluding complex), - // and string types (string or []byte). - // - // It's not an error to call a Go function from AWK with fewer - // arguments than it has parameters in Go. In this case, the zero - // value will be used for any additional parameters. However, it - // is a parse error to call a non-variadic function from AWK with - // more arguments than it has parameters in Go. - // - // Functions defined with the "function" keyword in AWK code - // take precedence over functions in Funcs. - Funcs map[string]interface{} - - // Set one or more of these to true to prevent unsafe behaviours, - // useful when executing untrusted scripts: - // - // * NoExec prevents system calls via system() or pipe operator - // * NoFileWrites prevents writing to files via '>' or '>>' - // * NoFileReads prevents reading from files via getline or the - // filenames in Args - NoExec bool - NoFileWrites bool - NoFileReads bool - - // Exec args used to run system shell. Typically, this will - // be {"/bin/sh", "-c"} - ShellCommand []string - - // List of name-value pairs to be assigned to the ENVIRON special - // array, for example []string{"USER", "bob", "HOME", "/home/bob"}. - // If nil (the default), values from os.Environ() are used. - // - // If the script doesn't need environment variables, set Environ to a - // non-nil empty slice, []string{}. - Environ []string - - // Mode for parsing input fields and record: default is to use normal FS - // and RS behaviour. If set to CSVMode or TSVMode, FS and RS are ignored, - // and input records are parsed as comma-separated values or tab-separated - // values, respectively. Parsing is done as per RFC 4180 and the - // "encoding/csv" package, but FieldsPerRecord is not supported, - // LazyQuotes is always on, and TrimLeadingSpace is always off. - // - // You can also enable CSV or TSV input mode by setting INPUTMODE to "csv" - // or "tsv" in Vars or in the BEGIN block (those override this setting). - // - // For further documentation about GoAWK's CSV support, see the full docs: - // https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/csv.md - InputMode IOMode - - // Additional options if InputMode is CSVMode or TSVMode. The zero value - // is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode. - // - // You can also specify these options by setting INPUTMODE in the BEGIN - // block, for example, to use '|' as the field separator, '#' as the - // comment character, and enable header row parsing: - // - // BEGIN { INPUTMODE="csv separator=| comment=# header" } - CSVInput CSVInputConfig - - // Mode for print output: default is to use normal OFS and ORS - // behaviour. If set to CSVMode or TSVMode, the "print" statement with one - // or more arguments outputs fields using CSV or TSV formatting, - // respectively. Output is written as per RFC 4180 and the "encoding/csv" - // package. - // - // You can also enable CSV or TSV output mode by setting OUTPUTMODE to - // "csv" or "tsv" in Vars or in the BEGIN block (those override this - // setting). - OutputMode IOMode - - // Additional options if OutputMode is CSVMode or TSVMode. The zero value - // is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode. - // - // You can also specify these options by setting OUTPUTMODE in the BEGIN - // block, for example, to use '|' as the output field separator: - // - // BEGIN { OUTPUTMODE="csv separator=|" } - CSVOutput CSVOutputConfig -} - -// IOMode specifies the input parsing or print output mode. -type IOMode int - -const ( - // DefaultMode uses normal AWK field and record separators: FS and RS for - // input, OFS and ORS for print output. - DefaultMode IOMode = 0 - - // CSVMode uses comma-separated value mode for input or output. - CSVMode IOMode = 1 - - // TSVMode uses tab-separated value mode for input or output. - TSVMode IOMode = 2 -) - -// CSVInputConfig holds additional configuration for when InputMode is CSVMode -// or TSVMode. -type CSVInputConfig struct { - // Input field separator character. If this is zero, it defaults to ',' - // when InputMode is CSVMode and '\t' when InputMode is TSVMode. - Separator rune - - // If nonzero, specifies that lines beginning with this character (and no - // leading whitespace) should be ignored as comments. - Comment rune - - // If true, parse the first row in each input file as a header row (that - // is, a list of field names), and enable the @"field" syntax to get a - // field by name as well as the FIELDS special array. - Header bool -} - -// CSVOutputConfig holds additional configuration for when OutputMode is -// CSVMode or TSVMode. -type CSVOutputConfig struct { - // Output field separator character. If this is zero, it defaults to ',' - // when OutputMode is CSVMode and '\t' when OutputMode is TSVMode. - Separator rune -} - -// ExecProgram executes the parsed program using the given interpreter -// config, returning the exit status code of the program. Error is nil -// on successful execution of the program, even if the program returns -// a non-zero status code. -// -// As of GoAWK version v1.16.0, a nil config is valid and will use the -// defaults (zero values). However, it may be simpler to use Exec in that -// case. -func ExecProgram(program *parser.Program, config *Config) (int, error) { - p := newInterp(program) - err := p.setExecuteConfig(config) - if err != nil { - return 0, err - } - return p.executeAll() -} - -func newInterp(program *parser.Program) *interp { - p := &interp{ - program: program, - functions: program.Compiled.Functions, - nums: program.Compiled.Nums, - strs: program.Compiled.Strs, - regexes: program.Compiled.Regexes, - } - - // Allocate memory for variables and virtual machine stack - p.globals = make([]value, len(program.Scalars)) - p.stack = make([]value, initialStackSize) - p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize) - for i := 0; i < len(program.Arrays); i++ { - p.arrays[i] = make(map[string]value) - } - - // Initialize defaults - p.regexCache = make(map[string]*regexp.Regexp, 10) - p.formatCache = make(map[string]cachedFormat, 10) - p.randSeed = 1.0 - seed := math.Float64bits(p.randSeed) - p.random = rand.New(rand.NewSource(int64(seed))) - p.convertFormat = "%.6g" - p.outputFormat = "%.6g" - p.fieldSep = " " - p.recordSep = "\n" - p.outputFieldSep = " " - p.outputRecordSep = "\n" - p.subscriptSep = "\x1c" - - p.inputStreams = make(map[string]io.ReadCloser) - p.outputStreams = make(map[string]io.WriteCloser) - p.commands = make(map[string]*exec.Cmd) - p.scanners = make(map[string]*bufio.Scanner) - - return p -} - -func (p *interp) setExecuteConfig(config *Config) error { - if config == nil { - config = &Config{} - } - if len(config.Vars)%2 != 0 { - return newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars)) - } - if len(config.Environ)%2 != 0 { - return newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ)) - } - - // Set up I/O mode config (Vars will override) - p.inputMode = config.InputMode - p.csvInputConfig = config.CSVInput - switch p.inputMode { - case CSVMode: - if p.csvInputConfig.Separator == 0 { - p.csvInputConfig.Separator = ',' - } - case TSVMode: - if p.csvInputConfig.Separator == 0 { - p.csvInputConfig.Separator = '\t' - } - case DefaultMode: - if p.csvInputConfig != (CSVInputConfig{}) { - return newError("input mode configuration not valid in default input mode") - } - } - p.outputMode = config.OutputMode - p.csvOutputConfig = config.CSVOutput - switch p.outputMode { - case CSVMode: - if p.csvOutputConfig.Separator == 0 { - p.csvOutputConfig.Separator = ',' - } - case TSVMode: - if p.csvOutputConfig.Separator == 0 { - p.csvOutputConfig.Separator = '\t' - } - case DefaultMode: - if p.csvOutputConfig != (CSVOutputConfig{}) { - return newError("output mode configuration not valid in default output mode") - } - } - - // Set up ARGV and other variables from config - argvIndex := p.program.Arrays["ARGV"] - p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0)) - p.argc = len(config.Args) + 1 - for i, arg := range config.Args { - p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg)) - } - p.filenameIndex = 1 - p.hadFiles = false - for i := 0; i < len(config.Vars); i += 2 { - err := p.setVarByName(config.Vars[i], config.Vars[i+1]) - if err != nil { - return err - } - } - - // After Vars has been handled, validate CSV configuration. - err := validateCSVInputConfig(p.inputMode, p.csvInputConfig) - if err != nil { - return err - } - err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig) - if err != nil { - return err - } - - // Set up ENVIRON from config or environment variables - environIndex := p.program.Arrays["ENVIRON"] - if config.Environ != nil { - for i := 0; i < len(config.Environ); i += 2 { - p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1])) - } - } else { - for _, kv := range os.Environ() { - eq := strings.IndexByte(kv, '=') - if eq >= 0 { - p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:])) - } - } - } - - // Set up system shell command - if len(config.ShellCommand) != 0 { - p.shellCommand = config.ShellCommand - } else { - p.shellCommand = defaultShellCommand - } - - // Set up I/O structures - p.noExec = config.NoExec - p.noFileWrites = config.NoFileWrites - p.noFileReads = config.NoFileReads - p.stdin = config.Stdin - if p.stdin == nil { - p.stdin = os.Stdin - } - p.output = config.Output - if p.output == nil { - p.output = bufio.NewWriterSize(os.Stdout, outputBufSize) - } - p.errorOutput = config.Error - if p.errorOutput == nil { - p.errorOutput = os.Stderr - } - - // Initialize native Go functions - if p.nativeFuncs == nil { - err := p.initNativeFuncs(config.Funcs) - if err != nil { - return err - } - } - - return nil -} - -func validateCSVInputConfig(mode IOMode, config CSVInputConfig) error { - if mode != CSVMode && mode != TSVMode { - return nil - } - if config.Separator == config.Comment || !validCSVSeparator(config.Separator) || - (config.Comment != 0 && !validCSVSeparator(config.Comment)) { - return errCSVSeparator - } - return nil -} - -func validateCSVOutputConfig(mode IOMode, config CSVOutputConfig) error { - if mode != CSVMode && mode != TSVMode { - return nil - } - if !validCSVSeparator(config.Separator) { - return errCSVSeparator - } - return nil -} - -func validCSVSeparator(r rune) bool { - return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError -} - -func (p *interp) executeAll() (int, error) { - defer p.closeAll() - - // Execute the program: BEGIN, then pattern/actions, then END - err := p.execute(p.program.Compiled.Begin) - if err != nil && err != errExit { - if p.checkCtx { - ctxErr := p.checkContextNow() - if ctxErr != nil { - return 0, ctxErr - } - } - return 0, err - } - if p.program.Actions == nil && p.program.End == nil { - return p.exitStatus, nil // only BEGIN specified, don't process input - } - if err != errExit { - err = p.execActions(p.program.Compiled.Actions) - if err != nil && err != errExit { - if p.checkCtx { - ctxErr := p.checkContextNow() - if ctxErr != nil { - return 0, ctxErr - } - } - return 0, err - } - } - err = p.execute(p.program.Compiled.End) - if err != nil && err != errExit { - if p.checkCtx { - ctxErr := p.checkContextNow() - if ctxErr != nil { - return 0, ctxErr - } - } - return 0, err - } - return p.exitStatus, nil -} - -// Exec provides a simple way to parse and execute an AWK program -// with the given field separator. Exec reads input from the given -// reader (nil means use os.Stdin) and writes output to stdout (nil -// means use a buffered version of os.Stdout). -func Exec(source, fieldSep string, input io.Reader, output io.Writer) error { - prog, err := parser.ParseProgram([]byte(source), nil) - if err != nil { - return err - } - config := &Config{ - Stdin: input, - Output: output, - Error: ioutil.Discard, - Vars: []string{"FS", fieldSep}, - } - _, err = ExecProgram(prog, config) - return err -} - -// Execute pattern-action blocks (may be multiple) -func (p *interp) execActions(actions []compiler.Action) error { - var inRange []bool -lineLoop: - for { - // Read and setup next line of input - line, err := p.nextLine() - if err == io.EOF { - break - } - if err != nil { - return err - } - p.setLine(line, false) - p.reparseCSV = false - - // Execute all the pattern-action blocks for each line - for i, action := range actions { - // First determine whether the pattern matches - matched := false - switch len(action.Pattern) { - case 0: - // No pattern is equivalent to pattern evaluating to true - matched = true - case 1: - // Single boolean pattern - err := p.execute(action.Pattern[0]) - if err != nil { - return err - } - matched = p.pop().boolean() - case 2: - // Range pattern (matches between start and stop lines) - if inRange == nil { - inRange = make([]bool, len(actions)) - } - if !inRange[i] { - err := p.execute(action.Pattern[0]) - if err != nil { - return err - } - inRange[i] = p.pop().boolean() - } - matched = inRange[i] - if inRange[i] { - err := p.execute(action.Pattern[1]) - if err != nil { - return err - } - inRange[i] = !p.pop().boolean() - } - } - if !matched { - continue - } - - // No action is equivalent to { print $0 } - if len(action.Body) == 0 { - err := p.printLine(p.output, p.line) - if err != nil { - return err - } - continue - } - - // Execute the body statements - err := p.execute(action.Body) - if err == errNext { - // "next" statement skips straight to next line - continue lineLoop - } - if err != nil { - return err - } - } - } - return nil -} - -// Get a special variable by index -func (p *interp) getSpecial(index int) value { - switch index { - case ast.V_NF: - p.ensureFields() - return num(float64(p.numFields)) - case ast.V_NR: - return num(float64(p.lineNum)) - case ast.V_RLENGTH: - return num(float64(p.matchLength)) - case ast.V_RSTART: - return num(float64(p.matchStart)) - case ast.V_FNR: - return num(float64(p.fileLineNum)) - case ast.V_ARGC: - return num(float64(p.argc)) - case ast.V_CONVFMT: - return str(p.convertFormat) - case ast.V_FILENAME: - return p.filename - case ast.V_FS: - return str(p.fieldSep) - case ast.V_OFMT: - return str(p.outputFormat) - case ast.V_OFS: - return str(p.outputFieldSep) - case ast.V_ORS: - return str(p.outputRecordSep) - case ast.V_RS: - return str(p.recordSep) - case ast.V_RT: - return str(p.recordTerminator) - case ast.V_SUBSEP: - return str(p.subscriptSep) - case ast.V_INPUTMODE: - return str(inputModeString(p.inputMode, p.csvInputConfig)) - case ast.V_OUTPUTMODE: - return str(outputModeString(p.outputMode, p.csvOutputConfig)) - default: - panic(fmt.Sprintf("unexpected special variable index: %d", index)) - } -} - -// Set a variable by name (specials and globals only) -func (p *interp) setVarByName(name, value string) error { - index := ast.SpecialVarIndex(name) - if index > 0 { - return p.setSpecial(index, numStr(value)) - } - index, ok := p.program.Scalars[name] - if ok { - p.globals[index] = numStr(value) - return nil - } - // Ignore variables that aren't defined in program - return nil -} - -// Set special variable by index to given value -func (p *interp) setSpecial(index int, v value) error { - switch index { - case ast.V_NF: - numFields := int(v.num()) - if numFields < 0 { - return newError("NF set to negative value: %d", numFields) - } - if numFields > maxFieldIndex { - return newError("NF set too large: %d", numFields) - } - p.ensureFields() - p.numFields = numFields - if p.numFields < len(p.fields) { - p.fields = p.fields[:p.numFields] - p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields] - } - for i := len(p.fields); i < p.numFields; i++ { - p.fields = append(p.fields, "") - p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) - } - p.line = p.joinFields(p.fields) - p.lineIsTrueStr = true - case ast.V_NR: - p.lineNum = int(v.num()) - case ast.V_RLENGTH: - p.matchLength = int(v.num()) - case ast.V_RSTART: - p.matchStart = int(v.num()) - case ast.V_FNR: - p.fileLineNum = int(v.num()) - case ast.V_ARGC: - p.argc = int(v.num()) - case ast.V_CONVFMT: - p.convertFormat = p.toString(v) - case ast.V_FILENAME: - p.filename = v - case ast.V_FS: - p.fieldSep = p.toString(v) - if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields - re, err := regexp.Compile(compiler.AddRegexFlags(p.fieldSep)) - if err != nil { - return newError("invalid regex %q: %s", p.fieldSep, err) - } - p.fieldSepRegex = re - } - case ast.V_OFMT: - p.outputFormat = p.toString(v) - case ast.V_OFS: - p.outputFieldSep = p.toString(v) - case ast.V_ORS: - p.outputRecordSep = p.toString(v) - case ast.V_RS: - p.recordSep = p.toString(v) - switch { // compare to interp.newScanner - case len(p.recordSep) <= 1: - // Simple cases use specialized splitters, not regex - case utf8.RuneCountInString(p.recordSep) == 1: - // Multi-byte unicode char falls back to regex splitter - sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars - p.recordSepRegex = regexp.MustCompile(sep) - default: - re, err := regexp.Compile(compiler.AddRegexFlags(p.recordSep)) - if err != nil { - return newError("invalid regex %q: %s", p.recordSep, err) - } - p.recordSepRegex = re - } - case ast.V_RT: - p.recordTerminator = p.toString(v) - case ast.V_SUBSEP: - p.subscriptSep = p.toString(v) - case ast.V_INPUTMODE: - var err error - p.inputMode, p.csvInputConfig, err = parseInputMode(p.toString(v)) - if err != nil { - return err - } - err = validateCSVInputConfig(p.inputMode, p.csvInputConfig) - if err != nil { - return err - } - case ast.V_OUTPUTMODE: - var err error - p.outputMode, p.csvOutputConfig, err = parseOutputMode(p.toString(v)) - if err != nil { - return err - } - err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig) - if err != nil { - return err - } - default: - panic(fmt.Sprintf("unexpected special variable index: %d", index)) - } - return nil -} - -// Determine the index of given array into the p.arrays slice. Global -// arrays are just at p.arrays[index], local arrays have to be looked -// up indirectly. -func (p *interp) arrayIndex(scope ast.VarScope, index int) int { - if scope == ast.ScopeGlobal { - return index - } else { - return p.localArrays[len(p.localArrays)-1][index] - } -} - -// Return array with given scope and index. -func (p *interp) array(scope ast.VarScope, index int) map[string]value { - return p.arrays[p.arrayIndex(scope, index)] -} - -// Return local array with given index. -func (p *interp) localArray(index int) map[string]value { - return p.arrays[p.localArrays[len(p.localArrays)-1][index]] -} - -// Set a value in given array by key (index) -func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) { - array := p.array(scope, arrayIndex) - array[index] = v -} - -// Get the value of given numbered field, equivalent to "$index" -func (p *interp) getField(index int) value { - if index == 0 { - if p.lineIsTrueStr { - return str(p.line) - } else { - return numStr(p.line) - } - } - p.ensureFields() - if index < 1 { - index = len(p.fields) + 1 + index - if index < 1 { - return str("") - } - } - if index > len(p.fields) { - return str("") - } - if p.fieldsIsTrueStr[index-1] { - return str(p.fields[index-1]) - } else { - return numStr(p.fields[index-1]) - } -} - -// Get the value of a field by name (for CSV/TSV mode), as in @"name". -func (p *interp) getFieldByName(name string) (value, error) { - if p.fieldIndexes == nil { - // Lazily create map of field names to indexes. - if p.fieldNames == nil { - return null(), newError(`@ only supported if header parsing enabled; use -H or add "header" to INPUTMODE`) - } - p.fieldIndexes = make(map[string]int, len(p.fieldNames)) - for i, n := range p.fieldNames { - p.fieldIndexes[n] = i + 1 - } - } - index := p.fieldIndexes[name] - if index == 0 { - return str(""), nil - } - return p.getField(index), nil -} - -// Sets a single field, equivalent to "$index = value" -func (p *interp) setField(index int, value string) error { - if index == 0 { - p.setLine(value, true) - return nil - } - if index > maxFieldIndex { - return newError("field index too large: %d", index) - } - // If there aren't enough fields, add empty string fields in between - p.ensureFields() - if index < 1 { - index = len(p.fields) + 1 + index - if index < 1 { - return nil - } - } - for i := len(p.fields); i < index; i++ { - p.fields = append(p.fields, "") - p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true) - } - p.fields[index-1] = value - p.fieldsIsTrueStr[index-1] = true - p.numFields = len(p.fields) - p.line = p.joinFields(p.fields) - p.lineIsTrueStr = true - return nil -} - -func (p *interp) joinFields(fields []string) string { - switch p.outputMode { - case CSVMode, TSVMode: - p.csvJoinFieldsBuf.Reset() - _ = p.writeCSV(&p.csvJoinFieldsBuf, fields) - line := p.csvJoinFieldsBuf.Bytes() - line = line[:len(line)-lenNewline(line)] - return string(line) - default: - return strings.Join(fields, p.outputFieldSep) - } -} - -// Convert value to string using current CONVFMT -func (p *interp) toString(v value) string { - return v.str(p.convertFormat) -} - -// Compile regex string (or fetch from regex cache) -func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) { - if re, ok := p.regexCache[regex]; ok { - return re, nil - } - re, err := regexp.Compile(compiler.AddRegexFlags(regex)) - if err != nil { - return nil, newError("invalid regex %q: %s", regex, err) - } - // Dumb, non-LRU cache: just cache the first N regexes - if len(p.regexCache) < maxCachedRegexes { - p.regexCache[regex] = re - } - return re, nil -} - -func getDefaultShellCommand() []string { - executable := "/bin/sh" - if runtime.GOOS == "windows" { - executable = "sh" - } - return []string{executable, "-c"} -} - -func inputModeString(mode IOMode, csvConfig CSVInputConfig) string { - var s string - var defaultSep rune - switch mode { - case CSVMode: - s = "csv" - defaultSep = ',' - case TSVMode: - s = "tsv" - defaultSep = '\t' - case DefaultMode: - return "" - } - if csvConfig.Separator != defaultSep { - s += " separator=" + string([]rune{csvConfig.Separator}) - } - if csvConfig.Comment != 0 { - s += " comment=" + string([]rune{csvConfig.Comment}) - } - if csvConfig.Header { - s += " header" - } - return s -} - -func parseInputMode(s string) (mode IOMode, csvConfig CSVInputConfig, err error) { - fields := strings.Fields(s) - if len(fields) == 0 { - return DefaultMode, CSVInputConfig{}, nil - } - switch fields[0] { - case "csv": - mode = CSVMode - csvConfig.Separator = ',' - case "tsv": - mode = TSVMode - csvConfig.Separator = '\t' - default: - return DefaultMode, CSVInputConfig{}, newError("invalid input mode %q", fields[0]) - } - for _, field := range fields[1:] { - key := field - val := "" - equals := strings.IndexByte(field, '=') - if equals >= 0 { - key = field[:equals] - val = field[equals+1:] - } - switch key { - case "separator": - r, n := utf8.DecodeRuneInString(val) - if n == 0 || n < len(val) { - return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV separator %q", val) - } - csvConfig.Separator = r - case "comment": - r, n := utf8.DecodeRuneInString(val) - if n == 0 || n < len(val) { - return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV comment character %q", val) - } - csvConfig.Comment = r - case "header": - if val != "" && val != "true" && val != "false" { - return DefaultMode, CSVInputConfig{}, newError("invalid header value %q", val) - } - csvConfig.Header = val == "" || val == "true" - default: - return DefaultMode, CSVInputConfig{}, newError("invalid input mode key %q", key) - } - } - return mode, csvConfig, nil -} - -func outputModeString(mode IOMode, csvConfig CSVOutputConfig) string { - var s string - var defaultSep rune - switch mode { - case CSVMode: - s = "csv" - defaultSep = ',' - case TSVMode: - s = "tsv" - defaultSep = '\t' - case DefaultMode: - return "" - } - if csvConfig.Separator != defaultSep { - s += " separator=" + string([]rune{csvConfig.Separator}) - } - return s -} - -func parseOutputMode(s string) (mode IOMode, csvConfig CSVOutputConfig, err error) { - fields := strings.Fields(s) - if len(fields) == 0 { - return DefaultMode, CSVOutputConfig{}, nil - } - switch fields[0] { - case "csv": - mode = CSVMode - csvConfig.Separator = ',' - case "tsv": - mode = TSVMode - csvConfig.Separator = '\t' - default: - return DefaultMode, CSVOutputConfig{}, newError("invalid output mode %q", fields[0]) - } - for _, field := range fields[1:] { - key := field - val := "" - equals := strings.IndexByte(field, '=') - if equals >= 0 { - key = field[:equals] - val = field[equals+1:] - } - switch key { - case "separator": - r, n := utf8.DecodeRuneInString(val) - if n == 0 || n < len(val) { - return DefaultMode, CSVOutputConfig{}, newError("invalid CSV/TSV separator %q", val) - } - csvConfig.Separator = r - default: - return DefaultMode, CSVOutputConfig{}, newError("invalid output mode key %q", key) - } - } - return mode, csvConfig, nil -} diff --git a/src/tool/awk/interp/interp_test.go b/src/tool/awk/interp/interp_test.go deleted file mode 100644 index 97768a2..0000000 --- a/src/tool/awk/interp/interp_test.go +++ /dev/null @@ -1,2609 +0,0 @@ -// Tests for GoAWK interpreter. -package interp_test - -import ( - "bytes" - "encoding/csv" - "errors" - "flag" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "reflect" - "runtime" - "strconv" - "strings" - "sync" - "testing" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -var ( - awkExe string -) - -func TestMain(m *testing.M) { - flag.StringVar(&awkExe, "awk", "gawk", "awk executable name") - flag.Parse() - os.Exit(m.Run()) -} - -type interpTest struct { - src string // if this includes "!awk" or "!gawk" those interpreters won't be run - in string - out string - err string // error from GoAWK must equal this - awkErr string // error from awk/gawk must contain this -} - -// Note: a lot of these are really parser tests too. -var interpTests = []interpTest{ - // BEGIN and END work correctly - {`BEGIN { print "b" }`, "", "b\n", "", ""}, - {`BEGIN { print "b" }`, "foo", "b\n", "", ""}, - {`END { print "e" }`, "", "e\n", "", ""}, - {`END { print "e" }`, "foo", "e\n", "", ""}, - {`BEGIN { print "b"} END { print "e" }`, "", "b\ne\n", "", ""}, - {`BEGIN { print "b"} END { print "e" }`, "foo", "b\ne\n", "", ""}, - {`BEGIN { print "b"} $0 { print NR } END { print "e" }`, "foo", "b\n1\ne\n", "", ""}, - {`BEGIN { printf "x" }; BEGIN { printf "y" }`, "", "xy", "", ""}, - - // Patterns - {`$0`, "foo\n\nbar", "foo\nbar\n", "", ""}, - {`{ print $0 }`, "foo\n\nbar", "foo\n\nbar\n", "", ""}, - {`$1=="foo"`, "foo\n\nbar", "foo\n", "", ""}, - {`$1==42`, "foo\n42\nbar", "42\n", "", ""}, - {`$1=="42"`, "foo\n42\nbar", "42\n", "", ""}, - {`/foo/`, "foo\nx\nfood\nxfooz\nbar", "foo\nfood\nxfooz\n", "", ""}, - {`/foo/ { print NR } /foo/`, "foo\nx\nfood\n", "1\nfoo\n3\nfood\n", "", ""}, - {`NR==2, NR==4`, "1\n2\n3\n4\n5\n6\n", "2\n3\n4\n", "", ""}, - {` -NR==2, NR==4 { print $0 } -NR==3, NR==5 { print NR } -`, "a\nb\nc\nd\ne\nf\ng", "b\nc\n3\nd\n4\n5\n", "", ""}, - - // print and printf statements - {`BEGIN { print "x", "y" }`, "", "x y\n", "", ""}, - {`BEGIN { print OFS; OFS = ","; print "x", "y" }`, "", " \nx,y\n", "", ""}, - {`BEGIN { print ORS; ORS = "."; print "x", "y" }`, "", "\n\nx y.", "", ""}, - {`BEGIN { print ORS; ORS = ""; print "x", "y" }`, "", "\n\nx y", "", ""}, - {`{ print; print }`, "foo", "foo\nfoo\n", "", ""}, - {`BEGIN { print; print }`, "", "\n\n", "", ""}, - {`BEGIN { printf "%% %d %x %c %f %s", 42, 42, 42, 42, 42 }`, "", "% 42 2a * 42.000000 42", "", ""}, - {`BEGIN { printf "%3d", 42 }`, "", " 42", "", ""}, - {`BEGIN { printf "%3s", "x" }`, "", " x", "", ""}, - {`BEGIN { printf "%.1g", 42 } # !windows-gawk`, "", "4e+01", "", ""}, // for some reason gawk gives "4e+001" on Windows - {`BEGIN { printf "%d", 12, 34 }`, "", "12", "", ""}, - {`BEGIN { printf "%d" }`, "", "", "format error: got 0 args, expected 1", "not enough arg"}, - // Our %c handling is mostly like awk's, except for multiples - // 256, where awk is weird, and we're like mawk - {`BEGIN { printf "%c", 0 }`, "", "\x00", "", ""}, - {`BEGIN { printf "%c", 127 }`, "", "\x7f", "", ""}, - {`BEGIN { printf "%c", 128 } # !gawk`, "", "\x80", "", ""}, - {`BEGIN { printf "%c", 255 } # !gawk`, "", "\xff", "", ""}, - {`BEGIN { printf "%c", 256 } # !awk !gawk`, "", "\x00", "", ""}, - {`BEGIN { printf "%c", "xyz" }`, "", "x", "", ""}, - {`BEGIN { printf "%c", "" } # !awk`, "", "\x00", "", ""}, - {`BEGIN { printf } # !awk !posix - doesn't error on this`, "", "", "parse error at 1:16: expected printf args, got none", "printf: no arguments"}, - {`BEGIN { printf("%%%dd", 4) }`, "", "%4d", "", ""}, - - // if and loop statements - {`BEGIN { if (1) print "t"; }`, "", "t\n", "", ""}, - {`BEGIN { if (0) print "t"; }`, "", "", "", ""}, - {`BEGIN { if (1) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (0) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (1==1) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (1==2) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (1!=1) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (1!=2) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (1>2) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (2>1) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (1>2) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (2>1) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (1>=2) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (2>=1) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (1<2) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (2<1) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if (1<=2) print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if (2<=1) print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("a"=="a") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("a"=="b") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("a"!="a") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("a"!="b") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("a">"b") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("b">"a") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("a">"b") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("b">"a") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("a">="b") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("b">="a") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("a"<"b") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("b"<"a") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { if ("a"<="b") print "t"; else print "f" }`, "", "t\n", "", ""}, - {`BEGIN { if ("b"<="a") print "t"; else print "f" }`, "", "f\n", "", ""}, - {`BEGIN { for (;;) { print "x"; break } }`, "", "x\n", "", ""}, - {`BEGIN { for (;;) { printf "%d ", i; i++; if (i>2) break; } }`, "", "0 1 2 ", "", ""}, - {`BEGIN { for (i=5; ; ) { printf "%d ", i; i++; if (i>8) break; } }`, "", "5 6 7 8 ", "", ""}, - {`BEGIN { for (i=5; ; i++) { printf "%d ", i; if (i>8) break; } }`, "", "5 6 7 8 9 ", "", ""}, - {`BEGIN { for (i=5; i<8; i++) { printf "%d ", i } }`, "", "5 6 7 ", "", ""}, - {`BEGIN { for (i=3; i>0; i--) { printf "%d ", i } }`, "", "3 2 1 ", "", ""}, - {`BEGIN { for (i=3; i>=0; i--) { printf "%d ", i } }`, "", "3 2 1 0 ", "", ""}, - {`BEGIN { for (i=0; i<10; i++) { if (i < 5) continue; printf "%d ", i } }`, "", "5 6 7 8 9 ", "", ""}, - {`BEGIN { for (i=0; i<100; i++) s+=i; print s }`, "", "4950\n", "", ""}, - {`BEGIN { a[1]=1; a[2]=1; for (k in a) { s++; break } print s }`, "", "1\n", "", ""}, - {`BEGIN { a[1]=1; a[2]=1; a[3]=1; for (k in a) { if (k==2) continue; s++ } print s }`, "", "2\n", "", ""}, - {`function alen(a, k, n) { n=0; for (k in a) n++; return n } BEGIN { a[1]=1; a[2]=1; print alen(a) }`, "", "2\n", "", ""}, - {`BEGIN { a["x"]=1; for (SUBSEP in a) print SUBSEP, a[SUBSEP] }`, "", "x 1\n", "", ""}, - {`BEGIN { while (i<3) { i++; s++; break } print s }`, "", "1\n", "", ""}, - {`BEGIN { while (i<3) { i++; if (i==2) continue; s++ } print s }`, "", "2\n", "", ""}, - {`BEGIN { do { i++; s++; break } while (i<3); print s }`, "", "1\n", "", ""}, - {`BEGIN { do { i++; if (i==2) continue; s++ } while (i<3); print s }`, "", "2\n", "", ""}, - {`BEGIN { a["x"] = 3; a["y"] = 4; for (k in a) x += a[k]; print x }`, "", "7\n", "", ""}, - {`BEGIN { while (i < 5) { print i; i++ } }`, "", "\n1\n2\n3\n4\n", "", ""}, - {`BEGIN { do { print i; i++ } while (i < 5) }`, "", "\n1\n2\n3\n4\n", "", ""}, - {`BEGIN { for (i=0; i<10; i++); printf "x" }`, "", "x", "", ""}, - {`BEGIN { s="x"; while (s=="x") { print s; s="y" } }`, "", "x\n", "", ""}, - {`BEGIN { s="x"; while (s!="") { print s; s="" } }`, "", "x\n", "", ""}, - {`BEGIN { s="x"; while (s) { print s; s="" } }`, "", "x\n", "", ""}, - // regression tests for break and continue with nested loops - {` -BEGIN { - for (i = 0; i < 1; i++) { - for (j = 0; j < 1; j++) { - print i, j - } - break - } -} -`, "", "0 0\n", "", ""}, - {` -BEGIN { - for (i = 0; i < 1; i++) { - for (j = 0; j < 1; j++) { - print i, j - } - continue - } -} -`, "", "0 0\n", "", ""}, - - // next statement - {`{ if (NR==2) next; print }`, "a\nb\nc", "a\nc\n", "", ""}, - {`{ if (NR==2) f(); print } function f() { next }`, "a\nb\nc", "a\nc\n", "", ""}, - {`BEGIN { next }`, "", "", "parse error at 1:9: next can't be inside BEGIN or END", "BEGIN"}, - {`END { next }`, "", "", "parse error at 1:7: next can't be inside BEGIN or END", "END"}, - - // Arrays, "in", and delete - {`BEGIN { a["x"] = 3; print "x" in a, "y" in a }`, "", "1 0\n", "", ""}, - {`BEGIN { a["x"] = 3; a["y"] = 4; delete a["x"]; for (k in a) print k, a[k] }`, "", "y 4\n", "", ""}, - {`BEGIN { a["x"] = 3; a["y"] = 4; for (k in a) delete a[k]; for (k in a) print k, a[k] }`, "", "", "", ""}, - {`BEGIN { a["x"]; "y" in a; for (k in a) print k, a[k] }`, "", "x \n", "", ""}, - {`BEGIN { a[] }`, "", "", "parse error at 1:11: expected expression instead of ]", "syntax error"}, - {`BEGIN { delete a[] }`, "", "", "parse error at 1:18: expected expression instead of ]", "syntax error"}, - {`BEGIN { a["x"] = 3; a["y"] = 4; delete a; for (k in a) print k, a[k] }`, "", "", "", ""}, - {`function f(a) { print "x" in a, "y" in a } BEGIN { b["x"] = 3; f(b) }`, "", "1 0\n", "", ""}, - - // Unary expressions: ! + - - {`BEGIN { print !42, !1, !0, !!42, !!1, !!0 }`, "", "0 0 1 1 1 0\n", "", ""}, - {`BEGIN { print !42, !1, !0, !!42, !!1, !!0 }`, "", "0 0 1 1 1 0\n", "", ""}, - {`BEGIN { print +4, +"3", +0, +-3, -3, - -4, -"3" }`, "", "4 3 0 -3 -3 4 -3\n", "", ""}, - {`BEGIN { $0="0"; print !$0 }`, "", "0\n", "", ""}, - {`BEGIN { $0="1"; print !$0 }`, "", "0\n", "", ""}, - {`{ print !$0 }`, "0\n", "1\n", "", ""}, - {`{ print !$0 }`, "1\n", "0\n", "", ""}, - {`!seen[$0]++`, "1\n2\n3\n2\n3\n3\n", "1\n2\n3\n", "", ""}, - {`!seen[$0]--`, "1\n2\n3\n2\n3\n3\n", "1\n2\n3\n", "", ""}, - - // Comparison expressions: == != < <= > >= - {`BEGIN { print (1==1, 1==0, "1"==1, "1"==1.0) }`, "", "1 0 1 1\n", "", ""}, - {`{ print ($0=="1", $0==1) }`, "1\n1.0\n+1", "1 1\n0 1\n0 1\n", "", ""}, - {`{ print ($1=="1", $1==1) }`, "1\n1.0\n+1", "1 1\n0 1\n0 1\n", "", ""}, - {`BEGIN { print (1!=1, 1!=0, "1"!=1, "1"!=1.0) }`, "", "0 1 0 0\n", "", ""}, - {`{ print ($0!="1", $0!=1) }`, "1\n1.0\n+1", "0 0\n1 0\n1 0\n", "", ""}, - {`{ print ($1!="1", $1!=1) }`, "1\n1.0\n+1", "0 0\n1 0\n1 0\n", "", ""}, - {`BEGIN { print (0<1, 1<1, 2<1, "12"<"2") }`, "", "1 0 0 1\n", "", ""}, - {`{ print ($1<2) }`, "1\n1.0\n+1", "1\n1\n1\n", "", ""}, - {`BEGIN { print (0<=1, 1<=1, 2<=1, "12"<="2") }`, "", "1 1 0 1\n", "", ""}, - {`{ print ($1<=2) }`, "1\n1.0\n+1", "1\n1\n1\n", "", ""}, - {`BEGIN { print (0>1, 1>1, 2>1, "12">"2") }`, "", "0 0 1 0\n", "", ""}, - {`{ print ($1>2) }`, "1\n1.0\n+1", "0\n0\n0\n", "", ""}, - {`BEGIN { print (0>=1, 1>=1, 2>=1, "12">="2") }`, "", "0 1 1 0\n", "", ""}, - {`{ print ($1>=2) }`, "1\n1.0\n+1", "0\n0\n0\n", "", ""}, - {`{ print($0<2) }`, "10", "0\n", "", ""}, - {`{ print($1<2) }`, "10", "0\n", "", ""}, - {`{ print($1<2) }`, "10x", "1\n", "", ""}, - {`BEGIN { $0="10"; print($0<2) }`, "", "1\n", "", ""}, - {`BEGIN { $1="10"; print($1<2) }`, "", "1\n", "", ""}, - {`BEGIN { $1="10x"; print($1<2) }`, "", "1\n", "", ""}, - - // Short-circuit && and || operators - {` -function t() { print "t"; return 2 } -function f() { print "f"; return 0 } -BEGIN { - print f() && f() - print f() && t() - print t() && f() - print t() && t() -} -`, "", "f\n0\nf\n0\nt\nf\n0\nt\nt\n1\n", "", ""}, - {` -function t() { print "t"; return 2 } -function f() { print "f"; return 0 } -BEGIN { - print f() || f() - print f() || t() - print t() || f() - print t() || t() -} -`, "", "f\nf\n0\nf\nt\n1\nt\n1\nt\n1\n", "", ""}, - {`BEGIN { print 0&&0, 0&&2, 2&&0, 2&&2 }`, "", "0 0 0 1\n", "", ""}, - {`BEGIN { print 0||0, 0||2, 2||0, 2||2 }`, "", "0 1 1 1\n", "", ""}, - - // Other binary expressions: + - * ^ ** / % CONCAT ~ !~ - {`BEGIN { print 1+2, 1+2+3, 1+-2, -1+2, "1"+"2", 3+.14 }`, "", "3 6 -1 1 3 3.14\n", "", ""}, - {`BEGIN { print 1-2, 1-2-3, 1-+2, -1-2, "1"-"2", 3-.14 }`, "", "-1 -4 -1 -3 -1 2.86\n", "", ""}, - {`BEGIN { print 2*3, 2*3*4, 2*-3, -2*3, "2"*"3", 3*.14 }`, "", "6 24 -6 -6 6 0.42\n", "", ""}, - {`BEGIN { print 2/3, 2/3/4, 2/-3, -2/3, "2"/"3", 3/.14 }`, "", "0.666667 0.166667 -0.666667 -0.666667 0.666667 21.4286\n", "", ""}, - {`BEGIN { print 2%3, 2%3%4, 2%-3, -2%3, "2"%"3", 3%.14 }`, "", "2 2 2 -2 2 0.06\n", "", ""}, - {`BEGIN { print 2^3, 2^3^3, 2^-3, -2^3, "2"^"3", 3^.14 }`, "", "8 134217728 0.125 -8 8 1.16626\n", "", ""}, - {`BEGIN { print 2**3, 2**3**3, 2**-3, -2**3, "2"**"3", 3**.14 } # !posix`, "", "8 134217728 0.125 -8 8 1.16626\n", "", ""}, - {`BEGIN { print 1 2, "x" "yz", 1+2 3+4 }`, "", "12 xyz 37\n", "", ""}, - {`BEGIN { print "food"~/oo/, "food"~/[oO]+d/, "food"~"f", "food"~"F", "food"~0 }`, "", "1 1 1 0 0\n", "", ""}, - {`BEGIN { print "food"!~/oo/, "food"!~/[oO]+d/, "food"!~"f", "food"!~"F", "food"!~0 }`, "", "0 0 0 1 1\n", "", ""}, - {`BEGIN { print 1+2*3/4^5%6 7, (1+2)*3/4^5%6 "7" }`, "", "1.005867 0.008789067\n", "", ""}, - {`BEGIN { print 1/0 }`, "", "", "division by zero", "division by zero"}, - {`BEGIN { print 1%0 }`, "", "", "division by zero in mod", "division by zero"}, - {`BEGIN { x /= 0 }`, "", "", "division by zero", "division by zero"}, - {`BEGIN { x %= 0 }`, "", "", "division by zero in mod", "division by zero"}, - - // Number, string, and regex expressions - {`BEGIN { print 1, 1., .1, 1e0, -1, 1e }`, "", "1 1 0.1 1 -1 1\n", "", ""}, - {`BEGIN { print '\"' '\'' 'xy' "z" "'" '\"' }`, "", "\"'xyz'\"\n", "", "syntax error"}, // Check support for single-quoted strings - {`BEGIN { print "0\n1\t2\r3\a4\b5\f6\v7\x408\xf" } # !posix`, "", "0\n1\t2\r3\a4\b5\f6\v7@8\x0f\n", "", ""}, - {`{ print /foo/ }`, "food\nfoo\nxfooz\nbar\n", "1\n1\n1\n0\n", "", ""}, - {`/[a-/`, "foo", "", "parse error at 1:1: error parsing regexp: missing closing ]: `[a-`", "terminated"}, - {`/=foo/`, "=foo", "=foo\n", "", ""}, - {`BEGIN { RS="x" } /^a.*c$/`, "a\nb\nc", "a\nb\nc\n", "", ""}, - {`BEGIN { print "-12"+0, "+12"+0, " \t\r\n7foo"+0, ".5"+0, "5."+0, "+."+0 }`, "", "-12 12 7 0.5 5 0\n", "", ""}, - {`BEGIN { print "1e3"+0, "1.2e-1"+0, "1e+1"+0, "1e"+0, "1e+"+0 }`, "", "1000 0.12 10 1 1\n", "", ""}, - {`BEGIN { print -(11102200000000000000000000000000000000 1040000) } # !gawk - gawk supports big numbers`, - "", "-inf\n", "", ""}, - {`BEGIN { print atan2(0, 8020020000000e20G-0)}`, "", "0\n", "", ""}, - {`BEGIN { print 1e1000, -1e1000 } # !gawk`, "", "inf -inf\n", "", ""}, - {`BEGIN { printf "\x0.\x00.\x0A\x10\xff\xFF\x41" } # !awk !posix`, "", "\x00.\x00.\n\x10\xff\xffA", "", ""}, - {`BEGIN { printf "\x1.\x01.\x0A\x10\xff\xFF\x41" } # !posix`, "", "\x01.\x01.\n\x10\xff\xffA", "", ""}, - {`BEGIN { printf "\0\78\7\77\777\0 \141 " } # !awk`, "", "\x00\a8\a?\xff\x00 a ", "", ""}, - {`BEGIN { printf "\1\78\7\77\777\1 \141 " }`, "", "\x01\a8\a?\xff\x01 a ", "", ""}, - - // Unusual number/exponent handling - {`BEGIN { e="x"; E="X"; print 1e, 1E }`, "", "1x 1X\n", "", ""}, - {`BEGIN { e="x"; E="X"; print 1e1e, 1E1E }`, "", "10x 10X\n", "", ""}, - {`BEGIN { a=2; print 1e+a, 1E+a, 1e+1, 1E+1 }`, "", "12 12 10 10\n", "", ""}, - {`BEGIN { a=2; print 1e-a, 1E-a, 1e-1, 1E-1 }`, "", "1-2 1-2 0.1 0.1\n", "", ""}, - {`BEGIN { print 1e+ }`, "", "", "parse error at 1:19: expected expression instead of }", "syntax error"}, - {`BEGIN { print 1e- }`, "", "", "parse error at 1:19: expected expression instead of }", "syntax error"}, - - // Conditional ?: expression - {`{ print /x/?"t":"f" }`, "x\ny\nxx\nz\n", "t\nf\nt\nf\n", "", ""}, - {`BEGIN { print 1?2?3:4:5, 1?0?3:4:5, 0?2?3:4:5 }`, "", "3 4 5\n", "", ""}, - {`BEGIN { $0="0"; print ($0?1:0) }`, "", "1\n", "", ""}, - {`{ print $0?1:0 }`, "0\n", "0\n", "", ""}, - {`{ print $0?1:0 }`, "1\n", "1\n", "", ""}, - {`BEGIN { $0="1"; print ($0?1:0) }`, "", "1\n", "", ""}, - {`BEGIN { print 0?1:0, 1?1:0, ""?1:0, "0"?1:0, "1"?1:0, x?1:0 }`, "", "0 1 0 1 1 0\n", "", ""}, - - // Built-in variables - {`BEGIN { print ARGC; ARGC=42; print ARGC } # !gawk`, "", "1\n42\n", "", ""}, // ARGC is properly tested in goawk_test.go - {` -BEGIN { - print CONVFMT, 1.2345678 "" - CONVFMT = "%.3g" - print CONVFMT, 1.234567 "" -}`, "", "%.6g 1.23457\n%.3g 1.23\n", "", ""}, - {`BEGIN { FILENAME = "foo"; print FILENAME }`, "", "foo\n", "", ""}, - {`BEGIN { FILENAME = "123.0"; print (FILENAME==123) }`, "", "0\n", "", ""}, - // Other FILENAME behaviour is tested in goawk_test.go - {`BEGIN { FNR = 123; print FNR }`, "", "123\n", "", ""}, - {`{ print FNR, $0 }`, "a\nb\nc", "1 a\n2 b\n3 c\n", "", ""}, - {`{ print NR, FNR } END { print NR, FNR }`, "a\nb\nc\n", "1 1\n2 2\n3 3\n3 3\n", "", ""}, - // Other FNR behaviour is tested in goawk_test.go - {`BEGIN { print "|" FS "|"; FS="," } { print $1, $2 }`, "a b\na,b\nx,,y", "| |\na b \na b\nx \n", "", ""}, - {`BEGIN { print "|" FS "|"; FS="\\." } { print $1, $2 }`, "a b\na.b\nx..y", "| |\na b \na b\nx \n", "", ""}, - // ASCII unit and record separator - {`BEGIN { FS="\x1f"; RS="\x1e"; OFS="," } { print $1, $2, $3 } # !posix`, - "id\x1fname\x1fage\x1e1\x1fBob \"Billy\" Smith\x1f42\x1e2\x1fJane\nBrown\x1f37", - "id,name,age\n1,Bob \"Billy\" Smith,42\n2,Jane\nBrown,37\n", "", ""}, - // Unicode unit and record separator (skip on Windows under gawk due to Unicode command line issues) - {`BEGIN { FS="␟"; RS="␞"; OFS="," } { print $1, $2, $3 } # !windows-gawk !posix`, - "id␟name␟age␞1␟Bob \"Billy\" Smith␟42␞2␟Jane\nBrown␟37", - "id,name,age\n1,Bob \"Billy\" Smith,42\n2,Jane\nBrown,37\n", "", ""}, - {`BEGIN { FS="\\" } { print $1, $2 }`, "a\\b", "a b\n", "", ""}, - {`BEGIN { RS="x"; FS=",.*," } { for (i=1; i<=NF; i++) print $i }`, "one,\n,two", "one\ntwo\n", "", ""}, - {`BEGIN { FS="x"; RS=",.*," } { print } # !posix`, "one,\n,two", "one\ntwo\n", "", ""}, - {`{ print NF }`, "\na\nc d\ne f g", "0\n1\n2\n3\n", "", ""}, - {`BEGIN { NR = 123; print NR }`, "", "123\n", "", ""}, - {`{ print NR, $0 }`, "a\nb\nc", "1 a\n2 b\n3 c\n", "", ""}, - {` -BEGIN { - print OFMT, 1.2345678 - OFMT = "%.3g" - print OFMT, 1.234567 -}`, "", "%.6g 1.23457\n%.3g 1.23\n", "", ""}, - // OFS and ORS are tested above - {`BEGIN { print RSTART, RLENGTH; RSTART=5; RLENGTH=42; print RSTART, RLENGTH; } `, "", - "0 0\n5 42\n", "", ""}, - {`BEGIN { print RS }`, "", "\n\n", "", ""}, - {`BEGIN { print RS; RS="|"; print RS } { print }`, "a b|c d|", "\n\n|\na b\nc d\n", "", ""}, - {`BEGIN { RS=""; FS="\n" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) print $i }`, - "a\n\nb\nc", - "1 (1):\na\n2 (2):\nb\nc\n", "", ""}, - {`BEGIN { RS=""; FS="\n" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) print $i }`, - "1\n2\n\na\nb", - "1 (2):\n1\n2\n2 (2):\na\nb\n", "", ""}, - {`BEGIN { RS=""; FS="\n" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) print $i }`, - "a b\nc d\n\ne f\n\n\n \n\n\ng h\n\n\n", - "1 (2):\na b\nc d\n2 (1):\ne f\n3 (1):\n \n4 (1):\ng h\n", "", ""}, - {`BEGIN { RS=""; FS="\n" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) print $i }`, - "\n\na b\n\nc d\n", - "1 (1):\na b\n2 (1):\nc d\n", "", ""}, - {`BEGIN { RS=""; FS="\n" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) print $i } # !awk !gawk - they don't handle CR LF with RS==""`, - "\r\n\r\na b\r\n\r\nc d\r\n", - "1 (1):\na b\n2 (1):\nc d\n", "", ""}, - {`BEGIN { RS=""; FS="X" } { printf "%d (%d):\n", NR, NF; for (i=1; i<=NF; i++) printf "%s|", $i }`, - "aXb\ncXd\n\neXf\n\n\n \n\n\ngXh\n\n\n", - "1 (4):\na|b|c|d|2 (2):\ne|f|3 (1):\n |4 (2):\ng|h|", "", ""}, - {`BEGIN { RS = "" } { print "got", $0 }`, - "\n\n\n\n", "", "", ""}, - {`BEGIN { RS="\n" } { print }`, "a\n\nb\nc", "a\n\nb\nc\n", "", ""}, - {`BEGIN { RS="ö" } { print } # !windows-gawk`, "1ötwoöthree", "1\ntwo\nthree\n", "", ""}, - {`BEGIN { RS="\\.+" } { print } # !posix`, "1.two..three...4.", "1\ntwo\nthree\n4\n", "", ""}, - {`BEGIN { RS = "\n|( *[[:upper:]]+ *)" } { print "Record =", $0,"and RT = [" RT "]" } # !posix`, // from https://www.gnu.org/software/gawk/manual/html_node/gawk-split-records.html - "record 1 AAAA record 2 BBBB record 3\n", - `Record = record 1 and RT = [ AAAA ] -Record = record 2 and RT = [ BBBB ] -Record = record 3 and RT = [ -] -`, "", ""}, - {`BEGIN { RS = "\n|( *[[:upper:]]+ *)" } { print "Record =", $0,"and RT = [" RT "]" } # !posix`, - "record 1 AAAA record 2 BBBB record 3", - `Record = record 1 and RT = [ AAAA ] -Record = record 2 and RT = [ BBBB ] -Record = record 3 and RT = [] -`, "", ""}, - {`BEGIN { RS=".." } { print $0 RT } # !posix`, "foo bar bazz", "fo\no \nba\nr \nba\nzz\n", "", ""}, - {`BEGIN { RT="foo"; print RT }`, "", "foo\n", "", ""}, - {` -BEGIN { - print SUBSEP - a[1, 2] = "onetwo" - print a[1, 2] - for (k in a) { - print k, a[k] - } - delete a[1, 2] - SUBSEP = "|" - print SUBSEP - a[1, 2] = "onetwo" - print a[1, 2] - for (k in a) { - print k, a[k] - } -}`, "", "\x1c\nonetwo\n1\x1c2 onetwo\n|\nonetwo\n1|2 onetwo\n", "", ""}, - - // Field expressions and assignment (and interaction with NF) - {`{ print NF; NF=1; $2="two"; print $0, NF }`, "\n", "0\n two 2\n", "", ""}, - {`{ print NF; NF=2; $2="two"; print $0, NF}`, "\n", "0\n two 2\n", "", ""}, - {`{ print NF; NF=3; $2="two"; print $0, NF}`, "a b c\n", "3\na two c 3\n", "", ""}, - {`{ print; print $1, $3, $NF }`, "a b c d e", "a b c d e\na c e\n", "", ""}, - {`{ print $1,$3; $2="x"; print; print $2 }`, "a b c", "a c\na x c\nx\n", "", ""}, - {`{ print; $0="x y z"; print; print $1, $3 }`, "a b c", "a b c\nx y z\nx z\n", "", ""}, - {`{ print $1^2 }`, "10", "100\n", "", ""}, - {`{ print $-1 }`, "a\nb c\nd e f\n", "a\nc\nf\n", "", "field -1"}, - {`{ print $-2 }`, "a\nb c\nd e f\n", "\nb\ne\n", "", "field -2"}, - {`{ print $-3 }`, "a\nb c\nd e f\n", "\n\nd\n", "", "field -3"}, - {`{ $-1="x"; print }`, "a\nb c\nd e f\n", "x\nb x\nd e x\n", "", "field -1"}, - {`{ $-2="y"; print }`, "a\nb c\nd e f\n", "a\ny c\nd y f\n", "", "field -2"}, - {`{ $-3="z"; print }`, "a\nb c\nd e f\n", "a\nb c\nz e f\n", "", "field -3"}, - {`{ NF=-1; } # !awk - awk allows setting negative NF`, - "x", "", "NF set to negative value: -1", "negative value"}, - {`{ NF=1234567; }`, "x", "", "NF set too large: 1234567", ""}, - {`BEGIN { $1234567=1 }`, "", "", "field index too large: 1234567", ""}, - {`0 in FS # !awk - doesn't flag this as an error`, "x", "", - `parse error at 1:6: can't use scalar "FS" as array`, "array"}, - // TODO: I think this is happening because we parse this as ($($0))++ rather than ($($0++)) - // {`{ $$0++; print $0 }`, "2 3 4", "3\n", "", ""}, - // {`BEGIN { $0="3 4 5 6 7 8 9"; a=3; print $$a++++; print }`, "", "7\n3 4 6 6 8 8 9\n", "", ""}, - - // Lots of NF tests with different combinations of NF, $, and number - // of input fields. Some of these cause segmentation faults on awk - // (but work fine on gawk and mawk). - {`{ NF=1; $1="x"; print $0; print NF }`, "a", "x\n1\n", "", ""}, - {`{ NF=1; $1="x"; print $0; print NF }`, "a b", "x\n1\n", "", ""}, - {`{ NF=1; $1="x"; print $0; print NF }`, "a b c", "x\n1\n", "", ""}, - {`{ NF=1; $2="x"; print $0; print NF }`, "a", "a x\n2\n", "", ""}, - {`{ NF=1; $2="x"; print $0; print NF }`, "a b", "a x\n2\n", "", ""}, - {`{ NF=1; $2="x"; print $0; print NF }`, "a b c", "a x\n2\n", "", ""}, - {`{ NF=1; $3="x"; print $0; print NF }`, "a", "a x\n3\n", "", ""}, - {`{ NF=1; $3="x"; print $0; print NF } # !awk - awk differs from gawk (but gawk seems right)`, - "a b", "a x\n3\n", "", ""}, - {`{ NF=1; $3="x"; print $0; print NF } # !awk - awk differs from gawk (but gawk seems right)`, - "a b c", "a x\n3\n", "", ""}, - {`{ NF=2; $1="x"; print $0; print NF }`, "a", "x \n2\n", "", ""}, - {`{ NF=2; $1="x"; print $0; print NF }`, "a b", "x b\n2\n", "", ""}, - {`{ NF=2; $1="x"; print $0; print NF }`, "a b c", "x b\n2\n", "", ""}, - {`{ NF=2; $2="x"; print $0; print NF }`, "a", "a x\n2\n", "", ""}, - {`{ NF=2; $2="x"; print $0; print NF }`, "a b", "a x\n2\n", "", ""}, - {`{ NF=2; $2="x"; print $0; print NF }`, "a b c", "a x\n2\n", "", ""}, - {`{ NF=2; $3="x"; print $0; print NF }`, "a", "a x\n3\n", "", ""}, - {`{ NF=2; $3="x"; print $0; print NF }`, "a b", "a b x\n3\n", "", ""}, - {`{ NF=2; $3="x"; print $0; print NF }`, "a b c", "a b x\n3\n", "", ""}, - {`{ NF=3; $1="x"; print $0; print NF } # !awk - segmentation fault`, - "a", "x \n3\n", "", ""}, - {`{ NF=3; $1="x"; print $0; print NF } # !awk - segmentation fault`, - "a b", "x b \n3\n", "", ""}, - {`{ NF=3; $1="x"; print $0; print NF }`, "a b c", "x b c\n3\n", "", ""}, - {`{ NF=3; $2="x"; print $0; print NF } # !awk - segmentation fault`, - "a", "a x \n3\n", "", ""}, - {`{ NF=3; $2="x"; print $0; print NF } # !awk - segmentation fault`, - "a b", "a x \n3\n", "", ""}, - {`{ NF=3; $2="x"; print $0; print NF }`, "a b c", "a x c\n3\n", "", ""}, - {`{ NF=3; $3="x"; print $0; print NF }`, "a", "a x\n3\n", "", ""}, - {`{ NF=3; $3="x"; print $0; print NF }`, "a b", "a b x\n3\n", "", ""}, - {`{ NF=3; $3="x"; print $0; print NF }`, "a b c", "a b x\n3\n", "", ""}, - - // Assignment expressions and vars - {`BEGIN { print x; x = 4; print x; }`, "", "\n4\n", "", ""}, - {`BEGIN { a["foo"]=1; b[2]="x"; k="foo"; print a[k], b["2"] }`, "", "1 x\n", "", ""}, - {`BEGIN { s+=5; print s; s-=2; print s; s-=s; print s }`, "", "5\n3\n0\n", "", ""}, - {`BEGIN { x=2; x*=x; print x; x*=3; print x }`, "", "4\n12\n", "", ""}, - {`BEGIN { x=6; x/=3; print x; x/=x; print x; x/=.6; print x }`, "", "2\n1\n1.66667\n", "", ""}, - {`BEGIN { x=12; x%=5; print x }`, "", "2\n", "", ""}, - {`BEGIN { x=2; x^=5; print x; x^=0.5; print x }`, "", "32\n5.65685\n", "", ""}, - {`BEGIN { x=2; x**=5; print x; x**=0.5; print x } # !posix`, "", "32\n5.65685\n", "", ""}, - {`{ $2+=10; print; $3/=2; print }`, "1 2 3", "1 12 3\n1 12 1.5\n", "", ""}, - {`BEGIN { a[2] += 1; a["2"] *= 3; print a[2] }`, "", "3\n", "", ""}, - {`function inc(x, n) { x += n; return x } BEGIN { print inc(3, 2) }`, "", "5\n", "", ""}, - {`function inca(a, k, n) { a[k] += n } BEGIN { b["x"]=7; inca(b, "x", 2); print b["x"] }`, "", "9\n", "", ""}, - {`BEGIN { NF += 3; print NF }`, "", "3\n", "", ""}, - {`BEGIN { x=1; x += x+=3; print x }`, "", "8\n", "", ""}, - - // Incr/decr expressions - {`BEGIN { print x++; print x }`, "", "0\n1\n", "", ""}, - {`BEGIN { print x; print x++; print ++x; print x }`, "", "\n0\n2\n2\n", "", ""}, - {`BEGIN { print x; print x--; print --x; print x }`, "", "\n0\n-2\n-2\n", "", ""}, - {`BEGIN { s++; s++; print s }`, "", "2\n", "", ""}, - {`BEGIN { y=" "; --x[y = y y]; print length(y) }`, "", "2\n", "", ""}, - {`BEGIN { x[y++]++; print y }`, "", "1\n", "", ""}, - {`BEGIN { x[y++] += 3; print y }`, "", "1\n", "", ""}, - {`BEGIN { $(y++)++; print y }`, "", "1\n", "", ""}, - {`BEGIN { print "s" ++n; print "s" --n }`, "", "s1\ns0\n", "", ""}, - {`function inc(x) { x++; return x } BEGIN { print inc(3) }`, "", "4\n", "", ""}, - {`function inca(a, k) { a[k]++ } BEGIN { b["x"]=7; inca(b, "x"); print b["x"] }`, "", "8\n", "", ""}, - {`BEGIN { NF++; print NF }`, "", "1\n", "", ""}, - - // Builtin functions - {`BEGIN { print sin(0), sin(0.5), sin(1), sin(-1) }`, "", "0 0.479426 0.841471 -0.841471\n", "", ""}, - {`BEGIN { print cos(0), cos(0.5), cos(1), cos(-1) }`, "", "1 0.877583 0.540302 0.540302\n", "", ""}, - {`BEGIN { print exp(0), exp(0.5), exp(1), exp(-1) }`, "", "1 1.64872 2.71828 0.367879\n", "", ""}, - {`BEGIN { print log(0), log(0.5), log(1) }`, "", "-inf -0.693147 0\n", "", ""}, - {`BEGIN { print log(-1) } # !gawk - gawk prints warning for this as well`, - "", "nan\n", "", ""}, - {`BEGIN { print sqrt(0), sqrt(2), sqrt(4) }`, "", "0 1.41421 2\n", "", ""}, - {`BEGIN { print int(3.5), int("1.9"), int(4), int(-3.6), int("x"), int("") }`, "", "3 1 4 -3 0 0\n", "", ""}, - {`BEGIN { print match("food", "foo"), RSTART, RLENGTH }`, "", "1 1 3\n", "", ""}, - {`BEGIN { print match("x food y", "fo"), RSTART, RLENGTH }`, "", "3 3 2\n", "", ""}, - {`BEGIN { print match("x food y", "fox"), RSTART, RLENGTH }`, "", "0 0 -1\n", "", ""}, - {`BEGIN { print match("x food y", /[fod]+/), RSTART, RLENGTH }`, "", "3 3 4\n", "", ""}, - {`BEGIN { print match("a\nb\nc", /^a.*c$/), RSTART, RLENGTH }`, "", "1 1 5\n", "", ""}, - {`{ print length, length(), length("buzz"), length("") }`, "foo bar", "7 7 4 0\n", "", ""}, - {`BEGIN { print index("foo", "f"), index("foo0", 0), index("foo", "o"), index("foo", "x") }`, "", "1 4 2 0\n", "", ""}, - {`BEGIN { print atan2(1, 0.5), atan2(-1, 0) }`, "", "1.10715 -1.5708\n", "", ""}, - {`BEGIN { print sprintf("%3d", 42) }`, "", " 42\n", "", ""}, - {`BEGIN { print sprintf("%d", 12, 34) }`, "", "12\n", "", ""}, - {`BEGIN { print sprintf("%d") }`, "", "", "format error: got 0 args, expected 1", "not enough arg"}, - {`BEGIN { print sprintf("%d", 12, 34) }`, "", "12\n", "", ""}, - {`BEGIN { print sprintf("% 5d", 42) }`, "", " 42\n", "", ""}, - {`BEGIN { print sprintf("%*s %.*s", 5, "abc", 5, "abcdefghi") }`, "", " abc abcde\n", "", ""}, - {`BEGIN { print substr("food", 1) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", 1, 2) }`, "", "fo\n", "", ""}, - {`BEGIN { print substr("food", 1, 4) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", 1, 8) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", 2) }`, "", "ood\n", "", ""}, - {`BEGIN { print substr("food", 2, 2) }`, "", "oo\n", "", ""}, - {`BEGIN { print substr("food", 2, 3) }`, "", "ood\n", "", ""}, - {`BEGIN { print substr("food", 2, 8) }`, "", "ood\n", "", ""}, - {`BEGIN { print substr("food", 0, 8) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", -1, 8) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", 5) }`, "", "\n", "", ""}, - {`BEGIN { print substr("food", -1) }`, "", "food\n", "", ""}, - {`BEGIN { print substr("food", 5, 8) }`, "", "\n", "", ""}, - {`BEGIN { print substr("food", 2, -3), substr("fööd", 2, -3) }`, "", " \n", "", ""}, - {`BEGIN { n = split("", a); for (i=1; i<=n; i++) print a[i] }`, "", "", "", ""}, - {`BEGIN { n = split("", a, "."); for (i=1; i<=n; i++) print a[i] }`, "", "", "", ""}, - {`BEGIN { n = split("ab c d ", a); for (i=1; i<=n; i++) print a[i] }`, "", "ab\nc\nd\n", "", ""}, - {`BEGIN { n = split("ab,c,d,", a, ","); for (i=1; i<=n; i++) print a[i] }`, "", "ab\nc\nd\n\n", "", ""}, - {`BEGIN { n = split("ab,c.d,", a, /[,.]/); for (i=1; i<=n; i++) print a[i] }`, "", "ab\nc\nd\n\n", "", ""}, - {`BEGIN { n = split("1 2", a); print (n, a[1], a[2], a[1]==1, a[2]==2) }`, "", "2 1 2 1 1\n", "", ""}, - {`BEGIN { x = "1.2.3"; print sub(/\./, ",", x); print x }`, "", "1\n1,2.3\n", "", ""}, - {`BEGIN { x = "1.2.3"; print sub(/\./, ",\\", x); print x }`, "", "1\n1,\\2.3\n", "", ""}, - {`{ print sub(/\./, ","); print $0 }`, "1.2.3", "1\n1,2.3\n", "", ""}, - {`BEGIN { x = "1.2.3"; print gsub(/\./, ",", x); print x }`, "", "2\n1,2,3\n", "", ""}, - {`{ print gsub(/\./, ","); print $0 }`, "1.2.3", "2\n1,2,3\n", "", ""}, - {`{ print gsub(/[0-9]/, "(&)"); print $0 }`, "0123x. 42y", "6\n(0)(1)(2)(3)x. (4)(2)y\n", "", ""}, - {`{ print gsub(/[0-9]+/, "(&)"); print $0 }`, "0123x. 42y", "2\n(0123)x. (42)y\n", "", ""}, - {`{ print gsub(/[0-9]/, "\\&"); print $0 }`, "0123x. 42y", "6\n&&&&x. &&y\n", "", ""}, - {`{ print gsub(/[0-9]/, "\\z"); print $0 }`, "0123x. 42y", "6\n\\z\\z\\z\\zx. \\z\\zy\n", "", ""}, - {`{ print gsub("0", "x\\\\y"); print $0 } # !awk !gawk -- our behaviour is per POSIX spec (gawk -P and mawk)`, - "0", "1\nx\\y\n", "", ""}, - {`sub("", "\\e", FS) # !awk !gawk`, "foo bar\nbaz buz\n", "", - "invalid regex \"\\\\e \": error parsing regexp: invalid escape sequence: `\\e`", ""}, - {`BEGIN { print tolower("Foo BaR") }`, "", "foo bar\n", "", ""}, - {`BEGIN { print toupper("Foo BaR") }`, "", "FOO BAR\n", "", ""}, - {` -BEGIN { - srand() - srand(1) - a = rand(); b = rand(); c = rand() - srand(1) - x = rand(); y = rand(); z = rand() - print (a==b, b==c, x==y, y==z) - print (a==x, b==y, c==z) -} -`, "", "0 0 0 0\n1 1 1\n", "", ""}, - {` -BEGIN { - for (i = 0; i < 1000; i++) { - if (rand() < 0.5) n++ - } - print (n>400) -} -`, "", "1\n", "", ""}, - {`BEGIN { print system("echo foo"); print system("echo bar") } # !fuzz`, - "", "foo\n0\nbar\n0\n", "", ""}, - {`BEGIN { print system(">&2 echo error") } # !fuzz`, - "", "error\n0\n", "", ""}, - {`BEGIN { print system("exit 42") } # !fuzz !posix`, "", "42\n", "", ""}, - {`BEGIN { system("cat") }`, "foo\nbar", "foo\nbar", "", ""}, - - // Test bytes/unicode handling (GoAWK currently has char==byte, unlike Gawk). - {`BEGIN { print match("food", "foo"), RSTART, RLENGTH } !gawk`, "", "1 1 3\n", "", ""}, - {`BEGIN { print match("x food y", "fo"), RSTART, RLENGTH } !gawk`, "", "3 3 2\n", "", ""}, - {`BEGIN { print match("x food y", "fox"), RSTART, RLENGTH } !gawk`, "", "0 0 -1\n", "", ""}, - {`BEGIN { print match("x food y", /[fod]+/), RSTART, RLENGTH } !gawk`, "", "3 3 4\n", "", ""}, - {`BEGIN { print match("絵 fööd y", /[föd]+/), RSTART, RLENGTH } !gawk`, "", "5 5 6\n", "", ""}, - {`{ print length, length(), length("buzz"), length("") } # !gawk`, "foo bar", "7 7 4 0\n", "", ""}, - {`BEGIN { print length("a"), length("絵") } # !gawk`, "", "1 3\n", "", ""}, - {`BEGIN { print index("foo", "f"), index("foo0", 0), index("foo", "o"), index("foo", "x") } # !gawk`, "", "1 4 2 0\n", "", ""}, - {`BEGIN { print index("föö", "f"), index("föö0", 0), index("föö", "ö"), index("föö", "x") } # !gawk`, "", "1 6 2 0\n", "", ""}, - {`BEGIN { print substr("food", 1), substr("fööd", 1) } # !gawk`, "", "food fööd\n", "", ""}, - {`BEGIN { print substr("food", 1, 2), substr("fööd", 1, 2) } # !gawk`, "", "fo f\xc3\n", "", ""}, - {`BEGIN { print substr("food", 1, 4), substr("fööd", 1, 4) } # !gawk`, "", "food fö\xc3\n", "", ""}, - {`BEGIN { print substr("food", 1, 8), substr("fööd", 1, 8) } # !gawk`, "", "food fööd\n", "", ""}, - {`BEGIN { print substr("food", 2), substr("fööd", 2) } # !gawk`, "", "ood ööd\n", "", ""}, - {`BEGIN { print substr("food", 2, 2), substr("fööd", 2, 2) } # !gawk`, "", "oo ö\n", "", ""}, - {`BEGIN { print substr("food", 2, 3), substr("fööd", 2, 3) } # !gawk`, "", "ood ö\xc3\n", "", ""}, - {`BEGIN { print substr("food", 2, 8), substr("fööd", 2, 8) } # !gawk`, "", "ood ööd\n", "", ""}, - {`BEGIN { print substr("food", 0, 8), substr("fööd", 0, 8) } # !gawk`, "", "food fööd\n", "", ""}, - {`BEGIN { print substr("food", -1, 8), substr("fööd", -1, 8) } # !gawk`, "", "food fööd\n", "", ""}, - {`BEGIN { print substr("food", 5, 8), substr("fööd", 5, 8) } # !gawk`, "", " \xb6d\n", "", ""}, - {`BEGIN { print substr("food", 2, -3), substr("fööd", 2, -3) } # !gawk`, "", " \n", "", ""}, - - // Conditional expressions parse and work correctly - {`BEGIN { print 0?"t":"f" }`, "", "f\n", "", ""}, - {`BEGIN { print 1?"t":"f" }`, "", "t\n", "", ""}, - {`BEGIN { print (1+2)?"t":"f" }`, "", "t\n", "", ""}, - {`BEGIN { print (1+2?"t":"f") }`, "", "t\n", "", ""}, - {`BEGIN { print(1 ? x="t" : "f"); print x; }`, "", "t\nt\n", "", ""}, - - // Locals vs globals, array params, and recursion - {` -function f(loc) { - glob += 1 - loc += 1 - loc = loc * 2 - print glob, loc -} -BEGIN { - glob = 1 - loc = 42 - f(3) - print loc - f(4) - print loc -} -`, "", "2 8\n42\n3 10\n42\n", "", ""}, - {` -function set(a, x, v) { a[x] = v } -function get(a, x) { return a[x] } -function get2(x, a) { return a[x] } -function get3(x, a, b) { b[0]; return a[x] } -BEGIN { - a["x"] = 1 - set(b, "y", 2) - for (k in a) print k, a[k] - print "---" - for (k in b) print k, b[k] - print "---" - print get(a, "x"), get(b, "y") - print get2("x", a), get2("y", b) - print get3("x", a), get2("y", b) -} -`, "", "x 1\n---\ny 2\n---\n1 2\n1 2\n1 2\n", "", ""}, - {` -function fib(n) { - return n < 3 ? 1 : fib(n-2) + fib(n-1) -} -BEGIN { - for (i = 1; i <= 7; i++) { - printf "%d ", fib(i) - } -} -`, "", "1 1 2 3 5 8 13 ", "", ""}, - {` -function f(a, x) { return a[x] } -function g(b, y) { f(b, y) } -BEGIN { c[1]=2; print f(c, 1); print g(c, 1) } -`, "", "2\n\n", "", ""}, - {` -function g(b, y) { return f(b, y) } -function f(a, x) { return a[x] } -BEGIN { c[1]=2; print f(c, 1); print g(c, 1) } -`, "", "2\n2\n", "", ""}, - {` -function h(b, y) { g(b, y) } -function g(b, y) { f(b, y) } -function f(a, x) { return a[x] } -BEGIN { c[1]=2; print f(c, 1); print g(c, 1) } -`, "", "2\n\n", "", ""}, - {` -function h(b, y) { return g(b, y) } -function g(b, y) { return f(b, y) } -function f(a, x) { return a[x] } -BEGIN { c[1]=2; print f(c, 1); print g(c, 1); print h(c, 1) } -`, "", "2\n2\n2\n", "", ""}, - {` -function get(a, x) { return a[x] } -BEGIN { a[1]=2; print get(a, x); print get(1, 2); } -# !awk - awk doesn't detect this -`, "", "", `parse error at 3:40: can't pass scalar 1 as array param`, "attempt to use scalar"}, - {` -function early() { - print "x" - return - print "y" -} -BEGIN { early() } -`, "", "x\n", "", ""}, - {`BEGIN { return }`, "", "", "parse error at 1:9: return must be inside a function", "return"}, - {`function f() { printf "x" }; BEGIN { f() } `, "", "x", "", ""}, - {`BEGIN { arr[0]; f(arr) } function f(a) { printf "x" }`, "", "x", "", ""}, - {`function f(x) { 0 in _; f(_) } BEGIN { f() } # !awk !gawk`, "", "", `calling "f" exceeded maximum call depth of 1000`, ""}, - {`BEGIN { for (i=0; i<1001; i++) f(); print x } function f() { x++ }`, "", "1001\n", "", ""}, - {` -function bar(y) { return y[1] } -function foo() { return bar(x) } -BEGIN { x[1] = 42; print foo() } -`, "", "42\n", "", ""}, - {` -function f1(x) { } -function f2(x, y) { return x[y] } -BEGIN { a[1]=2; f1(a); print f2(a, 1) } -`, "", "2\n", "", ""}, - {`BEGIN { arr[0]; f(arr) } function f(a) { print "x" }`, "", "x\n", "", ""}, - {`function add(a, b) { return a+b } BEGIN { print add(1, 2), add(1), add() }`, "", "3 1 0\n", "", ""}, - - // Type checking / resolver tests - {`BEGIN { a[x]; a=42 }`, "", "", `parse error at 1:15: can't use array "a" as scalar`, "array"}, - {`BEGIN { s=42; s[x] }`, "", "", `parse error at 1:15: can't use scalar "s" as array`, "array"}, - {`function get(a, k) { return a[k] } BEGIN { a = 42; print get(a, 1); } # !awk - doesn't error in awk`, - "", "", `parse error at 1:59: can't pass scalar "a" as array param`, "attempt to use scalar parameter `a' as an array"}, - {`function get(a, k) { return a+k } BEGIN { a[42]; print get(a, 1); }`, - "", "", `parse error at 1:56: can't pass array "a" as scalar param`, "array"}, - {`{ f(z) } function f(x) { print NR }`, "abc", "1\n", "", ""}, - {`function f() { f() } BEGIN { f() } # !awk !gawk`, "", "", `calling "f" exceeded maximum call depth of 1000`, ""}, - {`function f(x) { 0 in x } BEGIN { f(FS) } # !awk`, "", "", `parse error at 1:35: can't pass scalar "FS" as array param`, "attempt to use scalar parameter `x' as an array"}, - {` -function foo(x) { print "foo", x } -function bar(foo) { print "bar", foo } -BEGIN { foo(5); bar(10) } -# !posix -`, "", "foo 5\nbar 10\n", "", ""}, - {` -function foo(foo) { print "foo", foo } -function bar(foo) { print "bar", foo } -BEGIN { foo(5); bar(10) } -`, "", "", `parse error at 2:14: can't use function name as parameter name`, "function name"}, - {`function foo() { print foo } BEGIN { foo() }`, - "", "", `parse error at 1:46: global var "foo" can't also be a function`, "function"}, - {`function f(x) { print x, x(); } BEGIN { f() }`, "", "", `parse error at 1:27: can't call local variable "x" as function`, "function"}, - - // Redirected I/O - {`BEGIN { getline x; print x }`, "foo", "foo\n", "", ""}, - {`function f(x) { getline x; print x } BEGIN { f(); print x }`, "foo", "foo\n\n", "", ""}, - {`BEGIN { getline SUBSEP; print SUBSEP }`, "foo", "foo\n", "", ""}, - {`BEGIN { getline a[1]; print a[1] }`, "foo", "foo\n", "", ""}, - {`BEGIN { getline $1; print $1 }`, "foo", "foo\n", "", ""}, - {`BEGIN { "echo foo" | getline a[1]; print a[1] }`, "", "foo\n", "", ""}, - {`BEGIN { "echo foo" | getline $1; print $1 }`, "", "foo\n", "", ""}, - {`BEGIN { print "foo" |"sort"; print "bar" |"sort" } # !fuzz`, "", "bar\nfoo\n", "", ""}, - {`BEGIN { print "foo" |">&2 echo error" } # !gawk !fuzz`, "", "error\n", "", ""}, - {`BEGIN { "cat" | getline; print } # !fuzz`, "bar", "bar\n", "", ""}, - {`BEGIN { print getline x < "/no/such/file" } # !fuzz`, "", "-1\n", "", ""}, - {`BEGIN { print getline "z"; print $0 }`, "foo", "1z\nfoo\n", "", ""}, - {`BEGIN { print getline x+1; print x }`, "foo", "2\nfoo\n", "", ""}, - {`BEGIN { print getline (x+1); print $0 }`, "foo", "11\nfoo\n", "", ""}, - {`BEGIN { print getline foo(); print $0 } function foo() { print "z" }`, "foo", "z\n1\nfoo\n", "", ""}, - // TODO: these forms don't yet work under GoAWK - //{`BEGIN { print("echo foo" | getline x+1); print x }`, "", "2\nfoo\n", "", ""}, - //{`BEGIN { print("echo foo" | getline $0+1); print }`, "", "2\nfoo\n", "", ""}, - //{`BEGIN { print("echo foo" | getline ($0+1)); print }`, "", "11\nfoo\n", "", ""}, - //{`BEGIN { print("echo foo" | getline foo()); print } function foo() { print "z" }`, "", "z\n1\nfoo\n", "", ""}, - {`BEGIN { - print "foo" >"out" - print close("out") - print "bar" >"out" - print close("out") - getline <"out" - print $0 - print close("out") - print close("out") -}`, "", "0\n0\nbar\n0\n-1\n", "", ""}, - {`BEGIN { - print "foo" >"out" - print "bar" >"out" - print close("out") - getline <"out" - print $0 - print close("out") - getline <"out" - print $0 - print close("out") - print close("out") -}`, "", "0\nfoo\n0\nfoo\n0\n-1\n", "", ""}, - {`BEGIN { print close("nothing") }`, "", "-1\n", "", ""}, - {`BEGIN { - print "foo">"out" - close("out") - print "bar">>"out" - close("out") - getline <"out" - print $0 - getline <"out" - print $0 -}`, "", "foo\nbar\n", "", ""}, - - // Ensure data returned by getline (in various forms) is treated as numeric string - {`BEGIN { getline; print($0==0) }`, "0.0", "1\n", "", ""}, - {`BEGIN { getline x; print(x==0) }`, "0.0", "1\n", "", ""}, - {`BEGIN { "echo 0.0" | getline; print($0==0) }`, "", "1\n", "", ""}, - {`BEGIN { "echo 0.0" | getline x; print(x==0) }`, "", "1\n", "", ""}, - - // Redirected I/O errors (we give explicit errors, awk and gawk don't) - {`BEGIN { print >"out"; getline <"out" } # !awk !gawk`, "", "", "can't read from writer stream", ""}, - {`BEGIN { print |"out"; getline <"out" } # !awk !gawk`, "", "", "can't read from writer stream", ""}, - {`BEGIN { print >"out"; close("out"); getline <"out"; print >"out" } # !awk !gawk`, "", "", "can't write to reader stream", ""}, - {`BEGIN { print >"out"; close("out"); getline <"out"; print |"out" } # !awk !gawk`, "", "", "can't write to reader stream", ""}, - - // Redirecting to or from a filename of "-" means write to stdout or read from stdin - {`BEGIN { print getline x < "-"; print x }`, "a\nb\n", "1\na\n", "", ""}, - {`{ print $0; print getline x <"-"; print x }`, "one\ntwo\n", "one\n0\n\ntwo\n0\n\n", "", ""}, - {`BEGIN { print "x" >"-"; print "y" >"-" }`, "", "x\ny\n", "", ""}, - - // fflush() function - tests parsing and some edge cases, but not - // actual flushing behavior (that's partially tested in TestFlushes). - {`BEGIN { print fflush(); print fflush("") }`, "", "0\n0\n", "", ""}, - {`BEGIN { print "x"; print fflush(); print "y"; print fflush("") }`, "", "x\n0\ny\n0\n", "", ""}, - {`BEGIN { print "x" >"out"; print fflush("out"); print "y"; print fflush("") } # !fuzz`, "", "0\ny\n0\n", "", ""}, - {`BEGIN { print fflush("x") } # !gawk`, "", "error flushing \"x\": not an output file or pipe\n-1\n", "", ""}, - {`BEGIN { "cat" | getline; print fflush("cat") } # !gawk !fuzz`, "", "error flushing \"cat\": not an output file or pipe\n-1\n", "", ""}, - - // Greater than operator requires parentheses in print statement, - // otherwise it's a redirection directive - {`BEGIN { print "x" > "out" } # !fuzz`, "", "", "", ""}, - {`BEGIN { printf "x" > "out" } # !fuzz`, "", "", "", ""}, - {`BEGIN { print("x" > "out") }`, "", "1\n", "", ""}, - {`BEGIN { printf("x" > "out") }`, "", "1", "", ""}, - - // Grammar should allow blocks wherever statements are allowed - {`BEGIN { if (1) printf "x"; else printf "y" }`, "", "x", "", ""}, - {`BEGIN { printf "x"; { printf "y"; printf "z" } }`, "", "xyz", "", ""}, - - // Backslash line continuation - {"BEGIN { print 1,\\\n 2 }", "", "1 2\n", "", ""}, - {"BEGIN { print 1,\\\r\n 2 }", "", "1 2\n", "", ""}, - - // Ensure syntax errors result in errors - {`{ $1 = substr($1, 1, 3) print $1 }`, "", "", "parse error at 1:25: expected ; or newline between statements", "syntax error"}, - {`BEGIN { f() }`, "", "", `parse error at 1:9: undefined function "f"`, "defined"}, - {`function f() {} function f() {} BEGIN { }`, "", "", `parse error at 1:26: function "f" already defined`, "define"}, - {`BEGIN { print (1,2),(3,4) }`, "", "", "parse error at 1:15: unexpected comma-separated expression", "syntax"}, - {`BEGIN { print (1,2,(3,4),(5,6)) }`, "", "", "parse error at 1:20: unexpected comma-separated expression", "syntax"}, - {"BEGIN { print 1,\\2 }", "", "1 2\n", `parse error at 1:18: expected \n after \ line continuation`, "backslash not last character on line"}, - {`BEGIN { print . }`, "", "", "parse error at 1:16: expected digits", "syntax"}, - {`BEGIN { print "foo }`, "", "", "parse error at 1:21: didn't find end quote in string", "unterminated string"}, - {"BEGIN { print \"foo\n\"}", "", "", "parse error at 1:19: can't have newline in string", "unterminated string"}, - {`/foo`, "", "", "parse error at 1:5: didn't find end slash in regex", "unterminated regexp"}, - {"/foo\n", "", "", "parse error at 1:5: can't have newline in regex", "unterminated regexp"}, - {`BEGIN { print "\x" } # !gawk`, "", "", "parse error at 1:18: 1 or 2 hex digits expected", ""}, - {`BEGIN { print 1&*2 }`, "", "", "parse error at 1:17: unexpected char after '&'", "syntax"}, - {"BEGIN { ` }", "", "", "parse error at 1:9: unexpected char", "syntax"}, - - // Hex floating point and other number conversions - {`{ print $1+0 } # +posix`, ` -0x0 -0X10 -0x1234567890 -0xabcdef -0xABCDEF --0xa -+0XA -0xf.f -0xf.fp10 -0xf.fp-10 -0x.f -0xf. -0x. -`[1:], ` -0 -16 -78187493520 -11259375 -11259375 --10 -10 -15.9375 -16320 -0.015564 -0.9375 -15 -0 -`[1:], "", ""}, - {`BEGIN { print int("0x22"), int("-0xa"), int("0xffz"), int("022"), int("-022") } # +posix`, "", - "34 -10 255 22 -22\n", "", ""}, - {`{ print $1, $2+0 } # !gawk`, ` -1 nan -2 NAN -3 nanny -4 +nan -5 -nan -6 na -7 +na -8 inf -9 INF -10 infamous -11 infinity -12 +inf -13 -inf -14 in -15 +in -`[1:], ` -1 nan -2 nan -3 nan -4 nan -5 nan -6 0 -7 0 -8 inf -9 inf -10 inf -11 inf -12 inf -13 -inf -14 0 -15 0 -`[1:], "", ""}, - {`{ printf "%s < %s == %d\n", $1, $2, $1<$2 } # +posix`, ` -10 2 -0x10 0x2 -+nan +nan --0x10 +0x2 --0x10.0p0 +0x2.0p0 -`[1:], ` -10 < 2 == 0 -0x10 < 0x2 == 0 -+nan < +nan == 0 --0x10 < +0x2 == 1 --0x10.0p0 < +0x2.0p0 == 1 -`[1:], "", ""}, - {`{ print !$1 } # +posix`, "0x0\n0x0.0p0\n0x1\n0x0.01\n", "1\n1\n0\n0\n", "", ""}, - {`{ print $1<$2 }`, "1_0 2", "1\n", "", ""}, -} - -func TestInterp(t *testing.T) { - // Ensure very long lines work (> 64KB) - longLine := strings.Repeat("x", 70000) - tests := append(interpTests, - interpTest{`{ print length() }`, longLine, fmt.Sprintf("%d\n", len(longLine)), "", ""}, - ) - - for _, test := range tests { - testName := test.src - if len(testName) > 70 { - testName = testName[:70] - } - - // Run it through external awk program first - if awkExe != "" { - runAWK := func(t *testing.T, posix bool) { - if strings.Contains(test.src, "!"+awkExe) { - t.Skipf("skipping under %s", awkExe) - } - if strings.Contains(test.src, "!"+runtime.GOOS+"-"+awkExe) { - t.Skipf("skipping on %s under %s", runtime.GOOS, awkExe) - } - if posix && strings.Contains(test.src, "!posix") { - t.Skipf("skipping in --posix mode") - } - if !posix && strings.Contains(test.src, "+posix") { - t.Skip("skipping in non-posix mode") - } - - var args []string - if posix { - args = append(args, "--posix") - } - args = append(args, test.src, "-") - cmd := exec.Command(awkExe, args...) - if test.in != "" { - cmd.Stdin = strings.NewReader(test.in) - } - out, err := cmd.CombinedOutput() - if err != nil { - if test.awkErr != "" { - if strings.Contains(string(out), test.awkErr) { - return - } - t.Fatalf("expected error %q, got:\n%s", test.awkErr, out) - } else { - t.Fatalf("error running %s: %v:\n%s", awkExe, err, out) - } - } - if test.awkErr != "" { - t.Fatalf(`expected error %q, got ""`, test.awkErr) - } - normalized := normalizeNewlines(string(out)) - if normalized != test.out { - t.Fatalf("expected/got:\n%q\n%q", test.out, normalized) - } - } - t.Run("awk_"+testName, func(t *testing.T) { - runAWK(t, false) - }) - if strings.Contains(awkExe, "gawk") { - t.Run("awkposix_"+testName, func(t *testing.T) { - runAWK(t, true) - }) - } - } - - // Then test it in GoAWK - t.Run(testName, func(t *testing.T) { - testGoAWK(t, test.src, test.in, test.out, test.err, nil, nil) - }) - } - _ = os.Remove("out") -} - -// Version of bytes.Buffer that's safe for concurrent writes. This -// makes certain tests that write to Output and Error at once (due -// to os/exec) work correctly. -type concurrentBuffer struct { - buffer bytes.Buffer - mutex sync.Mutex -} - -func (b *concurrentBuffer) Write(data []byte) (int, error) { - b.mutex.Lock() - defer b.mutex.Unlock() - return b.buffer.Write(data) -} - -func (b *concurrentBuffer) String() string { - b.mutex.Lock() - defer b.mutex.Unlock() - return b.buffer.String() -} - -func testGoAWK( - t *testing.T, src, in, out, errStr string, - funcs map[string]interface{}, configure func(config *interp.Config), -) { - parserConfig := &parser.ParserConfig{ - Funcs: funcs, - } - prog, err := parser.ParseProgram([]byte(src), parserConfig) - if err != nil { - if errStr != "" { - if err.Error() == errStr { - return - } - t.Fatalf("expected error %q, got %q", errStr, err.Error()) - } - t.Fatal(err) - } - - // Test that disassembler at least doesn't panic or return an error. - err = prog.Disassemble(ioutil.Discard) - if err != nil { - t.Fatalf("disassembler returned an error: %v", err) - } - - outBuf := &concurrentBuffer{} - config := &interp.Config{ - Stdin: strings.NewReader(in), - Output: outBuf, - Error: outBuf, - Vars: []string{"_var", "42"}, - Funcs: funcs, - } - if configure != nil { - configure(config) - } - status, err := interp.ExecProgram(prog, config) - if err != nil { - if errStr != "" { - if err.Error() == errStr { - return - } - t.Fatalf("expected error %q, got %q", errStr, err.Error()) - } - t.Fatal(err) - } - if errStr != "" { - t.Fatalf(`expected error %q, got ""`, errStr) - } - normalized := normalizeNewlines(outBuf.String()) - if normalized != out { - t.Fatalf("expected/got:\n%q\n%q", out, normalized) - } - if status != 0 { - t.Fatalf("expected status 0, got %d", status) - } -} - -func TestNative(t *testing.T) { - tests := []struct { - src string - in string - out string - err string - funcs map[string]interface{} - }{ - {`BEGIN { print foo() }`, "", "", `parse error at 1:15: undefined function "foo"`, - nil}, - {`BEGIN { print foo() }`, "", "\n", "", - map[string]interface{}{ - "foo": func() {}, - }}, - {`BEGIN { print foo() }`, "", "FOO\n", "", - map[string]interface{}{ - "foo": func() string { return "FOO" }, - }}, - {`BEGIN { print foo() }`, "", "BYTES\n", "", - map[string]interface{}{ - "foo": func() []byte { return []byte("BYTES") }, - }}, - {`BEGIN { print repeat("xy", 5) }`, "", "xyxyxyxyxy\n", "", - map[string]interface{}{ - "repeat": strings.Repeat, - }}, - {`BEGIN { print repeat("xy", 5) }`, "", "xyxyxyxyxy\n", "", - map[string]interface{}{ - "repeat": strings.Repeat, - }}, - {` -BEGIN { - print r0() - print r1(), r1(5) - print r2(), r2(5) -}`, "", "\n0 25\n0 25\n", "", - map[string]interface{}{ - "r0": func() {}, - "r1": func(n int) int { return n * n }, - "r2": func(n int) (int, error) { - return n * n, nil - }, - }}, - {` -BEGIN { - print r2() -}`, "", "", "NATIVE ERROR", - map[string]interface{}{ - "r2": func(n int) (int, error) { - return n * n, fmt.Errorf("NATIVE ERROR") - }, - }}, - {` -BEGIN { - print - print bool(), bool(0), bool(1), bool(""), bool("0"), bool("x") - print i(), i(42), i(-5), i(3.75), i(-3.75) - print i8(), i8(42), i8(-5.6), i8(127), i8(-128) - print i16(), i16(42), i16(-5.6), i16(32767), i16(-32768) - print i32(), i32(42), i32(-5.6), i32(2147483647), i32(-2147483648) - print i64(), i64(42), i64(-5.6), i64(2147483647000), i64(-2147483647000) - print u(), u(42), u(0), u(1) - print u8(), u8(42), u8(-5.6), u8(127), u8(128), u8(255) - print u16(), u16(42), u16(-1), u16(65535) - print u32(), u32(42), u32(-1), u32(4294967295) - print u64(), u64(42), u64(1), u64(4294967296), u64(2147483647000) - print s() "." s("") "." s("Foo bar") "." s(1234) - print b() "." b("") "." b("Foo bar") "." b(1234) -}`, "", ` -0 0 1 0 1 1 -0 42 -5 3 -3 -0 42 -5 127 -128 -0 42 -5 32767 -32768 -0 42 -5 2147483647 -2147483648 -0 42 -5 2147483647000 -2147483647000 -0 42 0 1 -0 42 251 127 128 255 -0 42 65535 65535 -0 42 4294967295 4294967295 -0 42 1 4294967296 2147483647000 -..Foo bar.1234 -..Foo bar.1234 -`, "", - map[string]interface{}{ - "bool": func(b bool) bool { return b }, - "i": func(n int) int { return n }, - "i8": func(n int8) int8 { return n }, - "i16": func(n int16) int16 { return n }, - "i32": func(n int32) int32 { return n }, - "i64": func(n int64) int64 { return n }, - "u": func(n uint) uint { return n }, - "u8": func(n uint8) uint8 { return n }, - "u16": func(n uint16) uint16 { return n }, - "u32": func(n uint32) uint32 { return n }, - "u64": func(n uint64) uint64 { return n }, - "b": func(b []byte) []byte { return b }, - "s": func(s string) string { return s }, - }}, - {` -BEGIN { - print - print sum(), sum(1), sum(2, 3), sum(4, 5, 6, 7, 8) - print fmt_ints() - print fmt_ints("%5d") - print fmt_ints("%5d", 123) - print fmt_ints("%d %d", 123, 456) - print fmt_ints("%d %d %d", 123, 456, 789) -}`, "", ` -0 1 5 30 - -%!d(MISSING) - 123 -123 456 -123 456 789 -`, "", - map[string]interface{}{ - "sum": func(args ...int) int { - sum := 0 - for _, a := range args { - sum += a - } - return sum - }, - "fmt_ints": func(s string, args ...int) string { - fmtArgs := make([]interface{}, len(args)) - for i, a := range args { - fmtArgs[i] = a - } - return fmt.Sprintf(s, fmtArgs...) - }, - }}, - {`BEGIN { 0 }`, "", "", `native function "f" is not a function`, - map[string]interface{}{ - "f": 0, - }}, - {`BEGIN { 1 }`, "", "", `native function "g" param 0 is not int or string`, - map[string]interface{}{ - "g": func(s complex64) {}, - }}, - {`BEGIN { 2 }`, "", "", `native function "g" param 2 is not int or string`, - map[string]interface{}{ - "g": func(x, y int, s []int, t string) {}, - }}, - {`BEGIN { 3 }`, "", "", `native function "h" param 0 is not int or string`, - map[string]interface{}{ - "h": func(a ...map[string]int) {}, - }}, - {`BEGIN { 4 }`, "", "", `native function "h" param 1 is not int or string`, - map[string]interface{}{ - "h": func(x int, a ...complex64) {}, - }}, - {`BEGIN { 5 }`, "", "", `native function "r" return value is not int or string`, - map[string]interface{}{ - "r": func() map[string]int { return nil }, - }}, - {`BEGIN { 6 }`, "", "", `native function "r" first return value is not int or string`, - map[string]interface{}{ - "r": func() (map[string]int, error) { return nil, nil }, - }}, - {`BEGIN { 7 }`, "", "", `native function "r" second return value is not an error`, - map[string]interface{}{ - "r": func() (int, int) { return 0, 0 }, - }}, - {`BEGIN { 8 }`, "", "", `native function "r" returns more than two values`, - map[string]interface{}{ - "r": func() (int, error, int) { return 0, nil, 0 }, - }}, - {`BEGIN { print f(), f(1, 2) }`, "", "", `parse error at 1:20: "f" called with more arguments than declared`, - map[string]interface{}{ - "f": func(n int) {}, - }}, - {`BEGIN { print split("x y", a) }`, "", "", `can't use keyword "split" as native function name`, - map[string]interface{}{ - "split": func() {}, - }}, - {` -function foo(n) { return n * 2 } -BEGIN { print foo(42) } -`, "", "84\n", "", map[string]interface{}{ - "foo": func(n int) int { return n / 2 }, - }}, - {`BEGIN { x=3; print foo(x) }`, "", "9\n", ``, - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {` -function bar(n) { return foo(n) } -BEGIN { x=4; y=5; print foo(x), bar(y) } -`, "", "16 25\n", ``, - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {`BEGIN { a["x"]=1; print foo(a) }`, "", "", - `parse error at 1:25: can't pass array "a" to native function`, - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {`BEGIN { x["x"]=1; print f(x) } function f(a) { return foo(a) }`, "", "", - `parse error at 1:56: can't pass array "a" to native function`, - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {`function f(a) { return foo(a) } BEGIN { x["x"]=1; print f(x) }`, "", "", - `parse error at 1:24: can't pass array "a" to native function`, - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {`BEGIN { x["x"]=1; print f(x["x"]) } function f(a) { return foo(a) }`, "", "1\n", "", - map[string]interface{}{ - "foo": func(n int) int { return n * n }, - }}, - {`BEGIN { print add(1, add(2, 3)) }`, "", "6\n", "", - map[string]interface{}{ - "add": func(a, b float64) float64 { return a + b }, - }}, - {`BEGIN { print add(1, add(2, 3)) }`, "", "6\n", "", - map[string]interface{}{ - "add": func(a, b float32) float32 { return a + b }, - }}, - {`BEGIN { print foo(x) }`, "", "0\n", "", - map[string]interface{}{ - "foo": func(i int) int { return i }, - }}, - {`BEGIN { print foo(_var) }`, "", "42\n", "", - map[string]interface{}{ - "foo": func(i int) int { return i }, - }}, - {`function foo(y) { return y/2 } BEGIN { print foo(_var) }`, "", "21\n", "", - map[string]interface{}{ - "foo": func(i int) int { return i }, - }}, - } - for _, test := range tests { - testName := test.src - if len(testName) > 70 { - testName = testName[:70] - } - t.Run(testName, func(t *testing.T) { - testGoAWK(t, test.src, test.in, test.out, test.err, test.funcs, nil) - }) - } -} - -func TestSafeMode(t *testing.T) { - tests := []struct { - src string - in string - out string - err string - args []string - }{ - {`BEGIN { print "hi" >"out" }`, "", "", "can't write to file due to NoFileWrites", nil}, - {`BEGIN { print "hi" >>"out" }`, "", "", "can't write to file due to NoFileWrites", nil}, - {`BEGIN { print "hi" |"sort" }`, "", "", "can't write to pipe due to NoExec", nil}, - {`BEGIN { getline <"in" }`, "", "", "can't read from file due to NoFileReads", nil}, - {`$0 # no files`, "1\n2\n", "1\n2\n", "", nil}, - {`$0 # files`, "1\n2\n", "1\n2\n", "can't read from file due to NoFileReads", []string{"f1"}}, - {`BEGIN { "echo foo" |getline }`, "", "", "can't read from pipe due to NoExec", nil}, - {`BEGIN { system("echo foo") }`, "", "", "can't call system() due to NoExec", nil}, - } - for _, test := range tests { - testName := test.src - if len(testName) > 70 { - testName = testName[:70] - } - t.Run(testName, func(t *testing.T) { - testGoAWK(t, test.src, test.in, test.out, test.err, nil, func(config *interp.Config) { - config.Args = test.args - config.NoExec = true - config.NoFileWrites = true - config.NoFileReads = true - }) - }) - } -} - -func TestConfigVarsCorrect(t *testing.T) { - prog, err := parser.ParseProgram([]byte(`BEGIN { print x }`), nil) - if err != nil { - t.Fatalf("error parsing: %v", err) - } - config := &interp.Config{ - Stdin: strings.NewReader(""), - Output: &bytes.Buffer{}, - Error: ioutil.Discard, - Vars: []string{"FS"}, - } - _, err = interp.ExecProgram(prog, config) - expected := "length of config.Vars must be a multiple of 2, not 1" - if err == nil || err.Error() != expected { - t.Fatalf("expected error %q, got: %v", expected, err) - } -} - -func TestShellCommand(t *testing.T) { - testGoAWK(t, `BEGIN { system("echo hello world") }`, "", "hello world\n", "", nil, nil) - - if runtime.GOOS == "windows" { - testGoAWK(t, `BEGIN { system("echo hello world") }`, "", "hello world\n", "", nil, - func(config *interp.Config) { - config.ShellCommand = []string{"cmd.exe", "/c"} - }) - } else { - testGoAWK(t, `BEGIN { system("world") }`, "", "hello world\n", "", nil, - func(config *interp.Config) { - config.ShellCommand = []string{"/bin/echo", "hello"} - }) - testGoAWK(t, `BEGIN { "world" | getline; print }`, "", "hello world\n", "", nil, - func(config *interp.Config) { - config.ShellCommand = []string{"/bin/echo", "hello"} - }) - testGoAWK(t, `BEGIN { print "hello world" | "-" }`, "", "hello world\n", "", nil, - func(config *interp.Config) { - config.ShellCommand = []string{"/bin/cat"} - }) - testGoAWK(t, `BEGIN { print system("echo hi") }`, "", "exec: \"foobar3982\": executable file not found in $PATH\n-1\n", "", nil, - func(config *interp.Config) { - config.ShellCommand = []string{"foobar3982"} - }) - } -} - -func TestSystemCommandNotFound(t *testing.T) { - prog, err := parser.ParseProgram([]byte(`BEGIN { print system("foobar3982") }`), nil) - if err != nil { - t.Fatalf("error parsing: %v", err) - } - outBuf := &concurrentBuffer{} - config := &interp.Config{ - Output: outBuf, - Error: outBuf, - } - _, err = interp.ExecProgram(prog, config) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - got := outBuf.buffer.String() - if !strings.Contains(got, "foobar3982") || !strings.Contains(got, "not found") { - t.Fatalf(`expected output to contain "foobar3982" and "not found", got %q`, got) - } -} - -type mockFlusher struct { - bytes.Buffer - flushes []string -} - -func (f *mockFlusher) Flush() error { - f.flushes = append(f.flushes, normalizeNewlines(f.String())) - return nil -} - -func TestFlushes(t *testing.T) { - src := ` -BEGIN { - print fflush() - print "x" - print "y" - print fflush() - print "z" - print fflush("") -}` - f := &mockFlusher{} - testGoAWK(t, src, "", "", "", nil, func(config *interp.Config) { - config.Output = f - }) - // The last one is from GoAWK itself flushing output after finishing - expected := []string{"", "0\nx\ny\n", "0\nx\ny\n0\nz\n", "0\nx\ny\n0\nz\n0\n"} - if !reflect.DeepEqual(f.flushes, expected) { - t.Fatalf("expected flushes %q, got %q", expected, f.flushes) - } - - // Ensure output is flushed before getline reads from stdin - src = ` -BEGIN { - printf "Prompt: " - getline x - print x -}` - f = &mockFlusher{} - testGoAWK(t, src, "42\n", "", "", nil, func(config *interp.Config) { - config.Output = f - }) - expected = []string{"Prompt: ", "Prompt: 42\n"} - if !reflect.DeepEqual(f.flushes, expected) { - t.Fatalf("expected flushes %q, got %q", expected, f.flushes) - } - - // Ensure output is flushed before system() - src = ` -BEGIN { - print "one" - system("echo .") - print "two" -}` - f = &mockFlusher{} - testGoAWK(t, src, "", "", "", nil, func(config *interp.Config) { - config.Output = f - }) - expected = []string{"one\n", "one\n.\ntwo\n"} - if !reflect.DeepEqual(f.flushes, expected) { - t.Fatalf("expected flushes %q, got %q", expected, f.flushes) - } -} - -type errorFlusher struct { - bytes.Buffer -} - -func (f *errorFlusher) Flush() error { - return errors.New("that's not good, hackers") -} - -func TestFlushError(t *testing.T) { - f := &errorFlusher{} - testGoAWK(t, `BEGIN { fflush() }`, "", "", "", nil, func(config *interp.Config) { - config.Output = f - config.Error = f - }) - expected := "error flushing \"stdout\": that's not good, hackers\n" - if f.String() != expected { - t.Fatalf("expected/got:\n%q\n%q", expected, f.String()) - } -} - -func TestEnviron(t *testing.T) { - os.Setenv("GOAWK_TEN", "10") // to test that ENVIRON[x] is numeric string - src := ` -BEGIN { - n = 0 - for (k in ENVIRON) - n++ - print(n, ENVIRON["USER"], ENVIRON["GOAWK_TEN"] < 2) -}` - expected := fmt.Sprintf("%d %s 0\n", len(os.Environ()), os.Getenv("USER")) - testGoAWK(t, src, "", expected, "", nil, nil) - - expected = "2 bob 0\n" - testGoAWK(t, src, "", expected, "", nil, func(config *interp.Config) { - config.Environ = []string{"USER", "bob", "GOAWK_TEN", "10"} - }) - - expected = "0 1\n" - testGoAWK(t, src, "", expected, "", nil, func(config *interp.Config) { - config.Environ = []string{} - }) - - testGoAWK(t, src, "", "", "length of config.Environ must be a multiple of 2, not 3", nil, func(config *interp.Config) { - config.Environ = []string{"b", "a", "d"} - }) -} - -func TestExit(t *testing.T) { - tests := []struct { - src string - out string - status int - }{ - {`BEGIN { print "x"; exit; print "y" } { print "a" } END { print "z" }`, "x\nz\n", 0}, - {`BEGIN { print "x"; exit 1+2; print "y" } { print "a" } END { print "z" }`, "x\nz\n", 3}, - {`{ print "x"; exit; print "y" } END { print "z" }`, "x\nz\n", 0}, - {`{ print "x"; exit 1+2; print "y" } END { print "z" }`, "x\nz\n", 3}, - {`END { print "x"; exit; print "y" }`, "x\n", 0}, - {`END { print "x"; exit 1+2; print "y" }`, "x\n", 3}, - } - for _, test := range tests { - t.Run(test.src, func(t *testing.T) { - prog, err := parser.ParseProgram([]byte(test.src), nil) - if err != nil { - t.Fatalf("error parsing: %v", err) - } - outBuf := &bytes.Buffer{} - config := &interp.Config{ - Stdin: strings.NewReader("line\n"), - Output: outBuf, - } - status, err := interp.ExecProgram(prog, config) - if err != nil { - t.Fatalf("error interpreting: %v", err) - } - normalized := normalizeNewlines(outBuf.String()) - if normalized != test.out { - t.Fatalf("expected/got:\n%q\n%q", test.out, normalized) - } - if status != test.status { - t.Fatalf("expected status %d, got %d", test.status, status) - } - }) - } -} - -type csvTest struct { - src string - in string - out string - err string - configure func(config *interp.Config) -} - -var csvTests = []csvTest{ - // INPUTMODE combinations - {`BEGIN { INPUTMODE="" } { print $1, $3 }`, "name,email\nBob C. Smith,bob@smith.com\nJane X. Brown,jane@brown.com", "name,email \nBob Smith,bob@smith.com\nJane Brown,jane@brown.com\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } { print $1, $3 }`, "name,email,age\nBob\tSmith,bob@smith.com,42\n\nJane,jane@brown.com,37\n# not a comment", "Bob\tSmith 42\nJane 37\n# not a comment \n", "", nil}, - {`BEGIN { INPUTMODE="csv separator=|" } { print $1, $3 }`, "Bob,Smith|bob@smith.com|42\nJane|jane@brown.com|37", "Bob,Smith 42\nJane 37\n", "", nil}, - {`BEGIN { INPUTMODE="csv comment=#" } { print $1, $3 }`, "# this is a comment\nBob\tSmith,bob@smith.com,42\nJane,jane@brown.com,37", "Bob\tSmith 42\nJane 37\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { print $1, $3 }`, "name,email,age\nBob,bob@smith.com,42\nJane,jane@brown.com,37", "name age\nBob 42\nJane 37\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } { print @"age", @"name" }`, "name,email,age\nBob,bob@smith.com,42\nJane,jane@brown.com,37", "42 Bob\n37 Jane\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } { x="name"; print @"age", @x }`, "name,age\nBob,42", "42 Bob\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { print @"age", @"name" }`, "name,email,age\nBob,bob@smith.com,42\nJane,jane@brown.com,37", "", `@ only supported if header parsing enabled; use -H or add "header" to INPUTMODE`, nil}, - {`BEGIN { INPUTMODE="tsv header" } { print $1, $3 }`, "name\temail\tage\nBob,Smith\tbob@smith.com\t42\nJane\tjane@brown.com\t37", "Bob,Smith 42\nJane 37\n", "", nil}, - - // OUTPUTMODE combinations - {`BEGIN { OUTPUTMODE="csv" } { print $2, $1 }`, "a\"b c\nd e", "c,\"a\"\"b\"\ne,d\n", "", nil}, - {`BEGIN { OUTPUTMODE="tsv" } { print $2, $1 }`, "a\"b c\nd e", "c\t\"a\"\"b\"\ne\td\n", "", nil}, - {`BEGIN { OUTPUTMODE="csv separator=|" } { print $2, $1 }`, "a\"b c\nd e", "c|\"a\"\"b\"\ne|d\n", "", nil}, - - // Both input and output in CSV (or TSV) mode - {`BEGIN { INPUTMODE="csv header"; OUTPUTMODE="csv"; print "age", "name" } { print $2, $1 }`, "name,age\nBob,42\n\"J B\",37\n\"A\"\"B\",7", "age,name\n42,Bob\n37,J B\n7,\"A\"\"B\"\n", "", nil}, - {`BEGIN { INPUTMODE="csv"; OUTPUTMODE="tsv"; } { $1=$1; print }`, "name,age\nBob,42\n\"J B\",37\n\"A\"\"B\",7", "name\tage\nBob\t42\nJ B\t37\n\"A\"\"B\"\t7\n", "", nil}, - - // Configure via interp.Config struct - {`{ print $2, $1 }`, "name,age\nBob,42", "age name\n42 Bob\n", "", func(config *interp.Config) { - config.InputMode = interp.CSVMode - }}, - {`{ print $2, $1 }`, "name\tage\nBob\t42", "age name\n42 Bob\n", "", func(config *interp.Config) { - config.InputMode = interp.TSVMode - }}, - {`{ print $2, $1 }`, "# comment\nBob;42", "42 Bob\n", "", func(config *interp.Config) { - config.InputMode = interp.CSVMode - config.CSVInput.Separator = ';' - config.CSVInput.Comment = '#' - }}, - {`{ print $1, $2 }`, "", "", "input mode configuration not valid in default input mode", func(config *interp.Config) { - config.CSVInput.Separator = ';' - }}, - {`{ print $2, $1 }`, "Bob,42\nJane,37", "42\tBob\n37\tJane\n", "", func(config *interp.Config) { - config.InputMode = interp.CSVMode - config.OutputMode = interp.TSVMode - }}, - {`BEGIN { INPUTMODE="tsv header"; OUTPUTMODE="csv" } { print @"age", @"name" }`, "name\tage\nBob\t42", "42,Bob\n", "", func(config *interp.Config) { - config.InputMode = interp.CSVMode // will be overridden by BEGIN - config.OutputMode = interp.TSVMode - }}, - {`{ print @"age", @"name" }`, "name\tage\nBob\t42", "42,Bob\n", "", func(config *interp.Config) { - config.InputMode = interp.CSVMode // will be overridden by Vars - config.OutputMode = interp.TSVMode - config.Vars = []string{"INPUTMODE", "tsv header", "OUTPUTMODE", "csv"} - }}, - {`{ print $2, $1 }`, "Bob 42", "42,Bob\n", "", func(config *interp.Config) { - config.OutputMode = interp.CSVMode - }}, - {`{ print $2, $1 }`, "Bob 42", "42\tBob\n", "", func(config *interp.Config) { - config.OutputMode = interp.TSVMode - }}, - {`{ print $2, $1 }`, "Bob 42", "42;Bob\n", "", func(config *interp.Config) { - config.OutputMode = interp.CSVMode - config.CSVOutput.Separator = ';' - }}, - {`{ print $1, $2 }`, "", "", "output mode configuration not valid in default output mode", func(config *interp.Config) { - config.CSVOutput.Separator = ';' - }}, - - // $0 still works as expected in CSV mode - {`BEGIN { INPUTMODE="csv header" } { print }`, "name,age\nBob,42\nJane,37", "Bob,42\nJane,37\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } { print $0 }`, "name,age\nBob,42\nJane,37", "Bob,42\nJane,37\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } { print $0; $0=NR; print $0 }`, "name,age\nBob,42\nJane,37", "Bob,42\n1\nJane,37\n2\n", "", nil}, - {`BEGIN { INPUTMODE="csv header comment=#" } { print $0 } END { for (i=1; i in FIELDS; i++) print i, FIELDS[i] }`, - "# comment\n\nname,age\n# comment\n\nBob,42\n# comment\nJane,37\n\nFoo,5", - "Bob,42\nJane,37\nFoo,5\n1 name\n2 age\n", "", nil}, - - // CSV filters - {`BEGIN { INPUTMODE="csv header" } /foo/ { print $2 }`, "id,type\n1,food\n2,bar\n3,foo\n", "food\nfoo\n", "", nil}, - {`BEGIN { INPUTMODE="csv header" } $1==2 { print $2 }`, "id,type\n1,food\n2,bar\n3,foo\n", "bar\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { s += $-1 } END { print s }`, "a,1\nb,2\nc,3\n", "6\n", "", nil}, - - // Updating fields - {`BEGIN { INPUTMODE="csv" } { $1 = $1 $1; print $1, $2 }`, "a,1\nb,2", "aa 1\nbb 2\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { $1 = $1 $1; print }`, "a,1\nb,2", "aa 1\nbb 2\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { $0 = "X,3"; print $1, $2 }`, "a,1\nb,2", "X 3\nX 3\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } { $0 = "X,3"; print }`, "a,1\nb,2", "X,3\nX,3\n", "", nil}, - {`BEGIN { INPUTMODE=OUTPUTMODE="csv" } { $1 = $1 $1; print $1, $2 }`, "a,1\nb,2", "aa,1\nbb,2\n", "", nil}, - {`BEGIN { INPUTMODE=OUTPUTMODE="csv" } { $1 = $1 $1; print }`, "a,1\nb,2", "aa,1\nbb,2\n", "", nil}, - {`BEGIN { INPUTMODE=OUTPUTMODE="csv" } { $0 = "X,3"; print $1, $2 }`, "a,1\nb,2", "X,3\nX,3\n", "", nil}, - {`BEGIN { INPUTMODE=OUTPUTMODE="csv" } { $0 = "X,3"; print }`, "a,1\nb,2", "X,3\nX,3\n", "", nil}, - {`BEGIN { OUTPUTMODE="csv"; $0 = "a b c"; printf "%s|%s %s %s\n", $0, $1, $2, $3; NF=2; printf "%s|%s %s\n", $0, $1, $2 }`, "", "a b c|a b c\na,b|a b\n", "", nil}, - {`BEGIN { OUTPUTMODE="csv"; $0 = "a b c"; printf "%s|%s %s %s\n", $0, $1, $2, $3; NF=4; printf "%s|%s %s %s %s\n", $0, $1, $2, $3, $4 }`, "", "a b c|a b c\na,b,c,|a b c \n", "", nil}, - - // FIELDS array - {`BEGIN { INPUTMODE="csv header" } NR==1 { for (i=1; i in FIELDS; i++) print i, FIELDS[i] }`, "name,email,age\na,b,c", "1 name\n2 email\n3 age\n", "", nil}, - {`BEGIN { INPUTMODE="csv" } NR==1 { for (i=1; i in FIELDS; i++) print FIELDS[i] }`, "name,email,age\na,b,c", "", "", nil}, - - // Parsing and formatting of INPUTMODE and OUTPUTMODE special variables - {`BEGIN { INPUTMODE="csv separator=,"; print INPUTMODE }`, "", "csv\n", "", nil}, - {`BEGIN { INPUTMODE="csv header=true comment=# separator=|"; print INPUTMODE }`, "", "csv separator=| comment=# header\n", "", nil}, - {`BEGIN { OUTPUTMODE="csv separator=,"; printf "%s", OUTPUTMODE }`, "", "csv", "", nil}, - {`BEGIN { OUTPUTMODE="csv separator=|"; printf "%s", OUTPUTMODE }`, "", "csv separator=|", "", nil}, - - // Ignores UTF-8 byte order mark (BOM) at start of CSV file - {`BEGIN { INPUTMODE="csv" } { print $1=="foo" }`, "\ufefffoo,bar\n\ufefffoo,bar", "1\n0\n", "", nil}, - - // Error handling when parsing INPUTMODE and OUTPUTMODE - {`BEGIN { INPUTMODE="xyz" }`, "", "", `invalid input mode "xyz"`, nil}, - {`BEGIN { INPUTMODE="csv separator=foo" }`, "", "", `invalid CSV/TSV separator "foo"`, nil}, - {`BEGIN { INPUTMODE="csv comment=bar" }`, "", "", `invalid CSV/TSV comment character "bar"`, nil}, - {`BEGIN { INPUTMODE="csv header=x" }`, "", "", `invalid header value "x"`, nil}, - {`BEGIN { INPUTMODE="csv foo=bar" }`, "", "", `invalid input mode key "foo"`, nil}, - {`BEGIN { OUTPUTMODE="xyz" }`, "", "", `invalid output mode "xyz"`, nil}, - {`BEGIN { OUTPUTMODE="csv separator=foo" }`, "", "", `invalid CSV/TSV separator "foo"`, nil}, - {`BEGIN { OUTPUTMODE="csv foo=bar" }`, "", "", `invalid output mode key "foo"`, nil}, - - // Other errors - {`BEGIN { @"x" = "y" }`, "", "", "parse error at 1:14: assigning @ expression not supported", nil}, - {`BEGIN { x="a"; @x = "y" }`, "", "", "parse error at 1:19: assigning @ expression not supported", nil}, - {`BEGIN { @"x" += "y" }`, "", "", "parse error at 1:14: assigning @ expression not supported", nil}, - {`BEGIN { x="a"; @x += "y" }`, "", "", "parse error at 1:19: assigning @ expression not supported", nil}, -} - -func TestCSV(t *testing.T) { - for _, test := range csvTests { - testName := test.src - if len(testName) > 70 { - testName = testName[:70] - } - t.Run(testName, func(t *testing.T) { - testGoAWK(t, test.src, test.in, test.out, test.err, nil, test.configure) - }) - } -} - -func TestCSVMultiRead(t *testing.T) { - tests := []struct { - name string - src string - reads []string - out string - }{{ - name: "UnquotedHeader", - src: `BEGIN { INPUTMODE="csv header"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"name,age\n", "Bob", ",42\n", "", "Jill,", "37", ""}, - out: "Bob,42|Bob|42\nJill,37|Jill|37\n", - }, { - name: "QuotedHeader", - src: `BEGIN { INPUTMODE="csv header"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"name,age\n", "\"Bo", "b\"", ",42\n", "\"Ji\n", "ll\",", "37"}, - out: "\"Bob\",42|Bob|42\n\"Ji\nll\",37|Ji\nll|37\n", - }, { - name: "UnquotedNewline", - src: `BEGIN { INPUTMODE="csv header"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"name,age\n", "Bob", ",42\n", "Jill,", "37", "\n"}, - out: "Bob,42|Bob|42\nJill,37|Jill|37\n", - }, { - name: "QuotedNewline", - src: `BEGIN { INPUTMODE="csv header"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"name,age\n", "\"Bo", "b\"", ",42\n", "\"Ji\n", "ll\",", "37\n"}, - out: "\"Bob\",42|Bob|42\n\"Ji\nll\",37|Ji\nll|37\n", - }, { - name: "UnquotedNoHeader", - src: `BEGIN { INPUTMODE="csv"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"Bob", ",42\n", "", "Jill,", "37", ""}, - out: "Bob,42|Bob|42\nJill,37|Jill|37\n", - }, { - name: "QuotedNoHeader", - src: `BEGIN { INPUTMODE="csv"; OFS="|" } { print $0, $1, $2 }`, - reads: []string{"\"Bo", "b\"", ",42\n", "\"Ji\n", "ll\",", "37\n"}, - out: "\"Bob\",42|Bob|42\n\"Ji\nll\",37|Ji\nll|37\n", - }, { - name: "QuotedCRLF", - src: `BEGIN { INPUTMODE="csv" } { printf "%s|%s|%s", $0, $1, $2 }`, - reads: []string{"\"Ji\r\n", "ll\",", "37"}, - out: "\"Ji\nll\",37|Ji\nll|37", - }} - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - prog, err := parser.ParseProgram([]byte(test.src), nil) - if err != nil { - t.Fatalf("error parsing program: %v", err) - } - outBuf := &concurrentBuffer{} - config := &interp.Config{ - Stdin: &sliceReader{reads: test.reads}, - Output: outBuf, - Error: outBuf, - } - status, err := interp.ExecProgram(prog, config) - if err != nil { - t.Fatalf("error executing program: %v", err) - } - out := outBuf.String() - if runtime.GOOS == "windows" { - out = normalizeNewlines(out) - } - if out != test.out { - t.Fatalf("expected/got:\n%q\n%q", test.out, out) - } - if status != 0 { - t.Fatalf("expected status 0, got %d", status) - } - }) - } -} - -type sliceReader struct { - reads []string -} - -func (r *sliceReader) Read(buf []byte) (int, error) { - if len(r.reads) == 0 { - return 0, io.EOF - } - n := copy(buf, r.reads[0]) - if n < len(r.reads[0]) { - r.reads[0] = r.reads[0][:len(buf)] - } else { - r.reads = r.reads[1:] - } - return n, nil -} - -func benchmarkProgram(b *testing.B, funcs map[string]interface{}, - input, expected, srcFormat string, args ...interface{}, -) { - b.StopTimer() - src := fmt.Sprintf(srcFormat, args...) - parserConfig := &parser.ParserConfig{ - Funcs: funcs, - } - prog, err := parser.ParseProgram([]byte(src), parserConfig) - if err != nil { - b.Fatalf("error parsing %s: %v", b.Name(), err) - } - outBuf := &bytes.Buffer{} - config := &interp.Config{ - Stdin: strings.NewReader(input), - Output: outBuf, - Error: ioutil.Discard, - Funcs: funcs, - } - b.StartTimer() - _, err = interp.ExecProgram(prog, config) - b.StopTimer() - if err != nil { - b.Fatalf("error interpreting %s: %v", b.Name(), err) - } - if expected != "" { - expected += "\n" - } - outStr := strings.Replace(outBuf.String(), "\r\n", "\n", -1) - if outStr != expected { - b.Fatalf("expected/got:\n%q\n%q", expected, outStr) - } -} - -func BenchmarkGlobalVars(b *testing.B) { - benchmarkProgram(b, nil, "", "a 1", ` -BEGIN { - for (i = 0; i < %d; i++) { - x = 1; y = "a"; t = x; x = y; y = t - x = 1; y = "a"; t = x; x = y; y = t - x = 1; y = "a"; t = x; x = y; y = t - x = 1; y = "a"; t = x; x = y; y = t - x = 1; y = "a"; t = x; x = y; y = t - } - print x, y -} -`, b.N) -} - -func BenchmarkLocalVars(b *testing.B) { - benchmarkProgram(b, nil, "", "b 2", ` -function f(i, x, y, t) { - for (i = 0; i < %d; i++) { - x = 2; y = "b"; t = x; x = y; y = t - x = 2; y = "b"; t = x; x = y; y = t - x = 2; y = "b"; t = x; x = y; y = t - x = 2; y = "b"; t = x; x = y; y = t - x = 2; y = "b"; t = x; x = y; y = t - } - print x, y -} - -BEGIN { - f() -} -`, b.N) -} - -func BenchmarkIncrDecr(b *testing.B) { - benchmarkProgram(b, nil, "", "0 10", ` -BEGIN { - for (i = 0; i < %d; i++) { - x++; x++; x++; x++; x++; x++; x++; x++; x++; x++ - y = x - x--; x--; x--; x--; x--; x--; x--; x--; x--; x-- - } - print x, y -} -`, b.N) -} - -func BenchmarkSimpleBuiltins(b *testing.B) { - benchmarkProgram(b, nil, "", "", ` -BEGIN { - for (i = 0; i < %d; i++) { - sin(0); cos(0); exp(0); log(1); sqrt(2); int("x"); - sin(0); cos(0); exp(0); log(1); sqrt(2); int("x"); - sin(0); cos(0); exp(0); log(1); sqrt(2); int("x"); - sin(0); cos(0); exp(0); log(1); sqrt(2); int("x"); - sin(0); cos(0); exp(0); log(1); sqrt(2); int("x"); - } -} -`, b.N) -} - -func BenchmarkBuiltinMatch(b *testing.B) { - benchmarkProgram(b, nil, "", "21", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog" - for (i = 0; i < %d; i++) { - match(s, /j[a-z]+p/); match(s, /j[a-z]+p/) - match(s, /j[a-z]+p/); match(s, /j[a-z]+p/) - match(s, /j[a-z]+p/); match(s, /j[a-z]+p/) - match(s, /j[a-z]+p/); match(s, /j[a-z]+p/) - match(s, /j[a-z]+p/); x = match(s, /j[a-z]+p/) - } - print x -} -`, b.N) -} - -func BenchmarkBuiltinLength(b *testing.B) { - benchmarkProgram(b, nil, "", "134", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog." - for (i = 0; i < %d; i++) { - length(s); length(s); length(s); length(s); length(s); - length(s); length(s); length(s); length(s); length(s); - length(s); length(s); length(s); length(s); length(s); - length(s); length(s); length(s); length(s); length(s); - length(s); length(s); length(s); length(s); x = length(s); - } - print x -} -`, b.N) -} - -func BenchmarkBuiltinIndex(b *testing.B) { - benchmarkProgram(b, nil, "", "134", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog!?!" - for (i = 0; i < %d; i++) { - index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!") - index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!") - index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!") - index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!") - index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); index(s, "!?!"); x = index(s, "!?!") - } - print x -} -`, b.N) -} - -func BenchmarkBuiltinSubstr(b *testing.B) { - benchmarkProgram(b, nil, "", " brown fox", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog!?!" - for (i = 0; i < %d; i++) { - substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10) - substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10) - substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10) - substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10) - substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); substr(s, 100, 10); x = substr(s, 100, 10) - } - print x -} -`, b.N) -} - -func BenchmarkBuiltinSplitSpace(b *testing.B) { - benchmarkProgram(b, nil, "", "27", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog!?!" - for (i = 0; i < %d; i++) { - split(s, a, " "); split(s, a, " "); split(s, a, " ") - split(s, a, " "); split(s, a, " "); split(s, a, " ") - split(s, a, " "); split(s, a, " "); split(s, a, " ") - split(s, a, " "); split(s, a, " "); split(s, a, " ") - split(s, a, " "); split(s, a, " "); split(s, a, " ") - } - for (k in a) n++ - print n -} -`, b.N) -} - -func BenchmarkBuiltinSplitRegex(b *testing.B) { - benchmarkProgram(b, nil, "", "22", ` -BEGIN { - s = "a fox ab fax abc fix a fox ab fax abc fix a fox ab fax abc fix a fox ab fax abc fix a fox ab fax abc fix a fox ab fax abc fix a fox ab fax abc fix" - for (i = 0; i < %d; i++) { - split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x") - split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x") - split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x") - split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x") - split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x"); split(s, a, "f[a-z]x") - } - for (k in a) n++ - print n -} -`, b.N) -} - -func BenchmarkBuiltinSub(b *testing.B) { - benchmarkProgram(b, nil, "", "1 164", ` -BEGIN { - for (i = 0; i < %d; i++) { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog." - sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s) - sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s) - sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s) - sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s) - sub(/f[a-z]x/, "foxes", s); sub(/f[a-z]x/, "foxes", s); x = sub(/f[a-z]x/, "foxes", s) - } - print x, length(s) -} -`, b.N) -} - -func BenchmarkBuiltinSubAmpersand(b *testing.B) { - benchmarkProgram(b, nil, "", "1 164", ` -BEGIN { - for (i = 0; i < %d; i++) { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog." - sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s) - sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s) - sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s) - sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s) - sub(/f[a-z]x/, "&es", s); sub(/f[a-z]x/, "&es", s); x = sub(/f[a-z]x/, "&es", s) - } - print x, length(s) -} -`, b.N) -} - -func BenchmarkBuiltinGsub(b *testing.B) { - benchmarkProgram(b, nil, "", "3 224", ` -BEGIN { - for (i = 0; i < %d; i++) { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog." - gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s) - gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s) - gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s) - gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s) - gsub(/f[a-z]x/, "foxes", s); gsub(/f[a-z]x/, "foxes", s); x = gsub(/f[a-z]x/, "foxes", s) - } - print x, length(s) -} -`, b.N) -} - -func BenchmarkBuiltinGsubAmpersand(b *testing.B) { - benchmarkProgram(b, nil, "", "3 224", ` -BEGIN { - for (i = 0; i < %d; i++) { - s = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog." - gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s) - gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s) - gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s) - gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s) - gsub(/f[a-z]x/, "&es", s); gsub(/f[a-z]x/, "&es", s); x = gsub(/f[a-z]x/, "&es", s) - } - print x, length(s) -} -`, b.N) -} - -func BenchmarkBuiltinSprintf(b *testing.B) { - benchmarkProgram(b, nil, "", "A 123 foo 3.14", ` -BEGIN { - x = "foo" - y = 3.14159 - for (i = 0; i < %d; i++) { - sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y) - sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y) - sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y) - sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y) - sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y); s = sprintf("%%c %%d %%5s %%.3g", 65, 123, x, y) - } - print s -} -`, b.N) -} - -func BenchmarkRecursiveFunc(b *testing.B) { - benchmarkProgram(b, nil, "", "55", ` -function fib(n) { - if (n <= 2) { - return 1 - } - return fib(n-1) + fib(n-2) -} - -BEGIN { - for (i = 0; i < %d; i++) { - res = fib(10) - } - print res -} -`, b.N) -} - -func BenchmarkFuncCall(b *testing.B) { - benchmarkProgram(b, nil, "", "75", ` -function add(a, b) { - return a + b -} - -BEGIN { - for (i = 0; i < %d; i++) { - sum = add(0, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - } - print sum -} -`, b.N) -} - -func BenchmarkNativeFunc(b *testing.B) { - funcs := map[string]interface{}{ - "add": func(a, b float64) float64 { return a + b }, - } - benchmarkProgram(b, funcs, "", "75", ` -BEGIN { - for (i = 0; i < %d; i++) { - sum = add(0, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - sum = add(sum, add(1, add(2, add(3, add(4, 5))))) - } - print sum -} -`, b.N) -} - -func BenchmarkForLoop(b *testing.B) { - benchmarkProgram(b, nil, "", "", ` -BEGIN { - for (i = 0; i < %d; i++) { - for (j = 0; j < 100; j++); - } -} -`, b.N) -} - -func BenchmarkForInLoop(b *testing.B) { - benchmarkProgram(b, nil, "", "", ` -BEGIN { - for (j = 0; j < 100; j++) { - a[j] = j - } - for (i = 0; i < %d; i++) { - for (k in a); - } -} -`, b.N) -} - -func BenchmarkIfStatement(b *testing.B) { - benchmarkProgram(b, nil, "", "0", ` -BEGIN { - c = 1 - d = 0 - for (i = 0; i < %d; i++) { - if (c) { x = 1 } else { x = 0 } - if (c) { x = 1 } else { x = 0 } - if (c) { x = 1 } else { x = 0 } - if (d) { x = 1 } else { x = 0 } - if (d) { x = 1 } else { x = 0 } - if (d) { x = 1 } else { x = 0 } - } - print x -} -`, b.N) -} - -func BenchmarkCondExpr(b *testing.B) { - benchmarkProgram(b, nil, "", "0", ` -BEGIN { - c = 1 - d = 0 - for (i = 0; i < %d; i++) { - x = c ? 1 : 0 - x = c ? 1 : 0 - x = c ? 1 : 0 - x = d ? 1 : 0 - x = d ? 1 : 0 - x = d ? 1 : 0 - } - print x -} -`, b.N) -} - -func BenchmarkSimplePattern(b *testing.B) { - b.StopTimer() - inputLines := []string{} - expectedLines := []string{} - for i := 0; i < b.N; i++ { - if i != 0 && i%2 == 0 { - line := fmt.Sprintf("%d", i) - inputLines = append(inputLines, line) - expectedLines = append(expectedLines, line) - } else { - inputLines = append(inputLines, "") - } - } - input := strings.Join(inputLines, "\n") - expected := strings.Join(expectedLines, "\n") - benchmarkProgram(b, nil, input, expected, "$0") -} - -func BenchmarkGetField(b *testing.B) { - b.StopTimer() - inputLines := []string{} - expectedLines := []string{} - for i := 1; i < b.N+1; i++ { - inputLines = append(inputLines, fmt.Sprintf("%d %d %d", i, i*2, i*3)) - expectedLines = append(expectedLines, fmt.Sprintf("%d %d", i, i*3)) - } - input := strings.Join(inputLines, "\n") - expected := strings.Join(expectedLines, "\n") - benchmarkProgram(b, nil, input, expected, "{ print $1, $3 }") -} - -func BenchmarkSetField(b *testing.B) { - benchmarkProgram(b, nil, "1 2 3", "one 2 three", ` -{ - for (i = 0; i < %d; i++) { - $1 = "one"; $3 = "three" - $1 = "one"; $3 = "three" - $1 = "one"; $3 = "three" - $1 = "one"; $3 = "three" - $1 = "one"; $3 = "three" - } -} -END { - print $0 -} -`, b.N) -} - -func BenchmarkRegexMatch(b *testing.B) { - benchmarkProgram(b, nil, "", "1", ` -BEGIN { - s = "The quick brown fox jumps over the lazy dog" - for (i = 0; i < %d; i++) { - x = s ~ /j[a-z]+p/ - x = s ~ /j[a-z]+p/ - x = s ~ /j[a-z]+p/ - x = s ~ /j[a-z]+p/ - x = s ~ /j[a-z]+p/ - } - print x -} -`, b.N) -} - -func BenchmarkBinaryOperators(b *testing.B) { - benchmarkProgram(b, nil, "", "5.0293", ` -BEGIN { - for (i = 0; i < %d; i++) { - res = (1+2*3/4^5) + (1+2*3/4^5) + (1+2*3/4^5) + (1+2*3/4^5) + (1+2*3/4^5) - } - print res -} -`, b.N) -} - -func BenchmarkConcatTwo(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "20", ` -BEGIN { - x = "0123456789" - for (i = 0; i < %d; i++) { - y = x x - } - print length(y) -} -`, b.N) -} - -func BenchmarkConcatSmall(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "100", ` -BEGIN { - x = "0123456789" - for (i = 0; i < %d; i++) { - y = x x x x x x x x x x - } - print length(y) -} -`, b.N) -} - -func BenchmarkConcatLarge(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "1000000", ` -BEGIN { - x = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" - for (i = 0; i < %d; i++) { - y = x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x \ - x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x - z = y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y \ - y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y - } - print length(z) -} -`, b.N) -} - -func BenchmarkComparisons(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "1", ` -BEGIN { - for (i = 0; i < %d; i++) { - x = ((((((1 < 2) <= 3) > 4) >= 5) == 6) != 7) - x = ((((((1 < 2) <= 3) > 4) >= 5) == 6) != 7) - x = ((((((1 < 2) <= 3) > 4) >= 5) == 6) != 7) - } - print x -} -`, b.N) -} - -func BenchmarkArrayOperations(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "243", ` -BEGIN { - for (i = 0; i < %d; i++) { - a[0] = 1 - a[0] = a[0] + a[0] + a[0] - a[0] = a[0] + a[0] + a[0] - a[0] = a[0] + a[0] + a[0] - a[0] = a[0] + a[0] + a[0] - a[0] = a[0] + a[0] + a[0] - } - print a[0] -} -`, b.N) -} - -func BenchmarkAssign(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "0 1 2 3 4", ` -BEGIN { - for (i = 0; i < %d; i++) { - v=0; w=1; x=2; y=3; z=4 - v=0; w=1; x=2; y=3; z=4 - v=0; w=1; x=2; y=3; z=4 - v=0; w=1; x=2; y=3; z=4 - v=0; w=1; x=2; y=3; z=4 - } - print v, w, x, y, z -} -`, b.N) -} - -func BenchmarkAugAssign(b *testing.B) { - b.StopTimer() - benchmarkProgram(b, nil, "", "5 -9 729 32 3.0536 2", ` -BEGIN { - for (i = 0; i < %d; i++) { - a = 0; b = 1; c = 3; d = 1024; e = 2; f = 14 - a += 1; b -= 2; c *= 3; d /= 2; e ^= 1.1; f %%= 6 - a += 1; b -= 2; c *= 3; d /= 2; e ^= 1.1; f %%= 6 - a += 1; b -= 2; c *= 3; d /= 2; e ^= 1.1; f %%= 6 - a += 1; b -= 2; c *= 3; d /= 2; e ^= 1.1; f %%= 6 - a += 1; b -= 2; c *= 3; d /= 2; e ^= 1.1; f %%= 6 - } - print a, b, c, d, e, f -} -`, b.N) -} - -func BenchmarkPrint(b *testing.B) { - b.StopTimer() - src := fmt.Sprintf(` -BEGIN { - for (i = 0; i < %d; i++) { - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - print i, "foo", i, "bar" - } -} -`, b.N) - - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - b.StartTimer() - _, err = interp.ExecProgram(prog, &interp.Config{ - Output: ioutil.Discard, - Environ: []string{}, - }) - b.StopTimer() - if err != nil { - b.Fatalf("execute error: %v", err) - } -} - -func BenchmarkPrintf(b *testing.B) { - b.StopTimer() - src := fmt.Sprintf(` -BEGIN { - for (i = 0; i < %d; i++) { - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - printf "%%d %%s %%d %%s\n", i, "foo", i, "bar" - } -} -`, b.N) - - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - b.StartTimer() - _, err = interp.ExecProgram(prog, &interp.Config{ - Output: ioutil.Discard, - Environ: []string{}, - }) - b.StopTimer() - if err != nil { - b.Fatalf("execute error: %v", err) - } -} - -func BenchmarkRepeatExecProgram(b *testing.B) { - prog, err := parser.ParseProgram([]byte(`BEGIN {}`), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - config := interp.Config{ - Output: ioutil.Discard, - Environ: []string{}, - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err := interp.ExecProgram(prog, &config) - if err != nil { - b.Fatalf("execute error: %v", err) - } - } -} - -func BenchmarkRepeatNew(b *testing.B) { - prog, err := parser.ParseProgram([]byte(`BEGIN {}`), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - p, err := interp.New(prog) - if err != nil { - b.Fatalf("interp.New error: %v", err) - } - config := interp.Config{ - Output: ioutil.Discard, - Environ: []string{}, - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err := p.Execute(&config) - if err != nil { - b.Fatalf("execute error: %v", err) - } - } -} - -func BenchmarkRepeatIOExecProgram(b *testing.B) { - prog, err := parser.ParseProgram([]byte(`{ for (i=1; i<=NF; i++) print $i }`), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - inputStr := "foo bar\nbazz\n" - input := strings.NewReader(inputStr) - var output bytes.Buffer - config := interp.Config{ - Stdin: input, - Output: &output, - Environ: []string{}, - } - expected := "foo\nbar\nbazz\n" - b.ResetTimer() - for i := 0; i < b.N; i++ { - input.Reset(inputStr) - output.Reset() - _, err := interp.ExecProgram(prog, &config) - if err != nil { - b.Fatalf("execute error: %v", err) - } - if output.String() != expected { - b.Fatalf("expected/got:\n%q\n%q", expected, output.String()) - } - } -} - -func BenchmarkRepeatIONew(b *testing.B) { - prog, err := parser.ParseProgram([]byte(`{ for (i=1; i<=NF; i++) print $i }`), nil) - if err != nil { - b.Fatalf("parse error: %v", err) - } - p, err := interp.New(prog) - if err != nil { - b.Fatalf("interp.New error: %v", err) - } - inputStr := "foo bar\nbazz\n" - input := strings.NewReader(inputStr) - var output bytes.Buffer - config := interp.Config{ - Stdin: input, - Output: &output, - Environ: []string{}, - } - expected := "foo\nbar\nbazz\n" - b.ResetTimer() - for i := 0; i < b.N; i++ { - input.Reset(inputStr) - output.Reset() - _, err := p.Execute(&config) - if err != nil { - b.Fatalf("execute error: %v", err) - } - if output.String() != expected { - b.Fatalf("expected/got:\n%q\n%q", expected, output.String()) - } - } -} - -func BenchmarkCSVInputGoAWK(b *testing.B) { - b.StopTimer() - s := 0 - var inputLines []string - for i := 0; i < b.N; i++ { - s += i - inputLines = append(inputLines, fmt.Sprintf(`%d,foo,Bob Smith,"foo,bar,baz",email@example.com`, i)) - } - input := strings.Join(inputLines, "\n") - expected := fmt.Sprintf("%d", s) - src := `BEGIN { INPUTMODE="csv" } { s += $1 } END { print s }` - benchmarkProgram(b, nil, input, expected, src) -} - -func BenchmarkCSVInputReader(b *testing.B) { - b.StopTimer() - s := 0 - var inputLines []string - for i := 0; i < b.N; i++ { - s += i - inputLines = append(inputLines, fmt.Sprintf(`%d,foo,Bob Smith,"foo,bar,baz",email@example.com`, i)) - } - input := strings.Join(inputLines, "\n") - reader := csv.NewReader(strings.NewReader(input)) - total := 0 - b.StartTimer() - for { - record, err := reader.Read() - if err == io.EOF { - break - } - if err != nil { - b.Fatalf("read error: %v", err) - } - v, _ := strconv.Atoi(record[0]) - total += v - } - if s != total { - b.Fatalf("expected %d, got %d", s, total) - } -} - -func BenchmarkCSVOutputGoAWK(b *testing.B) { - b.StopTimer() - var expectedLines []string - for i := 0; i < b.N; i++ { - expectedLines = append(expectedLines, fmt.Sprintf(`%d,foo,Bob Smith,"foo,bar,baz",email@example.com`, i)) - } - expected := strings.Join(expectedLines, "\n") - benchmarkProgram(b, nil, "", expected, ` -BEGIN { - OUTPUTMODE = "csv"; - for (i=0; i<%d; i++) - print i, "foo", "Bob Smith", "foo,bar,baz", "email@example.com" -} -`, b.N) -} - -func BenchmarkCSVOutputWriter(b *testing.B) { - b.StopTimer() - var expectedLines []string - for i := 0; i < b.N; i++ { - expectedLines = append(expectedLines, fmt.Sprintf(`%d,foo,Bob Smith,"foo,bar,baz",email@example.com`, i)) - } - expected := strings.Join(expectedLines, "\n") + "\n" - var buf bytes.Buffer - writer := csv.NewWriter(&buf) - b.StartTimer() - for i := 0; i < b.N; i++ { - err := writer.Write([]string{strconv.Itoa(i), "foo", "Bob Smith", "foo,bar,baz", "email@example.com"}) - if err != nil { - b.Fatalf("write error: %v", err) - } - } - writer.Flush() - b.StopTimer() - output := buf.String() - if output != expected { - b.Fatalf("expected/got:\n%q\n%q\n", expected, output) - } -} - -func normalizeNewlines(s string) string { - return strings.Replace(s, "\r\n", "\n", -1) -} diff --git a/src/tool/awk/interp/io.go b/src/tool/awk/interp/io.go deleted file mode 100644 index a46ef66..0000000 --- a/src/tool/awk/interp/io.go +++ /dev/null @@ -1,899 +0,0 @@ -// Input/output handling for GoAWK interpreter - -package interp - -import ( - "bufio" - "bytes" - "encoding/csv" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "regexp" - "runtime" - "strconv" - "strings" - "unicode/utf8" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Print a line of output followed by a newline -func (p *interp) printLine(writer io.Writer, line string) error { - err := writeOutput(writer, line) - if err != nil { - return err - } - return writeOutput(writer, p.outputRecordSep) -} - -// Print given arguments followed by a newline (for "print" statement). -func (p *interp) printArgs(writer io.Writer, args []value) error { - switch p.outputMode { - case CSVMode, TSVMode: - fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation - for _, arg := range args { - fields = append(fields, arg.str(p.outputFormat)) - } - err := p.writeCSV(writer, fields) - if err != nil { - return err - } - default: - // Print OFS-separated args followed by ORS (usually newline). - for i, arg := range args { - if i > 0 { - err := writeOutput(writer, p.outputFieldSep) - if err != nil { - return err - } - } - err := writeOutput(writer, arg.str(p.outputFormat)) - if err != nil { - return err - } - } - err := writeOutput(writer, p.outputRecordSep) - if err != nil { - return err - } - } - return nil -} - -func (p *interp) writeCSV(output io.Writer, fields []string) error { - // If output is already a *bufio.Writer (the common case), csv.NewWriter - // will use it directly. This is not explicitly documented, but - // csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize - // with a 4KB buffer, and bufio.NewWriterSize is documented as returning - // the underlying bufio.Writer if it's passed a large enough one. - var flush func() error - _, isBuffered := output.(*bufio.Writer) - if !isBuffered { - // Otherwise create a new buffered writer and flush after writing. - if p.csvOutput == nil { - p.csvOutput = bufio.NewWriterSize(output, 4096) - } else { - p.csvOutput.Reset(output) - } - output = p.csvOutput - flush = p.csvOutput.Flush - } - - // Given the above, creating a new one of these is cheap. - writer := csv.NewWriter(output) - writer.Comma = p.csvOutputConfig.Separator - writer.UseCRLF = runtime.GOOS == "windows" - err := writer.Write(fields) - if err != nil { - return err - } - if flush != nil { - return flush() - } - return nil -} - -// Implement a buffered version of WriteCloser so output is buffered -// when redirecting to a file (eg: print >"out") -type bufferedWriteCloser struct { - *bufio.Writer - io.Closer -} - -func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser { - writer := bufio.NewWriterSize(w, outputBufSize) - return &bufferedWriteCloser{writer, w} -} - -func (wc *bufferedWriteCloser) Close() error { - err := wc.Writer.Flush() - if err != nil { - return err - } - return wc.Closer.Close() -} - -// Determine the output stream for given redirect token and -// destination (file or pipe name) -func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) { - name := p.toString(destValue) - if _, ok := p.inputStreams[name]; ok { - return nil, newError("can't write to reader stream") - } - if w, ok := p.outputStreams[name]; ok { - return w, nil - } - - switch redirect { - case GREATER, APPEND: - if name == "-" { - // filename of "-" means write to stdout, eg: print "x" >"-" - return p.output, nil - } - // Write or append to file - if p.noFileWrites { - return nil, newError("can't write to file due to NoFileWrites") - } - p.flushOutputAndError() // ensure synchronization - flags := os.O_CREATE | os.O_WRONLY - if redirect == GREATER { - flags |= os.O_TRUNC - } else { - flags |= os.O_APPEND - } - w, err := os.OpenFile(name, flags, 0644) - if err != nil { - return nil, newError("output redirection error: %s", err) - } - buffered := newBufferedWriteCloser(w) - p.outputStreams[name] = buffered - return buffered, nil - - case PIPE: - // Pipe to command - if p.noExec { - return nil, newError("can't write to pipe due to NoExec") - } - cmd := p.execShell(name) - w, err := cmd.StdinPipe() - if err != nil { - return nil, newError("error connecting to stdin pipe: %v", err) - } - cmd.Stdout = p.output - cmd.Stderr = p.errorOutput - p.flushOutputAndError() // ensure synchronization - err = cmd.Start() - if err != nil { - p.printErrorf("%s\n", err) - return ioutil.Discard, nil - } - p.commands[name] = cmd - buffered := newBufferedWriteCloser(w) - p.outputStreams[name] = buffered - return buffered, nil - - default: - // Should never happen - panic(fmt.Sprintf("unexpected redirect type %s", redirect)) - } -} - -// Executes code using configured system shell -func (p *interp) execShell(code string) *exec.Cmd { - executable := p.shellCommand[0] - args := p.shellCommand[1:] - args = append(args, code) - if p.checkCtx { - return exec.CommandContext(p.ctx, executable, args...) - } else { - return exec.Command(executable, args...) - } -} - -// Get input Scanner to use for "getline" based on file name -func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { - if _, ok := p.outputStreams[name]; ok { - return nil, newError("can't read from writer stream") - } - if _, ok := p.inputStreams[name]; ok { - return p.scanners[name], nil - } - if name == "-" { - // filename of "-" means read from stdin, eg: getline <"-" - if scanner, ok := p.scanners["-"]; ok { - return scanner, nil - } - scanner := p.newScanner(p.stdin, make([]byte, inputBufSize)) - p.scanners[name] = scanner - return scanner, nil - } - if p.noFileReads { - return nil, newError("can't read from file due to NoFileReads") - } - r, err := os.Open(name) - if err != nil { - return nil, err // *os.PathError is handled by caller (getline returns -1) - } - scanner := p.newScanner(r, make([]byte, inputBufSize)) - p.scanners[name] = scanner - p.inputStreams[name] = r - return scanner, nil -} - -// Get input Scanner to use for "getline" based on pipe name -func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) { - if _, ok := p.outputStreams[name]; ok { - return nil, newError("can't read from writer stream") - } - if _, ok := p.inputStreams[name]; ok { - return p.scanners[name], nil - } - if p.noExec { - return nil, newError("can't read from pipe due to NoExec") - } - cmd := p.execShell(name) - cmd.Stdin = p.stdin - cmd.Stderr = p.errorOutput - r, err := cmd.StdoutPipe() - if err != nil { - return nil, newError("error connecting to stdout pipe: %v", err) - } - p.flushOutputAndError() // ensure synchronization - err = cmd.Start() - if err != nil { - p.printErrorf("%s\n", err) - return bufio.NewScanner(strings.NewReader("")), nil - } - scanner := p.newScanner(r, make([]byte, inputBufSize)) - p.commands[name] = cmd - p.inputStreams[name] = r - p.scanners[name] = scanner - return scanner, nil -} - -// Create a new buffered Scanner for reading input records -func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner { - scanner := bufio.NewScanner(input) - switch { - case p.inputMode == CSVMode || p.inputMode == TSVMode: - splitter := csvSplitter{ - separator: p.csvInputConfig.Separator, - sepLen: utf8.RuneLen(p.csvInputConfig.Separator), - comment: p.csvInputConfig.Comment, - header: p.csvInputConfig.Header, - fields: &p.fields, - setFieldNames: p.setFieldNames, - } - scanner.Split(splitter.scan) - case p.recordSep == "\n": - // Scanner default is to split on newlines - case p.recordSep == "": - // Empty string for RS means split on \n\n (blank lines) - splitter := blankLineSplitter{terminator: &p.recordTerminator} - scanner.Split(splitter.scan) - case len(p.recordSep) == 1: - splitter := byteSplitter{sep: p.recordSep[0]} - scanner.Split(splitter.scan) - case utf8.RuneCountInString(p.recordSep) >= 1: - // Multi-byte and single char but multi-byte RS use regex - splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator} - scanner.Split(splitter.scan) - } - scanner.Buffer(buffer, maxRecordLength) - return scanner -} - -// setFieldNames is called by csvSplitter.scan on the first row (if the -// "header" option is specified). -func (p *interp) setFieldNames(names []string) { - p.fieldNames = names - p.fieldIndexes = nil // clear name-to-index cache - - // Populate FIELDS array (mapping of field indexes to field names). - fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"]) - for k := range fieldsArray { - delete(fieldsArray, k) - } - for i, name := range names { - fieldsArray[strconv.Itoa(i+1)] = str(name) - } -} - -// Copied from bufio/scan.go in the stdlib: I guess it's a bit more -// efficient than bytes.TrimSuffix(data, []byte("\r")) -func dropCR(data []byte) []byte { - if len(data) > 0 && data[len(data)-1] == '\r' { - return data[:len(data)-1] - } - return data -} - -func dropLF(data []byte) []byte { - if len(data) > 0 && data[len(data)-1] == '\n' { - return data[:len(data)-1] - } - return data -} - -type blankLineSplitter struct { - terminator *string -} - -func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - - // Skip newlines at beginning of data - i := 0 - for i < len(data) && (data[i] == '\n' || data[i] == '\r') { - i++ - } - if i >= len(data) { - // At end of data after newlines, skip entire data block - return i, nil, nil - } - start := i - - // Try to find two consecutive newlines (or \n\r\n for Windows) - for ; i < len(data); i++ { - if data[i] != '\n' { - continue - } - end := i - if i+1 < len(data) && data[i+1] == '\n' { - i += 2 - for i < len(data) && (data[i] == '\n' || data[i] == '\r') { - i++ // Skip newlines at end of record - } - *s.terminator = string(data[end:i]) - return i, dropCR(data[start:end]), nil - } - if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' { - i += 3 - for i < len(data) && (data[i] == '\n' || data[i] == '\r') { - i++ // Skip newlines at end of record - } - *s.terminator = string(data[end:i]) - return i, dropCR(data[start:end]), nil - } - } - - // If we're at EOF, we have one final record; return it - if atEOF { - token = dropCR(dropLF(data[start:])) - *s.terminator = string(data[len(token):]) - return len(data), token, nil - } - - // Request more data - return 0, nil, nil -} - -// Splitter that splits records on the given separator byte -type byteSplitter struct { - sep byte -} - -func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexByte(data, s.sep); i >= 0 { - // We have a full sep-terminated record - return i + 1, data[:i], nil - } - // If at EOF, we have a final, non-terminated record; return it - if atEOF { - return len(data), data, nil - } - // Request more data - return 0, nil, nil -} - -// Splitter that splits records on the given regular expression -type regexSplitter struct { - re *regexp.Regexp - terminator *string -} - -func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - loc := s.re.FindIndex(data) - // Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this - // case is to match the entire input. - if loc != nil && loc[0] != loc[1] { - *s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable - return loc[1], data[:loc[0]], nil - } - // If at EOF, we have a final, non-terminated record; return it - if atEOF { - *s.terminator = "" - return len(data), data, nil - } - // Request more data - return 0, nil, nil -} - -// Splitter that splits records in CSV or TSV format. -type csvSplitter struct { - separator rune - sepLen int - comment rune - header bool - - recordBuffer []byte - fieldIndexes []int - noBOMCheck bool - - fields *[]string - setFieldNames func(names []string) - rowNum int -} - -// The structure of this code is taken from the stdlib encoding/csv Reader -// code, which is licensed under a compatible BSD-style license. -// -// We don't support all encoding/csv features: FieldsPerRecord is not -// supported, LazyQuotes is always on, and TrimLeadingSpace is always off. -func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { - // Some CSV files are saved with a UTF-8 BOM at the start; skip it. - if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF { - data = data[3:] - advance = 3 - s.noBOMCheck = true - } - - origData := data - if atEOF && len(data) == 0 { - // No more data, tell Scanner to stop. - return 0, nil, nil - } - - readLine := func() []byte { - newline := bytes.IndexByte(data, '\n') - var line []byte - switch { - case newline >= 0: - // Process a single line (including newline). - line = data[:newline+1] - data = data[newline+1:] - case atEOF: - // If at EOF, we have a final record without a newline. - line = data - data = data[len(data):] - default: - // Need more data - return nil - } - - // For backwards compatibility, drop trailing \r before EOF. - if len(line) > 0 && atEOF && line[len(line)-1] == '\r' { - line = line[:len(line)-1] - advance++ - } - - return line - } - - // Read line (automatically skipping past empty lines and any comments). - skip := 0 - var line []byte - for { - line = readLine() - if len(line) == 0 { - return 0, nil, nil // Request more data - } - if s.comment != 0 && nextRune(line) == s.comment { - advance += len(line) - skip += len(line) - continue // Skip comment lines - } - if len(line) == lenNewline(line) { - advance += len(line) - skip += len(line) - continue // Skip empty lines - } - break - } - - // Parse each field in the record. - const quoteLen = len(`"`) - tokenHasCR := false - s.recordBuffer = s.recordBuffer[:0] - s.fieldIndexes = s.fieldIndexes[:0] -parseField: - for { - if len(line) == 0 || line[0] != '"' { - // Non-quoted string field - i := bytes.IndexRune(line, s.separator) - field := line - if i >= 0 { - advance += i + s.sepLen - field = field[:i] - } else { - advance += len(field) - field = field[:len(field)-lenNewline(field)] - } - s.recordBuffer = append(s.recordBuffer, field...) - s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) - if i >= 0 { - line = line[i+s.sepLen:] - continue parseField - } - break parseField - } else { - // Quoted string field - line = line[quoteLen:] - advance += quoteLen - for { - i := bytes.IndexByte(line, '"') - if i >= 0 { - // Hit next quote. - s.recordBuffer = append(s.recordBuffer, line[:i]...) - line = line[i+quoteLen:] - advance += i + quoteLen - switch rn := nextRune(line); { - case rn == '"': - // `""` sequence (append quote). - s.recordBuffer = append(s.recordBuffer, '"') - line = line[quoteLen:] - advance += quoteLen - case rn == s.separator: - // `",` sequence (end of field). - line = line[s.sepLen:] - s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) - advance += s.sepLen - continue parseField - case lenNewline(line) == len(line): - // `"\n` sequence (end of line). - s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) - advance += len(line) - break parseField - default: - // `"` sequence (bare quote). - s.recordBuffer = append(s.recordBuffer, '"') - } - } else if len(line) > 0 { - // Hit end of line (copy all data so far). - advance += len(line) - newlineLen := lenNewline(line) - if newlineLen == 2 { - tokenHasCR = true - s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...) - s.recordBuffer = append(s.recordBuffer, '\n') - } else { - s.recordBuffer = append(s.recordBuffer, line...) - } - line = readLine() - if line == nil { - return 0, nil, nil // Request more data - } - } else { - // Abrupt end of file. - s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) - advance += len(line) - break parseField - } - } - } - } - - // Create a single string and create slices out of it. - // This pins the memory of the fields together, but allocates once. - strBuf := string(s.recordBuffer) // Convert to string once to batch allocations - fields := make([]string, len(s.fieldIndexes)) - preIdx := 0 - for i, idx := range s.fieldIndexes { - fields[i] = strBuf[preIdx:idx] - preIdx = idx - } - - s.noBOMCheck = true - - if s.rowNum == 0 && s.header { - // Set header field names and advance, but don't return a line (token). - s.rowNum++ - s.setFieldNames(fields) - return advance, nil, nil - } - - // Normal row, set fields and return a line (token). - s.rowNum++ - *s.fields = fields - token = origData[skip:advance] - token = token[:len(token)-lenNewline(token)] - if tokenHasCR { - token = bytes.ReplaceAll(token, []byte{'\r'}, nil) - } - return advance, token, nil -} - -// lenNewline reports the number of bytes for the trailing \n. -func lenNewline(b []byte) int { - if len(b) > 0 && b[len(b)-1] == '\n' { - if len(b) > 1 && b[len(b)-2] == '\r' { - return 2 - } - return 1 - } - return 0 -} - -// nextRune returns the next rune in b or utf8.RuneError. -func nextRune(b []byte) rune { - r, _ := utf8.DecodeRune(b) - return r -} - -// Setup for a new input file with given name (empty string if stdin) -func (p *interp) setFile(filename string) { - p.filename = numStr(filename) - p.fileLineNum = 0 - p.hadFiles = true -} - -// Setup for a new input line (but don't parse it into fields till we -// need to) -func (p *interp) setLine(line string, isTrueStr bool) { - p.line = line - p.lineIsTrueStr = isTrueStr - p.haveFields = false - p.reparseCSV = true -} - -// Ensure that the current line is parsed into fields, splitting it -// into fields if it hasn't been already -func (p *interp) ensureFields() { - if p.haveFields { - return - } - p.haveFields = true - - switch { - case p.inputMode == CSVMode || p.inputMode == TSVMode: - if p.reparseCSV { - scanner := bufio.NewScanner(strings.NewReader(p.line)) - scanner.Buffer(nil, maxRecordLength) - splitter := csvSplitter{ - separator: p.csvInputConfig.Separator, - sepLen: utf8.RuneLen(p.csvInputConfig.Separator), - comment: p.csvInputConfig.Comment, - fields: &p.fields, - } - scanner.Split(splitter.scan) - if !scanner.Scan() { - p.fields = nil - } - } else { - // Normally fields have already been parsed by csvSplitter - } - case p.fieldSep == " ": - // FS space (default) means split fields on any whitespace - p.fields = strings.Fields(p.line) - case p.line == "": - p.fields = nil - case utf8.RuneCountInString(p.fieldSep) <= 1: - // 1-char FS is handled as plain split (not regex) - p.fields = strings.Split(p.line, p.fieldSep) - default: - // Split on FS as a regex - p.fields = p.fieldSepRegex.Split(p.line, -1) - } - - // Special case for when RS=="" and FS is single character, - // split on newline in addition to FS. See more here: - // https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html - if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 { - fields := make([]string, 0, len(p.fields)) - for _, field := range p.fields { - lines := strings.Split(field, "\n") - for _, line := range lines { - trimmed := strings.TrimSuffix(line, "\r") - fields = append(fields, trimmed) - } - } - p.fields = fields - } - - p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time - for range p.fields { - p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) - } - p.numFields = len(p.fields) -} - -// Fetch next line (record) of input from current input file, opening -// next input file if done with previous one -func (p *interp) nextLine() (string, error) { - for { - if p.scanner == nil { - if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin { - // Previous input is file, close it - _ = prevInput.Close() - } - if p.filenameIndex >= p.argc && !p.hadFiles { - // Moved past number of ARGV args and haven't seen - // any files yet, use stdin - p.input = p.stdin - p.setFile("-") - } else { - if p.filenameIndex >= p.argc { - // Done with ARGV args, all done with input - return "", io.EOF - } - // Fetch next filename from ARGV. Can't use - // getArrayValue() here as it would set the value if - // not present - index := strconv.Itoa(p.filenameIndex) - argvIndex := p.program.Arrays["ARGV"] - argvArray := p.array(ast.ScopeGlobal, argvIndex) - filename := p.toString(argvArray[index]) - p.filenameIndex++ - - // Is it actually a var=value assignment? - matches := varRegex.FindStringSubmatch(filename) - if len(matches) >= 3 { - // Yep, set variable to value and keep going - name, val := matches[1], matches[2] - // Oddly, var=value args must interpret escapes (issue #129) - unescaped, err := Unescape(val) - if err == nil { - val = unescaped - } - err = p.setVarByName(name, val) - if err != nil { - return "", err - } - continue - } else if filename == "" { - // ARGV arg is empty string, skip - p.input = nil - continue - } else if filename == "-" { - // ARGV arg is "-" meaning stdin - p.input = p.stdin - p.setFile("-") - } else { - // A regular file name, open it - if p.noFileReads { - return "", newError("can't read from file due to NoFileReads") - } - input, err := os.Open(filename) - if err != nil { - return "", err - } - p.input = input - p.setFile(filename) - } - } - if p.inputBuffer == nil { // reuse buffer from last input file - p.inputBuffer = make([]byte, inputBufSize) - } - p.scanner = p.newScanner(p.input, p.inputBuffer) - } - p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars - if p.scanner.Scan() { - // We scanned some input, break and return it - break - } - err := p.scanner.Err() - if err != nil { - return "", fmt.Errorf("error reading from input: %s", err) - } - // Signal loop to move onto next file - p.scanner = nil - } - - // Got a line (record) of input, return it - p.lineNum++ - p.fileLineNum++ - return p.scanner.Text(), nil -} - -// Write output string to given writer, producing correct line endings -// on Windows (CR LF). -func writeOutput(w io.Writer, s string) error { - if crlfNewline { - // First normalize to \n, then convert all newlines to \r\n - // (on Windows). NOTE: creating two new strings is almost - // certainly slow; would be better to create a custom Writer. - s = strings.Replace(s, "\r\n", "\n", -1) - s = strings.Replace(s, "\n", "\r\n", -1) - } - _, err := io.WriteString(w, s) - return err -} - -// Close all streams, commands, and so on (after program execution). -func (p *interp) closeAll() { - if prevInput, ok := p.input.(io.Closer); ok { - _ = prevInput.Close() - } - for _, r := range p.inputStreams { - _ = r.Close() - } - for _, w := range p.outputStreams { - _ = w.Close() - } - for _, cmd := range p.commands { - _ = cmd.Wait() - } - if f, ok := p.output.(flusher); ok { - _ = f.Flush() - } - if f, ok := p.errorOutput.(flusher); ok { - _ = f.Flush() - } -} - -// Flush all output streams as well as standard output. Report whether all -// streams were flushed successfully (logging error(s) if not). -func (p *interp) flushAll() bool { - allGood := true - for name, writer := range p.outputStreams { - allGood = allGood && p.flushWriter(name, writer) - } - if _, ok := p.output.(flusher); ok { - // User-provided output may or may not be flushable - allGood = allGood && p.flushWriter("stdout", p.output) - } - return allGood -} - -// Flush a single, named output stream, and report whether it was flushed -// successfully (logging an error if not). -func (p *interp) flushStream(name string) bool { - writer := p.outputStreams[name] - if writer == nil { - p.printErrorf("error flushing %q: not an output file or pipe\n", name) - return false - } - return p.flushWriter(name, writer) -} - -type flusher interface { - Flush() error -} - -// Flush given output writer, and report whether it was flushed successfully -// (logging an error if not). -func (p *interp) flushWriter(name string, writer io.Writer) bool { - flusher, ok := writer.(flusher) - if !ok { - return true // not a flusher, don't error - } - err := flusher.Flush() - if err != nil { - p.printErrorf("error flushing %q: %v\n", name, err) - return false - } - return true -} - -// Flush output and error streams. -func (p *interp) flushOutputAndError() { - if flusher, ok := p.output.(flusher); ok { - _ = flusher.Flush() - } - if flusher, ok := p.errorOutput.(flusher); ok { - _ = flusher.Flush() - } -} - -// Print a message to the error output stream, flushing as necessary. -func (p *interp) printErrorf(format string, args ...interface{}) { - if flusher, ok := p.output.(flusher); ok { - _ = flusher.Flush() // ensure synchronization - } - fmt.Fprintf(p.errorOutput, format, args...) - if flusher, ok := p.errorOutput.(flusher); ok { - _ = flusher.Flush() - } -} diff --git a/src/tool/awk/interp/newexecute.go b/src/tool/awk/interp/newexecute.go deleted file mode 100644 index 3c4f269..0000000 --- a/src/tool/awk/interp/newexecute.go +++ /dev/null @@ -1,176 +0,0 @@ -// The New...Execute API (allows you to efficiently execute the same program repeatedly). - -package interp - -import ( - "context" - "math" - - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -const checkContextOps = 1000 // for efficiency, only check context every N instructions - -// Interpreter is an interpreter for a specific program, allowing you to -// efficiently execute the same program over and over with different inputs. -// Use New to create an Interpreter. -// -// Most programs won't need reusable execution, and should use the simpler -// Exec or ExecProgram functions instead. -type Interpreter struct { - interp *interp -} - -// New creates a reusable interpreter for the given program. -// -// Most programs won't need reusable execution, and should use the simpler -// Exec or ExecProgram functions instead. -func New(program *parser.Program) (*Interpreter, error) { - p := newInterp(program) - return &Interpreter{interp: p}, nil -} - -// Execute runs this program with the given execution configuration (input, -// output, and variables) and returns the exit status code of the program. A -// nil config is valid and will use the defaults (zero values). -// -// Internal memory allocations are reused, so calling Execute on the same -// Interpreter instance is significantly more efficient than calling -// ExecProgram multiple times. -// -// I/O state is reset between each run, but variables and the random number -// generator seed are not; use ResetVars and ResetRand to reset those. -// -// It's best to set config.Environ to a non-nil slice, otherwise Execute will -// call the relatively inefficient os.Environ each time. Set config.Environ to -// []string{} if the script doesn't need environment variables, or call -// os.Environ once and set config.Environ to that value each execution. -// -// Note that config.Funcs must be the same value provided to -// parser.ParseProgram, and must not change between calls to Execute. -func (p *Interpreter) Execute(config *Config) (int, error) { - p.interp.resetCore() - p.interp.checkCtx = false - - err := p.interp.setExecuteConfig(config) - if err != nil { - return 0, err - } - - return p.interp.executeAll() -} - -func (p *interp) resetCore() { - p.scanner = nil - for k := range p.scanners { - delete(p.scanners, k) - } - p.input = nil - for k := range p.inputStreams { - delete(p.inputStreams, k) - } - for k := range p.outputStreams { - delete(p.outputStreams, k) - } - for k := range p.commands { - delete(p.commands, k) - } - - p.sp = 0 - p.localArrays = p.localArrays[:0] - p.callDepth = 0 - - p.filename = null() - p.line = "" - p.lineIsTrueStr = false - p.lineNum = 0 - p.fileLineNum = 0 - p.fields = nil - p.fieldsIsTrueStr = nil - p.numFields = 0 - p.haveFields = false - - p.exitStatus = 0 -} - -func (p *interp) resetVars() { - // Reset global scalars - for i := range p.globals { - p.globals[i] = null() - } - - // Reset global arrays - for _, array := range p.arrays { - for k := range array { - delete(array, k) - } - } - - // Reset special variables - p.convertFormat = "%.6g" - p.outputFormat = "%.6g" - p.fieldSep = " " - p.fieldSepRegex = nil - p.recordSep = "\n" - p.recordSepRegex = nil - p.recordTerminator = "" - p.outputFieldSep = " " - p.outputRecordSep = "\n" - p.subscriptSep = "\x1c" - p.matchLength = 0 - p.matchStart = 0 -} - -// ResetVars resets this interpreter's variables, setting scalar variables to -// null, clearing arrays, and resetting special variables such as FS and RS to -// their defaults. -func (p *Interpreter) ResetVars() { - p.interp.resetVars() -} - -// ResetRand resets this interpreter's random number generator seed, so that -// rand() produces the same sequence it would have after calling New. This is -// a relatively CPU-intensive operation. -func (p *Interpreter) ResetRand() { - p.interp.randSeed = 1.0 - p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed))) -} - -// ExecuteContext is like Execute, but takes a context to allow the caller to -// set an execution timeout or cancel the execution. For efficiency, the -// context is only tested every 1000 virtual machine instructions. -// -// Context handling is not preemptive: currently long-running operations like -// system() won't be interrupted. -func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) { - p.interp.resetCore() - p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO() - p.interp.ctx = ctx - p.interp.ctxDone = ctx.Done() - p.interp.ctxOps = 0 - - err := p.interp.setExecuteConfig(config) - if err != nil { - return 0, err - } - - return p.interp.executeAll() -} - -func (p *interp) checkContext() error { - p.ctxOps++ - if p.ctxOps < checkContextOps { - return nil - } - p.ctxOps = 0 - return p.checkContextNow() -} - -func (p *interp) checkContextNow() error { - select { - case <-p.ctxDone: - return p.ctx.Err() - default: - return nil - } -} diff --git a/src/tool/awk/interp/newexecute_test.go b/src/tool/awk/interp/newexecute_test.go deleted file mode 100644 index 347a1e8..0000000 --- a/src/tool/awk/interp/newexecute_test.go +++ /dev/null @@ -1,163 +0,0 @@ -// Tests for the New...Execute API. - -package interp_test - -import ( - "bytes" - "context" - "errors" - "strings" - "testing" - "time" - - "github.com/mojosa-software/goblin/src/tool/awk/interp" - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -// This definitely doesn't test that everything was reset, but it's a good start. -func TestNewExecute(t *testing.T) { - source := `{ print NR, OFMT, x, y, a["k"], $1, $3; OFMT="%g"; x++; y++; a["k"]++ }` - interpreter := newInterp(t, source) - - // First execution. - var output bytes.Buffer - status, err := interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("one two three\nfour five six\n"), - Output: &output, - }) - if err != nil { - t.Fatalf("error executing: %v", err) - } - if status != 0 { - t.Fatalf("expected status 0, got %d", status) - } - normalized := normalizeNewlines(output.String()) - expected := "1 %.6g one three\n2 %g 1 1 1 four six\n" - if normalized != expected { - t.Fatalf("expected %q, got %q", expected, normalized) - } - - // Second execution, with ResetVars. - output.Reset() - interpreter.ResetVars() - status, err = interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("ONE TWO THREE\nFOUR FIVE SIX\n"), - Output: &output, - Vars: []string{"x", "10"}, - }) - if err != nil { - t.Fatalf("error executing: %v", err) - } - if status != 0 { - t.Fatalf("expected status 0, got %d", status) - } - normalized = normalizeNewlines(output.String()) - expected = "1 %.6g 10 ONE THREE\n2 %g 11 1 1 FOUR SIX\n" - if normalized != expected { - t.Fatalf("expected %q, got %q", expected, normalized) - } - - // Third execution, without ResetVars. - output.Reset() - status, err = interpreter.Execute(&interp.Config{ - Stdin: strings.NewReader("1 2 3\n4 5 6\n"), - Output: &output, - Vars: []string{"x", "100"}, - }) - if err != nil { - t.Fatalf("error executing: %v", err) - } - if status != 0 { - t.Fatalf("expected status 0, got %d", status) - } - normalized = normalizeNewlines(output.String()) - expected = "1 %g 100 2 2 1 3\n2 %g 101 3 3 4 6\n" - if normalized != expected { - t.Fatalf("expected %q, got %q", expected, normalized) - } -} - -func TestResetRand(t *testing.T) { - source := `BEGIN { print rand(), rand(), rand() }` - interpreter := newInterp(t, source) - var output bytes.Buffer - - _, err := interpreter.Execute(&interp.Config{Output: &output}) - if err != nil { - t.Fatalf("error executing: %v", err) - } - original := output.String() - - output.Reset() - _, err = interpreter.Execute(&interp.Config{Output: &output}) - if err != nil { - t.Fatalf("error executing: %v", err) - } - noResetRand := output.String() - if original == noResetRand { - t.Fatalf("expected different random numbers, got %q both times", original) - } - - output.Reset() - interpreter.ResetRand() - _, err = interpreter.Execute(&interp.Config{Output: &output}) - if err != nil { - t.Fatalf("error executing: %v", err) - } - withResetRand := output.String() - if original != withResetRand { - t.Fatalf("expected same random numbers (%q) as original (%q)", withResetRand, original) - } -} - -func TestExecuteContextNoError(t *testing.T) { - interpreter := newInterp(t, `BEGIN {}`) - _, err := interpreter.ExecuteContext(context.Background(), nil) - if err != nil { - t.Fatalf("execute error: %v", err) - } -} - -func TestExecuteContextTimeout(t *testing.T) { - interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) - defer cancel() - _, err := interpreter.ExecuteContext(ctx, nil) - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("expected DeadlineExceeded error, got: %v", err) - } -} - -func TestExecuteContextCancel(t *testing.T) { - interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s - ctx, cancel := context.WithCancel(context.Background()) - cancel() // cancel it right away - _, err := interpreter.ExecuteContext(ctx, nil) - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected Canceled error, got: %v", err) - } -} - -func TestExecuteContextSystemTimeout(t *testing.T) { - t.Skip("TODO: skipping for now due to #122") - interpreter := newInterp(t, `BEGIN { print system("sleep 4") }`) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) - defer cancel() - _, err := interpreter.ExecuteContext(ctx, nil) - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("expected DeadlineExceeded error, got: %v", err) - } -} - -func newInterp(t *testing.T, src string) *interp.Interpreter { - t.Helper() - prog, err := parser.ParseProgram([]byte(src), nil) - if err != nil { - t.Fatalf("parse error: %v", err) - } - interpreter, err := interp.New(prog) - if err != nil { - t.Fatalf("interp.New error: %v", err) - } - return interpreter -} diff --git a/src/tool/awk/interp/value.go b/src/tool/awk/interp/value.go deleted file mode 100644 index 7ae95d6..0000000 --- a/src/tool/awk/interp/value.go +++ /dev/null @@ -1,294 +0,0 @@ -// GoAWK interpreter value type (not exported). - -package interp - -import ( - "fmt" - "math" - "strconv" - "strings" -) - -type valueType uint8 - -const ( - typeNull valueType = iota - typeStr - typeNum - typeNumStr -) - -// An AWK value (these are passed around by value) -type value struct { - typ valueType // Type of value - s string // String value (for typeStr and typeNumStr) - n float64 // Numeric value (for typeNum) -} - -// Create a new null value -func null() value { - return value{} -} - -// Create a new number value -func num(n float64) value { - return value{typ: typeNum, n: n} -} - -// Create a new string value -func str(s string) value { - return value{typ: typeStr, s: s} -} - -// Create a new value to represent a "numeric string" from an input field -func numStr(s string) value { - return value{typ: typeNumStr, s: s} -} - -// Create a numeric value from a Go bool -func boolean(b bool) value { - if b { - return num(1) - } - return num(0) -} - -// String returns a string representation of v for debugging. -func (v value) String() string { - switch v.typ { - case typeStr: - return fmt.Sprintf("str(%q)", v.s) - case typeNum: - return fmt.Sprintf("num(%s)", v.str("%.6g")) - case typeNumStr: - return fmt.Sprintf("numStr(%q)", v.s) - default: - return "null()" - } -} - -// Return true if value is a "true string" (a string or a "numeric string" -// from an input field that can't be converted to a number). If false, -// also return the (possibly converted) number. -func (v value) isTrueStr() (float64, bool) { - switch v.typ { - case typeStr: - return 0, true - case typeNumStr: - f, err := parseFloat(v.s) - if err != nil { - return 0, true - } - return f, false - default: // typeNum, typeNull - return v.n, false - } -} - -// Return Go bool value of AWK value. For numbers or numeric strings, -// zero is false and everything else is true. For strings, empty -// string is false and everything else is true. -func (v value) boolean() bool { - switch v.typ { - case typeStr: - return v.s != "" - case typeNumStr: - f, err := parseFloat(v.s) - if err != nil { - return v.s != "" - } - return f != 0 - default: // typeNum, typeNull - return v.n != 0 - } -} - -// Like strconv.ParseFloat, but allow hex floating point without exponent, and -// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow -// underscore digit separators. -func parseFloat(s string) (float64, error) { - s = strings.TrimSpace(s) - if len(s) > 1 && (s[0] == '+' || s[0] == '-') { - if len(s) == 4 && hasNaNPrefix(s[1:]) { - // ParseFloat doesn't handle "nan" with sign prefix, so handle it here. - return math.NaN(), nil - } - if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 { - s += "p0" - } - } else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 { - s += "p0" - } - n, err := strconv.ParseFloat(s, 64) - if err == nil && strings.IndexByte(s, '_') >= 0 { - // Underscore separators aren't supported by AWK. - return 0, strconv.ErrSyntax - } - return n, err -} - -// Return value's string value, or convert to a string using given -// format if a number value. Integers are a special case and don't -// use floatFormat. -func (v value) str(floatFormat string) string { - if v.typ == typeNum { - switch { - case math.IsNaN(v.n): - return "nan" - case math.IsInf(v.n, 0): - if v.n < 0 { - return "-inf" - } else { - return "inf" - } - case v.n == float64(int(v.n)): - return strconv.Itoa(int(v.n)) - default: - if floatFormat == "%.6g" { - return strconv.FormatFloat(v.n, 'g', 6, 64) - } - return fmt.Sprintf(floatFormat, v.n) - } - } - // For typeStr and typeNumStr we already have the string, for - // typeNull v.s == "". - return v.s -} - -// Return value's number value, converting from string if necessary -func (v value) num() float64 { - switch v.typ { - case typeStr, typeNumStr: - // Ensure string starts with a float and convert it - return parseFloatPrefix(v.s) - default: // typeNum, typeNull - return v.n - } -} - -var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} - -// Like strconv.ParseFloat, but parses at the start of string and -// allows things like "1.5foo" -func parseFloatPrefix(s string) float64 { - // Skip whitespace at start - i := 0 - for i < len(s) && asciiSpace[s[i]] != 0 { - i++ - } - start := i - - // Parse optional sign and check for NaN and Inf. - if i < len(s) && (s[i] == '+' || s[i] == '-') { - i++ - } - if i+3 <= len(s) { - if hasNaNPrefix(s[i:]) { - return math.NaN() - } - if hasInfPrefix(s[i:]) { - if s[start] == '-' { - return math.Inf(-1) - } - return math.Inf(1) - } - } - - // Parse mantissa: initial digit(s), optional '.', then more digits - if i+2 < len(s) && hasHexPrefix(s[i:]) { - return parseHexFloatPrefix(s, start, i+2) - } - gotDigit := false - for i < len(s) && isDigit(s[i]) { - gotDigit = true - i++ - } - if i < len(s) && s[i] == '.' { - i++ - } - for i < len(s) && isDigit(s[i]) { - gotDigit = true - i++ - } - if !gotDigit { - return 0 - } - - // Parse exponent ("1e" and similar are allowed, but ParseFloat - // rejects them) - end := i - if i < len(s) && (s[i] == 'e' || s[i] == 'E') { - i++ - if i < len(s) && (s[i] == '+' || s[i] == '-') { - i++ - } - for i < len(s) && isDigit(s[i]) { - i++ - end = i - } - } - - floatStr := s[start:end] - f, _ := strconv.ParseFloat(floatStr, 64) - return f // Returns infinity in case of "value out of range" error -} - -func hasHexPrefix(s string) bool { - return s[0] == '0' && (s[1] == 'x' || s[1] == 'X') -} - -func hasNaNPrefix(s string) bool { - return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N') -} - -func hasInfPrefix(s string) bool { - return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F') -} - -// Helper used by parseFloatPrefix to handle hexadecimal floating point. -func parseHexFloatPrefix(s string, start, i int) float64 { - gotDigit := false - for i < len(s) && isHexDigit(s[i]) { - gotDigit = true - i++ - } - if i < len(s) && s[i] == '.' { - i++ - } - for i < len(s) && isHexDigit(s[i]) { - gotDigit = true - i++ - } - if !gotDigit { - return 0 - } - - gotExponent := false - end := i - if i < len(s) && (s[i] == 'p' || s[i] == 'P') { - i++ - if i < len(s) && (s[i] == '+' || s[i] == '-') { - i++ - } - for i < len(s) && isDigit(s[i]) { - gotExponent = true - i++ - end = i - } - } - - floatStr := s[start:end] - if !gotExponent { - floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0" - } - f, _ := strconv.ParseFloat(floatStr, 64) - return f // Returns infinity in case of "value out of range" error -} - -func isDigit(c byte) bool { - return c >= '0' && c <= '9' -} - -func isHexDigit(c byte) bool { - return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' -} diff --git a/src/tool/awk/interp/vm.go b/src/tool/awk/interp/vm.go deleted file mode 100644 index 0b80253..0000000 --- a/src/tool/awk/interp/vm.go +++ /dev/null @@ -1,1259 +0,0 @@ -// Virtual machine: interpret GoAWK compiled opcodes - -package interp - -import ( - "io" - "math" - "os" - "os/exec" - "strings" - "time" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - "github.com/mojosa-software/goblin/src/tool/awk/internal/compiler" - "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// Execute a block of virtual machine instructions. -// -// A big switch seems to be the best way of doing this for now. I also tried -// an array of functions (https://github.com/mojosa-software/goblin/src/tool/awk/commit/8e04b069b621ff9b9456de57a35ff2fe335cf201) -// and it was ever so slightly faster, but the code was harder to work with -// and it won't be improved when Go gets faster switches via jump tables -// (https://go-review.googlesource.com/c/go/+/357330/). -// -// Additionally, I've made this version faster since the above test by -// reducing the number of opcodes (replacing a couple dozen Call* opcodes with -// a single CallBuiltin -- that probably pushed it below a switch binary tree -// branch threshold). -func (p *interp) execute(code []compiler.Opcode) error { - for ip := 0; ip < len(code); { - op := code[ip] - ip++ - - if p.checkCtx { - err := p.checkContext() - if err != nil { - return err - } - } - - switch op { - case compiler.Num: - index := code[ip] - ip++ - p.push(num(p.nums[index])) - - case compiler.Str: - index := code[ip] - ip++ - p.push(str(p.strs[index])) - - case compiler.Dupe: - v := p.peekTop() - p.push(v) - - case compiler.Drop: - p.pop() - - case compiler.Swap: - l, r := p.peekTwo() - p.replaceTwo(r, l) - - case compiler.Field: - index := p.peekTop() - v := p.getField(int(index.num())) - p.replaceTop(v) - - case compiler.FieldInt: - index := code[ip] - ip++ - v := p.getField(int(index)) - p.push(v) - - case compiler.FieldByName: - fieldName := p.peekTop() - field, err := p.getFieldByName(p.toString(fieldName)) - if err != nil { - return err - } - p.replaceTop(field) - - case compiler.FieldByNameStr: - index := code[ip] - fieldName := p.strs[index] - ip++ - field, err := p.getFieldByName(fieldName) - if err != nil { - return err - } - p.push(field) - - case compiler.Global: - index := code[ip] - ip++ - p.push(p.globals[index]) - - case compiler.Local: - index := code[ip] - ip++ - p.push(p.frame[index]) - - case compiler.Special: - index := code[ip] - ip++ - p.push(p.getSpecial(int(index))) - - case compiler.ArrayGlobal: - arrayIndex := code[ip] - ip++ - array := p.arrays[arrayIndex] - index := p.toString(p.peekTop()) - v := arrayGet(array, index) - p.replaceTop(v) - - case compiler.ArrayLocal: - arrayIndex := code[ip] - ip++ - array := p.localArray(int(arrayIndex)) - index := p.toString(p.peekTop()) - v := arrayGet(array, index) - p.replaceTop(v) - - case compiler.InGlobal: - arrayIndex := code[ip] - ip++ - array := p.arrays[arrayIndex] - index := p.toString(p.peekTop()) - _, ok := array[index] - p.replaceTop(boolean(ok)) - - case compiler.InLocal: - arrayIndex := code[ip] - ip++ - array := p.localArray(int(arrayIndex)) - index := p.toString(p.peekTop()) - _, ok := array[index] - p.replaceTop(boolean(ok)) - - case compiler.AssignField: - right, index := p.popTwo() - err := p.setField(int(index.num()), p.toString(right)) - if err != nil { - return err - } - - case compiler.AssignGlobal: - index := code[ip] - ip++ - p.globals[index] = p.pop() - - case compiler.AssignLocal: - index := code[ip] - ip++ - p.frame[index] = p.pop() - - case compiler.AssignSpecial: - index := code[ip] - ip++ - err := p.setSpecial(int(index), p.pop()) - if err != nil { - return err - } - - case compiler.AssignArrayGlobal: - arrayIndex := code[ip] - ip++ - array := p.arrays[arrayIndex] - v, index := p.popTwo() - array[p.toString(index)] = v - - case compiler.AssignArrayLocal: - arrayIndex := code[ip] - ip++ - array := p.localArray(int(arrayIndex)) - v, index := p.popTwo() - array[p.toString(index)] = v - - case compiler.Delete: - arrayScope := code[ip] - arrayIndex := code[ip+1] - ip += 2 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) - index := p.toString(p.pop()) - delete(array, index) - - case compiler.DeleteAll: - arrayScope := code[ip] - arrayIndex := code[ip+1] - ip += 2 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) - for k := range array { - delete(array, k) - } - - case compiler.IncrField: - amount := code[ip] - ip++ - index := int(p.pop().num()) - v := p.getField(index) - err := p.setField(index, p.toString(num(v.num()+float64(amount)))) - if err != nil { - return err - } - - case compiler.IncrGlobal: - amount := code[ip] - index := code[ip+1] - ip += 2 - p.globals[index] = num(p.globals[index].num() + float64(amount)) - - case compiler.IncrLocal: - amount := code[ip] - index := code[ip+1] - ip += 2 - p.frame[index] = num(p.frame[index].num() + float64(amount)) - - case compiler.IncrSpecial: - amount := code[ip] - index := int(code[ip+1]) - ip += 2 - v := p.getSpecial(index) - err := p.setSpecial(index, num(v.num()+float64(amount))) - if err != nil { - return err - } - - case compiler.IncrArrayGlobal: - amount := code[ip] - arrayIndex := code[ip+1] - ip += 2 - array := p.arrays[arrayIndex] - index := p.toString(p.pop()) - array[index] = num(array[index].num() + float64(amount)) - - case compiler.IncrArrayLocal: - amount := code[ip] - arrayIndex := code[ip+1] - ip += 2 - array := p.localArray(int(arrayIndex)) - index := p.toString(p.pop()) - array[index] = num(array[index].num() + float64(amount)) - - case compiler.AugAssignField: - operation := compiler.AugOp(code[ip]) - ip++ - right, indexVal := p.popTwo() - index := int(indexVal.num()) - field := p.getField(index) - v, err := p.augAssignOp(operation, field, right) - if err != nil { - return err - } - err = p.setField(index, p.toString(v)) - if err != nil { - return err - } - - case compiler.AugAssignGlobal: - operation := compiler.AugOp(code[ip]) - index := code[ip+1] - ip += 2 - v, err := p.augAssignOp(operation, p.globals[index], p.pop()) - if err != nil { - return err - } - p.globals[index] = v - - case compiler.AugAssignLocal: - operation := compiler.AugOp(code[ip]) - index := code[ip+1] - ip += 2 - v, err := p.augAssignOp(operation, p.frame[index], p.pop()) - if err != nil { - return err - } - p.frame[index] = v - - case compiler.AugAssignSpecial: - operation := compiler.AugOp(code[ip]) - index := int(code[ip+1]) - ip += 2 - v, err := p.augAssignOp(operation, p.getSpecial(index), p.pop()) - if err != nil { - return err - } - err = p.setSpecial(index, v) - if err != nil { - return err - } - - case compiler.AugAssignArrayGlobal: - operation := compiler.AugOp(code[ip]) - arrayIndex := code[ip+1] - ip += 2 - array := p.arrays[arrayIndex] - index := p.toString(p.pop()) - v, err := p.augAssignOp(operation, array[index], p.pop()) - if err != nil { - return err - } - array[index] = v - - case compiler.AugAssignArrayLocal: - operation := compiler.AugOp(code[ip]) - arrayIndex := code[ip+1] - ip += 2 - array := p.localArray(int(arrayIndex)) - right, indexVal := p.popTwo() - index := p.toString(indexVal) - v, err := p.augAssignOp(operation, array[index], right) - if err != nil { - return err - } - array[index] = v - - case compiler.Regex: - // Stand-alone /regex/ is equivalent to: $0 ~ /regex/ - index := code[ip] - ip++ - re := p.regexes[index] - p.push(boolean(re.MatchString(p.line))) - - case compiler.IndexMulti: - numValues := int(code[ip]) - ip++ - values := p.popSlice(numValues) - indices := make([]string, 0, 3) // up to 3-dimensional indices won't require heap allocation - for _, v := range values { - indices = append(indices, p.toString(v)) - } - p.push(str(strings.Join(indices, p.subscriptSep))) - - case compiler.Add: - l, r := p.peekPop() - p.replaceTop(num(l.num() + r.num())) - - case compiler.Subtract: - l, r := p.peekPop() - p.replaceTop(num(l.num() - r.num())) - - case compiler.Multiply: - l, r := p.peekPop() - p.replaceTop(num(l.num() * r.num())) - - case compiler.Divide: - l, r := p.peekPop() - rf := r.num() - if rf == 0.0 { - return newError("division by zero") - } - p.replaceTop(num(l.num() / rf)) - - case compiler.Power: - l, r := p.peekPop() - p.replaceTop(num(math.Pow(l.num(), r.num()))) - - case compiler.Modulo: - l, r := p.peekPop() - rf := r.num() - if rf == 0.0 { - return newError("division by zero in mod") - } - p.replaceTop(num(math.Mod(l.num(), rf))) - - case compiler.Equals: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) == p.toString(r))) - } else { - p.replaceTop(boolean(ln == rn)) - } - - case compiler.NotEquals: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) != p.toString(r))) - } else { - p.replaceTop(boolean(ln != rn)) - } - - case compiler.Less: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) < p.toString(r))) - } else { - p.replaceTop(boolean(ln < rn)) - } - - case compiler.Greater: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) > p.toString(r))) - } else { - p.replaceTop(boolean(ln > rn)) - } - - case compiler.LessOrEqual: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) <= p.toString(r))) - } else { - p.replaceTop(boolean(ln <= rn)) - } - - case compiler.GreaterOrEqual: - l, r := p.peekPop() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - p.replaceTop(boolean(p.toString(l) >= p.toString(r))) - } else { - p.replaceTop(boolean(ln >= rn)) - } - - case compiler.Concat: - l, r := p.peekPop() - p.replaceTop(str(p.toString(l) + p.toString(r))) - - case compiler.ConcatMulti: - numValues := int(code[ip]) - ip++ - values := p.popSlice(numValues) - var sb strings.Builder - - for _, v := range values { - sb.WriteString(p.toString(v)) - } - p.push(str(sb.String())) - - case compiler.Match: - l, r := p.peekPop() - re, err := p.compileRegex(p.toString(r)) - if err != nil { - return err - } - matched := re.MatchString(p.toString(l)) - p.replaceTop(boolean(matched)) - - case compiler.NotMatch: - l, r := p.peekPop() - re, err := p.compileRegex(p.toString(r)) - if err != nil { - return err - } - matched := re.MatchString(p.toString(l)) - p.replaceTop(boolean(!matched)) - - case compiler.Not: - p.replaceTop(boolean(!p.peekTop().boolean())) - - case compiler.UnaryMinus: - p.replaceTop(num(-p.peekTop().num())) - - case compiler.UnaryPlus: - p.replaceTop(num(p.peekTop().num())) - - case compiler.Boolean: - p.replaceTop(boolean(p.peekTop().boolean())) - - case compiler.Jump: - offset := code[ip] - ip += 1 + int(offset) - - case compiler.JumpFalse: - offset := code[ip] - ip++ - v := p.pop() - if !v.boolean() { - ip += int(offset) - } - - case compiler.JumpTrue: - offset := code[ip] - ip++ - v := p.pop() - if v.boolean() { - ip += int(offset) - } - - case compiler.JumpEquals: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) == p.toString(r) - } else { - b = ln == rn - } - if b { - ip += int(offset) - } - - case compiler.JumpNotEquals: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) != p.toString(r) - } else { - b = ln != rn - } - if b { - ip += int(offset) - } - - case compiler.JumpLess: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) < p.toString(r) - } else { - b = ln < rn - } - if b { - ip += int(offset) - } - - case compiler.JumpGreater: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) > p.toString(r) - } else { - b = ln > rn - } - if b { - ip += int(offset) - } - - case compiler.JumpLessOrEqual: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) <= p.toString(r) - } else { - b = ln <= rn - } - if b { - ip += int(offset) - } - - case compiler.JumpGreaterOrEqual: - offset := code[ip] - ip++ - l, r := p.popTwo() - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - var b bool - if lIsStr || rIsStr { - b = p.toString(l) >= p.toString(r) - } else { - b = ln >= rn - } - if b { - ip += int(offset) - } - - case compiler.Next: - return errNext - - case compiler.Exit: - p.exitStatus = int(p.pop().num()) - // Return special errExit value "caught" by top-level executor - return errExit - - case compiler.ForIn: - varScope := code[ip] - varIndex := code[ip+1] - arrayScope := code[ip+2] - arrayIndex := code[ip+3] - offset := code[ip+4] - ip += 5 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) - loopCode := code[ip : ip+int(offset)] - for index := range array { - switch ast.VarScope(varScope) { - case ast.ScopeGlobal: - p.globals[varIndex] = str(index) - case ast.ScopeLocal: - p.frame[varIndex] = str(index) - default: // ScopeSpecial - err := p.setSpecial(int(varIndex), str(index)) - if err != nil { - return err - } - } - err := p.execute(loopCode) - if err == errBreak { - break - } - if err != nil { - return err - } - } - ip += int(offset) - - case compiler.BreakForIn: - return errBreak - - case compiler.CallBuiltin: - builtinOp := compiler.BuiltinOp(code[ip]) - ip++ - err := p.callBuiltin(builtinOp) - if err != nil { - return err - } - - case compiler.CallSplit: - arrayScope := code[ip] - arrayIndex := code[ip+1] - ip += 2 - s := p.toString(p.peekTop()) - n, err := p.split(s, ast.VarScope(arrayScope), int(arrayIndex), p.fieldSep) - if err != nil { - return err - } - p.replaceTop(num(float64(n))) - - case compiler.CallSplitSep: - arrayScope := code[ip] - arrayIndex := code[ip+1] - ip += 2 - s, fieldSep := p.peekPop() - n, err := p.split(p.toString(s), ast.VarScope(arrayScope), int(arrayIndex), p.toString(fieldSep)) - if err != nil { - return err - } - p.replaceTop(num(float64(n))) - - case compiler.CallSprintf: - numArgs := code[ip] - ip++ - args := p.popSlice(int(numArgs)) - s, err := p.sprintf(p.toString(args[0]), args[1:]) - if err != nil { - return err - } - p.push(str(s)) - - case compiler.CallUser: - funcIndex := code[ip] - numArrayArgs := int(code[ip+1]) - ip += 2 - - f := p.program.Compiled.Functions[funcIndex] - if p.callDepth >= maxCallDepth { - return newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth) - } - - // Set up frame for scalar arguments - oldFrame := p.frame - p.frame = p.peekSlice(f.NumScalars) - - // Handle array arguments - var arrays []int - for j := 0; j < numArrayArgs; j++ { - arrayScope := ast.VarScope(code[ip]) - arrayIndex := int(code[ip+1]) - ip += 2 - arrays = append(arrays, p.arrayIndex(arrayScope, arrayIndex)) - } - oldArraysLen := len(p.arrays) - for j := numArrayArgs; j < f.NumArrays; j++ { - arrays = append(arrays, len(p.arrays)) - p.arrays = append(p.arrays, make(map[string]value)) - } - p.localArrays = append(p.localArrays, arrays) - - // Execute the function! - p.callDepth++ - err := p.execute(f.Body) - p.callDepth-- - - // Pop the locals off the stack - p.popSlice(f.NumScalars) - p.frame = oldFrame - p.localArrays = p.localArrays[:len(p.localArrays)-1] - p.arrays = p.arrays[:oldArraysLen] - - if r, ok := err.(returnValue); ok { - p.push(r.Value) - } else if err != nil { - return err - } else { - p.push(null()) - } - - case compiler.CallNative: - funcIndex := int(code[ip]) - numArgs := int(code[ip+1]) - ip += 2 - - args := p.popSlice(numArgs) - r, err := p.callNative(funcIndex, args) - if err != nil { - return err - } - p.push(r) - - case compiler.Return: - v := p.pop() - return returnValue{v} - - case compiler.ReturnNull: - return returnValue{null()} - - case compiler.Nulls: - numNulls := int(code[ip]) - ip++ - p.pushNulls(numNulls) - - case compiler.Print: - numArgs := code[ip] - redirect := lexer.Token(code[ip+1]) - ip += 2 - - args := p.popSlice(int(numArgs)) - - // Determine what output stream to write to. - output := p.output - if redirect != lexer.ILLEGAL { - var err error - dest := p.pop() - output, err = p.getOutputStream(redirect, dest) - if err != nil { - return err - } - } - - if numArgs > 0 { - err := p.printArgs(output, args) - if err != nil { - return err - } - } else { - // "print" with no arguments prints the raw value of $0, - // regardless of output mode. - err := p.printLine(output, p.line) - if err != nil { - return err - } - } - - case compiler.Printf: - numArgs := code[ip] - redirect := lexer.Token(code[ip+1]) - ip += 2 - - args := p.popSlice(int(numArgs)) - s, err := p.sprintf(p.toString(args[0]), args[1:]) - if err != nil { - return err - } - - output := p.output - if redirect != lexer.ILLEGAL { - dest := p.pop() - output, err = p.getOutputStream(redirect, dest) - if err != nil { - return err - } - } - err = writeOutput(output, s) - if err != nil { - return err - } - - case compiler.Getline: - redirect := lexer.Token(code[ip]) - ip++ - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - if ret == 1 { - p.setLine(line, false) - } - p.push(num(ret)) - - case compiler.GetlineField: - redirect := lexer.Token(code[ip]) - ip++ - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - if ret == 1 { - err := p.setField(0, line) - if err != nil { - return err - } - } - p.push(num(ret)) - - case compiler.GetlineGlobal: - redirect := lexer.Token(code[ip]) - index := code[ip+1] - ip += 2 - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - if ret == 1 { - p.globals[index] = numStr(line) - } - p.push(num(ret)) - - case compiler.GetlineLocal: - redirect := lexer.Token(code[ip]) - index := code[ip+1] - ip += 2 - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - if ret == 1 { - p.frame[index] = numStr(line) - } - p.push(num(ret)) - - case compiler.GetlineSpecial: - redirect := lexer.Token(code[ip]) - index := code[ip+1] - ip += 2 - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - if ret == 1 { - err := p.setSpecial(int(index), numStr(line)) - if err != nil { - return err - } - } - p.push(num(ret)) - - case compiler.GetlineArray: - redirect := lexer.Token(code[ip]) - arrayScope := code[ip+1] - arrayIndex := code[ip+2] - ip += 3 - - ret, line, err := p.getline(redirect) - if err != nil { - return err - } - index := p.toString(p.peekTop()) - if ret == 1 { - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) - array[index] = numStr(line) - } - p.replaceTop(num(ret)) - } - } - - return nil -} - -func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { - switch builtinOp { - case compiler.BuiltinAtan2: - y, x := p.peekPop() - p.replaceTop(num(math.Atan2(y.num(), x.num()))) - - case compiler.BuiltinClose: - name := p.toString(p.peekTop()) - var c io.Closer = p.inputStreams[name] - if c != nil { - // Close input stream - delete(p.inputStreams, name) - err := c.Close() - if err != nil { - p.replaceTop(num(-1)) - } else { - p.replaceTop(num(0)) - } - } else { - c = p.outputStreams[name] - if c != nil { - // Close output stream - delete(p.outputStreams, name) - err := c.Close() - if err != nil { - p.replaceTop(num(-1)) - } else { - p.replaceTop(num(0)) - } - } else { - // Nothing to close - p.replaceTop(num(-1)) - } - } - - case compiler.BuiltinCos: - p.replaceTop(num(math.Cos(p.peekTop().num()))) - - case compiler.BuiltinExp: - p.replaceTop(num(math.Exp(p.peekTop().num()))) - - case compiler.BuiltinFflush: - name := p.toString(p.peekTop()) - var ok bool - if name != "" { - // Flush a single, named output stream - ok = p.flushStream(name) - } else { - // fflush() or fflush("") flushes all output streams - ok = p.flushAll() - } - if !ok { - p.replaceTop(num(-1)) - } else { - p.replaceTop(num(0)) - } - - case compiler.BuiltinFflushAll: - ok := p.flushAll() - if !ok { - p.push(num(-1)) - } else { - p.push(num(0)) - } - - case compiler.BuiltinGsub: - regex, repl, in := p.peekPeekPop() - out, n, err := p.sub(p.toString(regex), p.toString(repl), p.toString(in), true) - if err != nil { - return err - } - p.replaceTwo(num(float64(n)), str(out)) - - case compiler.BuiltinIndex: - sValue, substr := p.peekPop() - s := p.toString(sValue) - index := strings.Index(s, p.toString(substr)) - p.replaceTop(num(float64(index + 1))) - - case compiler.BuiltinInt: - p.replaceTop(num(float64(int(p.peekTop().num())))) - - case compiler.BuiltinLength: - p.push(num(float64(len(p.line)))) - - case compiler.BuiltinLengthArg: - s := p.toString(p.peekTop()) - p.replaceTop(num(float64(len(s)))) - - case compiler.BuiltinLog: - p.replaceTop(num(math.Log(p.peekTop().num()))) - - case compiler.BuiltinMatch: - sValue, regex := p.peekPop() - s := p.toString(sValue) - re, err := p.compileRegex(p.toString(regex)) - if err != nil { - return err - } - loc := re.FindStringIndex(s) - if loc == nil { - p.matchStart = 0 - p.matchLength = -1 - p.replaceTop(num(0)) - } else { - p.matchStart = loc[0] + 1 - p.matchLength = loc[1] - loc[0] - p.replaceTop(num(float64(p.matchStart))) - } - - case compiler.BuiltinRand: - p.push(num(p.random.Float64())) - - case compiler.BuiltinSin: - p.replaceTop(num(math.Sin(p.peekTop().num()))) - - case compiler.BuiltinSqrt: - p.replaceTop(num(math.Sqrt(p.peekTop().num()))) - - case compiler.BuiltinSrand: - prevSeed := p.randSeed - p.random.Seed(time.Now().UnixNano()) - p.push(num(prevSeed)) - - case compiler.BuiltinSrandSeed: - prevSeed := p.randSeed - p.randSeed = p.peekTop().num() - p.random.Seed(int64(math.Float64bits(p.randSeed))) - p.replaceTop(num(prevSeed)) - - case compiler.BuiltinSub: - regex, repl, in := p.peekPeekPop() - out, n, err := p.sub(p.toString(regex), p.toString(repl), p.toString(in), false) - if err != nil { - return err - } - p.replaceTwo(num(float64(n)), str(out)) - - case compiler.BuiltinSubstr: - sValue, posValue := p.peekPop() - pos := int(posValue.num()) - s := p.toString(sValue) - if pos > len(s) { - pos = len(s) + 1 - } - if pos < 1 { - pos = 1 - } - length := len(s) - pos + 1 - p.replaceTop(str(s[pos-1 : pos-1+length])) - - case compiler.BuiltinSubstrLength: - posValue, lengthValue := p.popTwo() - length := int(lengthValue.num()) - pos := int(posValue.num()) - s := p.toString(p.peekTop()) - if pos > len(s) { - pos = len(s) + 1 - } - if pos < 1 { - pos = 1 - } - maxLength := len(s) - pos + 1 - if length < 0 { - length = 0 - } - if length > maxLength { - length = maxLength - } - p.replaceTop(str(s[pos-1 : pos-1+length])) - - case compiler.BuiltinSystem: - if p.noExec { - return newError("can't call system() due to NoExec") - } - cmdline := p.toString(p.peekTop()) - cmd := p.execShell(cmdline) - cmd.Stdin = p.stdin - cmd.Stdout = p.output - cmd.Stderr = p.errorOutput - _ = p.flushAll() // ensure synchronization - err := cmd.Run() - ret := 0.0 - if err != nil { - if p.checkCtx && p.ctx.Err() != nil { - return p.ctx.Err() - } - if exitErr, ok := err.(*exec.ExitError); ok { - ret = float64(exitErr.ProcessState.ExitCode()) - } else { - p.printErrorf("%v\n", err) - ret = -1 - } - } - p.replaceTop(num(ret)) - - case compiler.BuiltinTolower: - p.replaceTop(str(strings.ToLower(p.toString(p.peekTop())))) - - case compiler.BuiltinToupper: - p.replaceTop(str(strings.ToUpper(p.toString(p.peekTop())))) - } - - return nil -} - -// Fetch the value at the given index from array. This handles the strange -// POSIX behavior of creating a null entry for non-existent array elements. -// Per the POSIX spec, "Any other reference to a nonexistent array element -// [apart from "in" expressions] shall automatically create it." -func arrayGet(array map[string]value, index string) value { - v, ok := array[index] - if !ok { - array[index] = v - } - return v -} - -// Stack operations follow. These should be inlined. Instead of just push and -// pop, for efficiency we have custom operations for when we're replacing the -// top of stack without changing the stack pointer. Primarily this avoids the -// check for append in push. -func (p *interp) push(v value) { - sp := p.sp - if sp >= len(p.stack) { - p.stack = append(p.stack, null()) - } - p.stack[sp] = v - sp++ - p.sp = sp -} - -func (p *interp) pushNulls(num int) { - sp := p.sp - for p.sp+num-1 >= len(p.stack) { - p.stack = append(p.stack, null()) - } - for i := 0; i < num; i++ { - p.stack[sp] = null() - sp++ - } - p.sp = sp -} - -func (p *interp) pop() value { - p.sp-- - return p.stack[p.sp] -} - -func (p *interp) popTwo() (value, value) { - p.sp -= 2 - return p.stack[p.sp], p.stack[p.sp+1] -} - -func (p *interp) peekTop() value { - return p.stack[p.sp-1] -} - -func (p *interp) peekTwo() (value, value) { - return p.stack[p.sp-2], p.stack[p.sp-1] -} - -func (p *interp) peekPop() (value, value) { - p.sp-- - return p.stack[p.sp-1], p.stack[p.sp] -} - -func (p *interp) peekPeekPop() (value, value, value) { - p.sp-- - return p.stack[p.sp-2], p.stack[p.sp-1], p.stack[p.sp] -} - -func (p *interp) replaceTop(v value) { - p.stack[p.sp-1] = v -} - -func (p *interp) replaceTwo(l, r value) { - p.stack[p.sp-2] = l - p.stack[p.sp-1] = r -} - -func (p *interp) popSlice(n int) []value { - p.sp -= n - return p.stack[p.sp : p.sp+n] -} - -func (p *interp) peekSlice(n int) []value { - return p.stack[p.sp-n:] -} - -// Helper for getline operations. This performs the (possibly redirected) read -// of a line, and returns the result. If the result is 1 (success in AWK), the -// caller will set the target to the returned string. -func (p *interp) getline(redirect lexer.Token) (float64, string, error) { - switch redirect { - case lexer.PIPE: // redirect from command - name := p.toString(p.pop()) - scanner, err := p.getInputScannerPipe(name) - if err != nil { - return 0, "", err - } - if !scanner.Scan() { - if err := scanner.Err(); err != nil { - return -1, "", nil - } - return 0, "", nil - } - return 1, scanner.Text(), nil - - case lexer.LESS: // redirect from file - name := p.toString(p.pop()) - scanner, err := p.getInputScannerFile(name) - if err != nil { - if _, ok := err.(*os.PathError); ok { - // File not found is not a hard error, getline just returns -1. - // See: https://github.com/mojosa-software/goblin/src/tool/awk/issues/41 - return -1, "", nil - } - return 0, "", err - } - if !scanner.Scan() { - if err := scanner.Err(); err != nil { - return -1, "", nil - } - return 0, "", nil - } - return 1, scanner.Text(), nil - - default: // no redirect - p.flushOutputAndError() // Flush output in case they've written a prompt - var err error - line, err := p.nextLine() - if err == io.EOF { - return 0, "", nil - } - if err != nil { - return -1, "", nil - } - return 1, line, nil - } -} - -// Perform augmented assignment operation. -func (p *interp) augAssignOp(op compiler.AugOp, l, r value) (value, error) { - switch op { - case compiler.AugOpAdd: - return num(l.num() + r.num()), nil - case compiler.AugOpSub: - return num(l.num() - r.num()), nil - case compiler.AugOpMul: - return num(l.num() * r.num()), nil - case compiler.AugOpDiv: - rf := r.num() - if rf == 0.0 { - return null(), newError("division by zero") - } - return num(l.num() / rf), nil - case compiler.AugOpPow: - return num(math.Pow(l.num(), r.num())), nil - default: // AugOpMod - rf := r.num() - if rf == 0.0 { - return null(), newError("division by zero in mod") - } - return num(math.Mod(l.num(), rf)), nil - } -} diff --git a/src/tool/awk/lexer/lexer.go b/src/tool/awk/lexer/lexer.go deleted file mode 100644 index 05cf33f..0000000 --- a/src/tool/awk/lexer/lexer.go +++ /dev/null @@ -1,499 +0,0 @@ -// Package lexer is an AWK lexer (tokenizer). -// -// The lexer turns a string of AWK source code into a stream of -// tokens for parsing. -// -// To tokenize some source, create a new lexer with NewLexer(src) and -// then call Scan() until the token type is EOF or ILLEGAL. -package lexer - -import ( - "errors" -) - -// Lexer tokenizes a byte string of AWK source code. Use NewLexer to -// actually create a lexer, and Scan() or ScanRegex() to get tokens. -type Lexer struct { - src []byte - offset int - ch byte - pos Position - nextPos Position - hadSpace bool - lastTok Token -} - -// Position stores the source line and column where a token starts. -type Position struct { - // Line number of the token (starts at 1). - Line int - // Column on the line (starts at 1). Note that this is the byte - // offset into the line, not rune offset. - Column int -} - -// NewLexer creates a new lexer that will tokenize the given source -// code. See the module-level example for a working example. -func NewLexer(src []byte) *Lexer { - l := &Lexer{src: src} - l.nextPos.Line = 1 - l.nextPos.Column = 1 - l.next() - return l -} - -// HadSpace returns true if the previously-scanned token had -// whitespace before it. Used by the parser because when calling a -// user-defined function the grammar doesn't allow a space between -// the function name and the left parenthesis. -func (l *Lexer) HadSpace() bool { - return l.hadSpace -} - -// Scan scans the next token and returns its position (line/column), -// token value (one of the uppercase token constants), and the -// string value of the token. For most tokens, the token value is -// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the -// token's value. For an ILLEGAL token, it's the error message. -func (l *Lexer) Scan() (Position, Token, string) { - pos, tok, val := l.scan() - l.lastTok = tok - return pos, tok, val -} - -// Does the real work of scanning. Scan() wraps this to more easily -// set lastTok. -func (l *Lexer) scan() (Position, Token, string) { - // Skip whitespace (except newline, which is a token) - l.hadSpace = false - for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' { - l.hadSpace = true - if l.ch == '\\' { - l.next() - if l.ch == '\r' { - l.next() - } - if l.ch != '\n' { - return l.pos, ILLEGAL, "expected \\n after \\ line continuation" - } - } - l.next() - } - if l.ch == '#' { - // Skip comment till end of line - l.next() - for l.ch != '\n' && l.ch != 0 { - l.next() - } - } - if l.ch == 0 { - // l.next() reached end of input - return l.pos, EOF, "" - } - - pos := l.pos - tok := ILLEGAL - val := "" - - ch := l.ch - l.next() - - // Names: keywords and functions - if isNameStart(ch) { - start := l.offset - 2 - for isNameStart(l.ch) || isDigit(l.ch) { - l.next() - } - name := string(l.src[start : l.offset-1]) - tok := KeywordToken(name) - if tok == ILLEGAL { - tok = NAME - val = name - } - return pos, tok, val - } - - // These are ordered by my guess at frequency of use. Should run - // through a corpus of real AWK programs to determine actual - // frequency. - switch ch { - case '$': - tok = DOLLAR - case '@': - tok = AT - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': - // Avoid make/append and use l.offset directly for performance - start := l.offset - 2 - gotDigit := false - if ch != '.' { - gotDigit = true - for isDigit(l.ch) { - l.next() - } - if l.ch == '.' { - l.next() - } - } - for isDigit(l.ch) { - gotDigit = true - l.next() - } - if !gotDigit { - return l.pos, ILLEGAL, "expected digits" - } - if l.ch == 'e' || l.ch == 'E' { - l.next() - gotSign := false - if l.ch == '+' || l.ch == '-' { - gotSign = true - l.next() - } - gotDigit = false - for isDigit(l.ch) { - l.next() - gotDigit = true - } - // Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e" - // considered a variable). "1e+" is parsed as "1e + ...". - if !gotDigit { - if gotSign { - l.unread() // unread the '+' or '-' - } - l.unread() // unread the 'e' or 'E' - } - } - tok = NUMBER - val = string(l.src[start : l.offset-1]) - case '{': - tok = LBRACE - case '}': - tok = RBRACE - case '=': - tok = l.choice('=', ASSIGN, EQUALS) - case '<': - tok = l.choice('=', LESS, LTE) - case '>': - switch l.ch { - case '=': - l.next() - tok = GTE - case '>': - l.next() - tok = APPEND - default: - tok = GREATER - } - case '"', '\'': - // Note: POSIX awk spec doesn't allow single-quoted strings, - // but this helps with quoting, especially on Windows - // where the shell quote character is " (double quote). - s, err := parseString(ch, func() byte { return l.ch }, l.next) - if err != nil { - return l.pos, ILLEGAL, err.Error() - } - if l.ch != ch { - return l.pos, ILLEGAL, "didn't find end quote in string" - } - l.next() - tok = STRING - val = s - case '(': - tok = LPAREN - case ')': - tok = RPAREN - case ',': - tok = COMMA - case ';': - tok = SEMICOLON - case '+': - switch l.ch { - case '+': - l.next() - tok = INCR - case '=': - l.next() - tok = ADD_ASSIGN - default: - tok = ADD - } - case '-': - switch l.ch { - case '-': - l.next() - tok = DECR - case '=': - l.next() - tok = SUB_ASSIGN - default: - tok = SUB - } - case '*': - switch l.ch { - case '*': - l.next() - tok = l.choice('=', POW, POW_ASSIGN) - case '=': - l.next() - tok = MUL_ASSIGN - default: - tok = MUL - } - case '/': - tok = l.choice('=', DIV, DIV_ASSIGN) - case '%': - tok = l.choice('=', MOD, MOD_ASSIGN) - case '[': - tok = LBRACKET - case ']': - tok = RBRACKET - case '\n': - tok = NEWLINE - case '^': - tok = l.choice('=', POW, POW_ASSIGN) - case '!': - switch l.ch { - case '=': - l.next() - tok = NOT_EQUALS - case '~': - l.next() - tok = NOT_MATCH - default: - tok = NOT - } - case '~': - tok = MATCH - case '?': - tok = QUESTION - case ':': - tok = COLON - case '&': - tok = l.choice('&', ILLEGAL, AND) - if tok == ILLEGAL { - return l.pos, ILLEGAL, "unexpected char after '&'" - } - case '|': - tok = l.choice('|', PIPE, OR) - default: - tok = ILLEGAL - val = "unexpected char" - } - return pos, tok, val -} - -// ScanRegex parses an AWK regular expression in /slash/ syntax. The -// AWK grammar has somewhat special handling of regex tokens, so the -// parser can only call this after a DIV or DIV_ASSIGN token has just -// been scanned. -func (l *Lexer) ScanRegex() (Position, Token, string) { - pos, tok, val := l.scanRegex() - l.lastTok = tok - return pos, tok, val -} - -// Does the real work of scanning a regex. ScanRegex() wraps this to -// more easily set lastTok. -func (l *Lexer) scanRegex() (Position, Token, string) { - pos := l.pos - chars := make([]byte, 0, 32) // most won't require heap allocation - switch l.lastTok { - case DIV: - // Regex after '/' (the usual case) - pos.Column -= 1 - case DIV_ASSIGN: - // Regex after '/=' (happens when regex starts with '=') - pos.Column -= 2 - chars = append(chars, '=') - default: - panic("ScanRegex should only be called after DIV or DIV_ASSIGN token") - } - for l.ch != '/' { - c := l.ch - if c == 0 { - return l.pos, ILLEGAL, "didn't find end slash in regex" - } - if c == '\r' || c == '\n' { - return l.pos, ILLEGAL, "can't have newline in regex" - } - if c == '\\' { - l.next() - if l.ch != '/' { - chars = append(chars, '\\') - } - c = l.ch - } - chars = append(chars, c) - l.next() - } - l.next() - return pos, REGEX, string(chars) -} - -// Load the next character into l.ch (or 0 on end of input) and update -// line and column position. -func (l *Lexer) next() { - l.pos = l.nextPos - if l.offset >= len(l.src) { - // For last character, move offset 1 past the end as it - // simplifies offset calculations in NAME and NUMBER - if l.ch != 0 { - l.ch = 0 - l.offset++ - l.nextPos.Column++ - } - return - } - ch := l.src[l.offset] - if ch == '\n' { - l.nextPos.Line++ - l.nextPos.Column = 1 - } else if ch != '\r' { - l.nextPos.Column++ - } - l.ch = ch - l.offset++ -} - -// Un-read the character just scanned (doesn't handle line boundaries). -func (l *Lexer) unread() { - l.offset-- - l.pos.Column-- - l.nextPos.Column-- - l.ch = l.src[l.offset-1] -} - -func isNameStart(ch byte) bool { - return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') -} - -func isDigit(ch byte) bool { - return ch >= '0' && ch <= '9' -} - -// Return the hex digit 0-15 corresponding to the given ASCII byte, -// or -1 if it's not a valid hex digit. -func hexDigit(ch byte) int { - switch { - case isDigit(ch): - return int(ch - '0') - case ch >= 'a' && ch <= 'f': - return int(ch - 'a' + 10) - case ch >= 'A' && ch <= 'F': - return int(ch - 'A' + 10) - default: - return -1 - } -} - -func (l *Lexer) choice(ch byte, one, two Token) Token { - if l.ch == ch { - l.next() - return two - } - return one -} - -// PeekByte returns the next unscanned byte; used when parsing -// "getline lvalue" expressions. Returns 0 at end of input. -func (l *Lexer) PeekByte() byte { - return l.ch -} - -// Unescape unescapes the backslash escapes in s (which shouldn't include the -// surrounding quotes) and returns the unquoted string. It's intended for use -// when unescaping command line var=value assignments, as required by the -// POSIX AWK spec. -func Unescape(s string) (string, error) { - i := 0 - ch := func() byte { - if i >= len(s) { - return 0 - } - return s[i] - } - next := func() { - i++ - } - return parseString(0, ch, next) -} - -// Parses a string ending with given quote character (not parsed). The ch -// function returns the current character (or 0 at the end); the next function -// moves forward one character. -func parseString(quote byte, ch func() byte, next func()) (string, error) { - chars := make([]byte, 0, 32) // most strings won't require heap allocation - for { - c := ch() - if c == quote || c == 0 { - break - } - if c == '\r' || c == '\n' { - return "", errors.New("can't have newline in string") - } - if c != '\\' { - // Normal, non-escaped character - chars = append(chars, c) - next() - continue - } - // Escape sequence, skip over \ and process - next() - switch ch() { - case 'n': - c = '\n' - next() - case 't': - c = '\t' - next() - case 'r': - c = '\r' - next() - case 'a': - c = '\a' - next() - case 'b': - c = '\b' - next() - case 'f': - c = '\f' - next() - case 'v': - c = '\v' - next() - case 'x': - // Hex byte of one of two hex digits - next() - digit := hexDigit(ch()) - if digit < 0 { - return "", errors.New("1 or 2 hex digits expected") - } - c = byte(digit) - next() - digit = hexDigit(ch()) - if digit >= 0 { - c = c*16 + byte(digit) - next() - } - case '0', '1', '2', '3', '4', '5', '6', '7': - // Octal byte of 1-3 octal digits - c = ch() - '0' - next() - for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ { - c = c*8 + ch() - '0' - next() - } - default: - // Any other escape character is just the char - // itself, eg: "\z" is just "z". - c = ch() - if c == 0 { - // Expect backslash right at the end of the string, which is - // interpreted as a literal backslash (only for Unescape). - c = '\\' - } - next() - } - chars = append(chars, c) - } - return string(chars), nil -} diff --git a/src/tool/awk/lexer/lexer_test.go b/src/tool/awk/lexer/lexer_test.go deleted file mode 100644 index 4b27cef..0000000 --- a/src/tool/awk/lexer/lexer_test.go +++ /dev/null @@ -1,393 +0,0 @@ -// Test GoAWK Lexer - -package lexer_test - -import ( - "fmt" - "strconv" - "strings" - "testing" - - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -func TestLexer(t *testing.T) { - tests := []struct { - input string - output string - }{ - // Comments, whitespace, line continuations - {"+# foo \n- #foo", `1:1 + "", 1:8 "", 2:1 - ""`}, - {"+\\\n-", `1:1 + "", 2:1 - ""`}, - {"+\\\r\n-", `1:1 + "", 2:1 - ""`}, - {"+\\-", `1:1 + "", 1:3 "expected \\n after \\ line continuation", 1:3 - ""`}, - - // Names and keywords - {"x", `1:1 name "x"`}, - {"x y0", `1:1 name "x", 1:3 name "y0"`}, - {"x 0y", `1:1 name "x", 1:3 number "0", 1:4 name "y"`}, - {"sub SUB", `1:1 sub "", 1:5 name "SUB"`}, - - // String tokens - {`"foo"`, `1:1 string "foo"`}, - {`"a\t\r\n\z\'\"\a\b\f\vb"`, `1:1 string "a\t\r\nz'\"\a\b\f\vb"`}, - {`"x`, `1:3 "didn't find end quote in string"`}, - {`"foo\"`, `1:7 "didn't find end quote in string"`}, - {"\"x\n\"", `1:3 "can't have newline in string", 1:3 "", 2:2 "didn't find end quote in string"`}, - {`'foo'`, `1:1 string "foo"`}, - {`'a\t\r\n\z\'\"b'`, `1:1 string "a\t\r\nz'\"b"`}, - {`'x`, `1:3 "didn't find end quote in string"`}, - {"'x\n'", `1:3 "can't have newline in string", 1:3 "", 2:2 "didn't find end quote in string"`}, - {`"\x0.\x00.\x0A\x10\xff\xFF\x41"`, `1:1 string "\x00.\x00.\n\x10\xff\xffA"`}, - {`"\xg"`, `1:4 "1 or 2 hex digits expected", 1:4 name "g", 1:6 "didn't find end quote in string"`}, - {`"\0\78\7\77\777\0 \141 "`, `1:1 string "\x00\a8\a?\xff\x00 a "`}, - - // Number tokens - {"0", `1:1 number "0"`}, - {"9", `1:1 number "9"`}, - {" 0 ", `1:2 number "0"`}, - {"\n 1", `1:1 "", 2:3 number "1"`}, - {"1234", `1:1 number "1234"`}, - {".5", `1:1 number ".5"`}, - {".5e1", `1:1 number ".5e1"`}, - {"5e+1", `1:1 number "5e+1"`}, - {"5e-1", `1:1 number "5e-1"`}, - {"0.", `1:1 number "0."`}, - {"42e", `1:1 number "42", 1:3 name "e"`}, - {"4.2e", `1:1 number "4.2", 1:4 name "e"`}, - {"1.e3", `1:1 number "1.e3"`}, - {"1.e3", `1:1 number "1.e3"`}, - {"1e3foo", `1:1 number "1e3", 1:4 name "foo"`}, - {"1e3+", `1:1 number "1e3", 1:4 + ""`}, - {"1e3.4", `1:1 number "1e3", 1:4 number ".4"`}, - {"1e-", `1:1 number "1", 1:2 name "e", 1:3 - ""`}, - {"1e+", `1:1 number "1", 1:2 name "e", 1:3 + ""`}, - {"42`", `1:1 number "42", 1:3 "unexpected char"`}, - {"0..", `1:1 number "0.", 1:4 "expected digits"`}, - {".", `1:2 "expected digits"`}, - - // Misc errors - {"&=", `1:2 "unexpected char after '&'", 1:2 = ""`}, - } - for _, test := range tests { - t.Run(test.input, func(t *testing.T) { - l := NewLexer([]byte(test.input)) - strs := []string{} - for { - pos, tok, val := l.Scan() - if tok == EOF { - break - } - if tok == NUMBER { - // Ensure ParseFloat() works, as that's what our - // parser uses to convert - trimmed := strings.TrimRight(val, "eE") - _, err := strconv.ParseFloat(trimmed, 64) - if err != nil { - t.Fatalf("couldn't parse float: %q", val) - } - } - strs = append(strs, fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val)) - } - output := strings.Join(strs, ", ") - if output != test.output { - t.Errorf("expected %q, got %q", test.output, output) - } - }) - } -} - -func TestRegex(t *testing.T) { - tests := []struct { - input string - output string - }{ - {`/foo/`, `1:1 regex "foo"`}, - {`/=foo/`, `1:1 regex "=foo"`}, - {`/a\/b/`, `1:1 regex "a/b"`}, - {`/a\/\zb/`, `1:1 regex "a/\\zb"`}, - {`/a`, `1:3 "didn't find end slash in regex"`}, - {"/a\n", `1:3 "can't have newline in regex"`}, - } - for _, test := range tests { - t.Run(test.input, func(t *testing.T) { - l := NewLexer([]byte(test.input)) - l.Scan() // Scan first token (probably DIV) - pos, tok, val := l.ScanRegex() - output := fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val) - if output != test.output { - t.Errorf("expected %q, got %q", test.output, output) - } - }) - } -} - -func TestScanRegexInvalid(t *testing.T) { - defer func() { - r := recover() - if message, ok := r.(string); ok { - expected := "ScanRegex should only be called after DIV or DIV_ASSIGN token" - if message != expected { - t.Fatalf("expected %q, got %q", expected, message) - } - } else { - t.Fatalf("expected panic of string type") - } - }() - l := NewLexer([]byte("foo/")) - l.Scan() // Scan first token (NAME foo) - l.ScanRegex() -} - -func TestHadSpace(t *testing.T) { - tests := []struct { - input string - tokens []Token - spaces []bool - }{ - {`foo(x)`, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, false, false, false}}, - {`foo (x) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, true, false, false}}, - {` foo ( x ) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{true, true, true, true}}, - } - for _, test := range tests { - t.Run(test.input, func(t *testing.T) { - l := NewLexer([]byte(test.input)) - for i := 0; ; i++ { - _, tok, _ := l.Scan() - if tok == EOF { - break - } - if tok != test.tokens[i] { - t.Errorf("expected %s for token %d, got %s", test.tokens[i], i, tok) - } - if l.HadSpace() != test.spaces[i] { - t.Errorf("expected %v for space %d, got %v", test.spaces[i], i, l.HadSpace()) - } - } - }) - } -} - -func TestPeekByte(t *testing.T) { - l := NewLexer([]byte("foo()")) - b := l.PeekByte() - if b != 'f' { - t.Errorf("expected 'f', got %q", b) - } - _, tok, _ := l.Scan() - if tok != NAME { - t.Errorf("expected name, got %s", tok) - } - b = l.PeekByte() - if b != '(' { - t.Errorf("expected '(', got %q", b) - } - _, tok, _ = l.Scan() - if tok != LPAREN { - t.Errorf("expected (, got %s", tok) - } - _, tok, _ = l.Scan() - if tok != RPAREN { - t.Errorf("expected ), got %s", tok) - } - b = l.PeekByte() - if b != 0 { - t.Errorf("expected 0, got %q", b) - } -} - -func TestKeywordToken(t *testing.T) { - tests := []struct { - name string - tok Token - }{ - {"print", PRINT}, - {"split", F_SPLIT}, - {"BEGIN", BEGIN}, - {"foo", ILLEGAL}, - {"GoAWK", ILLEGAL}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - tok := KeywordToken(test.name) - if tok != test.tok { - t.Errorf("expected %v, got %v", test.tok, tok) - } - }) - } -} - -func TestAllTokens(t *testing.T) { - input := "# comment line\n" + - "+ += && = : , -- /\n/= $ @ == >= > >> ++ { [ < ( #\n" + - "<= ~ % %= * *= !~ ! != | || ^ ^= ** **= ? } ] ) ; - -= " + - "BEGIN break continue delete do else END exit " + - "for function getline if in next print printf return while " + - "atan2 close cos exp fflush gsub index int length log match rand " + - "sin split sprintf sqrt srand sub substr system tolower toupper " + - "x \"str\\n\" 1234\n" + - "` ." - - strs := make([]string, 0, LAST+1) - seen := make([]bool, LAST+1) - l := NewLexer([]byte(input)) - for { - _, tok, _ := l.Scan() - strs = append(strs, tok.String()) - seen[int(tok)] = true - if tok == EOF { - break - } - } - output := strings.Join(strs, " ") - - expected := " " + - "+ += && = : , -- / /= $ @ == >= > >> ++ { [ < ( " + - "<= ~ % %= * *= !~ ! != | || ^ ^= ^ ^= ? } ] ) ; - -= " + - "BEGIN break continue delete do else END exit " + - "for function getline if in next print printf return while " + - "atan2 close cos exp fflush gsub index int length log match rand " + - "sin split sprintf sqrt srand sub substr system tolower toupper " + - "name string number " + - " EOF" - if output != expected { - t.Errorf("expected %q, got %q", expected, output) - } - - for i, s := range seen { - if !s && Token(i) != CONCAT && Token(i) != REGEX { - t.Errorf("token %s (%d) not seen", Token(i), i) - } - } - - l = NewLexer([]byte(`/foo/`)) - _, tok1, _ := l.Scan() - _, tok2, val := l.ScanRegex() - if tok1 != DIV || tok2 != REGEX || val != "foo" { - t.Errorf(`expected / regex "foo", got %s %s %q`, tok1, tok2, val) - } - - l = NewLexer([]byte(`/=foo/`)) - _, tok1, _ = l.Scan() - _, tok2, val = l.ScanRegex() - if tok1 != DIV_ASSIGN || tok2 != REGEX || val != "=foo" { - t.Errorf(`expected /= regex "=foo", got %s %s %q`, tok1, tok2, val) - } -} - -func TestUnescape(t *testing.T) { - tests := []struct { - input string - output string - error string - }{ - {``, "", ""}, - {`foo bar`, "foo bar", ""}, - {`foo\tbar`, "foo\tbar", ""}, - {"foo\nbar", "", "can't have newline in string"}, - {`foo"`, "foo\"", ""}, - {`O'Connor`, "O'Connor", ""}, - {`foo\`, "foo\\", ""}, - // Other cases tested in TestLexer string handling. - } - for _, test := range tests { - t.Run(test.input, func(t *testing.T) { - got, err := Unescape(test.input) - if err != nil { - if err.Error() != test.error { - t.Fatalf("expected error %q, got %q", test.error, err) - } - } else { - if test.error != "" { - t.Fatalf("expected error %q, got %q", test.error, "") - } - if got != test.output { - t.Fatalf("expected %q, got %q", test.output, got) - } - } - }) - } -} - -func benchmarkLexer(b *testing.B, repeat int, source string) { - fullSource := []byte(strings.Repeat(source+"\n", repeat)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - l := NewLexer(fullSource) - for { - _, tok, _ := l.Scan() - if tok == EOF || tok == ILLEGAL { - break - } - } - } -} - -func BenchmarkProgram(b *testing.B) { - benchmarkLexer(b, 5, `{ print $1, ($3+$4)*$5 }`) -} - -func BenchmarkNames(b *testing.B) { - benchmarkLexer(b, 5, `x y i foobar abcdefghij0123456789 _`) -} - -func BenchmarkKeywords(b *testing.B) { - benchmarkLexer(b, 5, `BEGIN END print sub if length`) -} - -func BenchmarkSimpleTokens(b *testing.B) { - benchmarkLexer(b, 5, "\n : , { [ ( } ] ) ~ ? ; $") -} - -func BenchmarkChoiceTokens(b *testing.B) { - benchmarkLexer(b, 5, `/ /= % %= + ++ += * ** **= *= = == ^ ^= ! != !~ < <= > >= >> && | ||`) -} - -func BenchmarkNumbers(b *testing.B) { - benchmarkLexer(b, 5, `0 1 .5 1234 1234567890 1234.56789e-50`) -} - -func BenchmarkStrings(b *testing.B) { - benchmarkLexer(b, 5, `"x" "y" "xyz" "foo" "foo bar baz" "foo\tbar\rbaz\n"`) -} - -func BenchmarkRegex(b *testing.B) { - source := `/x/ /./ /foo/ /bar/ /=equals=/ /\/\/\/\//` - fullSource := []byte(strings.Repeat(source+" ", 5)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - l := NewLexer(fullSource) - for { - _, tok, _ := l.Scan() - if tok == EOF { - break - } - if tok != DIV && tok != DIV_ASSIGN { - b.Fatalf("expected / or /=, got %s", tok) - } - _, tok, _ = l.ScanRegex() - if tok != REGEX { - b.Fatalf("expected regex, got %s", tok) - } - } - } -} - -func Example() { - lexer := NewLexer([]byte(`$0 { print $1 }`)) - for { - pos, tok, val := lexer.Scan() - if tok == EOF { - break - } - fmt.Printf("%d:%d %s %q\n", pos.Line, pos.Column, tok, val) - } - // Output: - // 1:1 $ "" - // 1:2 number "0" - // 1:4 { "" - // 1:6 print "" - // 1:12 $ "" - // 1:13 number "1" - // 1:15 } "" -} diff --git a/src/tool/awk/lexer/token.go b/src/tool/awk/lexer/token.go deleted file mode 100644 index b3be569..0000000 --- a/src/tool/awk/lexer/token.go +++ /dev/null @@ -1,263 +0,0 @@ -// Lexer tokens - -package lexer - -// Token is the type of a single token. -type Token int - -const ( - ILLEGAL Token = iota - EOF - NEWLINE - CONCAT // Not really a token, but used as an operator - - // Symbols - - ADD - ADD_ASSIGN - AND - APPEND - ASSIGN - AT - COLON - COMMA - DECR - DIV - DIV_ASSIGN - DOLLAR - EQUALS - GTE - GREATER - INCR - LBRACE - LBRACKET - LESS - LPAREN - LTE - MATCH - MOD - MOD_ASSIGN - MUL - MUL_ASSIGN - NOT_MATCH - NOT - NOT_EQUALS - OR - PIPE - POW - POW_ASSIGN - QUESTION - RBRACE - RBRACKET - RPAREN - SEMICOLON - SUB - SUB_ASSIGN - - // Keywords - - BEGIN - BREAK - CONTINUE - DELETE - DO - ELSE - END - EXIT - FOR - FUNCTION - GETLINE - IF - IN - NEXT - PRINT - PRINTF - RETURN - WHILE - - // Built-in functions - - F_ATAN2 - F_CLOSE - F_COS - F_EXP - F_FFLUSH - F_GSUB - F_INDEX - F_INT - F_LENGTH - F_LOG - F_MATCH - F_RAND - F_SIN - F_SPLIT - F_SPRINTF - F_SQRT - F_SRAND - F_SUB - F_SUBSTR - F_SYSTEM - F_TOLOWER - F_TOUPPER - - // Literals and names (variables and arrays) - - NAME - NUMBER - STRING - REGEX - - LAST = REGEX - FIRST_FUNC = F_ATAN2 - LAST_FUNC = F_TOUPPER -) - -var keywordTokens = map[string]Token{ - "BEGIN": BEGIN, - "break": BREAK, - "continue": CONTINUE, - "delete": DELETE, - "do": DO, - "else": ELSE, - "END": END, - "exit": EXIT, - "for": FOR, - "function": FUNCTION, - "getline": GETLINE, - "if": IF, - "in": IN, - "next": NEXT, - "print": PRINT, - "printf": PRINTF, - "return": RETURN, - "while": WHILE, - - "atan2": F_ATAN2, - "close": F_CLOSE, - "cos": F_COS, - "exp": F_EXP, - "fflush": F_FFLUSH, - "gsub": F_GSUB, - "index": F_INDEX, - "int": F_INT, - "length": F_LENGTH, - "log": F_LOG, - "match": F_MATCH, - "rand": F_RAND, - "sin": F_SIN, - "split": F_SPLIT, - "sprintf": F_SPRINTF, - "sqrt": F_SQRT, - "srand": F_SRAND, - "sub": F_SUB, - "substr": F_SUBSTR, - "system": F_SYSTEM, - "tolower": F_TOLOWER, - "toupper": F_TOUPPER, -} - -// KeywordToken returns the token associated with the given keyword -// string, or ILLEGAL if given name is not a keyword. -func KeywordToken(name string) Token { - return keywordTokens[name] -} - -var tokenNames = map[Token]string{ - ILLEGAL: "", - EOF: "EOF", - NEWLINE: "", - CONCAT: "", - - ADD: "+", - ADD_ASSIGN: "+=", - AND: "&&", - APPEND: ">>", - ASSIGN: "=", - AT: "@", - COLON: ":", - COMMA: ",", - DECR: "--", - DIV: "/", - DIV_ASSIGN: "/=", - DOLLAR: "$", - EQUALS: "==", - GTE: ">=", - GREATER: ">", - INCR: "++", - LBRACE: "{", - LBRACKET: "[", - LESS: "<", - LPAREN: "(", - LTE: "<=", - MATCH: "~", - MOD: "%", - MOD_ASSIGN: "%=", - MUL: "*", - MUL_ASSIGN: "*=", - NOT_MATCH: "!~", - NOT: "!", - NOT_EQUALS: "!=", - OR: "||", - PIPE: "|", - POW: "^", - POW_ASSIGN: "^=", - QUESTION: "?", - RBRACE: "}", - RBRACKET: "]", - RPAREN: ")", - SEMICOLON: ";", - SUB: "-", - SUB_ASSIGN: "-=", - - BEGIN: "BEGIN", - BREAK: "break", - CONTINUE: "continue", - DELETE: "delete", - DO: "do", - ELSE: "else", - END: "END", - EXIT: "exit", - FOR: "for", - FUNCTION: "function", - GETLINE: "getline", - IF: "if", - IN: "in", - NEXT: "next", - PRINT: "print", - PRINTF: "printf", - RETURN: "return", - WHILE: "while", - - F_ATAN2: "atan2", - F_CLOSE: "close", - F_COS: "cos", - F_EXP: "exp", - F_FFLUSH: "fflush", - F_GSUB: "gsub", - F_INDEX: "index", - F_INT: "int", - F_LENGTH: "length", - F_LOG: "log", - F_MATCH: "match", - F_RAND: "rand", - F_SIN: "sin", - F_SPLIT: "split", - F_SPRINTF: "sprintf", - F_SQRT: "sqrt", - F_SRAND: "srand", - F_SUB: "sub", - F_SUBSTR: "substr", - F_SYSTEM: "system", - F_TOLOWER: "tolower", - F_TOUPPER: "toupper", - - NAME: "name", - NUMBER: "number", - STRING: "string", - REGEX: "regex", -} - -// String returns the string name of this token. -func (t Token) String() string { - return tokenNames[t] -} diff --git a/src/tool/awk/license.txt b/src/tool/awk/license.txt deleted file mode 100644 index e39bc70..0000000 --- a/src/tool/awk/license.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 Ben Hoyt - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/src/tool/awk/parser/parser.go b/src/tool/awk/parser/parser.go deleted file mode 100644 index ea534e6..0000000 --- a/src/tool/awk/parser/parser.go +++ /dev/null @@ -1,1048 +0,0 @@ -// Package parser is an AWK parser and abstract syntax tree. -// -// Use the ParseProgram function to parse an AWK program, and then give the -// result to interp.Exec, interp.ExecProgram, or interp.New to execute it. -package parser - -import ( - "fmt" - "io" - "regexp" - "strconv" - "strings" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - "github.com/mojosa-software/goblin/src/tool/awk/internal/compiler" - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -// ParseError (actually *ParseError) is the type of error returned by -// ParseProgram. -type ParseError struct { - // Source line/column position where the error occurred. - Position Position - // Error message. - Message string -} - -// Error returns a formatted version of the error, including the line -// and column numbers. -func (e *ParseError) Error() string { - return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) -} - -// ParserConfig lets you specify configuration for the parsing -// process (for example printing type information for debugging). -type ParserConfig struct { - // Enable printing of type information - DebugTypes bool - - // io.Writer to print type information on (for example, os.Stderr) - DebugWriter io.Writer - - // Map of named Go functions to allow calling from AWK. See docs - // on interp.Config.Funcs for details. - Funcs map[string]interface{} -} - -// ParseProgram parses an entire AWK program, returning the *Program -// abstract syntax tree or a *ParseError on error. "config" describes -// the parser configuration (and is allowed to be nil). -func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { - defer func() { - // The parser uses panic with a *ParseError to signal parsing - // errors internally, and they're caught here. This - // significantly simplifies the recursive descent calls as - // we don't have to check errors everywhere. - if r := recover(); r != nil { - // Convert to ParseError or re-panic - err = r.(*ParseError) - } - }() - lexer := NewLexer(src) - p := parser{lexer: lexer} - if config != nil { - p.debugTypes = config.DebugTypes - p.debugWriter = config.DebugWriter - p.nativeFuncs = config.Funcs - } - p.initResolve() - p.next() // initialize p.tok - - // Parse into abstract syntax tree - prog = p.program() - - // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(prog.toAST()) - return prog, err -} - -// Program is the parsed and compiled representation of an entire AWK program. -type Program struct { - // These fields aren't intended to be used or modified directly, - // but are exported for the interpreter (Program itself needs to - // be exported in package "parser", otherwise these could live in - // "internal/ast".) - Begin []ast.Stmts - Actions []ast.Action - End []ast.Stmts - Functions []ast.Function - Scalars map[string]int - Arrays map[string]int - Compiled *compiler.Program -} - -// String returns an indented, pretty-printed version of the parsed -// program. -func (p *Program) String() string { - return p.toAST().String() -} - -// Disassemble writes a human-readable form of the program's virtual machine -// instructions to writer. -func (p *Program) Disassemble(writer io.Writer) error { - return p.Compiled.Disassemble(writer) -} - -// toAST converts the *Program to an *ast.Program. -func (p *Program) toAST() *ast.Program { - return &ast.Program{ - Begin: p.Begin, - Actions: p.Actions, - End: p.End, - Functions: p.Functions, - Scalars: p.Scalars, - Arrays: p.Arrays, - } -} - -// Parser state -type parser struct { - // Lexer instance and current token values - lexer *Lexer - pos Position // position of last token (tok) - tok Token // last lexed token - prevTok Token // previously lexed token - val string // string value of last token (or "") - - // Parsing state - inAction bool // true if parsing an action (false in BEGIN or END) - funcName string // function name if parsing a func, else "" - loopDepth int // current loop depth (0 if not in any loops) - - // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) - varTypes map[string]map[string]typeInfo // map of func name to var name to type - varRefs []varRef // all variable references (usually scalars) - arrayRefs []arrayRef // all array references - multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions - - // Function tracking - functions map[string]int // map of function name to index - userCalls []userCall // record calls so we can resolve them later - nativeFuncs map[string]interface{} - - // Configuration and debugging - debugTypes bool // show variable types for debugging - debugWriter io.Writer // where the debug output goes -} - -// Parse an entire AWK program. -func (p *parser) program() *Program { - prog := &Program{} - p.optionalNewlines() - for p.tok != EOF { - switch p.tok { - case BEGIN: - p.next() - prog.Begin = append(prog.Begin, p.stmtsBrace()) - case END: - p.next() - prog.End = append(prog.End, p.stmtsBrace()) - case FUNCTION: - function := p.function() - p.addFunction(function.Name, len(prog.Functions)) - prog.Functions = append(prog.Functions, function) - default: - p.inAction = true - // Allow empty pattern, normal pattern, or range pattern - pattern := []ast.Expr{} - if !p.matches(LBRACE, EOF) { - pattern = append(pattern, p.expr()) - } - if !p.matches(LBRACE, EOF, NEWLINE) { - p.commaNewlines() - pattern = append(pattern, p.expr()) - } - // Or an empty action (equivalent to { print $0 }) - action := ast.Action{pattern, nil} - if p.tok == LBRACE { - action.Stmts = p.stmtsBrace() - } - prog.Actions = append(prog.Actions, action) - p.inAction = false - } - p.optionalNewlines() - } - - p.resolveUserCalls(prog) - p.resolveVars(prog) - p.checkMultiExprs() - - return prog -} - -// Parse a list of statements. -func (p *parser) stmts() ast.Stmts { - switch p.tok { - case SEMICOLON: - // This is so things like this parse correctly: - // BEGIN { for (i=0; i<10; i++); print "x" } - p.next() - return nil - case LBRACE: - return p.stmtsBrace() - default: - return []ast.Stmt{p.stmt()} - } -} - -// Parse a list of statements surrounded in {...} braces. -func (p *parser) stmtsBrace() ast.Stmts { - p.expect(LBRACE) - p.optionalNewlines() - ss := []ast.Stmt{} - for p.tok != RBRACE && p.tok != EOF { - ss = append(ss, p.stmt()) - } - p.expect(RBRACE) - if p.tok == SEMICOLON { - p.next() - } - return ss -} - -// Parse a "simple" statement (eg: allowed in a for loop init clause). -func (p *parser) simpleStmt() ast.Stmt { - switch p.tok { - case PRINT, PRINTF: - op := p.tok - p.next() - args := p.exprList(p.printExpr) - if len(args) == 1 { - // This allows parens around all the print args - if m, ok := args[0].(*ast.MultiExpr); ok { - args = m.Exprs - p.useMultiExpr(m) - } - } - redirect := ILLEGAL - var dest ast.Expr - if p.matches(GREATER, APPEND, PIPE) { - redirect = p.tok - p.next() - dest = p.expr() - } - if op == PRINT { - return &ast.PrintStmt{args, redirect, dest} - } else { - if len(args) == 0 { - panic(p.errorf("expected printf args, got none")) - } - return &ast.PrintfStmt{args, redirect, dest} - } - case DELETE: - p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - var index []ast.Expr - if p.tok == LBRACKET { - p.next() - index = p.exprList(p.expr) - if len(index) == 0 { - panic(p.errorf("expected expression instead of ]")) - } - p.expect(RBRACKET) - } - return &ast.DeleteStmt{ref, index} - case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN: - panic(p.errorf("expected print/printf, delete, or expression")) - default: - return &ast.ExprStmt{p.expr()} - } -} - -// Parse any top-level statement. -func (p *parser) stmt() ast.Stmt { - for p.matches(SEMICOLON, NEWLINE) { - p.next() - } - var s ast.Stmt - switch p.tok { - case IF: - p.next() - p.expect(LPAREN) - cond := p.expr() - p.expect(RPAREN) - p.optionalNewlines() - body := p.stmts() - p.optionalNewlines() - var elseBody ast.Stmts - if p.tok == ELSE { - p.next() - p.optionalNewlines() - elseBody = p.stmts() - } - s = &ast.IfStmt{cond, body, elseBody} - case FOR: - // Parse for statement, either "for in" or C-like for loop. - // - // FOR LPAREN NAME IN NAME RPAREN NEWLINE* stmts | - // FOR LPAREN [simpleStmt] SEMICOLON NEWLINE* - // [expr] SEMICOLON NEWLINE* - // [simpleStmt] RPAREN NEWLINE* stmts - // - p.next() - p.expect(LPAREN) - var pre ast.Stmt - if p.tok != SEMICOLON { - pre = p.simpleStmt() - } - if pre != nil && p.tok == RPAREN { - // Match: for (var in array) body - p.next() - p.optionalNewlines() - exprStmt, ok := pre.(*ast.ExprStmt) - if !ok { - panic(p.errorf("expected 'for (var in array) ...'")) - } - inExpr, ok := (exprStmt.Expr).(*ast.InExpr) - if !ok { - panic(p.errorf("expected 'for (var in array) ...'")) - } - if len(inExpr.Index) != 1 { - panic(p.errorf("expected 'for (var in array) ...'")) - } - varExpr, ok := (inExpr.Index[0]).(*ast.VarExpr) - if !ok { - panic(p.errorf("expected 'for (var in array) ...'")) - } - body := p.loopStmts() - s = &ast.ForInStmt{varExpr, inExpr.Array, body} - } else { - // Match: for ([pre]; [cond]; [post]) body - p.expect(SEMICOLON) - p.optionalNewlines() - var cond ast.Expr - if p.tok != SEMICOLON { - cond = p.expr() - } - p.expect(SEMICOLON) - p.optionalNewlines() - var post ast.Stmt - if p.tok != RPAREN { - post = p.simpleStmt() - } - p.expect(RPAREN) - p.optionalNewlines() - body := p.loopStmts() - s = &ast.ForStmt{pre, cond, post, body} - } - case WHILE: - p.next() - p.expect(LPAREN) - cond := p.expr() - p.expect(RPAREN) - p.optionalNewlines() - body := p.loopStmts() - s = &ast.WhileStmt{cond, body} - case DO: - p.next() - p.optionalNewlines() - body := p.loopStmts() - p.expect(WHILE) - p.expect(LPAREN) - cond := p.expr() - p.expect(RPAREN) - s = &ast.DoWhileStmt{body, cond} - case BREAK: - if p.loopDepth == 0 { - panic(p.errorf("break must be inside a loop body")) - } - p.next() - s = &ast.BreakStmt{} - case CONTINUE: - if p.loopDepth == 0 { - panic(p.errorf("continue must be inside a loop body")) - } - p.next() - s = &ast.ContinueStmt{} - case NEXT: - if !p.inAction && p.funcName == "" { - panic(p.errorf("next can't be inside BEGIN or END")) - } - p.next() - s = &ast.NextStmt{} - case EXIT: - p.next() - var status ast.Expr - if !p.matches(NEWLINE, SEMICOLON, RBRACE) { - status = p.expr() - } - s = &ast.ExitStmt{status} - case RETURN: - if p.funcName == "" { - panic(p.errorf("return must be inside a function")) - } - p.next() - var value ast.Expr - if !p.matches(NEWLINE, SEMICOLON, RBRACE) { - value = p.expr() - } - s = &ast.ReturnStmt{value} - case LBRACE: - body := p.stmtsBrace() - s = &ast.BlockStmt{body} - default: - s = p.simpleStmt() - } - - // Ensure statements are separated by ; or newline - if !p.matches(NEWLINE, SEMICOLON, RBRACE) && p.prevTok != NEWLINE && p.prevTok != SEMICOLON && p.prevTok != RBRACE { - panic(p.errorf("expected ; or newline between statements")) - } - for p.matches(NEWLINE, SEMICOLON) { - p.next() - } - return s -} - -// Same as stmts(), but tracks that we're in a loop (as break and -// continue can only occur inside a loop). -func (p *parser) loopStmts() ast.Stmts { - p.loopDepth++ - ss := p.stmts() - p.loopDepth-- - return ss -} - -// Parse a function definition and body. As it goes, this resolves -// the local variable indexes and tracks which parameters are array -// parameters. -func (p *parser) function() ast.Function { - if p.funcName != "" { - // Should never actually get here (FUNCTION token is only - // handled at the top level), but just in case. - panic(p.errorf("can't nest functions")) - } - p.next() - name := p.val - if _, ok := p.functions[name]; ok { - panic(p.errorf("function %q already defined", name)) - } - p.expect(NAME) - p.expect(LPAREN) - first := true - params := make([]string, 0, 7) // pre-allocate some to reduce allocations - p.locals = make(map[string]bool, 7) - for p.tok != RPAREN { - if !first { - p.commaNewlines() - } - first = false - param := p.val - if param == name { - panic(p.errorf("can't use function name as parameter name")) - } - if p.locals[param] { - panic(p.errorf("duplicate parameter name %q", param)) - } - p.expect(NAME) - params = append(params, param) - p.locals[param] = true - } - p.expect(RPAREN) - p.optionalNewlines() - - // Parse the body - p.startFunction(name, params) - body := p.stmtsBrace() - p.stopFunction() - p.locals = nil - - return ast.Function{name, params, nil, body} -} - -// Parse expressions separated by commas: args to print[f] or user -// function call, or multi-dimensional index. -func (p *parser) exprList(parse func() ast.Expr) []ast.Expr { - exprs := []ast.Expr{} - first := true - for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) { - if !first { - p.commaNewlines() - } - first = false - exprs = append(exprs, parse()) - } - return exprs -} - -// Here's where things get slightly interesting: only certain -// expression types are allowed in print/printf statements, -// presumably so `print a, b > "file"` is a file redirect instead of -// a greater-than comparison. So we kind of have two ways to recurse -// down here: expr(), which parses all expressions, and printExpr(), -// which skips PIPE GETLINE and GREATER expressions. - -// Parse a single expression. -func (p *parser) expr() ast.Expr { return p.getLine() } -func (p *parser) printExpr() ast.Expr { return p._assign(p.printCond) } - -// Parse an "expr | getline [lvalue]" expression: -// -// assign [PIPE GETLINE [lvalue]] -func (p *parser) getLine() ast.Expr { - expr := p._assign(p.cond) - if p.tok == PIPE { - p.next() - p.expect(GETLINE) - target := p.optionalLValue() - return &ast.GetlineExpr{expr, target, nil} - } - return expr -} - -// Parse an = assignment expression: -// -// lvalue [assign_op assign] -// -// An lvalue is a variable name, an array[expr] index expression, or -// an $expr field expression. -func (p *parser) _assign(higher func() ast.Expr) ast.Expr { - expr := higher() - _, isNamedField := expr.(*ast.NamedFieldExpr) - if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, - MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { - if isNamedField { - panic(p.errorf("assigning @ expression not supported")) - } - op := p.tok - p.next() - right := p._assign(higher) - switch op { - case ASSIGN: - return &ast.AssignExpr{expr, right} - case ADD_ASSIGN: - op = ADD - case DIV_ASSIGN: - op = DIV - case MOD_ASSIGN: - op = MOD - case MUL_ASSIGN: - op = MUL - case POW_ASSIGN: - op = POW - case SUB_ASSIGN: - op = SUB - } - return &ast.AugAssignExpr{expr, op, right} - } - return expr -} - -// Parse a ?: conditional expression: -// -// or [QUESTION NEWLINE* cond COLON NEWLINE* cond] -func (p *parser) cond() ast.Expr { return p._cond(p.or) } -func (p *parser) printCond() ast.Expr { return p._cond(p.printOr) } - -func (p *parser) _cond(higher func() ast.Expr) ast.Expr { - expr := higher() - if p.tok == QUESTION { - p.next() - p.optionalNewlines() - t := p.expr() - p.expect(COLON) - p.optionalNewlines() - f := p.expr() - return &ast.CondExpr{expr, t, f} - } - return expr -} - -// Parse an || or expression: -// -// and [OR NEWLINE* and] [OR NEWLINE* and] ... -func (p *parser) or() ast.Expr { return p.binaryLeft(p.and, true, OR) } -func (p *parser) printOr() ast.Expr { return p.binaryLeft(p.printAnd, true, OR) } - -// Parse an && and expression: -// -// in [AND NEWLINE* in] [AND NEWLINE* in] ... -func (p *parser) and() ast.Expr { return p.binaryLeft(p.in, true, AND) } -func (p *parser) printAnd() ast.Expr { return p.binaryLeft(p.printIn, true, AND) } - -// Parse an "in" expression: -// -// match [IN NAME] [IN NAME] ... -func (p *parser) in() ast.Expr { return p._in(p.match) } -func (p *parser) printIn() ast.Expr { return p._in(p.printMatch) } - -func (p *parser) _in(higher func() ast.Expr) ast.Expr { - expr := higher() - for p.tok == IN { - p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - expr = &ast.InExpr{[]ast.Expr{expr}, ref} - } - return expr -} - -// Parse a ~ match expression: -// -// compare [MATCH|NOT_MATCH compare] -func (p *parser) match() ast.Expr { return p._match(p.compare) } -func (p *parser) printMatch() ast.Expr { return p._match(p.printCompare) } - -func (p *parser) _match(higher func() ast.Expr) ast.Expr { - expr := higher() - if p.matches(MATCH, NOT_MATCH) { - op := p.tok - p.next() - right := p.regexStr(higher) // Not match() as these aren't associative - return &ast.BinaryExpr{expr, op, right} - } - return expr -} - -// Parse a comparison expression: -// -// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat] -func (p *parser) compare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) } -func (p *parser) printCompare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) } - -func (p *parser) _compare(ops ...Token) ast.Expr { - expr := p.concat() - if p.matches(ops...) { - op := p.tok - p.next() - right := p.concat() // Not compare() as these aren't associative - return &ast.BinaryExpr{expr, op, right} - } - return expr -} - -func (p *parser) concat() ast.Expr { - expr := p.add() - for p.matches(DOLLAR, AT, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) || - (p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) { - right := p.add() - expr = &ast.BinaryExpr{expr, CONCAT, right} - } - return expr -} - -func (p *parser) add() ast.Expr { - return p.binaryLeft(p.mul, false, ADD, SUB) -} - -func (p *parser) mul() ast.Expr { - return p.binaryLeft(p.pow, false, MUL, DIV, MOD) -} - -func (p *parser) pow() ast.Expr { - // Note that pow (expr ^ expr) is right-associative - expr := p.preIncr() - if p.tok == POW { - p.next() - right := p.pow() - return &ast.BinaryExpr{expr, POW, right} - } - return expr -} - -func (p *parser) preIncr() ast.Expr { - if p.tok == INCR || p.tok == DECR { - op := p.tok - p.next() - exprPos := p.pos - expr := p.preIncr() - if !ast.IsLValue(expr) { - panic(p.posErrorf(exprPos, "expected lvalue after ++ or --")) - } - return &ast.IncrExpr{expr, op, true} - } - return p.postIncr() -} - -func (p *parser) postIncr() ast.Expr { - expr := p.primary() - if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) { - op := p.tok - p.next() - return &ast.IncrExpr{expr, op, false} - } - return expr -} - -func (p *parser) primary() ast.Expr { - switch p.tok { - case NUMBER: - // AWK allows forms like "1.5e", but ParseFloat doesn't - s := strings.TrimRight(p.val, "eE") - n, _ := strconv.ParseFloat(s, 64) - p.next() - return &ast.NumExpr{n} - case STRING: - s := p.val - p.next() - return &ast.StrExpr{s} - case DIV, DIV_ASSIGN: - // If we get to DIV or DIV_ASSIGN as a primary expression, - // it's actually a regex. - regex := p.nextRegex() - return &ast.RegExpr{regex} - case DOLLAR: - p.next() - return &ast.FieldExpr{p.primary()} - case AT: - p.next() - return &ast.NamedFieldExpr{p.primary()} - case NOT, ADD, SUB: - op := p.tok - p.next() - return &ast.UnaryExpr{op, p.pow()} - case NAME: - name := p.val - namePos := p.pos - p.next() - if p.tok == LBRACKET { - // a[x] or a[x, y] array index expression - p.next() - index := p.exprList(p.expr) - if len(index) == 0 { - panic(p.errorf("expected expression instead of ]")) - } - p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} - } else if p.tok == LPAREN && !p.lexer.HadSpace() { - if p.locals[name] { - panic(p.errorf("can't call local variable %q as function", name)) - } - // Grammar requires no space between function name and - // left paren for user function calls, hence the funky - // lexer.HadSpace() method. - return p.userCall(name, namePos) - } - return p.varRef(name, namePos) - case LPAREN: - parenPos := p.pos - p.next() - exprs := p.exprList(p.expr) - switch len(exprs) { - case 0: - panic(p.errorf("expected expression, not %s", p.tok)) - case 1: - p.expect(RPAREN) - return exprs[0] - default: - // Multi-dimensional array "in" requires parens around index - p.expect(RPAREN) - if p.tok == IN { - p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - return &ast.InExpr{exprs, ref} - } - // MultiExpr is used as a pseudo-expression for print[f] parsing. - return p.multiExpr(exprs, parenPos) - } - case GETLINE: - p.next() - target := p.optionalLValue() - var file ast.Expr - if p.tok == LESS { - p.next() - file = p.primary() - } - return &ast.GetlineExpr{nil, target, file} - // Below is the parsing of all the builtin function calls. We - // could unify these but several of them have special handling - // (array/lvalue/regex params, optional arguments, and so on). - // Doing it this way means we can check more at parse time. - case F_SUB, F_GSUB: - op := p.tok - p.next() - p.expect(LPAREN) - regex := p.regexStr(p.expr) - p.commaNewlines() - repl := p.expr() - args := []ast.Expr{regex, repl} - if p.tok == COMMA { - p.commaNewlines() - inPos := p.pos - in := p.expr() - if !ast.IsLValue(in) { - panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) - } - args = append(args, in) - } - p.expect(RPAREN) - return &ast.CallExpr{op, args} - case F_SPLIT: - p.next() - p.expect(LPAREN) - str := p.expr() - p.commaNewlines() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - args := []ast.Expr{str, ref} - if p.tok == COMMA { - p.commaNewlines() - args = append(args, p.regexStr(p.expr)) - } - p.expect(RPAREN) - return &ast.CallExpr{F_SPLIT, args} - case F_MATCH: - p.next() - p.expect(LPAREN) - str := p.expr() - p.commaNewlines() - regex := p.regexStr(p.expr) - p.expect(RPAREN) - return &ast.CallExpr{F_MATCH, []ast.Expr{str, regex}} - case F_RAND: - p.next() - p.expect(LPAREN) - p.expect(RPAREN) - return &ast.CallExpr{F_RAND, nil} - case F_SRAND: - p.next() - p.expect(LPAREN) - var args []ast.Expr - if p.tok != RPAREN { - args = append(args, p.expr()) - } - p.expect(RPAREN) - return &ast.CallExpr{F_SRAND, args} - case F_LENGTH: - p.next() - var args []ast.Expr - // AWK quirk: "length" is allowed to be called without parens - if p.tok == LPAREN { - p.next() - if p.tok != RPAREN { - args = append(args, p.expr()) - } - p.expect(RPAREN) - } - return &ast.CallExpr{F_LENGTH, args} - case F_SUBSTR: - p.next() - p.expect(LPAREN) - str := p.expr() - p.commaNewlines() - start := p.expr() - args := []ast.Expr{str, start} - if p.tok == COMMA { - p.commaNewlines() - args = append(args, p.expr()) - } - p.expect(RPAREN) - return &ast.CallExpr{F_SUBSTR, args} - case F_SPRINTF: - p.next() - p.expect(LPAREN) - args := []ast.Expr{p.expr()} - for p.tok == COMMA { - p.commaNewlines() - args = append(args, p.expr()) - } - p.expect(RPAREN) - return &ast.CallExpr{F_SPRINTF, args} - case F_FFLUSH: - p.next() - p.expect(LPAREN) - var args []ast.Expr - if p.tok != RPAREN { - args = append(args, p.expr()) - } - p.expect(RPAREN) - return &ast.CallExpr{F_FFLUSH, args} - case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE: - // Simple 1-argument functions - op := p.tok - p.next() - p.expect(LPAREN) - arg := p.expr() - p.expect(RPAREN) - return &ast.CallExpr{op, []ast.Expr{arg}} - case F_ATAN2, F_INDEX: - // Simple 2-argument functions - op := p.tok - p.next() - p.expect(LPAREN) - arg1 := p.expr() - p.commaNewlines() - arg2 := p.expr() - p.expect(RPAREN) - return &ast.CallExpr{op, []ast.Expr{arg1, arg2}} - default: - panic(p.errorf("expected expression instead of %s", p.tok)) - } -} - -// Parse an optional lvalue -func (p *parser) optionalLValue() ast.Expr { - switch p.tok { - case NAME: - if p.lexer.PeekByte() == '(' { - // User function call, e.g., foo() not lvalue. - return nil - } - name := p.val - namePos := p.pos - p.next() - if p.tok == LBRACKET { - // a[x] or a[x, y] array index expression - p.next() - index := p.exprList(p.expr) - if len(index) == 0 { - panic(p.errorf("expected expression instead of ]")) - } - p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} - } - return p.varRef(name, namePos) - case DOLLAR: - p.next() - return &ast.FieldExpr{p.primary()} - default: - return nil - } -} - -// Parse /.../ regex or generic expression: -// -// REGEX | expr -func (p *parser) regexStr(parse func() ast.Expr) ast.Expr { - if p.matches(DIV, DIV_ASSIGN) { - regex := p.nextRegex() - return &ast.StrExpr{regex} - } - return parse() -} - -// Parse left-associative binary operator. Allow newlines after -// operator if allowNewline is true. -// -// parse [op parse] [op parse] ... -func (p *parser) binaryLeft(higher func() ast.Expr, allowNewline bool, ops ...Token) ast.Expr { - expr := higher() - for p.matches(ops...) { - op := p.tok - p.next() - if allowNewline { - p.optionalNewlines() - } - right := higher() - expr = &ast.BinaryExpr{expr, op, right} - } - return expr -} - -// Parse comma followed by optional newlines: -// -// COMMA NEWLINE* -func (p *parser) commaNewlines() { - p.expect(COMMA) - p.optionalNewlines() -} - -// Parse zero or more optional newlines: -// -// [NEWLINE] [NEWLINE] ... -func (p *parser) optionalNewlines() { - for p.tok == NEWLINE { - p.next() - } -} - -// Parse next token into p.tok (and set p.pos and p.val). -func (p *parser) next() { - p.prevTok = p.tok - p.pos, p.tok, p.val = p.lexer.Scan() - if p.tok == ILLEGAL { - panic(p.errorf("%s", p.val)) - } -} - -// Parse next regex and return it (must only be called after DIV or -// DIV_ASSIGN token). -func (p *parser) nextRegex() string { - p.pos, p.tok, p.val = p.lexer.ScanRegex() - if p.tok == ILLEGAL { - panic(p.errorf("%s", p.val)) - } - regex := p.val - _, err := regexp.Compile(regex) - if err != nil { - panic(p.errorf("%v", err)) - } - p.next() - return regex -} - -// Ensure current token is tok, and parse next token into p.tok. -func (p *parser) expect(tok Token) { - if p.tok != tok { - panic(p.errorf("expected %s instead of %s", tok, p.tok)) - } - p.next() -} - -// Return true iff current token matches one of the given operators, -// but don't parse next token. -func (p *parser) matches(operators ...Token) bool { - for _, operator := range operators { - if p.tok == operator { - return true - } - } - return false -} - -// Format given string and args with Sprintf and return *ParseError -// with that message and the current position. -func (p *parser) errorf(format string, args ...interface{}) error { - return p.posErrorf(p.pos, format, args...) -} - -// Like errorf, but with an explicit position. -func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error { - message := fmt.Sprintf(format, args...) - return &ParseError{pos, message} -} - -// Parse call to a user-defined function (and record call site for -// resolving later). -func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { - p.expect(LPAREN) - args := []ast.Expr{} - i := 0 - for !p.matches(NEWLINE, RPAREN) { - if i > 0 { - p.commaNewlines() - } - arg := p.expr() - p.processUserCallArg(name, arg, i) - args = append(args, arg) - i++ - } - p.expect(RPAREN) - call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later - p.recordUserCall(call, pos) - return call -} diff --git a/src/tool/awk/parser/parser_test.go b/src/tool/awk/parser/parser_test.go deleted file mode 100644 index 1cda9be..0000000 --- a/src/tool/awk/parser/parser_test.go +++ /dev/null @@ -1,242 +0,0 @@ -// Test parser package - -package parser_test - -import ( - "bytes" - "fmt" - "strings" - "testing" - - "github.com/mojosa-software/goblin/src/tool/awk/parser" -) - -// NOTE: apart from TestParseAndString, the parser doesn't have -// extensive tests of its own; the idea is to test the parser in the -// interp tests. - -func TestParseAndString(t *testing.T) { - // This program should have one of every AST element to ensure - // we can parse and String()ify each. - source := strings.TrimSpace(` -BEGIN { - print "begin one" -} - -BEGIN { - print "begin two" -} - -{ - print "empty pattern" -} - -$0 { - print "normal pattern" - print 1, 2, 3 - printf "%.3f", 3.14159 - print "x" >"file" - print "x" >>"append" - print "y" |"prog" - delete a[k] - if (c) { - get(a, k) - } - if (1 + 2) { - get(a, k) - } else { - set(a, k, v) - } - for (i = 0; i < 10; i++) { - print i - continue - } - for (k in a) { - break - } - while (0) { - print "x" - } - do { - print "y" - exit status - } while (x) - next - "cmd" |getline - "cmd" |getline x - "cmd" |getline a[1] - "cmd" |getline $1 - getline - getline x - (getline x + 1) - getline $1 - getline a[1] - getline <"file" - getline x <"file" - (getline x <"file" "x") - getline $1 <"file" - getline a[1] <"file" - x = 0 - y = z = 0 - b += 1 - c -= 2 - d *= 3 - e /= 4 - g ^= 5 - h %= 6 - (x ? "t" : "f") - ((b && c) || d) - (k in a) - ((x, y, z) in a) - (s ~ "foo") - (b < 1) - (c <= 2) - (d > 3) - (e >= 4) - (g == 5) - (h != 6) - ((x y) z) - ((b + c) + d) - ((b * c) * d) - ((b - c) - d) - ((b / c) / d) - (b ^ (c ^ d)) - x++ - x-- - ++y - --y - 1234 - 1.5 - "This is a string" - if (/a.b/) { - print "match" - } - $1 - $(1 + 2) - !x - +x - -x - var - a[key] - a[x, y, z] - f() - set(a, k, v) - sub(regex, repl) - sub(regex, repl, s) - gsub(regex, repl) - gsub(regex, repl, s) - split(s, a) - split(s, a, regex) - match(s, regex) - rand() - srand() - srand(1) - length() - length($1) - sprintf("") - sprintf("%.3f", 3.14159) - sprintf("%.3f %d", 3.14159, 42) - cos(1) - sin(1) - exp(1) - log(1) - sqrt(1) - int("42") - tolower("FOO") - toupper("foo") - system("ls") - close("file") - atan2(x, y) - index(haystack, needle) - { - print "block statement" - f() - } -} - -(NR == 1), (NR == 2) { - print "range pattern" -} - -($1 == "foo") - -END { - print "end one" -} - -END { - print "end two" -} - -function f() { -} - -function get(a, k) { - return a[k] -} - -function set(a, k, v) { - a[k] = v - return -} -`) - prog, err := parser.ParseProgram([]byte(source), nil) - if err != nil { - t.Fatalf("error parsing program: %v", err) - } - progStr := prog.String() - if progStr != source { - t.Fatalf("expected first, got second:\n%s\n----------\n%s", source, progStr) - } -} - -func TestResolveLargeCallGraph(t *testing.T) { - const numCalls = 10000 - - var buf bytes.Buffer - var i int - for i = 0; i < numCalls; i++ { - fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1) - } - fmt.Fprintf(&buf, "function f%d(a) { return a }\n", i) - fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n") - _, err := parser.ParseProgram(buf.Bytes(), nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - buf.Reset() - fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n") - fmt.Fprintf(&buf, "function f%d(a) { return a }\n", numCalls) - for i = numCalls - 1; i >= 0; i-- { - fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1) - } - _, err = parser.ParseProgram(buf.Bytes(), nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } -} - -func Example_valid() { - prog, err := parser.ParseProgram([]byte("$0 { print $1 }"), nil) - if err != nil { - fmt.Println(err) - } else { - fmt.Println(prog) - } - // Output: - // $0 { - // print $1 - // } -} - -func Example_error() { - prog, err := parser.ParseProgram([]byte("{ for if }"), nil) - if err != nil { - fmt.Println(err) - } else { - fmt.Println(prog) - } - // Output: - // parse error at 1:7: expected ( instead of if -} diff --git a/src/tool/awk/parser/resolve.go b/src/tool/awk/parser/resolve.go deleted file mode 100644 index eb540f6..0000000 --- a/src/tool/awk/parser/resolve.go +++ /dev/null @@ -1,462 +0,0 @@ -// Resolve function calls and variable types - -package parser - -import ( - "fmt" - "reflect" - "sort" - - "github.com/mojosa-software/goblin/src/tool/awk/internal/ast" - . "github.com/mojosa-software/goblin/src/tool/awk/lexer" -) - -type varType int - -const ( - typeUnknown varType = iota - typeScalar - typeArray -) - -func (t varType) String() string { - switch t { - case typeScalar: - return "Scalar" - case typeArray: - return "Array" - default: - return "Unknown" - } -} - -// typeInfo records type information for a single variable -type typeInfo struct { - typ varType - ref *ast.VarExpr - scope ast.VarScope - index int - callName string - argIndex int -} - -// Used by printVarTypes when debugTypes is turned on -func (t typeInfo) String() string { - var scope string - switch t.scope { - case ast.ScopeGlobal: - scope = "Global" - case ast.ScopeLocal: - scope = "Local" - default: - scope = "Special" - } - return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d", - t.typ, t.ref, scope, t.index, t.callName, t.argIndex) -} - -// A single variable reference (normally scalar) -type varRef struct { - funcName string - ref *ast.VarExpr - isArg bool - pos Position -} - -// A single array reference -type arrayRef struct { - funcName string - ref *ast.ArrayExpr - pos Position -} - -// Initialize the resolver -func (p *parser) initResolve() { - p.varTypes = make(map[string]map[string]typeInfo) - p.varTypes[""] = make(map[string]typeInfo) // globals - p.functions = make(map[string]int) - p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present - p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays - p.arrayRef("FIELDS", Position{1, 1}) - p.multiExprs = make(map[*ast.MultiExpr]Position, 3) -} - -// Signal the start of a function -func (p *parser) startFunction(name string, params []string) { - p.funcName = name - p.varTypes[name] = make(map[string]typeInfo) -} - -// Signal the end of a function -func (p *parser) stopFunction() { - p.funcName = "" -} - -// Add function by name with given index -func (p *parser) addFunction(name string, index int) { - p.functions[name] = index -} - -// Records a call to a user function (for resolving indexes later) -type userCall struct { - call *ast.UserCallExpr - pos Position - inFunc string -} - -// Record a user call site -func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) { - p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName}) -} - -// After parsing, resolve all user calls to their indexes. Also -// ensures functions called have actually been defined, and that -// they're not being called with too many arguments. -func (p *parser) resolveUserCalls(prog *Program) { - // Number the native funcs (order by name to get consistent order) - nativeNames := make([]string, 0, len(p.nativeFuncs)) - for name := range p.nativeFuncs { - nativeNames = append(nativeNames, name) - } - sort.Strings(nativeNames) - nativeIndexes := make(map[string]int, len(nativeNames)) - for i, name := range nativeNames { - nativeIndexes[name] = i - } - - for _, c := range p.userCalls { - // AWK-defined functions take precedence over native Go funcs - index, ok := p.functions[c.call.Name] - if !ok { - f, haveNative := p.nativeFuncs[c.call.Name] - if !haveNative { - panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name)) - } - typ := reflect.TypeOf(f) - if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) - } - c.call.Native = true - c.call.Index = nativeIndexes[c.call.Name] - continue - } - function := prog.Functions[index] - if len(c.call.Args) > len(function.Params) { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) - } - c.call.Index = index - } -} - -// For arguments that are variable references, we don't know the -// type based on context, so mark the types for these as unknown. -func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) { - if varExpr, ok := arg.(*ast.VarExpr); ok { - scope, varFuncName := p.getScope(varExpr.Name) - ref := p.varTypes[varFuncName][varExpr.Name].ref - if ref == varExpr { - // Only applies if this is the first reference to this - // variable (otherwise we know the type already) - p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index} - } - // Mark the last related varRef (the most recent one) as a - // call argument for later error handling - p.varRefs[len(p.varRefs)-1].isArg = true - } -} - -// Determine scope of given variable reference (and funcName if it's -// a local, otherwise empty string) -func (p *parser) getScope(name string) (ast.VarScope, string) { - switch { - case p.locals[name]: - return ast.ScopeLocal, p.funcName - case ast.SpecialVarIndex(name) > 0: - return ast.ScopeSpecial, "" - default: - return ast.ScopeGlobal, "" - } -} - -// Record a variable (scalar) reference and return the *VarExpr (but -// VarExpr.Index won't be set till later) -func (p *parser) varRef(name string, pos Position) *ast.VarExpr { - scope, funcName := p.getScope(name) - expr := &ast.VarExpr{scope, 0, name} - p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos}) - info := p.varTypes[funcName][name] - if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} - } - return expr -} - -// Record an array reference and return the *ArrayExpr (but -// ArrayExpr.Index won't be set till later) -func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { - scope, funcName := p.getScope(name) - if scope == ast.ScopeSpecial { - panic(p.errorf("can't use scalar %q as array", name)) - } - expr := &ast.ArrayExpr{scope, 0, name} - p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos}) - info := p.varTypes[funcName][name] - if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} - } - return expr -} - -// Print variable type information (for debugging) on p.debugWriter -func (p *parser) printVarTypes(prog *Program) { - fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars) - fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays) - funcNames := []string{} - for funcName := range p.varTypes { - funcNames = append(funcNames, funcName) - } - sort.Strings(funcNames) - for _, funcName := range funcNames { - if funcName != "" { - fmt.Fprintf(p.debugWriter, "function %s\n", funcName) - } else { - fmt.Fprintf(p.debugWriter, "globals\n") - } - varNames := []string{} - for name := range p.varTypes[funcName] { - varNames = append(varNames, name) - } - sort.Strings(varNames) - for _, name := range varNames { - info := p.varTypes[funcName][name] - fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info) - } - } -} - -// Resolve unknown variables types and generate variable indexes and -// name-to-index mappings for interpreter -func (p *parser) resolveVars(prog *Program) { - // First go through all unknown types and try to determine the - // type from the parameter type in that function definition. - // Iterate through functions in topological order, for example - // if f() calls g(), process g first, then f. - callGraph := make(map[string]map[string]struct{}) - for _, call := range p.userCalls { - if _, ok := callGraph[call.inFunc]; !ok { - callGraph[call.inFunc] = make(map[string]struct{}) - } - callGraph[call.inFunc][call.call.Name] = struct{}{} - } - sortedFuncs := topoSort(callGraph) - for _, funcName := range sortedFuncs { - infos := p.varTypes[funcName] - for name, info := range infos { - if info.scope == ast.ScopeSpecial || info.typ != typeUnknown { - // It's a special var or type is already known - continue - } - funcIndex, ok := p.functions[info.callName] - if !ok { - // Function being called is a native function - continue - } - // Determine var type based on type of this parameter - // in the called function (if we know that) - paramName := prog.Functions[funcIndex].Params[info.argIndex] - typ := p.varTypes[info.callName][paramName].typ - if typ != typeUnknown { - if p.debugTypes { - fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", - funcName, name, typ) - } - info.typ = typ - p.varTypes[funcName][name] = info - } - } - } - - // Resolve global variables (iteration order is undefined, so - // assign indexes basically randomly) - prog.Scalars = make(map[string]int) - prog.Arrays = make(map[string]int) - for name, info := range p.varTypes[""] { - _, isFunc := p.functions[name] - if isFunc { - // Global var can't also be the name of a function - panic(p.errorf("global var %q can't also be a function", name)) - } - var index int - if info.scope == ast.ScopeSpecial { - index = ast.SpecialVarIndex(name) - } else if info.typ == typeArray { - index = len(prog.Arrays) - prog.Arrays[name] = index - } else { - index = len(prog.Scalars) - prog.Scalars[name] = index - } - info.index = index - p.varTypes[""][name] = info - } - - // Fill in unknown parameter types that are being called with arrays, - // for example, as in the following code: - // - // BEGIN { arr[0]; f(arr) } - // function f(a) { } - for _, c := range p.userCalls { - if c.call.Native { - continue - } - function := prog.Functions[c.call.Index] - for i, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - argType := p.varTypes[funcName][varExpr.Name] - paramType := p.varTypes[function.Name][function.Params[i]] - if argType.typ == typeArray && paramType.typ == typeUnknown { - paramType.typ = argType.typ - p.varTypes[function.Name][function.Params[i]] = paramType - } - } - } - - // Resolve local variables (assign indexes in order of params). - // Also patch up Function.Arrays (tells interpreter which args - // are arrays). - for funcName, infos := range p.varTypes { - if funcName == "" { - continue - } - scalarIndex := 0 - arrayIndex := 0 - functionIndex := p.functions[funcName] - function := prog.Functions[functionIndex] - arrays := make([]bool, len(function.Params)) - for i, name := range function.Params { - info := infos[name] - var index int - if info.typ == typeArray { - index = arrayIndex - arrayIndex++ - arrays[i] = true - } else { - // typeScalar or typeUnknown: variables may still be - // of unknown type if they've never been referenced -- - // default to scalar in that case - index = scalarIndex - scalarIndex++ - } - info.index = index - p.varTypes[funcName][name] = info - } - prog.Functions[functionIndex].Arrays = arrays - } - - // Check that variables passed to functions are the correct type - for _, c := range p.userCalls { - // Check native function calls - if c.call.Native { - for _, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - // Non-variable expression, must be scalar - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] - if info.typ == typeArray { - panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) - } - } - continue - } - - // Check AWK function calls - function := prog.Functions[c.call.Index] - for i, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - if function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) - } - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] - if info.typ == typeArray && !function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) - } - if info.typ != typeArray && function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) - } - } - } - - if p.debugTypes { - p.printVarTypes(prog) - } - - // Patch up variable indexes (interpreter uses an index instead - // of name for more efficient lookups) - for _, varRef := range p.varRefs { - info := p.varTypes[varRef.funcName][varRef.ref.Name] - if info.typ == typeArray && !varRef.isArg { - panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) - } - varRef.ref.Index = info.index - } - for _, arrayRef := range p.arrayRefs { - info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name] - if info.typ == typeScalar { - panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) - } - arrayRef.ref.Index = info.index - } -} - -// If name refers to a local (in function inFunc), return that -// function's name, otherwise return "" (meaning global). -func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { - if inFunc == "" { - return "" - } - for _, param := range prog.Functions[p.functions[inFunc]].Params { - if name == param { - return inFunc - } - } - return "" -} - -// Record a "multi expression" (comma-separated pseudo-expression -// used to allow commas around print/printf arguments). -func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { - expr := &ast.MultiExpr{exprs} - p.multiExprs[expr] = pos - return expr -} - -// Mark the multi expression as used (by a print/printf statement). -func (p *parser) useMultiExpr(expr *ast.MultiExpr) { - delete(p.multiExprs, expr) -} - -// Check that there are no unused multi expressions (syntax error). -func (p *parser) checkMultiExprs() { - if len(p.multiExprs) == 0 { - return - } - // Show error on first comma-separated expression - min := Position{1000000000, 1000000000} - for _, pos := range p.multiExprs { - if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) { - min = pos - } - } - panic(p.posErrorf(min, "unexpected comma-separated expression")) -} diff --git a/src/tool/awk/parser/toposort.go b/src/tool/awk/parser/toposort.go deleted file mode 100644 index 90b71fa..0000000 --- a/src/tool/awk/parser/toposort.go +++ /dev/null @@ -1,72 +0,0 @@ -// Topological sorting - -package parser - -/* -This algorithm is taken from: -https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search - -L ← Empty list that will contain the sorted nodes -while exists nodes without a permanent mark do - select an unmarked node n - visit(n) - -function visit(node n) - if n has a permanent mark then - return - if n has a temporary mark then - stop (not a DAG) - - mark n with a temporary mark - - for each node m with an edge from n to m do - visit(m) - - remove temporary mark from n - mark n with a permanent mark - add n to head of L -*/ - -// Perform a topological sort on the given graph. -func topoSort(graph map[string]map[string]struct{}) []string { - if len(graph) == 0 { - return nil - } - - unmarked := make(map[string]struct{}) - for node := range graph { - unmarked[node] = struct{}{} - } - permMarks := make(map[string]struct{}) - tempMarks := make(map[string]struct{}) - var sorted []string - - var visit func(string) - visit = func(n string) { - if _, ok := permMarks[n]; ok { - return - } - if _, ok := tempMarks[n]; ok { - return - } - tempMarks[n] = struct{}{} - for m := range graph[n] { - visit(m) - } - delete(tempMarks, n) - permMarks[n] = struct{}{} - delete(unmarked, n) - sorted = append(sorted, n) - return - } - - for len(unmarked) > 0 { - var n string - for n = range unmarked { - break - } - visit(n) - } - - return sorted -} diff --git a/src/tool/awk/parser/toposort_test.go b/src/tool/awk/parser/toposort_test.go deleted file mode 100644 index d8d4c4c..0000000 --- a/src/tool/awk/parser/toposort_test.go +++ /dev/null @@ -1,100 +0,0 @@ -package parser - -import ( - "strconv" - "testing" -) - -func TestTopoSortEmpty(t *testing.T) { - sorted := topoSort(nil) - if len(sorted) != 0 { - t.Fatalf("expected empty slice, got %v", sorted) - } -} - -func TestTopoSortSimple(t *testing.T) { - sorted := topoSort(map[string]map[string]struct{}{ - "a": {"b": struct{}{}}, - "b": {"c": struct{}{}}, - }) - if len(sorted) != 3 { - t.Fatalf("expected 3 items, got %d", len(sorted)) - } - assertBefore(t, sorted, "c", "b") - assertBefore(t, sorted, "b", "a") -} - -func TestTopoSortComplex(t *testing.T) { - sorted := topoSort(map[string]map[string]struct{}{ - "a": {"b": struct{}{}, "c": struct{}{}}, - "c": {"d": struct{}{}}, - "f": {"g": struct{}{}, "h": struct{}{}}, - "g": {}, - "h": {}, - }) - if len(sorted) != 7 { - t.Fatalf("expected 7 items, got %d", len(sorted)) - } - assertBefore(t, sorted, "g", "f") - assertBefore(t, sorted, "h", "f") - assertBefore(t, sorted, "d", "c") - assertBefore(t, sorted, "c", "a") - assertBefore(t, sorted, "b", "a") -} - -func assertBefore(t *testing.T, sorted []string, x, y string) { - xi := strIndex(sorted, x) - if xi < 0 { - t.Fatalf("expected %q to be in result", x) - } - yi := strIndex(sorted, y) - if yi < 0 { - t.Fatalf("expected %q to be in result", y) - } - if xi >= yi { - t.Fatalf("expected %q to come before %q, got indexes %d and %d", x, y, xi, yi) - } -} - -func strIndex(slice []string, s string) int { - for i, item := range slice { - if s == item { - return i - } - } - return -1 -} - -func TestTopoSortCycle(t *testing.T) { - sorted := topoSort(map[string]map[string]struct{}{ - "a": {"b": struct{}{}, "c": struct{}{}}, - "c": {"a": struct{}{}}, - }) - if len(sorted) != 3 { - t.Fatalf("expected 3 items, got %d", len(sorted)) - } - assertBefore(t, sorted, "b", "a") - c := strIndex(sorted, "a") - if c < 0 { - t.Fatalf("expected %q to be in result", c) - } -} - -func TestTopoSortLarge(t *testing.T) { - const num = 1000 - graph := make(map[string]map[string]struct{}) - for i := 0; i < num; i++ { - graph[strconv.Itoa(i)] = map[string]struct{}{strconv.Itoa(i + 1): {}} - } - graph[strconv.Itoa(num)] = map[string]struct{}{} - sorted := topoSort(graph) - if len(sorted) != num+1 { - t.Fatalf("expected %d items, got %d", num+1, len(sorted)) - } - for i := 0; i < num+1; i++ { - expected := num - i - if sorted[i] != strconv.Itoa(expected) { - t.Fatalf("expected %d to be at index %d, got %s", num-1, i, sorted[i]) - } - } -} diff --git a/src/tool/awk/readme.md b/src/tool/awk/readme.md deleted file mode 100644 index 1fee6ca..0000000 --- a/src/tool/awk/readme.md +++ /dev/null @@ -1,125 +0,0 @@ - -# GoAWK: an AWK interpreter with CSV support - -[![Documentation](https://pkg.go.dev/badge/github.com/mojosa-software/goblin/src/tool/awk)](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk) -[![GitHub Actions Build](https://github.com/mojosa-software/goblin/src/tool/awk/workflows/Go/badge.svg)](https://github.com/mojosa-software/goblin/src/tool/awk/actions?query=workflow%3AGo) - - -AWK is a fascinating text-processing language, and somehow after reading the delightfully-terse [*The AWK Programming Language*](https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf) I was inspired to write an interpreter for it in Go. So here it is, feature-complete and tested against "the one true AWK" and GNU AWK test suites. - -GoAWK is a POSIX-compatible version of AWK, and additionally has a CSV mode for reading and writing CSV and TSV files. This feature was sponsored by the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/). Read the [CSV documentation](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/csv.md). - -You can also read one of the articles I've written about GoAWK: - -* The original article about [how GoAWK works and performs](https://benhoyt.com/writings/goawk/) -* How I converted the tree-walking interpreter to a [bytecode compiler and virtual machine](https://benhoyt.com/writings/goawk-compiler-vm/) -* A description of why and how I added [CSV support](https://benhoyt.com/writings/goawk-csv/) - - -## Basic usage - -To use the command-line version, simply use `go install` to install it, and then run it using `goawk` (assuming `~/go/bin` is in your `PATH`): - -```shell -$ go install github.com/mojosa-software/goblin/src/tool/awk@latest - -$ goawk 'BEGIN { print "foo", 42 }' -foo 42 - -$ echo 1 2 3 | goawk '{ print $1 + $3 }' -4 - -# Or use GoAWK's CSV and @"named-field" support: -$ echo -e 'name,amount\nBob,17.50\nJill,20\n"Boba Fett",100.00' | \ - goawk -i csv -H '{ total += @"amount" } END { print total }' -137.5 -``` - -On Windows, `"` is the shell quoting character, so use `"` around the entire AWK program on the command line, and use `'` around AWK strings -- this is a non-POSIX extension to make GoAWK easier to use on Windows: - -```powershell -C:\> goawk "BEGIN { print 'foo', 42 }" -foo 42 -``` - -To use it in your Go programs, you can call `interp.Exec()` directly for simple needs: - -```go -input := strings.NewReader("foo bar\n\nbaz buz") -err := interp.Exec("$0 { print $1 }", " ", input, nil) -if err != nil { - fmt.Println(err) - return -} -// Output: -// foo -// baz -``` - -Or you can use the `parser` module and then `interp.ExecProgram()` to control execution, set variables, and so on: - -```go -src := "{ print NR, tolower($0) }" -input := "A\naB\nAbC" - -prog, err := parser.ParseProgram([]byte(src), nil) -if err != nil { - fmt.Println(err) - return -} -config := &interp.Config{ - Stdin: strings.NewReader(input), - Vars: []string{"OFS", ":"}, -} -_, err = interp.ExecProgram(prog, config) -if err != nil { - fmt.Println(err) - return -} -// Output: -// 1:a -// 2:ab -// 3:abc -``` - -If you need to repeat execution of the same program on different inputs, you can call [`interp.New`](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk/interp#New) once, and then call the returned object's `Execute` method as many times as you need. - -Read the [package documentation](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk) for more details. - - -## Differences from AWK - -The intention is for GoAWK to conform to `awk`'s behavior and to the [POSIX AWK spec](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html), but this section describes some areas where it's different. - -Additional features GoAWK has over AWK: - -* It has proper support for CSV and TSV files ([read the documentation](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/csv.md)). -* It supports negative field indexes to access fields from the right, for example, `$-1` refers to the last field. -* It's embeddable in your Go programs! You can even call custom Go functions from your AWK scripts. -* Most AWK scripts are faster than `awk` and on a par with `gawk`, though usually slower than `mawk`. (See [recent benchmarks](https://benhoyt.com/writings/goawk-compiler-vm/#virtual-machine-results).) -* The parser supports `'single-quoted strings'` in addition to `"double-quoted strings"`, primarily to make Windows one-liners easier (the Windows `cmd.exe` shell uses `"` as the quote character). - -Things AWK has over GoAWK: - -* Scripts that use regular expressions are slower than other implementations (unfortunately Go's `regexp` package is relatively slow). -* AWK is written by Alfred Aho, Peter Weinberger, and Brian Kernighan. - - -## Stability - -This project has a good suite of tests, which include my own intepreter tests, the original AWK test suite, and the relevant tests from the Gawk test suite. I've used it a bunch personally, and it's used in the [Benthos](https://github.com/benthosdev/benthos) stream processor as well as by the software team at the library of the University of Antwerp. However, to `err == human`, so please use GoAWK at your own risk. I intend not to change the Go API in a breaking way in any v1.x.y version. - - -## AWKGo - -The GoAWK repository also includes the creatively-named AWKGo, an AWK-to-Go compiler. This is experimental and is not subject to the stability requirements of GoAWK itself. You can [read more about AWKGo](https://benhoyt.com/writings/awkgo/) or browse the code on the [`awkgo` branch](https://github.com/mojosa-software/goblin/src/tool/awk/tree/awkgo/awkgo). - - -## License - -GoAWK is licensed under an open source [MIT license](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/LICENSE.txt). - - -## The end - -Have fun, and please [contact me](https://benhoyt.com/) if you're using GoAWK or have any feedback! diff --git a/src/tool/awk/scripts/benchmark.sh b/src/tool/awk/scripts/benchmark.sh deleted file mode 100755 index 2a87014..0000000 --- a/src/tool/awk/scripts/benchmark.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -go test ./interp -bench=. -count=5 > benchmarks_new.txt diff --git a/src/tool/awk/scripts/benchmark_awks.py b/src/tool/awk/scripts/benchmark_awks.py deleted file mode 100755 index 7fbd919..0000000 --- a/src/tool/awk/scripts/benchmark_awks.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python3 -# Benchmark GoAWK against other AWK versions - -from __future__ import print_function - -import glob -import os.path -import shutil -import subprocess -import sys -import time - -AWKS = [ - './goawk', - './orig', # GoAWK without perf improvements - 'original-awk', - 'gawk', - 'mawk', -] -NORM_INDEX = AWKS.index('original-awk') -TESTS_TO_MEAN = None # By default, calculate the mean of all tests -if False: - # Only get the mean of these tests because these are the only ones - # we show in the GoAWK article. - TESTS_TO_MEAN = [ - 'tt.01_print', - 'tt.02_print_NR_NF', - 'tt.02a_print_length', - 'tt.03_sum_length', - 'tt.03a_sum_field', - 'tt.04_printf_fields', - 'tt.05_concat_fields', - 'tt.06_count_lengths', - 'tt.07_even_fields', - 'tt.big_complex_program', - 'tt.x1_mandelbrot', - 'tt.x2_sum_loop', - ] -NUM_RUNS = 6 -MIN_TIME = 0.5 -PROGRAM_GLOB = 'testdata/tt.*' - -if len(sys.argv) > 1: - PROGRAM_GLOB = 'testdata/' + sys.argv[1] - - -def repeat_file(input_file, repeated_file, n): - with open(input_file, 'rb') as fin, open(repeated_file, 'wb') as fout: - for i in range(n): - fin.seek(0) - shutil.copyfileobj(fin, fout) - - -print('Test ', end='') -for awk in AWKS: - display_awk = os.path.basename(awk) - display_awk = display_awk.replace('original-awk', 'awk') - print('| {:>8} '.format(display_awk), end='') -print() -print('-'*28 + ' | --------'*len(AWKS)) - -repeats_created = [] -products = [1] * len(AWKS) -num_products = 0 -programs = sorted(glob.glob(PROGRAM_GLOB)) -for program in programs: - # First do a test run with GoAWK to see roughly how long it takes - cmdline = '{} -f {} testdata/foo.td >tt.out'.format(AWKS[0], program) - start = time.time() - status = subprocess.call(cmdline, shell=True) - elapsed = time.time() - start - - # If test run took less than MIN_TIME seconds, scale/repeat input - # file accordingly - input_file = 'testdata/foo.td' - if elapsed < MIN_TIME: - multiplier = int(round(MIN_TIME / elapsed)) - repeated_file = '{}.{}'.format(input_file, multiplier) - if not os.path.exists(repeated_file): - repeat_file(input_file, repeated_file, multiplier) - repeats_created.append(repeated_file) - input_file = repeated_file - - # Record time taken to run this test, running each NUM_RUMS times - # and taking the minimum elapsed time - awk_times = [] - for awk in AWKS: - cmdline = '{} -f {} {} >tt.out'.format(awk, program, input_file) - times = [] - for i in range(NUM_RUNS): - start = time.time() - status = subprocess.call(cmdline, shell=True) - elapsed = time.time() - start - times.append(elapsed) - if status != 0: - print('ERROR status {} from cmd: {}'.format(status, cmdline), file=sys.stderr) - min_time = min(sorted(times)[1:]) - awk_times.append(min_time) - - # Normalize to One True AWK time = 1.0 - norm_time = awk_times[NORM_INDEX] - speeds = [norm_time/t for t in awk_times] - test_name = program.split('/')[1] - if TESTS_TO_MEAN is None or test_name in TESTS_TO_MEAN: - num_products += 1 - for i in range(len(AWKS)): - products[i] *= speeds[i] - - display_name = test_name.split('_')[0] + ' (' + ' '.join(test_name.split('_')[1:]) + ')' - print('{:28}'.format(display_name), end='') - for i, awk in enumerate(AWKS): - print(' | {:8.2f}'.format(speeds[i]), end='') - print() - -print('-'*28 + ' | --------'*len(AWKS)) -print('**Geo mean** ', end='') -for i, awk in enumerate(AWKS): - print(' | **{:.2f}**'.format(products[i] ** (1.0/num_products)), end='') -print() - -# Delete temporary files created -os.remove('tt.out') -for repeated_file in repeats_created: - os.remove(repeated_file) diff --git a/src/tool/awk/scripts/benchstat.sh b/src/tool/awk/scripts/benchstat.sh deleted file mode 100755 index 9b76b78..0000000 --- a/src/tool/awk/scripts/benchstat.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -~/go/bin/benchstat -sort=delta -geomean benchmarks_old.txt benchmarks_new.txt diff --git a/src/tool/awk/scripts/csvbench/count.py b/src/tool/awk/scripts/csvbench/count.py deleted file mode 100644 index bfc43c8..0000000 --- a/src/tool/awk/scripts/csvbench/count.py +++ /dev/null @@ -1,9 +0,0 @@ -import csv -import sys - -lines, fields = 0, 0 -for row in csv.reader(sys.stdin): - lines += 1 - fields += len(row) - -print(lines, fields) diff --git a/src/tool/awk/scripts/csvbench/count/main.go b/src/tool/awk/scripts/csvbench/count/main.go deleted file mode 100644 index ba859c9..0000000 --- a/src/tool/awk/scripts/csvbench/count/main.go +++ /dev/null @@ -1,27 +0,0 @@ -package main - -import ( - "bufio" - "encoding/csv" - "fmt" - "io" - "log" - "os" -) - -func main() { - reader := csv.NewReader(bufio.NewReader(os.Stdin)) - lines, fields := 0, 0 - for { - row, err := reader.Read() - if err == io.EOF { - break - } - if err != nil { - log.Fatal(err) - } - lines++ - fields += len(row) - } - fmt.Println(lines, fields) -} diff --git a/src/tool/awk/scripts/csvbench/csvbench.sh b/src/tool/awk/scripts/csvbench/csvbench.sh deleted file mode 100755 index 1c5a02b..0000000 --- a/src/tool/awk/scripts/csvbench/csvbench.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh - -set -e - -echo ===== Writing 1GB - goawk -time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null -time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null -time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null - -echo ===== Writing 1GB - frawk -time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null -time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null -time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null - -echo ===== Writing 1GB - Python -time python3 write.py >/dev/null -time python3 write.py >/dev/null -time python3 write.py >/dev/null - -echo ===== Writing 1GB - Go -go build -o bin/write ./write -time ./bin/write >/dev/null -time ./bin/write >/dev/null -time ./bin/write >/dev/null - - -./bin/write >count.csv - -echo ===== Reading 1GB - goawk -time goawk -i csv '{ w+=NF } END { print NR, w }'