Rebranding.
This commit is contained in:
parent
ab3fb9e759
commit
d1b3d13464
1161 changed files with 87 additions and 136633 deletions
|
@ -1,12 +0,0 @@
|
||||||
TARG = shr/obj/$OBJNAME.a
|
|
||||||
CFLAGS = $DEFS $INCS $CPPFLAGS
|
|
||||||
OFILES = ${CFILES:%.c=%.o}
|
|
||||||
all:V: $TARG
|
|
||||||
$TARG : $OFILES
|
|
||||||
mkdir -p shr/obj
|
|
||||||
$AR $ARFLAGS $TARG $OFILES
|
|
||||||
%.o : %.c
|
|
||||||
$CC -c -o $target $CFLAGS $stem.c
|
|
||||||
clean:V:
|
|
||||||
rm -f $TARG $OFILES
|
|
||||||
<$MKINCDIR/std/install
|
|
|
@ -1,8 +0,0 @@
|
||||||
# Universal powerful simple C build file.
|
|
||||||
<$(MKINCDIR)/std/cbuild
|
|
||||||
<$(MKINCDIR)/std/install
|
|
||||||
|
|
||||||
clean:V:
|
|
||||||
rm -f $TARG $TARG.strip $OFILES
|
|
||||||
uninstall:V:
|
|
||||||
rm -f $EXEDIR/$TARG
|
|
|
@ -1,59 +0,0 @@
|
||||||
# Main configuration file.
|
|
||||||
|
|
||||||
MKINCDIR = $(MKINCDIR)
|
|
||||||
|
|
||||||
EXEDIR = $(HOME)/exe
|
|
||||||
APPDIR = `goblin paths -fr $HOME/app`
|
|
||||||
SHRDIR = $(HOME)/shr
|
|
||||||
INCDIR = $SHRDIR/inc
|
|
||||||
OBJDIR = $SHRDIR/obj
|
|
||||||
MANDIR = $SHRDIR/man
|
|
||||||
INSTALLDIRS = $OBJDIR $MANDIR $EXEDIR $INCDIR $APPDIR
|
|
||||||
|
|
||||||
USRDIR = /usr
|
|
||||||
USRINC = -I$USRDIR/include
|
|
||||||
USRLIB = -L$USRDIR/lib
|
|
||||||
X11 = $USRDIR/X11R6
|
|
||||||
X11INC = -I$X11/include
|
|
||||||
X11LIB = -L$X11/lib -lX11
|
|
||||||
XFTLIB = -lXft
|
|
||||||
FTINC = $USRINC/freetype2
|
|
||||||
FTLIB = -lfreetype
|
|
||||||
FCINC =
|
|
||||||
FCLIB = -lfontconfig
|
|
||||||
MLIB = -lm
|
|
||||||
CLIB = -lc
|
|
||||||
UTILLIB = -lutil
|
|
||||||
XINLIB = -lXinerama
|
|
||||||
XINCPP = -DXINERAMA
|
|
||||||
XTLIB = -lXt
|
|
||||||
XILIB = -lXi
|
|
||||||
XEXTLIB = -lXext
|
|
||||||
DOTINC = -I.
|
|
||||||
SECINC = -Isec
|
|
||||||
PATH9 = lib/9
|
|
||||||
SECINC9 = -I$PATH9/sec
|
|
||||||
CRYPTLIB = -lcrypt
|
|
||||||
XRLIB = -lXrandr
|
|
||||||
CURLIB = -lcurses
|
|
||||||
PNGLIB = -lpng
|
|
||||||
JPGLIB = -ljpeg
|
|
||||||
OBJ9 = $OBJDIR/9.a
|
|
||||||
INC9 = -I$INCDIR/9
|
|
||||||
FRAMEOBJ = $OBJDIR/frame.a
|
|
||||||
FRAMEINC = -I$INCDIR/frame
|
|
||||||
XGOBJ = $OBJDIR/Xg.a
|
|
||||||
XGINC = -I$INCDIR/Xg
|
|
||||||
SLINC = -I$INCDIR/sl
|
|
||||||
XMULIB = -lXmu
|
|
||||||
GCC = cc
|
|
||||||
CC = tcc
|
|
||||||
LD = $CC
|
|
||||||
STRIP = strip
|
|
||||||
AR = ar
|
|
||||||
YACC = 9yacc
|
|
||||||
LEX = lex
|
|
||||||
PKG_CONFIG_CFLAGS = pkg-config --cflags
|
|
||||||
PkG_CONFIG_LIBS = pkg-config --libs
|
|
||||||
|
|
||||||
<$(HOME)/env/mk/config
|
|
|
@ -1,22 +0,0 @@
|
||||||
# File to build programs with Yacc files.
|
|
||||||
INSTALLDIRS = $APPDIR $EXEDIR $SHRDIR
|
|
||||||
OFILES = ${CFILES:%.c=%.o}
|
|
||||||
YOFILES = ${YCFILES:%.c=%.o}
|
|
||||||
TARG = exe/$PROGNAME
|
|
||||||
CFLAGS = $CPPFLAGS $DEFS $INCS $CFLAGS
|
|
||||||
all :V: $TARG
|
|
||||||
strip :V: $TARG
|
|
||||||
$STRIP $TARG
|
|
||||||
$TARG : $OFILES $YOFILES
|
|
||||||
mkdir -p exe
|
|
||||||
$LD -o $target $LDFLAGS $OFILES $YOFILES $AFILES $LIBS
|
|
||||||
%.o : %.c
|
|
||||||
$CC -c -o $target $CFLAGS $stem.c
|
|
||||||
$CFILES $YCFILES :N: $HFILES $YHFILES
|
|
||||||
$HFILES :N:
|
|
||||||
$YHFILES $YCFILES : $YFILES
|
|
||||||
$YACC -d $YFILES
|
|
||||||
$YFILES :N:
|
|
||||||
clean:
|
|
||||||
rm -f $TARG $OFILES $YOFILES $YHFILES $YCFILES
|
|
||||||
<$MKINCDIR/std/install
|
|
|
@ -1,14 +0,0 @@
|
||||||
% :V: %-$MKSHELL
|
|
||||||
%-sh :QV:
|
|
||||||
pwd=`pwd`
|
|
||||||
export pwd
|
|
||||||
for d in $DIRS ; do
|
|
||||||
echo "[ cd $d ; mk $stem]"
|
|
||||||
cd "$d" ; mk $MKFLAGS $stem ; cd "$pwd"
|
|
||||||
done
|
|
||||||
%-rc :QV:
|
|
||||||
pwd = `{pwd}
|
|
||||||
for(d in $DIRS){
|
|
||||||
echo [ cd $d ';' mk $stem]
|
|
||||||
{ builtin cd $d ; mk $MKFLAGS $stem ; builtin cd $pwd}
|
|
||||||
}
|
|
|
@ -1,4 +0,0 @@
|
||||||
# Compatible with pkg module for Golang.
|
|
||||||
<$MKINCDIR/std/gobuild
|
|
||||||
<$MKINCDIR/std/install
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
TARG = exe
|
|
||||||
exe:
|
|
||||||
mkdir -p $target
|
|
||||||
all:VQ:
|
|
||||||
echo -n
|
|
||||||
<$MKINCDIR/std/install
|
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
OFILES = ${CFILES:%.c=%.o}
|
|
||||||
TARG = exe/$PROGNAME
|
|
||||||
CFLAGS = $CPPFLAGS $DEFS $INCS $CFLAGS
|
|
||||||
all :V: $TARG
|
|
||||||
strip :V: $TARG.strip
|
|
||||||
$TARG.strip : $TARG
|
|
||||||
cp -f $TARG $target
|
|
||||||
$STRIP $target
|
|
||||||
$TARG : $OFILES
|
|
||||||
mkdir -p exe
|
|
||||||
$LD -o $target $LDFLAGS $OFILES $AFILES $LIBS
|
|
||||||
%.o : %.c $HFILES
|
|
||||||
$CC -c -o $target $CFLAGS $stem.c
|
|
||||||
run :V: $TARG
|
|
||||||
exec ./$TARG $MKFLAGS
|
|
|
@ -1,34 +0,0 @@
|
||||||
all :V: build
|
|
||||||
build :VQ: build-$MKSHELL
|
|
||||||
build-sh :VQ:
|
|
||||||
mkdir -p exe
|
|
||||||
pwd=`pwd`
|
|
||||||
for name in `command cd src/cmd && goblin ls && command cd $pwd`; do
|
|
||||||
cd src/cmd/$name
|
|
||||||
echo Buliding "$name"...
|
|
||||||
if go build -o $pwd/exe/$name ; then
|
|
||||||
echo Done building "$name"
|
|
||||||
else
|
|
||||||
echo "Error(s) while building $name"
|
|
||||||
fi
|
|
||||||
if echo "$name" | goblin in $BUILD_WASM ; then
|
|
||||||
echo Bulding WASM for "'$name'"...
|
|
||||||
if GOARCH=wasm GOOS=js go build -o "$pwd/$STATIC/$name.wasm" ; then
|
|
||||||
echo Done building WASM for "$name"
|
|
||||||
else
|
|
||||||
echo "Error(s) while building WASM for '$name'"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
command cd $pwd
|
|
||||||
done
|
|
||||||
clean:VQ:
|
|
||||||
pwd=`pwd`
|
|
||||||
names=`command cd src/cmd && goblin ls && command cd $pwd`
|
|
||||||
for name in $names ; do
|
|
||||||
echo Removing "'$name'..."
|
|
||||||
rm -f "$pwd/exe/$name"
|
|
||||||
if echo "$name" | goblin in $BUILD_WASM ; then
|
|
||||||
rm -f "$pwd/$STATIC/$name.wasm"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
install:V: install-$MKSHELL
|
|
||||||
install-sh:VQ: build
|
|
||||||
if test -d shr ; then
|
|
||||||
echo Installing shared files...
|
|
||||||
cp -rf shr $HOME/shr/..
|
|
||||||
echo Done installing shared files
|
|
||||||
fi
|
|
||||||
if test -d app ; then
|
|
||||||
echo Installing application files...
|
|
||||||
echo "'$HOME'"
|
|
||||||
echo "'$APPDIR'"
|
|
||||||
echo `goblin paths app/* $APPDIR/$PKG_NAME/`
|
|
||||||
#goblin mkdir -p `goblin path $APPDIR/$PKG_NAME` && cp -rf `goblin paths app/* $APPDIR/$PKG_NAME/`
|
|
||||||
echo Done installing application files
|
|
||||||
fi
|
|
||||||
if test -d exe ; then
|
|
||||||
echo Installing executables...
|
|
||||||
goblin mkdir -p $EXEDIR
|
|
||||||
cp -rf exe/* $EXEDIR/
|
|
||||||
files=`goblin basename $(ls exe)`
|
|
||||||
for i in $files ; do
|
|
||||||
chmod 0755 $EXEDIR/$i
|
|
||||||
done
|
|
||||||
echo Done installing executables
|
|
||||||
fi
|
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
CC = $GCC
|
|
||||||
LD = $CC
|
|
|
@ -1,14 +0,0 @@
|
||||||
run-dev:V:
|
|
||||||
while true ; do
|
|
||||||
goblin echo -n '> '
|
|
||||||
input=`goblin read`
|
|
||||||
case $input in
|
|
||||||
exit)
|
|
||||||
exit
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
goblin ls -r 100 $WATCH_FILES \
|
|
||||||
| entr -d -r sh -c \
|
|
||||||
'mk && ./exe/w3site' \
|
|
||||||
|| pkill w3site && ./exe/w3site
|
|
||||||
done
|
|
20
check.anko
20
check.anko
|
@ -1,20 +0,0 @@
|
||||||
var strings = import("strings")
|
|
||||||
|
|
||||||
e = 5
|
|
||||||
v = 53
|
|
||||||
println(e + v)
|
|
||||||
|
|
||||||
for v in Cmd("ls").Stdout().ShSplit() {
|
|
||||||
println("file:", v)
|
|
||||||
}
|
|
||||||
|
|
||||||
for v in strings.Split("big dick and me", " ") {
|
|
||||||
println(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v < 55 {
|
|
||||||
println("it fucking works")
|
|
||||||
}
|
|
||||||
|
|
||||||
Rcmd("goblin", "ls", "-r", "100", "src") || println("it works also") && Rcmd("goblin", "cat", "check.anko")
|
|
||||||
|
|
7
go.mod
7
go.mod
|
@ -1,8 +1,5 @@
|
||||||
module github.com/mojosa-software/goblin
|
module github.com/reklesio/tk
|
||||||
|
|
||||||
go 1.18
|
go 1.18
|
||||||
|
|
||||||
require (
|
require github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e // indirect
|
||||||
github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f
|
|
||||||
github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c
|
|
||||||
)
|
|
||||||
|
|
10
go.sum
10
go.sum
|
@ -1,6 +1,8 @@
|
||||||
github.com/mojosa-software/gomtool v0.0.0-20230626085847-176486ff01a2 h1:xbw1/w6ZB8xRmaTS0mQvfTETF8M2/tSBfHJIR+cJyNE=
|
|
||||||
github.com/mojosa-software/gomtool v0.0.0-20230626085847-176486ff01a2/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw=
|
|
||||||
github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f h1:lsvXiy5XeOGCiOvkzuX0jA11jJf3j998Xes0/gmk50A=
|
|
||||||
github.com/mojosa-software/gomtool v0.0.0-20230628111258-73d5a2f1940f/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw=
|
|
||||||
github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c h1:y7RQZz/zJDARRJkn4szD8N2rK6K9NU1vUNPwahtW5zw=
|
github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c h1:y7RQZz/zJDARRJkn4szD8N2rK6K9NU1vUNPwahtW5zw=
|
||||||
github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c/go.mod h1:LtBn7lQTgA/TMEL8Y+dGkD6XWHV2gxRPZXiqCZt3HRc=
|
github.com/mojosa-software/goscript v0.0.0-20230626091305-86a004b7769c/go.mod h1:LtBn7lQTgA/TMEL8Y+dGkD6XWHV2gxRPZXiqCZt3HRc=
|
||||||
|
github.com/reklesio v0.0.0-20230626085847-176486ff01a2 h1:xbw1/w6ZB8xRmaTS0mQvfTETF8M2/tSBfHJIR+cJyNE=
|
||||||
|
github.com/reklesio v0.0.0-20230626085847-176486ff01a2/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw=
|
||||||
|
github.com/reklesio v0.0.0-20230628111258-73d5a2f1940f h1:lsvXiy5XeOGCiOvkzuX0jA11jJf3j998Xes0/gmk50A=
|
||||||
|
github.com/reklesio v0.0.0-20230628111258-73d5a2f1940f/go.mod h1:cJ6/4rcQ/s22RTLuLtypFh7gubwG4OLSph3NHX3haAw=
|
||||||
|
github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e h1:2ntFru8B2HDixWKy5EBU4QOcJGyHR4GhB8tWua4Leos=
|
||||||
|
github.com/reklesio/mtool v0.0.0-20231023113051-bbe64fae523e/go.mod h1:G6WEew5BI+7sorvUztT8wh7mr2jp2Vh5IFjkqWGVM34=
|
||||||
|
|
10
install.sh
10
install.sh
|
@ -1,10 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
wd=`pwd`
|
|
||||||
|
|
||||||
# cd $wd/src/cmd/goblin && go install && cd $wd
|
|
||||||
go install
|
|
||||||
|
|
||||||
mkdir -p $HOME/app/goblin
|
|
||||||
cp -rf $wd/app/* $HOME/app/goblin
|
|
||||||
|
|
38
license.txt
38
license.txt
|
@ -1,25 +1,21 @@
|
||||||
Copyright (c) 2020 surdeus, aka Andrey Parhomenko
|
MIT License
|
||||||
|
|
||||||
Permission is hereby granted, free of charge,
|
Copyright (c) 2023 surdeus
|
||||||
to any person obtaining a copy of this software
|
|
||||||
and associated documentation files (the "Software"),
|
|
||||||
to deal in the Software without restriction,
|
|
||||||
including without limitation the rights to use,
|
|
||||||
copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
and/or sell copies of the Software, and to permit persons
|
|
||||||
to whom the Software is furnished to do so,
|
|
||||||
subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
shall be included in all copies or substantial portions of the Software.
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS",
|
The above copyright notice and this permission notice shall be included in all
|
||||||
WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
copies or substantial portions of the Software.
|
||||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
||||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
||||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
||||||
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
66
main.go
66
main.go
|
@ -1,42 +1,37 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/mojosa-software/gomtool/src/mtool"
|
"github.com/reklesio/mtool"
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk"
|
"github.com/reklesio/tk/tool/cat"
|
||||||
"github.com/mojosa-software/goblin/src/tool/basename"
|
"github.com/reklesio/tk/tool/date"
|
||||||
"github.com/mojosa-software/goblin/src/tool/cat"
|
"github.com/reklesio/tk/tool/ec"
|
||||||
"github.com/mojosa-software/goblin/src/tool/date"
|
"github.com/reklesio/tk/tool/echo"
|
||||||
"github.com/mojosa-software/goblin/src/tool/ec"
|
"github.com/reklesio/tk/tool/ftest"
|
||||||
"github.com/mojosa-software/goblin/src/tool/echo"
|
"github.com/reklesio/tk/tool/gfalse"
|
||||||
"github.com/mojosa-software/goblin/src/tool/ftest"
|
"github.com/reklesio/tk/tool/grange"
|
||||||
"github.com/mojosa-software/goblin/src/tool/gfalse"
|
"github.com/reklesio/tk/tool/gtrue"
|
||||||
"github.com/mojosa-software/goblin/src/tool/grange"
|
"github.com/reklesio/tk/tool/in"
|
||||||
"github.com/mojosa-software/goblin/src/tool/gtrue"
|
"github.com/reklesio/tk/tool/ln"
|
||||||
"github.com/mojosa-software/goblin/src/tool/in"
|
"github.com/reklesio/tk/tool/ls"
|
||||||
"github.com/mojosa-software/goblin/src/tool/ln"
|
"github.com/reklesio/tk/tool/mergelbl"
|
||||||
"github.com/mojosa-software/goblin/src/tool/ls"
|
"github.com/reklesio/tk/tool/mkdir"
|
||||||
"github.com/mojosa-software/goblin/src/tool/mergelbl"
|
"github.com/reklesio/tk/tool/noext"
|
||||||
"github.com/mojosa-software/goblin/src/tool/mk"
|
"github.com/reklesio/tk/tool/paths"
|
||||||
"github.com/mojosa-software/goblin/src/tool/mkdir"
|
"github.com/reklesio/tk/tool/quote"
|
||||||
"github.com/mojosa-software/goblin/src/tool/noext"
|
"github.com/reklesio/tk/tool/read"
|
||||||
"github.com/mojosa-software/goblin/src/tool/paths"
|
"github.com/reklesio/tk/tool/sort"
|
||||||
"github.com/mojosa-software/goblin/src/tool/quote"
|
"github.com/reklesio/tk/tool/tac"
|
||||||
"github.com/mojosa-software/goblin/src/tool/read"
|
"github.com/reklesio/tk/tool/uniq"
|
||||||
"github.com/mojosa-software/goblin/src/tool/sort"
|
"github.com/reklesio/tk/tool/urlprs"
|
||||||
"github.com/mojosa-software/goblin/src/tool/tac"
|
"github.com/reklesio/tk/tool/useprog"
|
||||||
"github.com/mojosa-software/goblin/src/tool/uniq"
|
"github.com/reklesio/tk/tool/wc"
|
||||||
"github.com/mojosa-software/goblin/src/tool/urlprs"
|
"github.com/reklesio/tk/tool/whoami"
|
||||||
"github.com/mojosa-software/goblin/src/tool/useprog"
|
"github.com/reklesio/tk/tool/yes"
|
||||||
"github.com/mojosa-software/goblin/src/tool/wc"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/whoami"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/yes"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/script"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
tools := mtool.Tools{
|
tools := mtool.Tools{
|
||||||
"basename": mtool.Tool{basename.Run, "get base name of file path", ""},
|
|
||||||
"cat": mtool.Tool{cat.Run, "print file data to the standard output", ""},
|
"cat": mtool.Tool{cat.Run, "print file data to the standard output", ""},
|
||||||
"mkdir": mtool.Tool{mkdir.Run, "make new directory", ""},
|
"mkdir": mtool.Tool{mkdir.Run, "make new directory", ""},
|
||||||
"echo": mtool.Tool{echo.Run, "print strings to the standard output", ""},
|
"echo": mtool.Tool{echo.Run, "print strings to the standard output", ""},
|
||||||
|
@ -59,8 +54,6 @@ func main() {
|
||||||
"range": mtool.Tool{grange.Run, "too lazy", ""},
|
"range": mtool.Tool{grange.Run, "too lazy", ""},
|
||||||
"in": mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments", ""},
|
"in": mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments", ""},
|
||||||
"which": mtool.Tool{useprog.Run, "print the name or the path of the first existing program in arg list", ""},
|
"which": mtool.Tool{useprog.Run, "print the name or the path of the first existing program in arg list", ""},
|
||||||
"mk": mtool.Tool{mk.Run, "file dependency system, simpler make", ""},
|
|
||||||
"awk": mtool.Tool{awk.Run, "simple scripting language for working with string templates", ""},
|
|
||||||
"paths": mtool.Tool{
|
"paths": mtool.Tool{
|
||||||
paths.Run,
|
paths.Run,
|
||||||
"convert UNIX slash separated paths into the OS compatible ones",
|
"convert UNIX slash separated paths into the OS compatible ones",
|
||||||
|
@ -76,12 +69,7 @@ func main() {
|
||||||
"link files",
|
"link files",
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
"script": mtool.Tool{
|
|
||||||
script.Run,
|
|
||||||
"run embedded anko",
|
|
||||||
"",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mtool.Main("goblin", tools)
|
mtool.Main("tk", tools)
|
||||||
}
|
}
|
||||||
|
|
BIN
media/gopher.png
Normal file
BIN
media/gopher.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 71 KiB |
5
mkconfig
5
mkconfig
|
@ -1,5 +0,0 @@
|
||||||
MKSHELL = sh
|
|
||||||
<$(MKINCDIR)/config
|
|
||||||
PKG_NAME = goblin
|
|
||||||
CC = cc
|
|
||||||
|
|
3
mkfile
3
mkfile
|
@ -1,3 +0,0 @@
|
||||||
<mkconfig
|
|
||||||
<$MKINCDIR/gobuild
|
|
||||||
|
|
17
readme.md
17
readme.md
|
@ -1,17 +1,6 @@
|
||||||
# goblin
|
# tk
|
||||||
|
|
||||||
![](https://raw.githubusercontent.com/mojosa-software/goblin/master/media/goblin.jpg)
|
![Insert here a gopher, please](https://raw.githubusercontent.com/reklesio/tk//master/media/gopher.png)
|
||||||
|
|
||||||
|
Gopher ToolKit. Not POSIX compatible BusyBox-like set of programs
|
||||||
GO Base utils LINked.
|
|
||||||
|
|
||||||
Not POSIX compatible.
|
|
||||||
|
|
||||||
Since Golang is so good at static files it makes sense to
|
|
||||||
put many programs into one, so now it is gonna include many
|
|
||||||
suckless and cat-v like stuff, including:
|
|
||||||
|
|
||||||
* mk
|
|
||||||
* awk
|
|
||||||
* sed
|
|
||||||
|
|
||||||
|
|
|
@ -1,387 +0,0 @@
|
||||||
|
|
||||||
# GoAWK's CSV and TSV file support
|
|
||||||
|
|
||||||
[CSV](https://en.wikipedia.org/wiki/Comma-separated_values) and [TSV](https://en.wikipedia.org/wiki/Tab-separated_values) files are often used in data processing today, but unfortunately you can't properly process them using POSIX AWK. You can change the field separator to `,` or tab (for example `awk -F,` or `awk '-F\t'`) but that doesn't handle quoted or multi-line fields.
|
|
||||||
|
|
||||||
There are other workarounds, such as [Gawk's FPAT feature](https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html), various [CSV extensions](http://mcollado.z15.es/xgawk/) for Gawk, or Adam Gordon Bell's [csvquote](https://github.com/adamgordonbell/csvquote) tool. There's also [frawk](https://github.com/ezrosent/frawk), which is an amazing tool that natively supports CSV, but unfortunately it deviates quite a bit from POSIX-compatible AWK.
|
|
||||||
|
|
||||||
Since version v1.17.0, GoAWK has included CSV support, which allows you to read and write CSV and TSV files, including proper handling of quoted and multi-line fields as per [RFC 4180](https://rfc-editor.org/rfc/rfc4180.html). In addition, GoAWK supports a "named field" construct that allows you to access CSV fields by name as well as number, for example `@"Address"` rather than `$5`.
|
|
||||||
|
|
||||||
**Many thanks to the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/), who sponsored this feature in May 2022.** Thanks also to [Eli Rosenthal](https://github.com/ezrosent), whose frawk tool inspired aspects of the design (including the `-i` and `-o` command line arguments).
|
|
||||||
|
|
||||||
Links to sections:
|
|
||||||
|
|
||||||
* [CSV input configuration](#csv-input-configuration)
|
|
||||||
* [CSV output configuration](#csv-output-configuration)
|
|
||||||
* [Named field syntax](#named-field-syntax)
|
|
||||||
* [Go API](#go-api)
|
|
||||||
* [Examples](#examples)
|
|
||||||
* [Examples based on csvkit](#examples-based-on-csvkit)
|
|
||||||
* [Performance](#performance)
|
|
||||||
* [Future work](#future-work)
|
|
||||||
|
|
||||||
|
|
||||||
## CSV input configuration
|
|
||||||
|
|
||||||
When in CSV input mode, GoAWK ignores the regular field and record separators (`FS` and `RS`), instead parsing input into records and fields using the CSV or TSV format. Fields can be accessed using the standard AWK numbered field syntax (for example, `$1` or `$5`), or using the GoAWK-specific [named field syntax](#named-field-syntax).
|
|
||||||
|
|
||||||
To enable CSV input mode when using the `goawk` program, use the `-i mode` command line argument. You can also enable CSV input mode by setting the `INPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
|
|
||||||
|
|
||||||
```
|
|
||||||
csv|tsv [separator=<char>] [comment=<char>] [header]
|
|
||||||
```
|
|
||||||
|
|
||||||
The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
|
|
||||||
|
|
||||||
* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
|
|
||||||
* `comment=<char>`: consider lines starting with the given character to be comments and skip them, for example `comment=#` will ignore any lines starting with `#` (without preceding whitespace). The default is not to support comments.
|
|
||||||
* `header`: treat the first line of each input file as a header row providing the field names, and enable the `@"field"` syntax as well as the `FIELDS` array. This option is equivalent to the `-H` command line argument. If neither `header` or `-H` is specified, you can't use named fields.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## CSV output configuration
|
|
||||||
|
|
||||||
When in CSV output mode, the GoAWK `print` statement with one or more arguments ignores `OFS` and `ORS` and separates its arguments (fields) and records using CSV formatting. No header row is printed; if required, a header row can be printed in the `BEGIN` block manually. No other functionality is changed, for example, `printf` doesn't do anything different in CSV output mode.
|
|
||||||
|
|
||||||
**NOTE:** The behaviour of `print` without arguments remains unchanged. This means you can print the input line (`$0`) without further quoting by using a bare `print` statement, but `print $0` will print the input line as a single CSV field, which is probably not what you want. See the [example](#example-convert-between-formats-all-fields) below.
|
|
||||||
|
|
||||||
To enable CSV output mode when using the `goawk` program, use the `-o mode` command line argument. You can also enable CSV output mode by setting the `OUTPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
|
|
||||||
|
|
||||||
```
|
|
||||||
csv|tsv [separator=<char>]
|
|
||||||
```
|
|
||||||
|
|
||||||
The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
|
|
||||||
|
|
||||||
* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
|
|
||||||
|
|
||||||
|
|
||||||
## Named field syntax
|
|
||||||
|
|
||||||
If the `header` option or `-H` argument is given, CSV input mode parses the first row of each input file as a header row containing a list of field names.
|
|
||||||
|
|
||||||
When the header option is enabled, you can use the GoAWK-specific "named field" operator (`@`) to access fields by name instead of by number (`$`). For example, given the header row `id,name,email`, for each record you can access the email address using `@"email"`, `$3`, or even `$-1` (first field from the right). Further usage examples are shown [below](#examples).
|
|
||||||
|
|
||||||
Every time a header row is processed, the `FIELDS` special array is updated: it is a mapping of field number to field name, allowing you to loop over the field names dynamically. For example, given the header row `id,name,email`, GoAWK sets `FIELDS` using the equivalent of:
|
|
||||||
|
|
||||||
```
|
|
||||||
FIELDS[1] = "id"
|
|
||||||
FIELDS[2] = "name"
|
|
||||||
FIELDS[3] = "email"
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that named field assignment such as `@"id" = 42` is not yet supported, but this feature may be added later.
|
|
||||||
|
|
||||||
|
|
||||||
## Go API
|
|
||||||
|
|
||||||
When using GoAWK via the Go API, you can still use `INPUTMODE`, but it may be more convenient to use the `interp.Config` fields directly: `InputMode`, `CSVInput`, `OutputMode`, and `CSVOutput`.
|
|
||||||
|
|
||||||
Here's a simple snippet showing the use of the `InputMode` and `CSVInput` fields to enable `#` as the comment character:
|
|
||||||
|
|
||||||
```
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil { ... }
|
|
||||||
|
|
||||||
config := &interp.Config{
|
|
||||||
InputMode: interp.CSVMode,
|
|
||||||
CSVInput: interp.CSVInputConfig{Comment: '#'},
|
|
||||||
}
|
|
||||||
_, err = interp.ExecProgram(prog, config)
|
|
||||||
if err != nil { ... }
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that `INPUTMODE` and `OUTPUTMODE` set using `Vars` or in the `BEGIN` block will override these settings.
|
|
||||||
|
|
||||||
See the [full reference documentation](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk/interp#Config) for the `interp.Config` struct.
|
|
||||||
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
Below are some examples using the [testdata/csv/states.csv](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/testdata/csv/states.csv) file, which is a simple CSV file whose contents are as follows:
|
|
||||||
|
|
||||||
```
|
|
||||||
"State","Abbreviation"
|
|
||||||
"Alabama","AL"
|
|
||||||
"Alaska","AK"
|
|
||||||
"Arizona","AZ"
|
|
||||||
"Arkansas","AR"
|
|
||||||
"California","CA"
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: output a field by name
|
|
||||||
|
|
||||||
To output a field by name (in this case the state's abbreviation):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -H '{ print @"Abbreviation" }' testdata/csv/states.csv
|
|
||||||
AL
|
|
||||||
AK
|
|
||||||
AZ
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: match a field and count
|
|
||||||
|
|
||||||
To count the number of states that have "New" in the name, and then print out what they are:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -H '@"State" ~ /New/ { n++ } END { print n }' testdata/csv/states.csv
|
|
||||||
4
|
|
||||||
$ goawk -i csv -H '@"State" ~ /New/ { print @"State" }' testdata/csv/states.csv
|
|
||||||
New Hampshire
|
|
||||||
New Jersey
|
|
||||||
New Mexico
|
|
||||||
New York
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: rename and reorder fields
|
|
||||||
|
|
||||||
To rename and reorder the fields from `State`, `Abbreviation` to `abbr`, `name`. Note that the `print` statement in the `BEGIN` block prints the header row for the output:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -H -o csv 'BEGIN { print "abbr", "name" } { print @"Abbreviation", @"State" }' testdata/csv/states.csv
|
|
||||||
abbr,name
|
|
||||||
AL,Alabama
|
|
||||||
AK,Alaska
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: convert between formats (explicit field list)
|
|
||||||
|
|
||||||
To convert the file from CSV to TSV format (note how we're *not* using `-H`, so the header row is included):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -o tsv '{ print $1, $2 }' testdata/csv/states.csv
|
|
||||||
State Abbreviation
|
|
||||||
Alabama AL
|
|
||||||
Alaska AK
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: convert between formats (all fields)
|
|
||||||
|
|
||||||
If you want to convert between CSV and TSV format but don't know the number of fields, you can use a field assignment like `$1=$1` so that GoAWK reformats `$0` according to the output format (TSV in this case). This is similar to how in POSIX AWK a field assignment reformats `$0` according to the output field separator (`OFS`). Then `print` without arguments prints the raw value of `$0`:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -o tsv '{ $1=$1; print }' testdata/csv/states.csv
|
|
||||||
State Abbreviation
|
|
||||||
Alabama AL
|
|
||||||
Alaska AK
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
**NOTE:** It's not correct to use `print $0` in this case, because that would print `$0` as a single TSV field, which you generally don't want:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -o tsv '{ $1=$1; print $0 }' testdata/csv/states.csv # INCORRECT!
|
|
||||||
"State Abbreviation"
|
|
||||||
"Alabama AL"
|
|
||||||
"Alaska AK"
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: override separator
|
|
||||||
|
|
||||||
To test overriding the separator character, we can use GoAWK to add a comment and convert the separator to `|` (pipe). We'll also add a comment line to test comment handling:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -o 'csv separator=|' 'BEGIN { printf "# comment\n" } { $1=$1; print }' testdata/csv/states.csv
|
|
||||||
# comment
|
|
||||||
State|Abbreviation
|
|
||||||
Alabama|AL
|
|
||||||
Alaska|AK
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: skip comment lines
|
|
||||||
|
|
||||||
We can process the "pipe-separated values" file generated above, skipping comment lines, and printing the first three state names (accessed by field number this time):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i 'csv header comment=# separator=|' 'NR<=3 { print $1 }' testdata/csv/states.psv
|
|
||||||
Alabama
|
|
||||||
Alaska
|
|
||||||
Arizona
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: use dynamic field names
|
|
||||||
|
|
||||||
Similar to the `$` operator, you can also use `@` with dynamic values. For example, if there are fields named `address_1`, `address_2`, up through `address_5`, you could loop over them as follows:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ cat testdata/csv/address5.csv
|
|
||||||
name,address_1,address_2,address_3,address_4,address_5
|
|
||||||
Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
|
|
||||||
$ goawk -i csv -H '{ for (i=1; i<=5; i++) print @("address_" i) }' testdata/csv/address5.csv
|
|
||||||
123 Way St
|
|
||||||
Apt 2B
|
|
||||||
Township
|
|
||||||
Cityville
|
|
||||||
United Plates
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: use the `FIELDS` array
|
|
||||||
|
|
||||||
A somewhat contrived example showing use of the `FIELDS` array:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ cat testdata/csv/fields.csv
|
|
||||||
id,name,email
|
|
||||||
1,Bob,b@bob.com
|
|
||||||
$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) print i, FIELDS[i] }' testdata/csv/fields.csv
|
|
||||||
1 id
|
|
||||||
2 name
|
|
||||||
3 email
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: create CSV file from array
|
|
||||||
|
|
||||||
The following example shows how you might pull fields out of an integer-indexed array to produce a CSV file:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -o csv 'BEGIN { print "id", "name"; names[1]="Bob"; names[2]="Jane"; for (i=1; i in names; i++) print i, names[i] }'
|
|
||||||
id,name
|
|
||||||
1,Bob
|
|
||||||
2,Jane
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: create CSV file by assigning fields
|
|
||||||
|
|
||||||
This example shows the same result, but producing the CSV output by assigning individual fields and then using a bare `print` statement:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -o csv 'BEGIN { print "id", "name"; $1=1; $2="Bob"; print; $1=2; $2="Jane"; print }'
|
|
||||||
id,name
|
|
||||||
1,Bob
|
|
||||||
2,Jane
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example: different ways to specify CSV mode
|
|
||||||
|
|
||||||
And finally, four equivalent examples showing different ways to specify the input mode, using `-i` or the `INPUTMODE` special variable (the same techniques work for `-o` and `OUTPUTMODE`):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ goawk -i csv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
|
||||||
NY
|
|
||||||
$ goawk -icsv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
|
||||||
NY
|
|
||||||
$ goawk 'BEGIN { INPUTMODE="csv header" } @"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
|
||||||
NY
|
|
||||||
$ goawk -v 'INPUTMODE=csv header' '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
|
||||||
NY
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Examples based on csvkit
|
|
||||||
|
|
||||||
The [csvkit](https://csvkit.readthedocs.io/en/latest/index.html) suite is a set of tools that allow you to quickly analyze and extract fields from CSV files. Each csvkit tool allows you to do a specific task; GoAWK is more low-level and verbose, but also a more general tool ([`csvsql`](https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html#csvsql-and-sql2csv-ultimate-power) being the exception!). GoAWK also runs significantly faster than csvkit (the latter is written in Python).
|
|
||||||
|
|
||||||
Below are a few snippets showing how you'd do some of the tasks in the csvkit documentation, but using GoAWK (the input file is [testdata/csv/nz-schools.csv](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/testdata/csv/nz-schools.csv)):
|
|
||||||
|
|
||||||
### csvkit example: print column names
|
|
||||||
|
|
||||||
```
|
|
||||||
$ csvcut -n testdata/csv/nz-schools.csv
|
|
||||||
1: School_Id
|
|
||||||
2: Org_Name
|
|
||||||
3: Decile
|
|
||||||
4: Total
|
|
||||||
|
|
||||||
# In GoAWK you have to loop through the fields, but you can print the data in
|
|
||||||
# any format you want (note the "exit" so it stops after the first row):
|
|
||||||
$ goawk -i csv '{ for (i=1; i<=NF; i++) printf "%3d: %s\n", i, $i; exit }' testdata/csv/nz-schools.csv
|
|
||||||
1: School_Id
|
|
||||||
2: Org_Name
|
|
||||||
3: Decile
|
|
||||||
4: Total
|
|
||||||
|
|
||||||
# You could also use -H and the FIELDS array to do this:
|
|
||||||
$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) printf "%3d: %s\n", i, FIELDS[i]; exit }' testdata/csv/nz-schools.csv
|
|
||||||
1: School_Id
|
|
||||||
2: Org_Name
|
|
||||||
3: Decile
|
|
||||||
4: Total
|
|
||||||
```
|
|
||||||
|
|
||||||
### csvkit example: select a subset of columns
|
|
||||||
|
|
||||||
```
|
|
||||||
$ csvcut -c Org_Name,Total testdata/csv/nz-schools.csv
|
|
||||||
Org_Name,Total
|
|
||||||
Waipa Christian School,60
|
|
||||||
Remarkables Primary School,494
|
|
||||||
...
|
|
||||||
|
|
||||||
# In GoAWK you need to print the field names explicitly in BEGIN:
|
|
||||||
$ goawk -i csv -H -o csv 'BEGIN { print "Org_Name", "Total" } { print @"Org_Name", @"Total" }' testdata/csv/nz-schools.csv
|
|
||||||
Org_Name,Total
|
|
||||||
Waipa Christian School,60
|
|
||||||
Remarkables Primary School,494
|
|
||||||
...
|
|
||||||
|
|
||||||
# But you can also change the column names and reorder them:
|
|
||||||
$ goawk -i csv -H -o csv 'BEGIN { print "# Students", "School" } { print @"Total", @"Org_Name" }' testdata/csv/nz-schools.csv
|
|
||||||
# Students,School
|
|
||||||
60,Waipa Christian School
|
|
||||||
494,Remarkables Primary School
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### csvkit example: generate statistics
|
|
||||||
|
|
||||||
There's no equivalent of the `csvstat` tool in GoAWK, but you can calculate statistics yourself. For example, to calculate the total number of students in New Zealand schools, you can do the following (`csvstat` is giving a warning due to the single-column input):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ csvcut -c Total testdata/csv/nz-schools.csv | csvstat --sum
|
|
||||||
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
|
||||||
802,516
|
|
||||||
|
|
||||||
$ goawk -i csv -H '{ sum += @"Total" } END { print sum }' testdata/csv/nz-schools.csv
|
|
||||||
802516
|
|
||||||
```
|
|
||||||
|
|
||||||
To calculate the average (mean) decile level for boys' and girls' schools (sorry, boys!):
|
|
||||||
|
|
||||||
```
|
|
||||||
$ csvgrep -c Org_Name -m Boys testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
|
|
||||||
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
|
||||||
6.45
|
|
||||||
$ csvgrep -c Org_Name -m Girls testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
|
|
||||||
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
|
||||||
8.889
|
|
||||||
|
|
||||||
$ goawk -i csv -H '/Boys/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv
|
|
||||||
6.45
|
|
||||||
$ goawk -i csv -H '/Girls/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv
|
|
||||||
8.88889
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Performance
|
|
||||||
|
|
||||||
The performance of GoAWK's CSV input and output mode is quite good, on a par with using the `encoding/csv` package from Go directly, and much faster than the `csv` module in Python. CSV input speed is significantly slower than `frawk`, though CSV output speed is significantly faster than `frawk`.
|
|
||||||
|
|
||||||
Below are the results of some simple read and write [benchmarks](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/scripts/csvbench) using `goawk` and `frawk` as well as plain Python and Go. The output of the write benchmarks is a 1GB, 3.5 million row CSV file with 20 columns (including quoted columns); the input for the read benchmarks uses that same file. Times are in seconds, showing the best of three runs on a 64-bit Linux laptop with an SSD drive:
|
|
||||||
|
|
||||||
Test | goawk | frawk | Python | Go
|
|
||||||
--------------- | ----- | ----- | ------ | ----
|
|
||||||
Reading 1GB CSV | 3.18 | 1.01 | 13.4 | 3.22
|
|
||||||
Writing 1GB CSV | 5.64 | 13.0 | 17.0 | 3.24
|
|
||||||
|
|
||||||
|
|
||||||
## Future work
|
|
||||||
|
|
||||||
* Consider adding a `printrow(a)` or similar function to make it easier to construct CSV rows from scratch.
|
|
||||||
- `a` would be an array such as: `a["name"] = "Bob"; a["age"] = 7`
|
|
||||||
- keys would be ordered by `OFIELDS` (eg: `OFIELDS[1] = "name"; OFIELDS[2] = "age"`) or by "smart name" if `OFIELDS` not set ("smart name" meaning numeric if `a` keys are numeric, string otherwise)
|
|
||||||
- `printrow(a)` could take an optional second `fields` array arg to use that instead of the global `OFIELDS`
|
|
||||||
* Consider allowing `-H` to accept an optional list of field names which could be used as headers in the absence of headers in the file itself (either `-H=name,age` or `-i 'csv header=name,age'`).
|
|
||||||
* Consider adding TrimLeadingSpace CSV input option. See: https://github.com/mojosa-software/goblin/src/tool/awk/issues/109
|
|
||||||
* Consider supporting `@"id" = 42` named field assignment.
|
|
||||||
|
|
||||||
|
|
||||||
## Feedback
|
|
||||||
|
|
||||||
Please [open an issue](https://github.com/mojosa-software/goblin/src/tool/awk/issues) if you have bug reports or feature requests for GoAWK's CSV support.
|
|
Binary file not shown.
|
@ -1,401 +0,0 @@
|
||||||
// Package goawk is an implementation of AWK with CSV support
|
|
||||||
//
|
|
||||||
// You can use the command-line "goawk" command or run AWK from your
|
|
||||||
// Go programs using the "interp" package. The command-line program
|
|
||||||
// has the same interface as regular awk:
|
|
||||||
//
|
|
||||||
// goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]
|
|
||||||
//
|
|
||||||
// The -F flag specifies the field separator (the default is to split
|
|
||||||
// on whitespace). The -v flag allows you to set a variable to a
|
|
||||||
// given value (multiple -v flags allowed). The -f flag allows you to
|
|
||||||
// read AWK source from a file instead of the 'prog' command-line
|
|
||||||
// argument. The rest of the arguments are input filenames (default
|
|
||||||
// is to read from stdin).
|
|
||||||
//
|
|
||||||
// A simple example (prints the sum of the numbers in the file's
|
|
||||||
// second column):
|
|
||||||
//
|
|
||||||
// $ echo 'foo 12
|
|
||||||
// > bar 34
|
|
||||||
// > baz 56' >file.txt
|
|
||||||
// $ goawk '{ sum += $2 } END { print sum }' file.txt
|
|
||||||
// 102
|
|
||||||
//
|
|
||||||
// To use GoAWK in your Go programs, see README.md or the "interp"
|
|
||||||
// package docs.
|
|
||||||
package awk
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"runtime"
|
|
||||||
"runtime/pprof"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/interp"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/gomtool/src/mtool"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
version = "v1.19.0"
|
|
||||||
copyright = "GoAWK " + version + " - Copyright (c) 2022 Ben Hoyt"
|
|
||||||
shortUsage = "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"
|
|
||||||
longUsage = `Standard AWK arguments:
|
|
||||||
-F separator field separator (default " ")
|
|
||||||
-f progfile load AWK source from progfile (multiple allowed)
|
|
||||||
-v var=value variable assignment (multiple allowed)
|
|
||||||
|
|
||||||
Additional GoAWK arguments:
|
|
||||||
-cpuprofile file write CPU profile to file
|
|
||||||
-d print parsed syntax tree to stderr (debug mode)
|
|
||||||
-da print virtual machine assembly instructions to stderr
|
|
||||||
-dt print variable type information to stderr
|
|
||||||
-H parse header row and enable @"field" in CSV input mode
|
|
||||||
-h, --help show this help message
|
|
||||||
-i mode parse input into fields using CSV format (ignore FS and RS)
|
|
||||||
'csv|tsv [separator=<char>] [comment=<char>] [header]'
|
|
||||||
-o mode use CSV output for print with args (ignore OFS and ORS)
|
|
||||||
'csv|tsv [separator=<char>]'
|
|
||||||
-version show GoAWK version and exit
|
|
||||||
`
|
|
||||||
)
|
|
||||||
|
|
||||||
func Run(flags *mtool.Flags) {
|
|
||||||
// Parse command line arguments manually rather than using the
|
|
||||||
// "flag" package, so we can support flags with no space between
|
|
||||||
// flag and argument, like '-F:' (allowed by POSIX)
|
|
||||||
|
|
||||||
// J's comment: nope, we will change it, lol.
|
|
||||||
var progFiles []string
|
|
||||||
var vars []string
|
|
||||||
fieldSep := " "
|
|
||||||
cpuprofile := ""
|
|
||||||
debug := false
|
|
||||||
debugAsm := false
|
|
||||||
debugTypes := false
|
|
||||||
memprofile := ""
|
|
||||||
inputMode := ""
|
|
||||||
outputMode := ""
|
|
||||||
header := false
|
|
||||||
|
|
||||||
argv0 := flags.UtilName()
|
|
||||||
args := flags.AllArgs()
|
|
||||||
|
|
||||||
var i int
|
|
||||||
for i = 0; i < len(args); i++ {
|
|
||||||
// Stop on explicit end of args or first arg not prefixed with "-"
|
|
||||||
arg := args[i]
|
|
||||||
if arg == "--" {
|
|
||||||
i++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if arg == "-" || !strings.HasPrefix(arg, "-") {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
switch arg {
|
|
||||||
case "-F":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -F")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
fieldSep = args[i]
|
|
||||||
case "-f":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -f")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
progFiles = append(progFiles, args[i])
|
|
||||||
case "-v":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -v")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
vars = append(vars, args[i])
|
|
||||||
case "-cpuprofile":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -cpuprofile")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
cpuprofile = args[i]
|
|
||||||
case "-d":
|
|
||||||
debug = true
|
|
||||||
case "-da":
|
|
||||||
debugAsm = true
|
|
||||||
case "-dt":
|
|
||||||
debugTypes = true
|
|
||||||
case "-H":
|
|
||||||
header = true
|
|
||||||
case "-h", "--help":
|
|
||||||
fmt.Printf("%s\n\n%s\n\n%s", copyright, shortUsage, longUsage)
|
|
||||||
os.Exit(0)
|
|
||||||
case "-i":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -i")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
inputMode = args[i]
|
|
||||||
case "-memprofile":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -memprofile")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
memprofile = args[i]
|
|
||||||
case "-o":
|
|
||||||
if i+1 >= len(args) {
|
|
||||||
errorExitf("flag needs an argument: -o")
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
outputMode = args[i]
|
|
||||||
case "-version", "--version":
|
|
||||||
fmt.Println(version)
|
|
||||||
os.Exit(0)
|
|
||||||
default:
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(arg, "-F"):
|
|
||||||
fieldSep = arg[2:]
|
|
||||||
case strings.HasPrefix(arg, "-f"):
|
|
||||||
progFiles = append(progFiles, arg[2:])
|
|
||||||
case strings.HasPrefix(arg, "-i"):
|
|
||||||
inputMode = arg[2:]
|
|
||||||
case strings.HasPrefix(arg, "-o"):
|
|
||||||
outputMode = arg[2:]
|
|
||||||
case strings.HasPrefix(arg, "-v"):
|
|
||||||
vars = append(vars, arg[2:])
|
|
||||||
case strings.HasPrefix(arg, "-cpuprofile="):
|
|
||||||
cpuprofile = arg[12:]
|
|
||||||
case strings.HasPrefix(arg, "-memprofile="):
|
|
||||||
memprofile = arg[12:]
|
|
||||||
default:
|
|
||||||
errorExitf("flag provided but not defined: %s", arg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Any remaining args are program and input files
|
|
||||||
args = args[i:]
|
|
||||||
|
|
||||||
var src []byte
|
|
||||||
var stdinBytes []byte // used if there's a parse error
|
|
||||||
if len(progFiles) > 0 {
|
|
||||||
// Read source: the concatenation of all source files specified
|
|
||||||
buf := &bytes.Buffer{}
|
|
||||||
progFiles = expandWildcardsOnWindows(progFiles)
|
|
||||||
for _, progFile := range progFiles {
|
|
||||||
if progFile == "-" {
|
|
||||||
b, err := ioutil.ReadAll(os.Stdin)
|
|
||||||
if err != nil {
|
|
||||||
errorExit(err)
|
|
||||||
}
|
|
||||||
stdinBytes = b
|
|
||||||
_, _ = buf.Write(b)
|
|
||||||
} else {
|
|
||||||
f, err := os.Open(progFile)
|
|
||||||
if err != nil {
|
|
||||||
errorExit(err)
|
|
||||||
}
|
|
||||||
_, err = buf.ReadFrom(f)
|
|
||||||
if err != nil {
|
|
||||||
_ = f.Close()
|
|
||||||
errorExit(err)
|
|
||||||
}
|
|
||||||
_ = f.Close()
|
|
||||||
}
|
|
||||||
// Append newline to file in case it doesn't end with one
|
|
||||||
_ = buf.WriteByte('\n')
|
|
||||||
}
|
|
||||||
src = buf.Bytes()
|
|
||||||
} else {
|
|
||||||
if len(args) < 1 {
|
|
||||||
errorExitf(shortUsage)
|
|
||||||
}
|
|
||||||
src = []byte(args[0])
|
|
||||||
args = args[1:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse source code and setup interpreter
|
|
||||||
parserConfig := &parser.ParserConfig{
|
|
||||||
DebugTypes: debugTypes,
|
|
||||||
DebugWriter: os.Stderr,
|
|
||||||
}
|
|
||||||
prog, err := parser.ParseProgram(src, parserConfig)
|
|
||||||
if err != nil {
|
|
||||||
if err, ok := err.(*parser.ParseError); ok {
|
|
||||||
name, line := errorFileLine(progFiles, stdinBytes, err.Position.Line)
|
|
||||||
fmt.Fprintf(os.Stderr, "%s:%d:%d: %s\n",
|
|
||||||
name, line, err.Position.Column, err.Message)
|
|
||||||
showSourceLine(src, err.Position)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
errorExitf("%s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if debug {
|
|
||||||
fmt.Fprintln(os.Stderr, prog)
|
|
||||||
}
|
|
||||||
|
|
||||||
if debugAsm {
|
|
||||||
err := prog.Disassemble(os.Stderr)
|
|
||||||
if err != nil {
|
|
||||||
errorExitf("could not disassemble program: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if header {
|
|
||||||
if inputMode == "" {
|
|
||||||
errorExitf("-H only allowed together with -i")
|
|
||||||
}
|
|
||||||
inputMode += " header"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't buffer output if stdout is a terminal (default output writer when
|
|
||||||
// Config.Output is nil is a buffered version of os.Stdout).
|
|
||||||
var stdout io.Writer
|
|
||||||
stdoutInfo, err := os.Stdout.Stat()
|
|
||||||
if err == nil && stdoutInfo.Mode()&os.ModeCharDevice != 0 {
|
|
||||||
stdout = os.Stdout
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &interp.Config{
|
|
||||||
Argv0: filepath.Base(argv0),
|
|
||||||
Args: expandWildcardsOnWindows(args),
|
|
||||||
Vars: []string{
|
|
||||||
"FS", fieldSep,
|
|
||||||
"INPUTMODE", inputMode,
|
|
||||||
"OUTPUTMODE", outputMode,
|
|
||||||
},
|
|
||||||
Output: stdout,
|
|
||||||
}
|
|
||||||
for _, v := range vars {
|
|
||||||
equals := strings.IndexByte(v, '=')
|
|
||||||
if equals < 0 {
|
|
||||||
errorExitf("-v flag must be in format name=value")
|
|
||||||
}
|
|
||||||
name, value := v[:equals], v[equals+1:]
|
|
||||||
// Oddly, -v must interpret escapes (issue #129)
|
|
||||||
unescaped, err := lexer.Unescape(value)
|
|
||||||
if err == nil {
|
|
||||||
value = unescaped
|
|
||||||
}
|
|
||||||
config.Vars = append(config.Vars, name, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
if cpuprofile != "" {
|
|
||||||
f, err := os.Create(cpuprofile)
|
|
||||||
if err != nil {
|
|
||||||
errorExitf("could not create CPU profile: %v", err)
|
|
||||||
}
|
|
||||||
if err := pprof.StartCPUProfile(f); err != nil {
|
|
||||||
errorExitf("could not start CPU profile: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run the program!
|
|
||||||
status, err := interp.ExecProgram(prog, config)
|
|
||||||
if err != nil {
|
|
||||||
errorExit(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if cpuprofile != "" {
|
|
||||||
pprof.StopCPUProfile()
|
|
||||||
}
|
|
||||||
if memprofile != "" {
|
|
||||||
f, err := os.Create(memprofile)
|
|
||||||
if err != nil {
|
|
||||||
errorExitf("could not create memory profile: %v", err)
|
|
||||||
}
|
|
||||||
runtime.GC() // get up-to-date statistics
|
|
||||||
if err := pprof.WriteHeapProfile(f); err != nil {
|
|
||||||
errorExitf("could not write memory profile: %v", err)
|
|
||||||
}
|
|
||||||
_ = f.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
os.Exit(status)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show source line and position of error, for example:
|
|
||||||
//
|
|
||||||
// BEGIN { x*; }
|
|
||||||
// ^
|
|
||||||
func showSourceLine(src []byte, pos lexer.Position) {
|
|
||||||
lines := bytes.Split(src, []byte{'\n'})
|
|
||||||
srcLine := string(lines[pos.Line-1])
|
|
||||||
numTabs := strings.Count(srcLine[:pos.Column-1], "\t")
|
|
||||||
runeColumn := utf8.RuneCountInString(srcLine[:pos.Column-1])
|
|
||||||
fmt.Fprintln(os.Stderr, strings.Replace(srcLine, "\t", " ", -1))
|
|
||||||
fmt.Fprintln(os.Stderr, strings.Repeat(" ", runeColumn)+strings.Repeat(" ", numTabs)+"^")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine which filename and line number to display for the overall
|
|
||||||
// error line number.
|
|
||||||
func errorFileLine(progFiles []string, stdinBytes []byte, errorLine int) (string, int) {
|
|
||||||
if len(progFiles) == 0 {
|
|
||||||
return "<cmdline>", errorLine
|
|
||||||
}
|
|
||||||
startLine := 1
|
|
||||||
for _, progFile := range progFiles {
|
|
||||||
var content []byte
|
|
||||||
if progFile == "-" {
|
|
||||||
progFile = "<stdin>"
|
|
||||||
content = stdinBytes
|
|
||||||
} else {
|
|
||||||
b, err := ioutil.ReadFile(progFile)
|
|
||||||
if err != nil {
|
|
||||||
return "<unknown>", errorLine
|
|
||||||
}
|
|
||||||
content = b
|
|
||||||
}
|
|
||||||
content = append(content, '\n')
|
|
||||||
|
|
||||||
numLines := bytes.Count(content, []byte{'\n'})
|
|
||||||
if errorLine >= startLine && errorLine < startLine+numLines {
|
|
||||||
return progFile, errorLine - startLine + 1
|
|
||||||
}
|
|
||||||
startLine += numLines
|
|
||||||
}
|
|
||||||
return "<unknown>", errorLine
|
|
||||||
}
|
|
||||||
|
|
||||||
func errorExit(err error) {
|
|
||||||
pathErr, ok := err.(*os.PathError)
|
|
||||||
if ok && os.IsNotExist(err) {
|
|
||||||
errorExitf("file %q not found", pathErr.Path)
|
|
||||||
}
|
|
||||||
errorExitf("%s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func errorExitf(format string, args ...interface{}) {
|
|
||||||
fmt.Fprintf(os.Stderr, format+"\n", args...)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func expandWildcardsOnWindows(args []string) []string {
|
|
||||||
if runtime.GOOS != "windows" {
|
|
||||||
return args
|
|
||||||
}
|
|
||||||
return expandWildcards(args)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Originally from https://github.com/mattn/getwild (compatible LICENSE).
|
|
||||||
func expandWildcards(args []string) []string {
|
|
||||||
result := make([]string, 0, len(args))
|
|
||||||
for _, arg := range args {
|
|
||||||
matches, err := filepath.Glob(arg)
|
|
||||||
if err == nil && len(matches) > 0 {
|
|
||||||
result = append(result, matches...)
|
|
||||||
} else {
|
|
||||||
result = append(result, arg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
|
@ -1,749 +0,0 @@
|
||||||
// GoAWK tests
|
|
||||||
|
|
||||||
package awk_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
|
||||||
"runtime"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/interp"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
goExe string
|
|
||||||
testsDir string
|
|
||||||
outputDir string
|
|
||||||
awkExe string
|
|
||||||
goAWKExe string
|
|
||||||
writeAWK bool
|
|
||||||
writeGoAWK bool
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
|
||||||
flag.StringVar(&goExe, "goexe", "go", "set to override Go executable used to build goawk")
|
|
||||||
flag.StringVar(&testsDir, "testsdir", "./testdata", "directory with one-true-awk tests")
|
|
||||||
flag.StringVar(&outputDir, "outputdir", "./testdata/output", "directory for test output")
|
|
||||||
flag.StringVar(&awkExe, "awk", "gawk", "awk executable name")
|
|
||||||
flag.StringVar(&goAWKExe, "goawk", "./goawk", "goawk executable name")
|
|
||||||
flag.BoolVar(&writeAWK, "writeawk", false, "write expected output")
|
|
||||||
flag.BoolVar(&writeGoAWK, "writegoawk", true, "write Go AWK output")
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
cmd := exec.Command(goExe, "build", "-ldflags=-w")
|
|
||||||
stderr, err := cmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error building goawk: %v\n%s\n", err, stderr)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
os.Exit(m.Run())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAWK(t *testing.T) {
|
|
||||||
inputByPrefix := map[string]string{
|
|
||||||
"t": "test.data",
|
|
||||||
"p": "test.countries",
|
|
||||||
}
|
|
||||||
// These programs exit with non-zero status code
|
|
||||||
errorExits := map[string]bool{
|
|
||||||
"t.exit": true,
|
|
||||||
"t.exit1": true,
|
|
||||||
"t.gsub4": true,
|
|
||||||
"t.split3": true,
|
|
||||||
}
|
|
||||||
// These programs have known different output
|
|
||||||
knownDifferent := map[string]bool{
|
|
||||||
"t.printf2": true, // because awk is weird here (our behavior is like mawk)
|
|
||||||
}
|
|
||||||
// Can't really diff test rand() tests as we're using a totally
|
|
||||||
// different algorithm for random numbers
|
|
||||||
randTests := map[string]bool{
|
|
||||||
"p.48b": true,
|
|
||||||
"t.randk": true,
|
|
||||||
}
|
|
||||||
// These tests use "for (x in a)", which iterates in an undefined
|
|
||||||
// order (according to the spec), so sort lines before comparing.
|
|
||||||
sortLines := map[string]bool{
|
|
||||||
"p.43": true,
|
|
||||||
"t.in1": true, // because "sort" is locale-dependent
|
|
||||||
"t.in2": true,
|
|
||||||
"t.intest2": true,
|
|
||||||
}
|
|
||||||
dontRunOnWindows := map[string]bool{
|
|
||||||
"p.50": true, // because this pipes to Unix sort "sort -t: +0 -1 +2nr"
|
|
||||||
}
|
|
||||||
|
|
||||||
infos, err := ioutil.ReadDir(testsDir)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("couldn't read test files: %v", err)
|
|
||||||
}
|
|
||||||
for _, info := range infos {
|
|
||||||
if !strings.HasPrefix(info.Name(), "t.") && !strings.HasPrefix(info.Name(), "p.") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if runtime.GOOS == "windows" && dontRunOnWindows[info.Name()] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
t.Run(info.Name(), func(t *testing.T) {
|
|
||||||
srcPath := filepath.Join(testsDir, info.Name())
|
|
||||||
inputPath := filepath.Join(testsDir, inputByPrefix[info.Name()[:1]])
|
|
||||||
outputPath := filepath.Join(outputDir, info.Name())
|
|
||||||
|
|
||||||
cmd := exec.Command(awkExe, "-f", srcPath, inputPath)
|
|
||||||
expected, err := cmd.Output()
|
|
||||||
if err != nil && !errorExits[info.Name()] {
|
|
||||||
t.Fatalf("error running %s: %v", awkExe, err)
|
|
||||||
}
|
|
||||||
expected = bytes.Replace(expected, []byte{0}, []byte("<00>"), -1)
|
|
||||||
expected = normalizeNewlines(expected)
|
|
||||||
if sortLines[info.Name()] {
|
|
||||||
expected = sortedLines(expected)
|
|
||||||
}
|
|
||||||
if writeAWK {
|
|
||||||
err := ioutil.WriteFile(outputPath, expected, 0644)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error writing awk output: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prog, err := parseGoAWK(srcPath)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
output, err := interpGoAWK(prog, inputPath)
|
|
||||||
if err != nil && !errorExits[info.Name()] {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
output = bytes.Replace(output, []byte{0}, []byte("<00>"), -1)
|
|
||||||
output = normalizeNewlines(output)
|
|
||||||
if randTests[info.Name()] || knownDifferent[info.Name()] {
|
|
||||||
// For tests that use rand(), run them to ensure they
|
|
||||||
// parse and interpret, but can't compare the output,
|
|
||||||
// so stop now
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if sortLines[info.Name()] {
|
|
||||||
output = sortedLines(output)
|
|
||||||
}
|
|
||||||
if writeGoAWK {
|
|
||||||
err := ioutil.WriteFile(outputPath, output, 0644)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error writing goawk output: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if string(output) != string(expected) {
|
|
||||||
t.Fatalf("output differs, run: git diff %s", outputPath)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = os.Remove("tempbig")
|
|
||||||
_ = os.Remove("tempsmall")
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseGoAWK(srcPath string) (*parser.Program, error) {
|
|
||||||
src, err := ioutil.ReadFile(srcPath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
prog, err := parser.ParseProgram(src, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return prog, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) {
|
|
||||||
outBuf := &bytes.Buffer{}
|
|
||||||
errBuf := &bytes.Buffer{}
|
|
||||||
config := &interp.Config{
|
|
||||||
Output: outBuf,
|
|
||||||
Error: &concurrentWriter{w: errBuf},
|
|
||||||
Args: []string{inputPath},
|
|
||||||
}
|
|
||||||
_, err := interp.ExecProgram(prog, config)
|
|
||||||
result := outBuf.Bytes()
|
|
||||||
result = append(result, errBuf.Bytes()...)
|
|
||||||
return result, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func interpGoAWKStdin(prog *parser.Program, inputPath string) ([]byte, error) {
|
|
||||||
input, _ := ioutil.ReadFile(inputPath)
|
|
||||||
outBuf := &bytes.Buffer{}
|
|
||||||
errBuf := &bytes.Buffer{}
|
|
||||||
config := &interp.Config{
|
|
||||||
Stdin: &concurrentReader{r: bytes.NewReader(input)},
|
|
||||||
Output: outBuf,
|
|
||||||
Error: &concurrentWriter{w: errBuf},
|
|
||||||
// srcdir is for "redfilnm.awk"
|
|
||||||
Vars: []string{"srcdir", filepath.Dir(inputPath)},
|
|
||||||
}
|
|
||||||
_, err := interp.ExecProgram(prog, config)
|
|
||||||
result := outBuf.Bytes()
|
|
||||||
result = append(result, errBuf.Bytes()...)
|
|
||||||
return result, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wraps a Writer but makes Write calls safe for concurrent use.
|
|
||||||
type concurrentWriter struct {
|
|
||||||
w io.Writer
|
|
||||||
mu sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *concurrentWriter) Write(p []byte) (int, error) {
|
|
||||||
w.mu.Lock()
|
|
||||||
defer w.mu.Unlock()
|
|
||||||
return w.w.Write(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wraps a Reader but makes Read calls safe for concurrent use.
|
|
||||||
type concurrentReader struct {
|
|
||||||
r io.Reader
|
|
||||||
mu sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *concurrentReader) Read(p []byte) (int, error) {
|
|
||||||
r.mu.Lock()
|
|
||||||
defer r.mu.Unlock()
|
|
||||||
return r.r.Read(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func sortedLines(data []byte) []byte {
|
|
||||||
trimmed := strings.TrimSuffix(string(data), "\n")
|
|
||||||
lines := strings.Split(trimmed, "\n")
|
|
||||||
sort.Strings(lines)
|
|
||||||
return []byte(strings.Join(lines, "\n") + "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGAWK(t *testing.T) {
|
|
||||||
skip := map[string]bool{ // TODO: fix these (at least the ones that are bugs)
|
|
||||||
"getline": true, // getline syntax issues (may be okay, see grammar notes at http://pubs.opengroup.org/onlinepubs/007904975/utilities/awk.html#tag_04_06_13_14)
|
|
||||||
"getline3": true, // getline syntax issues (similar to above)
|
|
||||||
|
|
||||||
"gsubtst7": true, // something wrong with gsub or field split/join
|
|
||||||
"splitwht": true, // other awks handle split(s, a, " ") differently from split(s, a, / /)
|
|
||||||
"status-close": true, // hmmm, not sure what's up here
|
|
||||||
"sigpipe1": true, // probable race condition: sometimes fails, sometimes passes
|
|
||||||
|
|
||||||
"parse1": true, // incorrect parsing of $$a++++ (see TODOs in interp_test.go too)
|
|
||||||
|
|
||||||
"rscompat": true, // GoAWK allows multi-char RS by default
|
|
||||||
"rsstart2": true, // GoAWK ^ and $ anchors match beginning and end of line, not file (unlike Gawk)
|
|
||||||
|
|
||||||
"hex2": true, // GoAWK allows hex numbers / floating point (per POSIX)
|
|
||||||
"strtod": true, // GoAWK allows hex numbers / floating point (per POSIX)
|
|
||||||
}
|
|
||||||
|
|
||||||
dontRunOnWindows := map[string]bool{
|
|
||||||
"delargv": true, // reads from /dev/null
|
|
||||||
"eofsplit": true, // reads from /etc/passwd
|
|
||||||
"getline5": true, // removes a file while it's open
|
|
||||||
"iobug1": true, // reads from /dev/null
|
|
||||||
}
|
|
||||||
|
|
||||||
sortLines := map[string]bool{
|
|
||||||
"arryref2": true,
|
|
||||||
"delargv": true,
|
|
||||||
"delarpm2": true,
|
|
||||||
"forref": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
gawkDir := filepath.Join(testsDir, "gawk")
|
|
||||||
infos, err := ioutil.ReadDir(gawkDir)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("couldn't read test files: %v", err)
|
|
||||||
}
|
|
||||||
for _, info := range infos {
|
|
||||||
if !strings.HasSuffix(info.Name(), ".awk") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
testName := info.Name()[:len(info.Name())-4]
|
|
||||||
if skip[testName] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if runtime.GOOS == "windows" && dontRunOnWindows[testName] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
t.Run(testName, func(t *testing.T) {
|
|
||||||
srcPath := filepath.Join(gawkDir, info.Name())
|
|
||||||
inputPath := filepath.Join(gawkDir, testName+".in")
|
|
||||||
okPath := filepath.Join(gawkDir, testName+".ok")
|
|
||||||
|
|
||||||
expected, err := ioutil.ReadFile(okPath)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
expected = normalizeNewlines(expected)
|
|
||||||
|
|
||||||
prog, err := parseGoAWK(srcPath)
|
|
||||||
if err != nil {
|
|
||||||
if err.Error() != string(expected) {
|
|
||||||
t.Fatalf("parser error differs, got:\n%s\nexpected:\n%s", err.Error(), expected)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
output, err := interpGoAWKStdin(prog, inputPath)
|
|
||||||
output = normalizeNewlines(output)
|
|
||||||
if err != nil {
|
|
||||||
errStr := string(output) + err.Error()
|
|
||||||
if errStr != string(expected) {
|
|
||||||
t.Fatalf("interp error differs, got:\n%s\nexpected:\n%s", errStr, expected)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if sortLines[testName] {
|
|
||||||
output = sortedLines(output)
|
|
||||||
expected = sortedLines(expected)
|
|
||||||
}
|
|
||||||
|
|
||||||
if string(output) != string(expected) {
|
|
||||||
t.Fatalf("output differs, got:\n%s\nexpected:\n%s", output, expected)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = os.Remove("seq")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCommandLine(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
args []string
|
|
||||||
stdin string
|
|
||||||
output string
|
|
||||||
error string
|
|
||||||
}{
|
|
||||||
// Load source from stdin
|
|
||||||
{[]string{"-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
|
||||||
{[]string{"-f", "-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
|
||||||
{[]string{"-f-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
|
||||||
|
|
||||||
// Program with no input
|
|
||||||
{[]string{`BEGIN { print "a" }`}, "", "a\n", ""},
|
|
||||||
|
|
||||||
// Read input from stdin
|
|
||||||
{[]string{`$0`}, "one\n\nthree", "one\nthree\n", ""},
|
|
||||||
{[]string{`$0`, "-"}, "one\n\nthree", "one\nthree\n", ""},
|
|
||||||
{[]string{`$0`, "-", "-"}, "one\n\nthree", "one\nthree\n", ""},
|
|
||||||
{[]string{"-f", "testdata/t.0", "-"}, "one\ntwo\n", "one\ntwo\n", ""},
|
|
||||||
{[]string{"{ print FILENAME }"}, "a", "-\n", ""},
|
|
||||||
{[]string{"{ print FILENAME }", "-"}, "a", "-\n", ""},
|
|
||||||
|
|
||||||
// Read input from file(s)
|
|
||||||
{[]string{`$0`, "testdata/g.1"}, "", "ONE\n", ""},
|
|
||||||
{[]string{`$0`, "testdata/g.1", "testdata/g.2"}, "", "ONE\nTWO\n", ""},
|
|
||||||
{[]string{`{ print FILENAME ":" FNR "/" NR ": " $0 }`, "testdata/g.1", "testdata/g.4"}, "",
|
|
||||||
"testdata/g.1:1/1: ONE\ntestdata/g.4:1/2: FOUR a\ntestdata/g.4:2/3: FOUR b\n", ""},
|
|
||||||
{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
|
|
||||||
{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2", "-"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
|
|
||||||
{[]string{"-F", " ", "--", "$0", "testdata/g.1"}, "", "ONE\n", ""},
|
|
||||||
{[]string{"{ print NR, FNR } END { print NR, FNR }", "-"}, "a\nb\nc\n", "1 1\n2 2\n3 3\n3 3\n", ""},
|
|
||||||
// I've deleted the "-ftest" file for now as it was causing problems with "go install" zip files
|
|
||||||
// {[]string{"--", "$0", "-ftest"}, "", "used in tests; do not delete\n", ""}, // Issue #53
|
|
||||||
// {[]string{"$0", "-ftest"}, "", "used in tests; do not delete\n", ""},
|
|
||||||
|
|
||||||
// Specifying field separator with -F
|
|
||||||
{[]string{`{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 3\n4 6\n", ""},
|
|
||||||
{[]string{"-F", ",", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 2 3 \n4 5 6 \n", ""},
|
|
||||||
{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
|
||||||
{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
|
||||||
{[]string{"-F,", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
|
||||||
|
|
||||||
// Assigning other variables with -v
|
|
||||||
{[]string{"-v", "OFS=.", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.3\n4.6\n", ""},
|
|
||||||
{[]string{"-v", "OFS=.", "-v", "ORS=", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.34.6", ""},
|
|
||||||
{[]string{"-v", "x=42", "-v", "y=foo", `BEGIN { print x, y }`}, "", "42 foo\n", ""},
|
|
||||||
{[]string{"-v", "RS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
|
|
||||||
{[]string{"-vRS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
|
|
||||||
{[]string{"-v", `X=x\ty`, `BEGIN { printf X }`}, "", "x\ty", ""},
|
|
||||||
|
|
||||||
// ARGV/ARGC handling
|
|
||||||
{[]string{`
|
|
||||||
BEGIN {
|
|
||||||
for (i=1; i<ARGC; i++) {
|
|
||||||
print i, ARGV[i]
|
|
||||||
}
|
|
||||||
}`, "a", "b"}, "", "1 a\n2 b\n", ""},
|
|
||||||
{[]string{`
|
|
||||||
BEGIN {
|
|
||||||
for (i=1; i<ARGC; i++) {
|
|
||||||
print i, ARGV[i]
|
|
||||||
delete ARGV[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$0`, "a", "b"}, "c\nd", "1 a\n2 b\nc\nd\n", ""},
|
|
||||||
{[]string{`
|
|
||||||
BEGIN {
|
|
||||||
ARGV[1] = ""
|
|
||||||
}
|
|
||||||
$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "c\nd\nTWO\n", ""},
|
|
||||||
{[]string{`
|
|
||||||
BEGIN {
|
|
||||||
ARGC = 3
|
|
||||||
}
|
|
||||||
$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "ONE\nc\nd\n", ""},
|
|
||||||
{[]string{"-v", "A=1", "-f", "testdata/g.3", "B=2", "testdata/test.countries"}, "",
|
|
||||||
"A=1, B=0\n\tARGV[1] = B=2\n\tARGV[2] = testdata/test.countries\nA=1, B=2\n", ""},
|
|
||||||
{[]string{`END { print (x==42) }`, "x=42.0"}, "", "1\n", ""},
|
|
||||||
{[]string{`END { printf X }`, `X=a\tb`}, "", "a\tb", ""},
|
|
||||||
{[]string{"-v", "x=42.0", `BEGIN { print (x==42) }`}, "", "1\n", ""},
|
|
||||||
{[]string{`BEGIN { print(ARGV[1]<2, ARGV[2]<2); ARGV[1]="10"; ARGV[2]="10x"; print(ARGV[1]<2, ARGV[2]<2) }`,
|
|
||||||
"10", "10x"}, "", "0 1\n1 1\n", ""},
|
|
||||||
|
|
||||||
// Error handling
|
|
||||||
{[]string{}, "", "", "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"},
|
|
||||||
{[]string{"-F"}, "", "", "flag needs an argument: -F"},
|
|
||||||
{[]string{"-f"}, "", "", "flag needs an argument: -f"},
|
|
||||||
{[]string{"-v"}, "", "", "flag needs an argument: -v"},
|
|
||||||
{[]string{"-z"}, "", "", "flag provided but not defined: -z"},
|
|
||||||
{[]string{"{ print }", "notexist"}, "", "", `file "notexist" not found`},
|
|
||||||
{[]string{"BEGIN { print 1/0 }"}, "", "", "division by zero"},
|
|
||||||
{[]string{"-v", "foo", "BEGIN {}"}, "", "", "-v flag must be in format name=value"},
|
|
||||||
{[]string{"--", "{ print $1 }", "-file"}, "", "", `file "-file" not found`},
|
|
||||||
{[]string{"{ print $1 }", "-file"}, "", "", `file "-file" not found`},
|
|
||||||
|
|
||||||
// Output synchronization
|
|
||||||
{[]string{`BEGIN { print "1"; print "2"|"cat" }`}, "", "1\n2\n", ""},
|
|
||||||
{[]string{`BEGIN { print "1"; "echo 2" | getline x; print x }`}, "", "1\n2\n", ""},
|
|
||||||
|
|
||||||
// Parse error formatting
|
|
||||||
{[]string{"`"}, "", "", "<cmdline>:1:1: unexpected char\n`\n^"},
|
|
||||||
{[]string{"BEGIN {\n\tx*;\n}"}, "", "", "<cmdline>:2:4: expected expression instead of ;\n x*;\n ^"},
|
|
||||||
{[]string{"BEGIN {\n\tx*\r\n}"}, "", "", "<cmdline>:2:4: expected expression instead of <newline>\n x*\n ^"},
|
|
||||||
{[]string{"-f", "-"}, "\n ++", "", "<stdin>:2:4: expected expression instead of <newline>\n ++\n ^"},
|
|
||||||
{[]string{"-f", "testdata/parseerror/good.awk", "-f", "testdata/parseerror/bad.awk"},
|
|
||||||
"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n ^"},
|
|
||||||
{[]string{"-f", "testdata/parseerror/bad.awk", "-f", "testdata/parseerror/good.awk"},
|
|
||||||
"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n ^"},
|
|
||||||
{[]string{"-f", "testdata/parseerror/good.awk", "-f", "-", "-f", "testdata/parseerror/bad.awk"},
|
|
||||||
"`", "", "<stdin>:1:1: unexpected char\n`\n^"},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
testName := strings.Join(test.args, " ")
|
|
||||||
t.Run(testName, func(t *testing.T) {
|
|
||||||
runAWKs(t, test.args, test.stdin, test.output, test.error)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDevStdout(t *testing.T) {
|
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
t.Skip("/dev/stdout not presnt on Windows")
|
|
||||||
}
|
|
||||||
runAWKs(t, []string{`BEGIN { print "1"; print "2">"/dev/stdout" }`}, "", "1\n2\n", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
func runGoAWK(args []string, stdin string) (stdout, stderr string, err error) {
|
|
||||||
cmd := exec.Command(goAWKExe, args...)
|
|
||||||
if stdin != "" {
|
|
||||||
cmd.Stdin = strings.NewReader(stdin)
|
|
||||||
}
|
|
||||||
errBuf := &bytes.Buffer{}
|
|
||||||
cmd.Stderr = errBuf
|
|
||||||
output, err := cmd.Output()
|
|
||||||
stdout = string(normalizeNewlines(output))
|
|
||||||
stderr = string(normalizeNewlines(errBuf.Bytes()))
|
|
||||||
return stdout, stderr, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func runAWKs(t *testing.T, testArgs []string, testStdin, testOutput, testError string) {
|
|
||||||
var args []string
|
|
||||||
if strings.Contains(awkExe, "gawk") {
|
|
||||||
args = append(args, "--posix")
|
|
||||||
}
|
|
||||||
args = append(args, testArgs...)
|
|
||||||
cmd := exec.Command(awkExe, testArgs...)
|
|
||||||
if testStdin != "" {
|
|
||||||
cmd.Stdin = strings.NewReader(testStdin)
|
|
||||||
}
|
|
||||||
errBuf := &bytes.Buffer{}
|
|
||||||
cmd.Stderr = errBuf
|
|
||||||
output, err := cmd.Output()
|
|
||||||
if err != nil {
|
|
||||||
if testError == "" {
|
|
||||||
t.Fatalf("expected no error, got AWK error: %v (%s)", err, errBuf.String())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if testError != "" {
|
|
||||||
t.Fatalf("expected AWK error, got none")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stdout := string(normalizeNewlines(output))
|
|
||||||
if stdout != testOutput {
|
|
||||||
t.Fatalf("expected AWK to give %q, got %q", testOutput, stdout)
|
|
||||||
}
|
|
||||||
|
|
||||||
stdout, stderr, err := runGoAWK(testArgs, testStdin)
|
|
||||||
if err != nil {
|
|
||||||
stderr = strings.TrimSpace(stderr)
|
|
||||||
if stderr != testError {
|
|
||||||
t.Fatalf("expected GoAWK error %q, got %q", testError, stderr)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if testError != "" {
|
|
||||||
t.Fatalf("expected GoAWK error %q, got none", testError)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if stdout != testOutput {
|
|
||||||
t.Fatalf("expected GoAWK to give %q, got %q", testOutput, stdout)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWildcards(t *testing.T) {
|
|
||||||
if runtime.GOOS != "windows" {
|
|
||||||
// Wildcards shouldn't be expanded on non-Windows systems, and a file
|
|
||||||
// literally named "*.go" doesn't exist, so expect a failure.
|
|
||||||
_, stderr, err := runGoAWK([]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"}, "")
|
|
||||||
if err == nil {
|
|
||||||
t.Fatal("expected error using wildcards on non-Windows system")
|
|
||||||
}
|
|
||||||
expected := "file \"testdata/wildcards/*.txt\" not found\n"
|
|
||||||
if stderr != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, stderr)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
args []string
|
|
||||||
output string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
[]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"},
|
|
||||||
"testdata/wildcards/one.txt\ntestdata/wildcards/two.txt\n",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/one.txt"},
|
|
||||||
"testdata/wildcards/one.txt\nbee\n",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/*.txt"},
|
|
||||||
"testdata/wildcards/one.txt\nbee\ntestdata/wildcards/two.txt\nbee\n",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
testName := strings.Join(test.args, " ")
|
|
||||||
t.Run(testName, func(t *testing.T) {
|
|
||||||
stdout, stderr, err := runGoAWK(test.args, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
|
||||||
}
|
|
||||||
stdout = strings.Replace(stdout, "\\", "/", -1)
|
|
||||||
if stdout != test.output {
|
|
||||||
t.Fatalf("expected %q, got %q", test.output, stdout)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFILENAME(t *testing.T) {
|
|
||||||
origGoAWKExe := goAWKExe
|
|
||||||
goAWKExe = "../../" + goAWKExe
|
|
||||||
defer func() { goAWKExe = origGoAWKExe }()
|
|
||||||
|
|
||||||
origDir, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
err = os.Chdir("testdata/filename")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer os.Chdir(origDir)
|
|
||||||
|
|
||||||
src := `
|
|
||||||
BEGIN { FILENAME = "10"; print(FILENAME, FILENAME<2) }
|
|
||||||
BEGIN { FILENAME = 10; print(FILENAME, FILENAME<2) }
|
|
||||||
{ print(FILENAME, FILENAME<2) }
|
|
||||||
`
|
|
||||||
runAWKs(t, []string{src, "10", "10x"}, "", "10 1\n10 0\n10 0\n10x 1\n", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
func normalizeNewlines(b []byte) []byte {
|
|
||||||
return bytes.Replace(b, []byte("\r\n"), []byte{'\n'}, -1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInputOutputMode(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
args []string
|
|
||||||
input string
|
|
||||||
output string
|
|
||||||
error string
|
|
||||||
}{
|
|
||||||
{[]string{"-icsv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
|
||||||
{[]string{"-i", "csv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
|
||||||
{[]string{"-icsv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
|
||||||
{[]string{"-i", "csv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
|
||||||
{[]string{"-icsv", "-H", "-ocsv", `{ print @"age", @"name" }`}, "name,age\n\"Bo,ba\",42\nJane,37", "42,\"Bo,ba\"\n37,Jane\n", ""},
|
|
||||||
{[]string{"-o", "csv", `BEGIN { print "foo,bar", 3.14, "baz" }`}, "", "\"foo,bar\",3.14,baz\n", ""},
|
|
||||||
{[]string{"-iabc", `{}`}, "", "", "invalid input mode \"abc\"\n"},
|
|
||||||
{[]string{"-oxyz", `{}`}, "", "", "invalid output mode \"xyz\"\n"},
|
|
||||||
{[]string{"-H", `{}`}, "", "", "-H only allowed together with -i\n"},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
testName := strings.Join(test.args, " ")
|
|
||||||
t.Run(testName, func(t *testing.T) {
|
|
||||||
stdout, stderr, err := runGoAWK(test.args, test.input)
|
|
||||||
if err != nil {
|
|
||||||
if test.error == "" {
|
|
||||||
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
|
||||||
} else if stderr != test.error {
|
|
||||||
t.Fatalf("expected error message %q, got %q", test.error, stderr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if stdout != test.output {
|
|
||||||
t.Fatalf("expected %q, got %q", test.output, stdout)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMultipleCSVFiles(t *testing.T) {
|
|
||||||
// Ensure CSV handling works across multiple files with different headers (field names).
|
|
||||||
src := `
|
|
||||||
{
|
|
||||||
for (i=1; i in FIELDS; i++) {
|
|
||||||
if (i>1)
|
|
||||||
printf ",";
|
|
||||||
printf "%s", FIELDS[i]
|
|
||||||
}
|
|
||||||
printf " "
|
|
||||||
}
|
|
||||||
{ print @"name", @"age" }
|
|
||||||
`
|
|
||||||
stdout, stderr, err := runGoAWK([]string{"-i", "csv", "-H", src, "testdata/csv/1.csv", "testdata/csv/2.csv"}, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
|
||||||
}
|
|
||||||
expected := `
|
|
||||||
name,age Bob 42
|
|
||||||
name,age Jill 37
|
|
||||||
age,email,name Sarah 25
|
|
||||||
`[1:]
|
|
||||||
if stdout != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, stdout)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCSVDocExamples(t *testing.T) {
|
|
||||||
f, err := os.Open("csv.md")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error opening examples file: %v", err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var (
|
|
||||||
command string
|
|
||||||
output string
|
|
||||||
truncated bool
|
|
||||||
n = 1
|
|
||||||
)
|
|
||||||
runTest := func() {
|
|
||||||
t.Run(fmt.Sprintf("Example%d", n), func(t *testing.T) {
|
|
||||||
shell := "/bin/sh"
|
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
shell = "sh"
|
|
||||||
}
|
|
||||||
cmd := exec.Command(shell, "-c", command)
|
|
||||||
gotBytes, err := cmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error running %q: %v\n%s", command, err, gotBytes)
|
|
||||||
}
|
|
||||||
got := string(gotBytes)
|
|
||||||
if truncated {
|
|
||||||
numLines := strings.Count(output, "\n")
|
|
||||||
got = strings.Join(strings.Split(got, "\n")[:numLines], "\n") + "\n"
|
|
||||||
}
|
|
||||||
got = string(normalizeNewlines([]byte(got)))
|
|
||||||
if got != output {
|
|
||||||
t.Fatalf("error running %q\ngot:\n%s\nexpected:\n%s", command, got, output)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(f)
|
|
||||||
inTest := false
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
if strings.HasPrefix(line, "$ goawk") {
|
|
||||||
if inTest {
|
|
||||||
runTest()
|
|
||||||
}
|
|
||||||
inTest = true
|
|
||||||
command = "./" + line[2:]
|
|
||||||
output = ""
|
|
||||||
truncated = false
|
|
||||||
} else if inTest {
|
|
||||||
switch line {
|
|
||||||
case "```", "":
|
|
||||||
runTest()
|
|
||||||
inTest = false
|
|
||||||
case "...":
|
|
||||||
truncated = true
|
|
||||||
runTest()
|
|
||||||
inTest = false
|
|
||||||
default:
|
|
||||||
output += line + "\n"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if scanner.Err() != nil {
|
|
||||||
t.Errorf("error reading input: %v", scanner.Err())
|
|
||||||
}
|
|
||||||
if inTest {
|
|
||||||
t.Error("unexpectedly in test at end of file")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMandelbrot(t *testing.T) {
|
|
||||||
stdout, stderr, err := runGoAWK([]string{"-v", "width=80", "-v", "height=25", "-f", "testdata/tt.x1_mandelbrot"}, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
|
||||||
}
|
|
||||||
expected := `
|
|
||||||
................................................................................
|
|
||||||
......................................................--+-----..................
|
|
||||||
....................................................-----+*+-++-................
|
|
||||||
.................................................--------+* *+-----.............
|
|
||||||
..............................................--------+# #%*-------.........
|
|
||||||
.........................................------------++$ +-----------.....
|
|
||||||
...................................---------* # +* # *+++++%+--...
|
|
||||||
............................----------------++ @ *----..
|
|
||||||
.......................-+----------------+$ %+----..
|
|
||||||
..................-------*++%++**+++---++ #+--.
|
|
||||||
...............----------+* #*++* %*---.
|
|
||||||
.............-------+++++* # #----.
|
|
||||||
....------+-------++**@ @ ------.
|
|
||||||
....------+-------++**@ @ ------.
|
|
||||||
.............-------+++++* # #----.
|
|
||||||
...............----------+* #*++* %*---.
|
|
||||||
..................-------*++%++**+++---++ #+--.
|
|
||||||
.......................-+----------------+$ %+----..
|
|
||||||
............................----------------++ @ *----..
|
|
||||||
...................................---------* # +* # *+++++%+--...
|
|
||||||
.........................................------------++$ +-----------.....
|
|
||||||
..............................................--------+# #%*-------.........
|
|
||||||
.................................................--------+* *+-----.............
|
|
||||||
....................................................-----+*+-++-................
|
|
||||||
......................................................--+-----..................
|
|
||||||
`[1:]
|
|
||||||
if stdout != expected {
|
|
||||||
t.Fatalf("expected:\n%s\ngot:\n%s", expected, stdout)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,600 +0,0 @@
|
||||||
// GoAWK parser - abstract syntax tree structs
|
|
||||||
|
|
||||||
package ast
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
. "github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Program is an entire AWK program.
|
|
||||||
type Program struct {
|
|
||||||
Begin []Stmts
|
|
||||||
Actions []Action
|
|
||||||
End []Stmts
|
|
||||||
Functions []Function
|
|
||||||
Scalars map[string]int
|
|
||||||
Arrays map[string]int
|
|
||||||
}
|
|
||||||
|
|
||||||
// String returns an indented, pretty-printed version of the parsed
|
|
||||||
// program.
|
|
||||||
func (p *Program) String() string {
|
|
||||||
parts := []string{}
|
|
||||||
for _, ss := range p.Begin {
|
|
||||||
parts = append(parts, "BEGIN {\n"+ss.String()+"}")
|
|
||||||
}
|
|
||||||
for _, a := range p.Actions {
|
|
||||||
parts = append(parts, a.String())
|
|
||||||
}
|
|
||||||
for _, ss := range p.End {
|
|
||||||
parts = append(parts, "END {\n"+ss.String()+"}")
|
|
||||||
}
|
|
||||||
for _, function := range p.Functions {
|
|
||||||
parts = append(parts, function.String())
|
|
||||||
}
|
|
||||||
return strings.Join(parts, "\n\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stmts is a block containing multiple statements.
|
|
||||||
type Stmts []Stmt
|
|
||||||
|
|
||||||
func (ss Stmts) String() string {
|
|
||||||
lines := []string{}
|
|
||||||
for _, s := range ss {
|
|
||||||
subLines := strings.Split(s.String(), "\n")
|
|
||||||
for _, sl := range subLines {
|
|
||||||
lines = append(lines, " "+sl+"\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return strings.Join(lines, "")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Action is pattern-action section of a program.
|
|
||||||
type Action struct {
|
|
||||||
Pattern []Expr
|
|
||||||
Stmts Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *Action) String() string {
|
|
||||||
patterns := make([]string, len(a.Pattern))
|
|
||||||
for i, p := range a.Pattern {
|
|
||||||
patterns[i] = p.String()
|
|
||||||
}
|
|
||||||
sep := ""
|
|
||||||
if len(patterns) > 0 && a.Stmts != nil {
|
|
||||||
sep = " "
|
|
||||||
}
|
|
||||||
stmtsStr := ""
|
|
||||||
if a.Stmts != nil {
|
|
||||||
stmtsStr = "{\n" + a.Stmts.String() + "}"
|
|
||||||
}
|
|
||||||
return strings.Join(patterns, ", ") + sep + stmtsStr
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expr is the abstract syntax tree for any AWK expression.
|
|
||||||
type Expr interface {
|
|
||||||
expr()
|
|
||||||
String() string
|
|
||||||
}
|
|
||||||
|
|
||||||
// All these types implement the Expr interface.
|
|
||||||
func (e *FieldExpr) expr() {}
|
|
||||||
func (e *NamedFieldExpr) expr() {}
|
|
||||||
func (e *UnaryExpr) expr() {}
|
|
||||||
func (e *BinaryExpr) expr() {}
|
|
||||||
func (e *ArrayExpr) expr() {}
|
|
||||||
func (e *InExpr) expr() {}
|
|
||||||
func (e *CondExpr) expr() {}
|
|
||||||
func (e *NumExpr) expr() {}
|
|
||||||
func (e *StrExpr) expr() {}
|
|
||||||
func (e *RegExpr) expr() {}
|
|
||||||
func (e *VarExpr) expr() {}
|
|
||||||
func (e *IndexExpr) expr() {}
|
|
||||||
func (e *AssignExpr) expr() {}
|
|
||||||
func (e *AugAssignExpr) expr() {}
|
|
||||||
func (e *IncrExpr) expr() {}
|
|
||||||
func (e *CallExpr) expr() {}
|
|
||||||
func (e *UserCallExpr) expr() {}
|
|
||||||
func (e *MultiExpr) expr() {}
|
|
||||||
func (e *GetlineExpr) expr() {}
|
|
||||||
|
|
||||||
// FieldExpr is an expression like $0.
|
|
||||||
type FieldExpr struct {
|
|
||||||
Index Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *FieldExpr) String() string {
|
|
||||||
return "$" + e.Index.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// NamedFieldExpr is an expression like @"name".
|
|
||||||
type NamedFieldExpr struct {
|
|
||||||
Field Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *NamedFieldExpr) String() string {
|
|
||||||
return "@" + e.Field.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnaryExpr is an expression like -1234.
|
|
||||||
type UnaryExpr struct {
|
|
||||||
Op Token
|
|
||||||
Value Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *UnaryExpr) String() string {
|
|
||||||
return e.Op.String() + e.Value.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// BinaryExpr is an expression like 1 + 2.
|
|
||||||
type BinaryExpr struct {
|
|
||||||
Left Expr
|
|
||||||
Op Token
|
|
||||||
Right Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *BinaryExpr) String() string {
|
|
||||||
var opStr string
|
|
||||||
if e.Op == CONCAT {
|
|
||||||
opStr = " "
|
|
||||||
} else {
|
|
||||||
opStr = " " + e.Op.String() + " "
|
|
||||||
}
|
|
||||||
return "(" + e.Left.String() + opStr + e.Right.String() + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// ArrayExpr is an array reference. Not really a stand-alone
|
|
||||||
// expression, except as an argument to split() or a user function
|
|
||||||
// call.
|
|
||||||
type ArrayExpr struct {
|
|
||||||
Scope VarScope
|
|
||||||
Index int
|
|
||||||
Name string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ArrayExpr) String() string {
|
|
||||||
return e.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
// InExpr is an expression like (index in array).
|
|
||||||
type InExpr struct {
|
|
||||||
Index []Expr
|
|
||||||
Array *ArrayExpr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *InExpr) String() string {
|
|
||||||
if len(e.Index) == 1 {
|
|
||||||
return "(" + e.Index[0].String() + " in " + e.Array.String() + ")"
|
|
||||||
}
|
|
||||||
indices := make([]string, len(e.Index))
|
|
||||||
for i, index := range e.Index {
|
|
||||||
indices[i] = index.String()
|
|
||||||
}
|
|
||||||
return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// CondExpr is an expression like cond ? 1 : 0.
|
|
||||||
type CondExpr struct {
|
|
||||||
Cond Expr
|
|
||||||
True Expr
|
|
||||||
False Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *CondExpr) String() string {
|
|
||||||
return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// NumExpr is a literal number like 1234.
|
|
||||||
type NumExpr struct {
|
|
||||||
Value float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *NumExpr) String() string {
|
|
||||||
if e.Value == float64(int(e.Value)) {
|
|
||||||
return strconv.Itoa(int(e.Value))
|
|
||||||
} else {
|
|
||||||
return fmt.Sprintf("%.6g", e.Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StrExpr is a literal string like "foo".
|
|
||||||
type StrExpr struct {
|
|
||||||
Value string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StrExpr) String() string {
|
|
||||||
return strconv.Quote(e.Value)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RegExpr is a stand-alone regex expression, equivalent to:
|
|
||||||
// $0 ~ /regex/.
|
|
||||||
type RegExpr struct {
|
|
||||||
Regex string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *RegExpr) String() string {
|
|
||||||
escaped := strings.Replace(e.Regex, "/", `\/`, -1)
|
|
||||||
return "/" + escaped + "/"
|
|
||||||
}
|
|
||||||
|
|
||||||
type VarScope int
|
|
||||||
|
|
||||||
const (
|
|
||||||
ScopeSpecial VarScope = iota
|
|
||||||
ScopeGlobal
|
|
||||||
ScopeLocal
|
|
||||||
)
|
|
||||||
|
|
||||||
// VarExpr is a variable reference (special var, global, or local).
|
|
||||||
// Index is the resolved variable index used by the interpreter; Name
|
|
||||||
// is the original name used by String().
|
|
||||||
type VarExpr struct {
|
|
||||||
Scope VarScope
|
|
||||||
Index int
|
|
||||||
Name string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *VarExpr) String() string {
|
|
||||||
return e.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
// IndexExpr is an expression like a[k] (rvalue or lvalue).
|
|
||||||
type IndexExpr struct {
|
|
||||||
Array *ArrayExpr
|
|
||||||
Index []Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *IndexExpr) String() string {
|
|
||||||
indices := make([]string, len(e.Index))
|
|
||||||
for i, index := range e.Index {
|
|
||||||
indices[i] = index.String()
|
|
||||||
}
|
|
||||||
return e.Array.String() + "[" + strings.Join(indices, ", ") + "]"
|
|
||||||
}
|
|
||||||
|
|
||||||
// AssignExpr is an expression like x = 1234.
|
|
||||||
type AssignExpr struct {
|
|
||||||
Left Expr // can be one of: var, array[x], $n
|
|
||||||
Right Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *AssignExpr) String() string {
|
|
||||||
return e.Left.String() + " = " + e.Right.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// AugAssignExpr is an assignment expression like x += 5.
|
|
||||||
type AugAssignExpr struct {
|
|
||||||
Left Expr // can be one of: var, array[x], $n
|
|
||||||
Op Token
|
|
||||||
Right Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *AugAssignExpr) String() string {
|
|
||||||
return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// IncrExpr is an increment or decrement expression like x++ or --y.
|
|
||||||
type IncrExpr struct {
|
|
||||||
Expr Expr
|
|
||||||
Op Token
|
|
||||||
Pre bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *IncrExpr) String() string {
|
|
||||||
if e.Pre {
|
|
||||||
return e.Op.String() + e.Expr.String()
|
|
||||||
} else {
|
|
||||||
return e.Expr.String() + e.Op.String()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CallExpr is a builtin function call like length($1).
|
|
||||||
type CallExpr struct {
|
|
||||||
Func Token
|
|
||||||
Args []Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *CallExpr) String() string {
|
|
||||||
args := make([]string, len(e.Args))
|
|
||||||
for i, a := range e.Args {
|
|
||||||
args[i] = a.String()
|
|
||||||
}
|
|
||||||
return e.Func.String() + "(" + strings.Join(args, ", ") + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// UserCallExpr is a user-defined function call like my_func(1, 2, 3)
|
|
||||||
//
|
|
||||||
// Index is the resolved function index used by the interpreter; Name
|
|
||||||
// is the original name used by String().
|
|
||||||
type UserCallExpr struct {
|
|
||||||
Native bool // false = AWK-defined function, true = native Go func
|
|
||||||
Index int
|
|
||||||
Name string
|
|
||||||
Args []Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *UserCallExpr) String() string {
|
|
||||||
args := make([]string, len(e.Args))
|
|
||||||
for i, a := range e.Args {
|
|
||||||
args[i] = a.String()
|
|
||||||
}
|
|
||||||
return e.Name + "(" + strings.Join(args, ", ") + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// MultiExpr isn't an interpretable expression, but it's used as a
|
|
||||||
// pseudo-expression for print[f] parsing.
|
|
||||||
type MultiExpr struct {
|
|
||||||
Exprs []Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *MultiExpr) String() string {
|
|
||||||
exprs := make([]string, len(e.Exprs))
|
|
||||||
for i, e := range e.Exprs {
|
|
||||||
exprs[i] = e.String()
|
|
||||||
}
|
|
||||||
return "(" + strings.Join(exprs, ", ") + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetlineExpr is an expression read from file or pipe input.
|
|
||||||
type GetlineExpr struct {
|
|
||||||
Command Expr
|
|
||||||
Target Expr
|
|
||||||
File Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *GetlineExpr) String() string {
|
|
||||||
s := ""
|
|
||||||
if e.Command != nil {
|
|
||||||
s += e.Command.String() + " |"
|
|
||||||
}
|
|
||||||
s += "getline"
|
|
||||||
if e.Target != nil {
|
|
||||||
s += " " + e.Target.String()
|
|
||||||
}
|
|
||||||
if e.File != nil {
|
|
||||||
s += " <" + e.File.String()
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsLValue returns true if the given expression can be used as an
|
|
||||||
// lvalue (on the left-hand side of an assignment, in a ++ or --
|
|
||||||
// operation, or as the third argument to sub or gsub).
|
|
||||||
func IsLValue(expr Expr) bool {
|
|
||||||
switch expr.(type) {
|
|
||||||
case *VarExpr, *IndexExpr, *FieldExpr:
|
|
||||||
return true
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stmt is the abstract syntax tree for any AWK statement.
|
|
||||||
type Stmt interface {
|
|
||||||
stmt()
|
|
||||||
String() string
|
|
||||||
}
|
|
||||||
|
|
||||||
// All these types implement the Stmt interface.
|
|
||||||
func (s *PrintStmt) stmt() {}
|
|
||||||
func (s *PrintfStmt) stmt() {}
|
|
||||||
func (s *ExprStmt) stmt() {}
|
|
||||||
func (s *IfStmt) stmt() {}
|
|
||||||
func (s *ForStmt) stmt() {}
|
|
||||||
func (s *ForInStmt) stmt() {}
|
|
||||||
func (s *WhileStmt) stmt() {}
|
|
||||||
func (s *DoWhileStmt) stmt() {}
|
|
||||||
func (s *BreakStmt) stmt() {}
|
|
||||||
func (s *ContinueStmt) stmt() {}
|
|
||||||
func (s *NextStmt) stmt() {}
|
|
||||||
func (s *ExitStmt) stmt() {}
|
|
||||||
func (s *DeleteStmt) stmt() {}
|
|
||||||
func (s *ReturnStmt) stmt() {}
|
|
||||||
func (s *BlockStmt) stmt() {}
|
|
||||||
|
|
||||||
// PrintStmt is a statement like print $1, $3.
|
|
||||||
type PrintStmt struct {
|
|
||||||
Args []Expr
|
|
||||||
Redirect Token
|
|
||||||
Dest Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PrintStmt) String() string {
|
|
||||||
return printString("print", s.Args, s.Redirect, s.Dest)
|
|
||||||
}
|
|
||||||
|
|
||||||
func printString(f string, args []Expr, redirect Token, dest Expr) string {
|
|
||||||
parts := make([]string, len(args))
|
|
||||||
for i, a := range args {
|
|
||||||
parts[i] = a.String()
|
|
||||||
}
|
|
||||||
str := f + " " + strings.Join(parts, ", ")
|
|
||||||
if dest != nil {
|
|
||||||
str += " " + redirect.String() + dest.String()
|
|
||||||
}
|
|
||||||
return str
|
|
||||||
}
|
|
||||||
|
|
||||||
// PrintfStmt is a statement like printf "%3d", 1234.
|
|
||||||
type PrintfStmt struct {
|
|
||||||
Args []Expr
|
|
||||||
Redirect Token
|
|
||||||
Dest Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PrintfStmt) String() string {
|
|
||||||
return printString("printf", s.Args, s.Redirect, s.Dest)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExprStmt is statement like a bare function call: my_func(x).
|
|
||||||
type ExprStmt struct {
|
|
||||||
Expr Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ExprStmt) String() string {
|
|
||||||
return s.Expr.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// IfStmt is an if or if-else statement.
|
|
||||||
type IfStmt struct {
|
|
||||||
Cond Expr
|
|
||||||
Body Stmts
|
|
||||||
Else Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *IfStmt) String() string {
|
|
||||||
str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
|
|
||||||
if len(s.Else) > 0 {
|
|
||||||
str += " else {\n" + s.Else.String() + "}"
|
|
||||||
}
|
|
||||||
return str
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i.
|
|
||||||
type ForStmt struct {
|
|
||||||
Pre Stmt
|
|
||||||
Cond Expr
|
|
||||||
Post Stmt
|
|
||||||
Body Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ForStmt) String() string {
|
|
||||||
preStr := ""
|
|
||||||
if s.Pre != nil {
|
|
||||||
preStr = s.Pre.String()
|
|
||||||
}
|
|
||||||
condStr := ""
|
|
||||||
if s.Cond != nil {
|
|
||||||
condStr = " " + trimParens(s.Cond.String())
|
|
||||||
}
|
|
||||||
postStr := ""
|
|
||||||
if s.Post != nil {
|
|
||||||
postStr = " " + s.Post.String()
|
|
||||||
}
|
|
||||||
return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForInStmt is a for loop like for (k in a) print k, a[k].
|
|
||||||
type ForInStmt struct {
|
|
||||||
Var *VarExpr
|
|
||||||
Array *ArrayExpr
|
|
||||||
Body Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ForInStmt) String() string {
|
|
||||||
return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
// WhileStmt is a while loop.
|
|
||||||
type WhileStmt struct {
|
|
||||||
Cond Expr
|
|
||||||
Body Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *WhileStmt) String() string {
|
|
||||||
return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
// DoWhileStmt is a do-while loop.
|
|
||||||
type DoWhileStmt struct {
|
|
||||||
Body Stmts
|
|
||||||
Cond Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *DoWhileStmt) String() string {
|
|
||||||
return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
// BreakStmt is a break statement.
|
|
||||||
type BreakStmt struct{}
|
|
||||||
|
|
||||||
func (s *BreakStmt) String() string {
|
|
||||||
return "break"
|
|
||||||
}
|
|
||||||
|
|
||||||
// ContinueStmt is a continue statement.
|
|
||||||
type ContinueStmt struct{}
|
|
||||||
|
|
||||||
func (s *ContinueStmt) String() string {
|
|
||||||
return "continue"
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextStmt is a next statement.
|
|
||||||
type NextStmt struct{}
|
|
||||||
|
|
||||||
func (s *NextStmt) String() string {
|
|
||||||
return "next"
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExitStmt is an exit statement.
|
|
||||||
type ExitStmt struct {
|
|
||||||
Status Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ExitStmt) String() string {
|
|
||||||
var statusStr string
|
|
||||||
if s.Status != nil {
|
|
||||||
statusStr = " " + s.Status.String()
|
|
||||||
}
|
|
||||||
return "exit" + statusStr
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteStmt is a statement like delete a[k].
|
|
||||||
type DeleteStmt struct {
|
|
||||||
Array *ArrayExpr
|
|
||||||
Index []Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *DeleteStmt) String() string {
|
|
||||||
indices := make([]string, len(s.Index))
|
|
||||||
for i, index := range s.Index {
|
|
||||||
indices[i] = index.String()
|
|
||||||
}
|
|
||||||
return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]"
|
|
||||||
}
|
|
||||||
|
|
||||||
// ReturnStmt is a return statement.
|
|
||||||
type ReturnStmt struct {
|
|
||||||
Value Expr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReturnStmt) String() string {
|
|
||||||
var valueStr string
|
|
||||||
if s.Value != nil {
|
|
||||||
valueStr = " " + s.Value.String()
|
|
||||||
}
|
|
||||||
return "return" + valueStr
|
|
||||||
}
|
|
||||||
|
|
||||||
// BlockStmt is a stand-alone block like { print "x" }.
|
|
||||||
type BlockStmt struct {
|
|
||||||
Body Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BlockStmt) String() string {
|
|
||||||
return "{\n" + s.Body.String() + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function is the AST for a user-defined function.
|
|
||||||
type Function struct {
|
|
||||||
Name string
|
|
||||||
Params []string
|
|
||||||
Arrays []bool
|
|
||||||
Body Stmts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Function) String() string {
|
|
||||||
return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" +
|
|
||||||
f.Body.String() + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
func trimParens(s string) string {
|
|
||||||
if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") {
|
|
||||||
s = s[1 : len(s)-1]
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
|
@ -1,100 +0,0 @@
|
||||||
// Special variable constants
|
|
||||||
|
|
||||||
package ast
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
V_ILLEGAL = iota
|
|
||||||
V_ARGC
|
|
||||||
V_CONVFMT
|
|
||||||
V_FILENAME
|
|
||||||
V_FNR
|
|
||||||
V_FS
|
|
||||||
V_INPUTMODE
|
|
||||||
V_NF
|
|
||||||
V_NR
|
|
||||||
V_OFMT
|
|
||||||
V_OFS
|
|
||||||
V_ORS
|
|
||||||
V_OUTPUTMODE
|
|
||||||
V_RLENGTH
|
|
||||||
V_RS
|
|
||||||
V_RSTART
|
|
||||||
V_RT
|
|
||||||
V_SUBSEP
|
|
||||||
|
|
||||||
V_LAST = V_SUBSEP
|
|
||||||
)
|
|
||||||
|
|
||||||
var specialVars = map[string]int{
|
|
||||||
"ARGC": V_ARGC,
|
|
||||||
"CONVFMT": V_CONVFMT,
|
|
||||||
"FILENAME": V_FILENAME,
|
|
||||||
"FNR": V_FNR,
|
|
||||||
"FS": V_FS,
|
|
||||||
"INPUTMODE": V_INPUTMODE,
|
|
||||||
"NF": V_NF,
|
|
||||||
"NR": V_NR,
|
|
||||||
"OFMT": V_OFMT,
|
|
||||||
"OFS": V_OFS,
|
|
||||||
"ORS": V_ORS,
|
|
||||||
"OUTPUTMODE": V_OUTPUTMODE,
|
|
||||||
"RLENGTH": V_RLENGTH,
|
|
||||||
"RS": V_RS,
|
|
||||||
"RSTART": V_RSTART,
|
|
||||||
"RT": V_RT,
|
|
||||||
"SUBSEP": V_SUBSEP,
|
|
||||||
}
|
|
||||||
|
|
||||||
// SpecialVarIndex returns the "index" of the special variable, or 0
|
|
||||||
// if it's not a special variable.
|
|
||||||
func SpecialVarIndex(name string) int {
|
|
||||||
return specialVars[name]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SpecialVarName returns the name of the special variable by index.
|
|
||||||
func SpecialVarName(index int) string {
|
|
||||||
switch index {
|
|
||||||
case V_ILLEGAL:
|
|
||||||
return "ILLEGAL"
|
|
||||||
case V_ARGC:
|
|
||||||
return "ARGC"
|
|
||||||
case V_CONVFMT:
|
|
||||||
return "CONVFMT"
|
|
||||||
case V_FILENAME:
|
|
||||||
return "FILENAME"
|
|
||||||
case V_FNR:
|
|
||||||
return "FNR"
|
|
||||||
case V_FS:
|
|
||||||
return "FS"
|
|
||||||
case V_INPUTMODE:
|
|
||||||
return "INPUTMODE"
|
|
||||||
case V_NF:
|
|
||||||
return "NF"
|
|
||||||
case V_NR:
|
|
||||||
return "NR"
|
|
||||||
case V_OFMT:
|
|
||||||
return "OFMT"
|
|
||||||
case V_OFS:
|
|
||||||
return "OFS"
|
|
||||||
case V_ORS:
|
|
||||||
return "ORS"
|
|
||||||
case V_OUTPUTMODE:
|
|
||||||
return "OUTPUTMODE"
|
|
||||||
case V_RLENGTH:
|
|
||||||
return "RLENGTH"
|
|
||||||
case V_RS:
|
|
||||||
return "RS"
|
|
||||||
case V_RSTART:
|
|
||||||
return "RSTART"
|
|
||||||
case V_RT:
|
|
||||||
return "RT"
|
|
||||||
case V_SUBSEP:
|
|
||||||
return "SUBSEP"
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("<unknown special var %d>", index)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
package ast
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNameIndex(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
index int
|
|
||||||
}{
|
|
||||||
{"ILLEGAL", V_ILLEGAL},
|
|
||||||
{"ARGC", V_ARGC},
|
|
||||||
{"CONVFMT", V_CONVFMT},
|
|
||||||
{"FILENAME", V_FILENAME},
|
|
||||||
{"FNR", V_FNR},
|
|
||||||
{"FS", V_FS},
|
|
||||||
{"INPUTMODE", V_INPUTMODE},
|
|
||||||
{"NF", V_NF},
|
|
||||||
{"NR", V_NR},
|
|
||||||
{"OFMT", V_OFMT},
|
|
||||||
{"OFS", V_OFS},
|
|
||||||
{"ORS", V_ORS},
|
|
||||||
{"OUTPUTMODE", V_OUTPUTMODE},
|
|
||||||
{"RLENGTH", V_RLENGTH},
|
|
||||||
{"RS", V_RS},
|
|
||||||
{"RSTART", V_RSTART},
|
|
||||||
{"RT", V_RT},
|
|
||||||
{"SUBSEP", V_SUBSEP},
|
|
||||||
{"<unknown special var 42>", 42},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
name := SpecialVarName(test.index)
|
|
||||||
if name != test.name {
|
|
||||||
t.Errorf("got %q, want %q", name, test.name)
|
|
||||||
}
|
|
||||||
if test.index <= V_LAST {
|
|
||||||
index := SpecialVarIndex(test.name)
|
|
||||||
if index != test.index {
|
|
||||||
t.Errorf("got %d, want %d", index, test.index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,495 +0,0 @@
|
||||||
// Disassembles compiled program to text assembly instructions
|
|
||||||
|
|
||||||
package compiler
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/internal/ast"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Disassemble writes a human-readable form of the program's virtual machine
|
|
||||||
// instructions to writer.
|
|
||||||
func (p *Program) Disassemble(writer io.Writer) error {
|
|
||||||
if p.Begin != nil {
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: p.Begin,
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err := d.disassemble("BEGIN")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, action := range p.Actions {
|
|
||||||
switch len(action.Pattern) {
|
|
||||||
case 0:
|
|
||||||
// Nothing to do here.
|
|
||||||
case 1:
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: action.Pattern[0],
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err := d.disassemble("pattern")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
case 2:
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: action.Pattern[0],
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err := d.disassemble("start")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
d = &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: action.Pattern[1],
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err = d.disassemble("stop")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(action.Body) > 0 {
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: action.Body,
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err := d.disassemble("{ body }")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.End != nil {
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: p.End,
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
}
|
|
||||||
err := d.disassemble("END")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, f := range p.Functions {
|
|
||||||
d := &disassembler{
|
|
||||||
program: p,
|
|
||||||
writer: writer,
|
|
||||||
code: f.Body,
|
|
||||||
nativeFuncNames: p.nativeFuncNames,
|
|
||||||
funcIndex: i,
|
|
||||||
}
|
|
||||||
err := d.disassemble("function " + f.Name)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disassembles a single block of opcodes.
|
|
||||||
type disassembler struct {
|
|
||||||
program *Program
|
|
||||||
writer io.Writer
|
|
||||||
code []Opcode
|
|
||||||
nativeFuncNames []string
|
|
||||||
funcIndex int
|
|
||||||
ip int
|
|
||||||
opAddr int
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *disassembler) disassemble(prefix string) error {
|
|
||||||
if prefix != "" {
|
|
||||||
d.writef(" // %s\n", prefix)
|
|
||||||
}
|
|
||||||
|
|
||||||
for d.ip < len(d.code) && d.err == nil {
|
|
||||||
d.opAddr = d.ip
|
|
||||||
op := d.fetch()
|
|
||||||
|
|
||||||
switch op {
|
|
||||||
case Num:
|
|
||||||
index := d.fetch()
|
|
||||||
num := d.program.Nums[index]
|
|
||||||
if num == float64(int(num)) {
|
|
||||||
d.writeOpf("Num %d (%d)", int(num), index)
|
|
||||||
} else {
|
|
||||||
d.writeOpf("Num %.6g (%d)", num, index)
|
|
||||||
}
|
|
||||||
|
|
||||||
case Str:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("Str %q (%d)", d.program.Strs[index], index)
|
|
||||||
|
|
||||||
case FieldInt:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("FieldInt %d", index)
|
|
||||||
|
|
||||||
case FieldByNameStr:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index)
|
|
||||||
|
|
||||||
case Global:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("Global %s", d.program.scalarNames[index])
|
|
||||||
|
|
||||||
case Local:
|
|
||||||
index := int(d.fetch())
|
|
||||||
d.writeOpf("Local %s", d.localName(index))
|
|
||||||
|
|
||||||
case Special:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("Special %s", ast.SpecialVarName(int(index)))
|
|
||||||
|
|
||||||
case ArrayGlobal:
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex])
|
|
||||||
|
|
||||||
case ArrayLocal:
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex)))
|
|
||||||
|
|
||||||
case InGlobal:
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex])
|
|
||||||
|
|
||||||
case InLocal:
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("InLocal %s", d.localArrayName(arrayIndex))
|
|
||||||
|
|
||||||
case AssignGlobal:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("AssignGlobal %s", d.program.scalarNames[index])
|
|
||||||
|
|
||||||
case AssignLocal:
|
|
||||||
index := int(d.fetch())
|
|
||||||
d.writeOpf("AssignLocal %s", d.localName(index))
|
|
||||||
|
|
||||||
case AssignSpecial:
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index)))
|
|
||||||
|
|
||||||
case AssignArrayGlobal:
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex])
|
|
||||||
|
|
||||||
case AssignArrayLocal:
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex))
|
|
||||||
|
|
||||||
case Delete:
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex))
|
|
||||||
|
|
||||||
case DeleteAll:
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex))
|
|
||||||
|
|
||||||
case IncrField:
|
|
||||||
amount := d.fetch()
|
|
||||||
d.writeOpf("IncrField %d", amount)
|
|
||||||
|
|
||||||
case IncrGlobal:
|
|
||||||
amount := d.fetch()
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index])
|
|
||||||
|
|
||||||
case IncrLocal:
|
|
||||||
amount := d.fetch()
|
|
||||||
index := int(d.fetch())
|
|
||||||
d.writeOpf("IncrLocal %d %s", amount, d.localName(index))
|
|
||||||
|
|
||||||
case IncrSpecial:
|
|
||||||
amount := d.fetch()
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index)))
|
|
||||||
|
|
||||||
case IncrArrayGlobal:
|
|
||||||
amount := d.fetch()
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex])
|
|
||||||
|
|
||||||
case IncrArrayLocal:
|
|
||||||
amount := d.fetch()
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex))
|
|
||||||
|
|
||||||
case AugAssignField:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
d.writeOpf("AugAssignField %s", operation)
|
|
||||||
|
|
||||||
case AugAssignGlobal:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index])
|
|
||||||
|
|
||||||
case AugAssignLocal:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
index := int(d.fetch())
|
|
||||||
d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index))
|
|
||||||
|
|
||||||
case AugAssignSpecial:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index)))
|
|
||||||
|
|
||||||
case AugAssignArrayGlobal:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
arrayIndex := d.fetch()
|
|
||||||
d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex])
|
|
||||||
|
|
||||||
case AugAssignArrayLocal:
|
|
||||||
operation := AugOp(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex))
|
|
||||||
|
|
||||||
case Regex:
|
|
||||||
regexIndex := d.fetch()
|
|
||||||
d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex)
|
|
||||||
|
|
||||||
case IndexMulti:
|
|
||||||
num := d.fetch()
|
|
||||||
d.writeOpf("IndexMulti %d", num)
|
|
||||||
|
|
||||||
case ConcatMulti:
|
|
||||||
num := d.fetch()
|
|
||||||
d.writeOpf("ConcatMulti %d", num)
|
|
||||||
|
|
||||||
case Jump:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("Jump 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpFalse:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpTrue:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpEquals:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpNotEquals:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpLess:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpLess 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpGreater:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpLessOrEqual:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case JumpGreaterOrEqual:
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset))
|
|
||||||
|
|
||||||
case ForIn:
|
|
||||||
varScope := ast.VarScope(d.fetch())
|
|
||||||
varIndex := int(d.fetch())
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
offset := d.fetch()
|
|
||||||
d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset))
|
|
||||||
|
|
||||||
case CallBuiltin:
|
|
||||||
builtinOp := BuiltinOp(d.fetch())
|
|
||||||
d.writeOpf("CallBuiltin %s", builtinOp)
|
|
||||||
|
|
||||||
case CallSplit:
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex))
|
|
||||||
|
|
||||||
case CallSplitSep:
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex))
|
|
||||||
|
|
||||||
case CallSprintf:
|
|
||||||
numArgs := d.fetch()
|
|
||||||
d.writeOpf("CallSprintf %d", numArgs)
|
|
||||||
|
|
||||||
case CallUser:
|
|
||||||
funcIndex := d.fetch()
|
|
||||||
numArrayArgs := int(d.fetch())
|
|
||||||
var arrayArgs []string
|
|
||||||
for i := 0; i < numArrayArgs; i++ {
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex))
|
|
||||||
}
|
|
||||||
d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", "))
|
|
||||||
|
|
||||||
case CallNative:
|
|
||||||
funcIndex := d.fetch()
|
|
||||||
numArgs := d.fetch()
|
|
||||||
d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs)
|
|
||||||
|
|
||||||
case Nulls:
|
|
||||||
numNulls := d.fetch()
|
|
||||||
d.writeOpf("Nulls %d", numNulls)
|
|
||||||
|
|
||||||
case Print:
|
|
||||||
numArgs := d.fetch()
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
if redirect == lexer.ILLEGAL {
|
|
||||||
d.writeOpf("Print %d", numArgs)
|
|
||||||
} else {
|
|
||||||
d.writeOpf("Print %d %s", numArgs, redirect)
|
|
||||||
}
|
|
||||||
|
|
||||||
case Printf:
|
|
||||||
numArgs := d.fetch()
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
if redirect == lexer.ILLEGAL {
|
|
||||||
d.writeOpf("Printf %d", numArgs)
|
|
||||||
} else {
|
|
||||||
d.writeOpf("Printf %d %s", numArgs, redirect)
|
|
||||||
}
|
|
||||||
|
|
||||||
case Getline:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
d.writeOpf("Getline %s", redirect)
|
|
||||||
|
|
||||||
case GetlineField:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
d.writeOpf("GetlineField %s", redirect)
|
|
||||||
|
|
||||||
case GetlineGlobal:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index])
|
|
||||||
|
|
||||||
case GetlineLocal:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
index := int(d.fetch())
|
|
||||||
d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index))
|
|
||||||
|
|
||||||
case GetlineSpecial:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
index := d.fetch()
|
|
||||||
d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index)))
|
|
||||||
|
|
||||||
case GetlineArray:
|
|
||||||
redirect := lexer.Token(d.fetch())
|
|
||||||
arrayScope := ast.VarScope(d.fetch())
|
|
||||||
arrayIndex := int(d.fetch())
|
|
||||||
d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex))
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Handles all other opcodes with no arguments
|
|
||||||
d.writeOpf("%s", op)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
d.writef("\n")
|
|
||||||
return d.err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch the next opcode and increment the "instruction pointer".
|
|
||||||
func (d *disassembler) fetch() Opcode {
|
|
||||||
op := d.code[d.ip]
|
|
||||||
d.ip++
|
|
||||||
return op
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write formatted string to the disassembly output.
|
|
||||||
func (d *disassembler) writef(format string, args ...interface{}) {
|
|
||||||
if d.err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, d.err = fmt.Fprintf(d.writer, format, args...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write formatted opcode (with address and newline) to disassembly output.
|
|
||||||
func (d *disassembler) writeOpf(format string, args ...interface{}) {
|
|
||||||
if d.err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
addrStr := fmt.Sprintf("%04x", d.opAddr)
|
|
||||||
_, d.err = fmt.Fprintf(d.writer, addrStr+" "+format+"\n", args...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the scalar variable name described by scope and index.
|
|
||||||
func (d *disassembler) varName(scope ast.VarScope, index int) string {
|
|
||||||
switch scope {
|
|
||||||
case ast.ScopeGlobal:
|
|
||||||
return d.program.scalarNames[index]
|
|
||||||
case ast.ScopeLocal:
|
|
||||||
return d.localName(index)
|
|
||||||
default: // ScopeSpecial
|
|
||||||
return ast.SpecialVarName(index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the local variable name with the given index.
|
|
||||||
func (d *disassembler) localName(index int) string {
|
|
||||||
f := d.program.Functions[d.funcIndex]
|
|
||||||
n := 0
|
|
||||||
for i, p := range f.Params {
|
|
||||||
if f.Arrays[i] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if n == index {
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
panic(fmt.Sprintf("unexpected local variable index %d", index))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the array variable name describes by scope and index.
|
|
||||||
func (d *disassembler) arrayName(scope ast.VarScope, index int) string {
|
|
||||||
if scope == ast.ScopeLocal {
|
|
||||||
return d.localArrayName(index)
|
|
||||||
}
|
|
||||||
return d.program.arrayNames[index]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the local array name with the given index.
|
|
||||||
func (d *disassembler) localArrayName(index int) string {
|
|
||||||
f := d.program.Functions[d.funcIndex]
|
|
||||||
n := 0
|
|
||||||
for i, p := range f.Params {
|
|
||||||
if !f.Arrays[i] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if n == index {
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
panic(fmt.Sprintf("unexpected local array index %d", index))
|
|
||||||
}
|
|
|
@ -1,51 +0,0 @@
|
||||||
package compiler
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestDisassembler(t *testing.T) {
|
|
||||||
// Note: this doesn't really test the disassembly, just that each opcode
|
|
||||||
// disassembly includes the opcode name, to help catch silly typos.
|
|
||||||
for op := Nop; op < EndOpcode; op++ {
|
|
||||||
t.Run(op.String(), func(t *testing.T) {
|
|
||||||
p := Program{
|
|
||||||
Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0},
|
|
||||||
Functions: []Function{
|
|
||||||
{
|
|
||||||
Name: "f",
|
|
||||||
Params: []string{"a", "k"},
|
|
||||||
Arrays: []bool{true, false},
|
|
||||||
NumScalars: 1,
|
|
||||||
NumArrays: 1,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Nums: []float64{0},
|
|
||||||
Strs: []string{""},
|
|
||||||
Regexes: []*regexp.Regexp{regexp.MustCompile("")},
|
|
||||||
scalarNames: []string{"s"},
|
|
||||||
arrayNames: []string{"a"},
|
|
||||||
nativeFuncNames: []string{"n"},
|
|
||||||
}
|
|
||||||
var buf bytes.Buffer
|
|
||||||
err := p.Disassemble(&buf)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error disassembling opcode %s: %v", op, err)
|
|
||||||
}
|
|
||||||
lines := strings.Split(buf.String(), "\n")
|
|
||||||
if strings.TrimSpace(lines[0]) != "// BEGIN" {
|
|
||||||
t.Fatalf("first line should be \"// BEGIN\", not %q", lines[0])
|
|
||||||
}
|
|
||||||
fields := strings.Fields(lines[1])
|
|
||||||
if fields[0] != "0000" {
|
|
||||||
t.Fatalf("address should be \"0000\", not %q", fields[0])
|
|
||||||
}
|
|
||||||
if fields[1] != op.String() {
|
|
||||||
t.Fatalf("opcode name should be %q, not %q", op.String(), fields[1])
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,174 +0,0 @@
|
||||||
// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT.
|
|
||||||
|
|
||||||
package compiler
|
|
||||||
|
|
||||||
import "strconv"
|
|
||||||
|
|
||||||
func _() {
|
|
||||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
|
||||||
// Re-run the stringer command to generate them again.
|
|
||||||
var x [1]struct{}
|
|
||||||
_ = x[Nop-0]
|
|
||||||
_ = x[Num-1]
|
|
||||||
_ = x[Str-2]
|
|
||||||
_ = x[Dupe-3]
|
|
||||||
_ = x[Drop-4]
|
|
||||||
_ = x[Swap-5]
|
|
||||||
_ = x[Field-6]
|
|
||||||
_ = x[FieldInt-7]
|
|
||||||
_ = x[FieldByName-8]
|
|
||||||
_ = x[FieldByNameStr-9]
|
|
||||||
_ = x[Global-10]
|
|
||||||
_ = x[Local-11]
|
|
||||||
_ = x[Special-12]
|
|
||||||
_ = x[ArrayGlobal-13]
|
|
||||||
_ = x[ArrayLocal-14]
|
|
||||||
_ = x[InGlobal-15]
|
|
||||||
_ = x[InLocal-16]
|
|
||||||
_ = x[AssignField-17]
|
|
||||||
_ = x[AssignGlobal-18]
|
|
||||||
_ = x[AssignLocal-19]
|
|
||||||
_ = x[AssignSpecial-20]
|
|
||||||
_ = x[AssignArrayGlobal-21]
|
|
||||||
_ = x[AssignArrayLocal-22]
|
|
||||||
_ = x[Delete-23]
|
|
||||||
_ = x[DeleteAll-24]
|
|
||||||
_ = x[IncrField-25]
|
|
||||||
_ = x[IncrGlobal-26]
|
|
||||||
_ = x[IncrLocal-27]
|
|
||||||
_ = x[IncrSpecial-28]
|
|
||||||
_ = x[IncrArrayGlobal-29]
|
|
||||||
_ = x[IncrArrayLocal-30]
|
|
||||||
_ = x[AugAssignField-31]
|
|
||||||
_ = x[AugAssignGlobal-32]
|
|
||||||
_ = x[AugAssignLocal-33]
|
|
||||||
_ = x[AugAssignSpecial-34]
|
|
||||||
_ = x[AugAssignArrayGlobal-35]
|
|
||||||
_ = x[AugAssignArrayLocal-36]
|
|
||||||
_ = x[Regex-37]
|
|
||||||
_ = x[IndexMulti-38]
|
|
||||||
_ = x[ConcatMulti-39]
|
|
||||||
_ = x[Add-40]
|
|
||||||
_ = x[Subtract-41]
|
|
||||||
_ = x[Multiply-42]
|
|
||||||
_ = x[Divide-43]
|
|
||||||
_ = x[Power-44]
|
|
||||||
_ = x[Modulo-45]
|
|
||||||
_ = x[Equals-46]
|
|
||||||
_ = x[NotEquals-47]
|
|
||||||
_ = x[Less-48]
|
|
||||||
_ = x[Greater-49]
|
|
||||||
_ = x[LessOrEqual-50]
|
|
||||||
_ = x[GreaterOrEqual-51]
|
|
||||||
_ = x[Concat-52]
|
|
||||||
_ = x[Match-53]
|
|
||||||
_ = x[NotMatch-54]
|
|
||||||
_ = x[Not-55]
|
|
||||||
_ = x[UnaryMinus-56]
|
|
||||||
_ = x[UnaryPlus-57]
|
|
||||||
_ = x[Boolean-58]
|
|
||||||
_ = x[Jump-59]
|
|
||||||
_ = x[JumpFalse-60]
|
|
||||||
_ = x[JumpTrue-61]
|
|
||||||
_ = x[JumpEquals-62]
|
|
||||||
_ = x[JumpNotEquals-63]
|
|
||||||
_ = x[JumpLess-64]
|
|
||||||
_ = x[JumpGreater-65]
|
|
||||||
_ = x[JumpLessOrEqual-66]
|
|
||||||
_ = x[JumpGreaterOrEqual-67]
|
|
||||||
_ = x[Next-68]
|
|
||||||
_ = x[Exit-69]
|
|
||||||
_ = x[ForIn-70]
|
|
||||||
_ = x[BreakForIn-71]
|
|
||||||
_ = x[CallBuiltin-72]
|
|
||||||
_ = x[CallSplit-73]
|
|
||||||
_ = x[CallSplitSep-74]
|
|
||||||
_ = x[CallSprintf-75]
|
|
||||||
_ = x[CallUser-76]
|
|
||||||
_ = x[CallNative-77]
|
|
||||||
_ = x[Return-78]
|
|
||||||
_ = x[ReturnNull-79]
|
|
||||||
_ = x[Nulls-80]
|
|
||||||
_ = x[Print-81]
|
|
||||||
_ = x[Printf-82]
|
|
||||||
_ = x[Getline-83]
|
|
||||||
_ = x[GetlineField-84]
|
|
||||||
_ = x[GetlineGlobal-85]
|
|
||||||
_ = x[GetlineLocal-86]
|
|
||||||
_ = x[GetlineSpecial-87]
|
|
||||||
_ = x[GetlineArray-88]
|
|
||||||
_ = x[EndOpcode-89]
|
|
||||||
}
|
|
||||||
|
|
||||||
const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
|
|
||||||
|
|
||||||
var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826}
|
|
||||||
|
|
||||||
func (i Opcode) String() string {
|
|
||||||
if i < 0 || i >= Opcode(len(_Opcode_index)-1) {
|
|
||||||
return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
||||||
}
|
|
||||||
return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]]
|
|
||||||
}
|
|
||||||
func _() {
|
|
||||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
|
||||||
// Re-run the stringer command to generate them again.
|
|
||||||
var x [1]struct{}
|
|
||||||
_ = x[AugOpAdd-0]
|
|
||||||
_ = x[AugOpSub-1]
|
|
||||||
_ = x[AugOpMul-2]
|
|
||||||
_ = x[AugOpDiv-3]
|
|
||||||
_ = x[AugOpPow-4]
|
|
||||||
_ = x[AugOpMod-5]
|
|
||||||
}
|
|
||||||
|
|
||||||
const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod"
|
|
||||||
|
|
||||||
var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48}
|
|
||||||
|
|
||||||
func (i AugOp) String() string {
|
|
||||||
if i < 0 || i >= AugOp(len(_AugOp_index)-1) {
|
|
||||||
return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
||||||
}
|
|
||||||
return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]]
|
|
||||||
}
|
|
||||||
func _() {
|
|
||||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
|
||||||
// Re-run the stringer command to generate them again.
|
|
||||||
var x [1]struct{}
|
|
||||||
_ = x[BuiltinAtan2-0]
|
|
||||||
_ = x[BuiltinClose-1]
|
|
||||||
_ = x[BuiltinCos-2]
|
|
||||||
_ = x[BuiltinExp-3]
|
|
||||||
_ = x[BuiltinFflush-4]
|
|
||||||
_ = x[BuiltinFflushAll-5]
|
|
||||||
_ = x[BuiltinGsub-6]
|
|
||||||
_ = x[BuiltinIndex-7]
|
|
||||||
_ = x[BuiltinInt-8]
|
|
||||||
_ = x[BuiltinLength-9]
|
|
||||||
_ = x[BuiltinLengthArg-10]
|
|
||||||
_ = x[BuiltinLog-11]
|
|
||||||
_ = x[BuiltinMatch-12]
|
|
||||||
_ = x[BuiltinRand-13]
|
|
||||||
_ = x[BuiltinSin-14]
|
|
||||||
_ = x[BuiltinSqrt-15]
|
|
||||||
_ = x[BuiltinSrand-16]
|
|
||||||
_ = x[BuiltinSrandSeed-17]
|
|
||||||
_ = x[BuiltinSub-18]
|
|
||||||
_ = x[BuiltinSubstr-19]
|
|
||||||
_ = x[BuiltinSubstrLength-20]
|
|
||||||
_ = x[BuiltinSystem-21]
|
|
||||||
_ = x[BuiltinTolower-22]
|
|
||||||
_ = x[BuiltinToupper-23]
|
|
||||||
}
|
|
||||||
|
|
||||||
const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper"
|
|
||||||
|
|
||||||
var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300}
|
|
||||||
|
|
||||||
func (i BuiltinOp) String() string {
|
|
||||||
if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) {
|
|
||||||
return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
||||||
}
|
|
||||||
return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]]
|
|
||||||
}
|
|
|
@ -1,180 +0,0 @@
|
||||||
package compiler
|
|
||||||
|
|
||||||
//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp
|
|
||||||
|
|
||||||
// Opcode represents a single virtual machine instruction (or argument). The
|
|
||||||
// comments beside each opcode show any arguments that instruction consumes.
|
|
||||||
//
|
|
||||||
// Normally this is called "bytecode", but I've avoided that term here as each
|
|
||||||
// opcode is a 32-bit word, not an 8-bit byte.
|
|
||||||
//
|
|
||||||
// I tested various bit widths, and I believe 32 bit was the fastest, but also
|
|
||||||
// means we don't have to worry about jump offsets overflowing. That's tested
|
|
||||||
// in the compiler, but who's going to have an AWK program bigger than 2GB?
|
|
||||||
type Opcode int32
|
|
||||||
|
|
||||||
const (
|
|
||||||
Nop Opcode = iota
|
|
||||||
|
|
||||||
// Stack operations
|
|
||||||
Num // numIndex
|
|
||||||
Str // strIndex
|
|
||||||
Dupe
|
|
||||||
Drop
|
|
||||||
Swap
|
|
||||||
|
|
||||||
// Fetch a field, variable, or array item
|
|
||||||
Field
|
|
||||||
FieldInt // index
|
|
||||||
FieldByName
|
|
||||||
FieldByNameStr // strIndex
|
|
||||||
Global // index
|
|
||||||
Local // index
|
|
||||||
Special // index
|
|
||||||
ArrayGlobal // arrayIndex
|
|
||||||
ArrayLocal // arrayIndex
|
|
||||||
InGlobal // arrayIndex
|
|
||||||
InLocal // arrayIndex
|
|
||||||
|
|
||||||
// Assign a field, variable, or array item
|
|
||||||
AssignField
|
|
||||||
AssignGlobal // index
|
|
||||||
AssignLocal // index
|
|
||||||
AssignSpecial // index
|
|
||||||
AssignArrayGlobal // arrayIndex
|
|
||||||
AssignArrayLocal // arrayIndex
|
|
||||||
|
|
||||||
// Delete statement
|
|
||||||
Delete // arrayScope arrayIndex
|
|
||||||
DeleteAll // arrayScope arrayIndex
|
|
||||||
|
|
||||||
// Post-increment and post-decrement
|
|
||||||
IncrField // amount
|
|
||||||
IncrGlobal // amount index
|
|
||||||
IncrLocal // amount index
|
|
||||||
IncrSpecial // amount index
|
|
||||||
IncrArrayGlobal // amount arrayIndex
|
|
||||||
IncrArrayLocal // amount arrayIndex
|
|
||||||
|
|
||||||
// Augmented assignment (also used for pre-increment and pre-decrement)
|
|
||||||
AugAssignField // augOp
|
|
||||||
AugAssignGlobal // augOp index
|
|
||||||
AugAssignLocal // augOp index
|
|
||||||
AugAssignSpecial // augOp index
|
|
||||||
AugAssignArrayGlobal // augOp arrayIndex
|
|
||||||
AugAssignArrayLocal // augOp arrayIndex
|
|
||||||
|
|
||||||
// Stand-alone regex expression /foo/
|
|
||||||
Regex // regexIndex
|
|
||||||
|
|
||||||
// Multi-index concatenation
|
|
||||||
IndexMulti // num
|
|
||||||
|
|
||||||
// Multi-value concatenation
|
|
||||||
ConcatMulti // num
|
|
||||||
|
|
||||||
// Binary operators
|
|
||||||
Add
|
|
||||||
Subtract
|
|
||||||
Multiply
|
|
||||||
Divide
|
|
||||||
Power
|
|
||||||
Modulo
|
|
||||||
Equals
|
|
||||||
NotEquals
|
|
||||||
Less
|
|
||||||
Greater
|
|
||||||
LessOrEqual
|
|
||||||
GreaterOrEqual
|
|
||||||
Concat
|
|
||||||
Match
|
|
||||||
NotMatch
|
|
||||||
|
|
||||||
// Unary operators
|
|
||||||
Not
|
|
||||||
UnaryMinus
|
|
||||||
UnaryPlus
|
|
||||||
Boolean
|
|
||||||
|
|
||||||
// Control flow
|
|
||||||
Jump // offset
|
|
||||||
JumpFalse // offset
|
|
||||||
JumpTrue // offset
|
|
||||||
JumpEquals // offset
|
|
||||||
JumpNotEquals // offset
|
|
||||||
JumpLess // offset
|
|
||||||
JumpGreater // offset
|
|
||||||
JumpLessOrEqual // offset
|
|
||||||
JumpGreaterOrEqual // offset
|
|
||||||
Next
|
|
||||||
Exit
|
|
||||||
ForIn // varScope varIndex arrayScope arrayIndex offset
|
|
||||||
BreakForIn
|
|
||||||
|
|
||||||
// Builtin functions
|
|
||||||
CallBuiltin // builtinOp
|
|
||||||
CallSplit // arrayScope arrayIndex
|
|
||||||
CallSplitSep // arrayScope arrayIndex
|
|
||||||
CallSprintf // numArgs
|
|
||||||
|
|
||||||
// User and native functions
|
|
||||||
CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...]
|
|
||||||
CallNative // funcIndex numArgs
|
|
||||||
Return
|
|
||||||
ReturnNull
|
|
||||||
Nulls // numNulls
|
|
||||||
|
|
||||||
// Print, printf, and getline
|
|
||||||
Print // numArgs redirect
|
|
||||||
Printf // numArgs redirect
|
|
||||||
Getline // redirect
|
|
||||||
GetlineField // redirect
|
|
||||||
GetlineGlobal // redirect index
|
|
||||||
GetlineLocal // redirect index
|
|
||||||
GetlineSpecial // redirect index
|
|
||||||
GetlineArray // redirect arrayScope arrayIndex
|
|
||||||
|
|
||||||
EndOpcode
|
|
||||||
)
|
|
||||||
|
|
||||||
// AugOp represents an augmented assignment operation.
|
|
||||||
type AugOp Opcode
|
|
||||||
|
|
||||||
const (
|
|
||||||
AugOpAdd AugOp = iota
|
|
||||||
AugOpSub
|
|
||||||
AugOpMul
|
|
||||||
AugOpDiv
|
|
||||||
AugOpPow
|
|
||||||
AugOpMod
|
|
||||||
)
|
|
||||||
|
|
||||||
// BuiltinOp represents a builtin function call.
|
|
||||||
type BuiltinOp Opcode
|
|
||||||
|
|
||||||
const (
|
|
||||||
BuiltinAtan2 BuiltinOp = iota
|
|
||||||
BuiltinClose
|
|
||||||
BuiltinCos
|
|
||||||
BuiltinExp
|
|
||||||
BuiltinFflush
|
|
||||||
BuiltinFflushAll
|
|
||||||
BuiltinGsub
|
|
||||||
BuiltinIndex
|
|
||||||
BuiltinInt
|
|
||||||
BuiltinLength
|
|
||||||
BuiltinLengthArg
|
|
||||||
BuiltinLog
|
|
||||||
BuiltinMatch
|
|
||||||
BuiltinRand
|
|
||||||
BuiltinSin
|
|
||||||
BuiltinSqrt
|
|
||||||
BuiltinSrand
|
|
||||||
BuiltinSrandSeed
|
|
||||||
BuiltinSub
|
|
||||||
BuiltinSubstr
|
|
||||||
BuiltinSubstrLength
|
|
||||||
BuiltinSystem
|
|
||||||
BuiltinTolower
|
|
||||||
BuiltinToupper
|
|
||||||
)
|
|
|
@ -1,392 +0,0 @@
|
||||||
// Tests copied from encoding/csv to ensure we pass all the relevant cases.
|
|
||||||
|
|
||||||
// These tests are a subset of those in encoding/csv used to test Reader.
|
|
||||||
// However, the §, ¶ and ∑ special characters (for error positions) have been
|
|
||||||
// removed, and some tests have been removed or tweaked slightly because we
|
|
||||||
// don't support all the encoding/csv features (FieldsPerRecord is not
|
|
||||||
// supported, LazyQuotes is always on, and TrimLeadingSpace is always off).
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"encoding/csv"
|
|
||||||
"reflect"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
type readTest struct {
|
|
||||||
Name string
|
|
||||||
Input string
|
|
||||||
Output [][]string
|
|
||||||
Error string
|
|
||||||
|
|
||||||
// These fields are copied into the CSVInputConfig
|
|
||||||
Comma rune
|
|
||||||
Comment rune
|
|
||||||
}
|
|
||||||
|
|
||||||
var readTests = []readTest{{
|
|
||||||
Name: "Simple",
|
|
||||||
Input: "a,b,c\n",
|
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
|
||||||
}, {
|
|
||||||
Name: "CRLF",
|
|
||||||
Input: "a,b\r\nc,d\r\n",
|
|
||||||
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
|
||||||
}, {
|
|
||||||
Name: "BareCR",
|
|
||||||
Input: "a,b\rc,d\r\n",
|
|
||||||
Output: [][]string{{"a", "b\rc", "d"}},
|
|
||||||
}, {
|
|
||||||
Name: "RFC4180test",
|
|
||||||
Input: `#field1,field2,field3
|
|
||||||
"aaa","bb
|
|
||||||
b","ccc"
|
|
||||||
"a,a","b""bb","ccc"
|
|
||||||
zzz,yyy,xxx
|
|
||||||
`,
|
|
||||||
Output: [][]string{
|
|
||||||
{"#field1", "field2", "field3"},
|
|
||||||
{"aaa", "bb\nb", "ccc"},
|
|
||||||
{"a,a", `b"bb`, "ccc"},
|
|
||||||
{"zzz", "yyy", "xxx"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "NoEOLTest",
|
|
||||||
Input: "a,b,c",
|
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
|
||||||
}, {
|
|
||||||
Name: "Semicolon",
|
|
||||||
Input: "a;b;c\n",
|
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
|
||||||
Comma: ';',
|
|
||||||
}, {
|
|
||||||
Name: "MultiLine",
|
|
||||||
Input: `"two
|
|
||||||
line","one line","three
|
|
||||||
line
|
|
||||||
field"`,
|
|
||||||
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
|
||||||
}, {
|
|
||||||
Name: "BlankLine",
|
|
||||||
Input: "a,b,c\n\nd,e,f\n\n",
|
|
||||||
Output: [][]string{
|
|
||||||
{"a", "b", "c"},
|
|
||||||
{"d", "e", "f"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "BlankLineFieldCount",
|
|
||||||
Input: "a,b,c\n\nd,e,f\n\n",
|
|
||||||
Output: [][]string{
|
|
||||||
{"a", "b", "c"},
|
|
||||||
{"d", "e", "f"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "LeadingSpace",
|
|
||||||
Input: " a, b, c\n",
|
|
||||||
Output: [][]string{{" a", " b", " c"}},
|
|
||||||
}, {
|
|
||||||
Name: "Comment",
|
|
||||||
Input: "#1,2,3\na,b,c\n#comment",
|
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
|
||||||
Comment: '#',
|
|
||||||
}, {
|
|
||||||
Name: "NoComment",
|
|
||||||
Input: "#1,2,3\na,b,c",
|
|
||||||
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
|
||||||
}, {
|
|
||||||
Name: "LazyQuotes",
|
|
||||||
Input: `a "word","1"2",a","b`,
|
|
||||||
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
|
||||||
}, {
|
|
||||||
Name: "BareQuotes",
|
|
||||||
Input: `a "word","1"2",a"`,
|
|
||||||
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
|
||||||
}, {
|
|
||||||
Name: "BareDoubleQuotes",
|
|
||||||
Input: `a""b,c`,
|
|
||||||
Output: [][]string{{`a""b`, `c`}},
|
|
||||||
}, {
|
|
||||||
Name: "TrimQuote",
|
|
||||||
Input: `"a"," b",c`,
|
|
||||||
Output: [][]string{{"a", " b", "c"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCount",
|
|
||||||
Input: "a,b,c\nd,e",
|
|
||||||
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaEOF",
|
|
||||||
Input: "a,b,c,",
|
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaEOL",
|
|
||||||
Input: "a,b,c,\n",
|
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaSpaceEOF",
|
|
||||||
Input: "a,b,c, ",
|
|
||||||
Output: [][]string{{"a", "b", "c", " "}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaSpaceEOL",
|
|
||||||
Input: "a,b,c, \n",
|
|
||||||
Output: [][]string{{"a", "b", "c", " "}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaLine3",
|
|
||||||
Input: "a,b,c\nd,e,f\ng,hi,",
|
|
||||||
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
|
|
||||||
}, {
|
|
||||||
Name: "NotTrailingComma3",
|
|
||||||
Input: "a,b,c, \n",
|
|
||||||
Output: [][]string{{"a", "b", "c", " "}},
|
|
||||||
}, {
|
|
||||||
Name: "CommaFieldTest",
|
|
||||||
Input: `x,y,z,w
|
|
||||||
x,y,z,
|
|
||||||
x,y,,
|
|
||||||
x,,,
|
|
||||||
,,,
|
|
||||||
"x","y","z","w"
|
|
||||||
"x","y","z",""
|
|
||||||
"x","y","",""
|
|
||||||
"x","","",""
|
|
||||||
"","","",""
|
|
||||||
`,
|
|
||||||
Output: [][]string{
|
|
||||||
{"x", "y", "z", "w"},
|
|
||||||
{"x", "y", "z", ""},
|
|
||||||
{"x", "y", "", ""},
|
|
||||||
{"x", "", "", ""},
|
|
||||||
{"", "", "", ""},
|
|
||||||
{"x", "y", "z", "w"},
|
|
||||||
{"x", "y", "z", ""},
|
|
||||||
{"x", "y", "", ""},
|
|
||||||
{"x", "", "", ""},
|
|
||||||
{"", "", "", ""},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCommaIneffective1",
|
|
||||||
Input: "a,b,\nc,d,e",
|
|
||||||
Output: [][]string{
|
|
||||||
{"a", "b", ""},
|
|
||||||
{"c", "d", "e"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "ReadAllReuseRecord",
|
|
||||||
Input: "a,b\nc,d",
|
|
||||||
Output: [][]string{
|
|
||||||
{"a", "b"},
|
|
||||||
{"c", "d"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "CRLFInQuotedField", // Issue 21201
|
|
||||||
Input: "A,\"Hello\r\nHi\",B\r\n",
|
|
||||||
Output: [][]string{
|
|
||||||
{"A", "Hello\nHi", "B"},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "BinaryBlobField", // Issue 19410
|
|
||||||
Input: "x09\x41\xb4\x1c,aktau",
|
|
||||||
Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
|
|
||||||
}, {
|
|
||||||
Name: "TrailingCR",
|
|
||||||
Input: "field1,field2\r",
|
|
||||||
Output: [][]string{{"field1", "field2"}},
|
|
||||||
}, {
|
|
||||||
Name: "QuotedTrailingCR",
|
|
||||||
Input: "\"field\"\r",
|
|
||||||
Output: [][]string{{"field"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCR",
|
|
||||||
Input: "field\rfield\r",
|
|
||||||
Output: [][]string{{"field\rfield"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCRCR",
|
|
||||||
Input: "field\r\rfield\r\r",
|
|
||||||
Output: [][]string{{"field\r\rfield\r"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCRCRLF",
|
|
||||||
Input: "field\r\r\nfield\r\r\n",
|
|
||||||
Output: [][]string{{"field\r"}, {"field\r"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCRCRLFCR",
|
|
||||||
Input: "field\r\r\n\rfield\r\r\n\r",
|
|
||||||
Output: [][]string{{"field\r"}, {"\rfield\r"}},
|
|
||||||
}, {
|
|
||||||
Name: "FieldCRCRLFCRCR",
|
|
||||||
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
|
|
||||||
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
|
|
||||||
}, {
|
|
||||||
Name: "MultiFieldCRCRLFCRCR",
|
|
||||||
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
|
|
||||||
Output: [][]string{
|
|
||||||
{"field1", "field2\r"},
|
|
||||||
{"\r\rfield1", "field2\r"},
|
|
||||||
{"\r\r", ""},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
Name: "NonASCIICommaAndComment",
|
|
||||||
Input: "a£b,c£ \td,e\n€ comment\n",
|
|
||||||
Output: [][]string{{"a", "b,c", " \td,e"}},
|
|
||||||
Comma: '£',
|
|
||||||
Comment: '€',
|
|
||||||
}, {
|
|
||||||
Name: "NonASCIICommaAndCommentWithQuotes",
|
|
||||||
Input: "a€\" b,\"€ c\nλ comment\n",
|
|
||||||
Output: [][]string{{"a", " b,", " c"}},
|
|
||||||
Comma: '€',
|
|
||||||
Comment: 'λ',
|
|
||||||
}, {
|
|
||||||
// λ and θ start with the same byte.
|
|
||||||
// This tests that the parser doesn't confuse such characters.
|
|
||||||
Name: "NonASCIICommaConfusion",
|
|
||||||
Input: "\"abθcd\"λefθgh",
|
|
||||||
Output: [][]string{{"abθcd", "efθgh"}},
|
|
||||||
Comma: 'λ',
|
|
||||||
Comment: '€',
|
|
||||||
}, {
|
|
||||||
Name: "NonASCIICommentConfusion",
|
|
||||||
Input: "λ\nλ\nθ\nλ\n",
|
|
||||||
Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
|
|
||||||
Comment: 'θ',
|
|
||||||
}, {
|
|
||||||
Name: "QuotedFieldMultipleLF",
|
|
||||||
Input: "\"\n\n\n\n\"",
|
|
||||||
Output: [][]string{{"\n\n\n\n"}},
|
|
||||||
}, {
|
|
||||||
Name: "MultipleCRLF",
|
|
||||||
Input: "\r\n\r\n\r\n\r\n",
|
|
||||||
}, {
|
|
||||||
// The implementation may read each line in several chunks if it doesn't fit entirely
|
|
||||||
// in the read buffer, so we should test the code to handle that condition.
|
|
||||||
Name: "HugeLines",
|
|
||||||
Input: strings.Repeat("#ignore\n", 10000) + "" + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
|
|
||||||
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
|
|
||||||
Comment: '#',
|
|
||||||
}, {
|
|
||||||
Name: "LazyQuoteWithTrailingCRLF",
|
|
||||||
Input: "\"foo\"bar\"\r\n",
|
|
||||||
Output: [][]string{{`foo"bar`}},
|
|
||||||
}, {
|
|
||||||
Name: "DoubleQuoteWithTrailingCRLF",
|
|
||||||
Input: "\"foo\"\"bar\"\r\n",
|
|
||||||
Output: [][]string{{`foo"bar`}},
|
|
||||||
}, {
|
|
||||||
Name: "EvenQuotes",
|
|
||||||
Input: `""""""""`,
|
|
||||||
Output: [][]string{{`"""`}},
|
|
||||||
}, {
|
|
||||||
Name: "LazyOddQuotes",
|
|
||||||
Input: `"""""""`,
|
|
||||||
Output: [][]string{{`"""`}},
|
|
||||||
}, {
|
|
||||||
Name: "BadComma1",
|
|
||||||
Comma: '\n',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComma2",
|
|
||||||
Comma: '\r',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComma3",
|
|
||||||
Comma: '"',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComma4",
|
|
||||||
Comma: utf8.RuneError,
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComment1",
|
|
||||||
Comment: '\n',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComment2",
|
|
||||||
Comment: '\r',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadComment3",
|
|
||||||
Comment: utf8.RuneError,
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}, {
|
|
||||||
Name: "BadCommaComment",
|
|
||||||
Comma: 'X',
|
|
||||||
Comment: 'X',
|
|
||||||
Error: "invalid CSV field separator or comment delimiter",
|
|
||||||
}}
|
|
||||||
|
|
||||||
func TestCSVReader(t *testing.T) {
|
|
||||||
for _, tt := range readTests {
|
|
||||||
t.Run(tt.Name, func(t *testing.T) {
|
|
||||||
inputConfig := CSVInputConfig{
|
|
||||||
Separator: tt.Comma,
|
|
||||||
Comment: tt.Comment,
|
|
||||||
}
|
|
||||||
if inputConfig.Separator == 0 {
|
|
||||||
inputConfig.Separator = ','
|
|
||||||
}
|
|
||||||
|
|
||||||
var out [][]string
|
|
||||||
err := validateCSVInputConfig(CSVMode, inputConfig)
|
|
||||||
if err == nil {
|
|
||||||
var fields []string
|
|
||||||
splitter := csvSplitter{
|
|
||||||
separator: inputConfig.Separator,
|
|
||||||
sepLen: utf8.RuneLen(inputConfig.Separator),
|
|
||||||
comment: inputConfig.Comment,
|
|
||||||
fields: &fields,
|
|
||||||
}
|
|
||||||
scanner := bufio.NewScanner(strings.NewReader(tt.Input))
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
scanner.Buffer(make([]byte, inputBufSize), maxRecordLength)
|
|
||||||
|
|
||||||
for scanner.Scan() {
|
|
||||||
row := make([]string, len(fields))
|
|
||||||
copy(row, fields)
|
|
||||||
out = append(out, row)
|
|
||||||
|
|
||||||
// We don't explicitly check the returned token, but at
|
|
||||||
// least check it parses to the same row.
|
|
||||||
if strings.ContainsRune(tt.Input, '\r') {
|
|
||||||
// But FieldCRCRLF and similar tests don't round-trip
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
token := scanner.Text()
|
|
||||||
reader := csv.NewReader(strings.NewReader(token))
|
|
||||||
reader.Comma = inputConfig.Separator
|
|
||||||
reader.Comment = inputConfig.Comment
|
|
||||||
reader.FieldsPerRecord = -1
|
|
||||||
reader.LazyQuotes = true
|
|
||||||
tokenRow, err := reader.Read()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error reparsing token: %v", err)
|
|
||||||
}
|
|
||||||
if !reflect.DeepEqual(tokenRow, row) {
|
|
||||||
t.Fatalf("token mismatch:\ngot %q\nwant %q", tokenRow, row)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err = scanner.Err()
|
|
||||||
}
|
|
||||||
|
|
||||||
if tt.Error != "" {
|
|
||||||
if err == nil {
|
|
||||||
t.Fatalf("error mismatch:\ngot nil\nwant %q", tt.Error)
|
|
||||||
}
|
|
||||||
if err.Error() != tt.Error {
|
|
||||||
t.Fatalf("error mismatch:\ngot %q\nwant %q", err.Error(), tt.Error)
|
|
||||||
}
|
|
||||||
if out != nil {
|
|
||||||
t.Fatalf("output mismatch:\ngot %q\nwant nil", out)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error mismatch:\ngot %q\nwant nil", err.Error())
|
|
||||||
}
|
|
||||||
if !reflect.DeepEqual(out, tt.Output) {
|
|
||||||
t.Fatalf("output mismatch:\ngot %q\nwant %q", out, tt.Output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,177 +0,0 @@
|
||||||
// Don't run these on Windows, because newline handling means they don't pass.
|
|
||||||
|
|
||||||
//go:build !windows
|
|
||||||
// +build !windows
|
|
||||||
|
|
||||||
package interp_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/interp"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
func Example() {
|
|
||||||
input := strings.NewReader("foo bar\n\nbaz buz")
|
|
||||||
err := interp.Exec("$0 { print $1 }", " ", input, nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// foo
|
|
||||||
// baz
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_fieldsep() {
|
|
||||||
// Use ',' as the field separator
|
|
||||||
input := strings.NewReader("1,2\n3,4")
|
|
||||||
err := interp.Exec("{ print $1, $2 }", ",", input, nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 1 2
|
|
||||||
// 3 4
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_program() {
|
|
||||||
src := "{ print NR, tolower($0) }"
|
|
||||||
input := "A\naB\nAbC"
|
|
||||||
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
config := &interp.Config{
|
|
||||||
Stdin: strings.NewReader(input),
|
|
||||||
Vars: []string{"OFS", ":"},
|
|
||||||
}
|
|
||||||
_, err = interp.ExecProgram(prog, config)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 1:a
|
|
||||||
// 2:ab
|
|
||||||
// 3:abc
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_funcs() {
|
|
||||||
src := `BEGIN { print sum(), sum(1), sum(2, 3, 4), repeat("xyz", 3) }`
|
|
||||||
|
|
||||||
parserConfig := &parser.ParserConfig{
|
|
||||||
Funcs: map[string]interface{}{
|
|
||||||
"sum": func(args ...float64) float64 {
|
|
||||||
sum := 0.0
|
|
||||||
for _, a := range args {
|
|
||||||
sum += a
|
|
||||||
}
|
|
||||||
return sum
|
|
||||||
},
|
|
||||||
"repeat": strings.Repeat,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), parserConfig)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
interpConfig := &interp.Config{
|
|
||||||
Funcs: parserConfig.Funcs,
|
|
||||||
}
|
|
||||||
_, err = interp.ExecProgram(prog, interpConfig)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 0 1 9 xyzxyzxyz
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_new() {
|
|
||||||
// We'll execute this program multiple times on different inputs.
|
|
||||||
src := `{ print $1, x, $3; x++ }`
|
|
||||||
|
|
||||||
// Parse the program and set up the interpreter.
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
interpreter, err := interp.New(prog)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run it once on one input.
|
|
||||||
_, err = interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("one two three"),
|
|
||||||
Environ: []string{}, // avoid calling os.Environ each time
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset variables and run it again efficiently on a different input (this
|
|
||||||
// could be from a completely different data source).
|
|
||||||
interpreter.ResetVars()
|
|
||||||
_, err = interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("a b c\nd e f\n"),
|
|
||||||
Environ: []string{},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run it on another input, this time without resetting variables.
|
|
||||||
_, err = interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("x y z"),
|
|
||||||
Environ: []string{},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// one three
|
|
||||||
// a c
|
|
||||||
// d 1 f
|
|
||||||
// x 2 z
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_csv() {
|
|
||||||
src := `{ total += @"amount" } END { print total }`
|
|
||||||
input := `# comment
|
|
||||||
name,amount
|
|
||||||
Bob,17.50
|
|
||||||
Jill,20
|
|
||||||
"Boba Fett",100.00
|
|
||||||
`
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
config := &interp.Config{
|
|
||||||
Stdin: strings.NewReader(input),
|
|
||||||
InputMode: interp.CSVMode,
|
|
||||||
CSVInput: interp.CSVInputConfig{Comment: '#', Header: true},
|
|
||||||
}
|
|
||||||
_, err = interp.ExecProgram(prog, config)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 137.5
|
|
||||||
}
|
|
|
@ -1,413 +0,0 @@
|
||||||
// Call native Go functions; helpers for some builtin function calls.
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"reflect"
|
|
||||||
"sort"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/internal/ast"
|
|
||||||
. "github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Call native-defined function with given name and arguments, return
|
|
||||||
// its return value (or null value if it doesn't return anything).
|
|
||||||
func (p *interp) callNative(index int, args []value) (value, error) {
|
|
||||||
f := p.nativeFuncs[index]
|
|
||||||
minIn := len(f.in) // Minimum number of args we should pass
|
|
||||||
var variadicType reflect.Type
|
|
||||||
if f.isVariadic {
|
|
||||||
variadicType = f.in[len(f.in)-1].Elem()
|
|
||||||
minIn--
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build list of args to pass to function
|
|
||||||
values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation
|
|
||||||
for i, a := range args {
|
|
||||||
var argType reflect.Type
|
|
||||||
if !f.isVariadic || i < len(f.in)-1 {
|
|
||||||
argType = f.in[i]
|
|
||||||
} else {
|
|
||||||
// Final arg(s) when calling a variadic are all of this type
|
|
||||||
argType = variadicType
|
|
||||||
}
|
|
||||||
values = append(values, p.toNative(a, argType))
|
|
||||||
}
|
|
||||||
// Use zero value for any unspecified args
|
|
||||||
for i := len(args); i < minIn; i++ {
|
|
||||||
values = append(values, reflect.Zero(f.in[i]))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call Go function, determine return value
|
|
||||||
outs := f.value.Call(values)
|
|
||||||
switch len(outs) {
|
|
||||||
case 0:
|
|
||||||
// No return value, return null value to AWK
|
|
||||||
return null(), nil
|
|
||||||
case 1:
|
|
||||||
// Single return value
|
|
||||||
return fromNative(outs[0]), nil
|
|
||||||
case 2:
|
|
||||||
// Two-valued return of (scalar, error)
|
|
||||||
if !outs[1].IsNil() {
|
|
||||||
return null(), outs[1].Interface().(error)
|
|
||||||
}
|
|
||||||
return fromNative(outs[0]), nil
|
|
||||||
default:
|
|
||||||
// Should never happen (checked at parse time)
|
|
||||||
panic(fmt.Sprintf("unexpected number of return values: %d", len(outs)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert from an AWK value to a native Go value
|
|
||||||
func (p *interp) toNative(v value, typ reflect.Type) reflect.Value {
|
|
||||||
switch typ.Kind() {
|
|
||||||
case reflect.Bool:
|
|
||||||
return reflect.ValueOf(v.boolean())
|
|
||||||
case reflect.Int:
|
|
||||||
return reflect.ValueOf(int(v.num()))
|
|
||||||
case reflect.Int8:
|
|
||||||
return reflect.ValueOf(int8(v.num()))
|
|
||||||
case reflect.Int16:
|
|
||||||
return reflect.ValueOf(int16(v.num()))
|
|
||||||
case reflect.Int32:
|
|
||||||
return reflect.ValueOf(int32(v.num()))
|
|
||||||
case reflect.Int64:
|
|
||||||
return reflect.ValueOf(int64(v.num()))
|
|
||||||
case reflect.Uint:
|
|
||||||
return reflect.ValueOf(uint(v.num()))
|
|
||||||
case reflect.Uint8:
|
|
||||||
return reflect.ValueOf(uint8(v.num()))
|
|
||||||
case reflect.Uint16:
|
|
||||||
return reflect.ValueOf(uint16(v.num()))
|
|
||||||
case reflect.Uint32:
|
|
||||||
return reflect.ValueOf(uint32(v.num()))
|
|
||||||
case reflect.Uint64:
|
|
||||||
return reflect.ValueOf(uint64(v.num()))
|
|
||||||
case reflect.Float32:
|
|
||||||
return reflect.ValueOf(float32(v.num()))
|
|
||||||
case reflect.Float64:
|
|
||||||
return reflect.ValueOf(v.num())
|
|
||||||
case reflect.String:
|
|
||||||
return reflect.ValueOf(p.toString(v))
|
|
||||||
case reflect.Slice:
|
|
||||||
if typ.Elem().Kind() != reflect.Uint8 {
|
|
||||||
// Shouldn't happen: prevented by checkNativeFunc
|
|
||||||
panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind()))
|
|
||||||
}
|
|
||||||
return reflect.ValueOf([]byte(p.toString(v)))
|
|
||||||
default:
|
|
||||||
// Shouldn't happen: prevented by checkNativeFunc
|
|
||||||
panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert from a native Go value to an AWK value
|
|
||||||
func fromNative(v reflect.Value) value {
|
|
||||||
switch v.Kind() {
|
|
||||||
case reflect.Bool:
|
|
||||||
return boolean(v.Bool())
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
||||||
return num(float64(v.Int()))
|
|
||||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
||||||
return num(float64(v.Uint()))
|
|
||||||
case reflect.Float32, reflect.Float64:
|
|
||||||
return num(v.Float())
|
|
||||||
case reflect.String:
|
|
||||||
return str(v.String())
|
|
||||||
case reflect.Slice:
|
|
||||||
if b, ok := v.Interface().([]byte); ok {
|
|
||||||
return str(string(b))
|
|
||||||
}
|
|
||||||
// Shouldn't happen: prevented by checkNativeFunc
|
|
||||||
panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind()))
|
|
||||||
default:
|
|
||||||
// Shouldn't happen: prevented by checkNativeFunc
|
|
||||||
panic(fmt.Sprintf("unexpected return type: %s", v.Kind()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used for caching native function type information on init
|
|
||||||
type nativeFunc struct {
|
|
||||||
isVariadic bool
|
|
||||||
in []reflect.Type
|
|
||||||
value reflect.Value
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check and initialize native functions
|
|
||||||
func (p *interp) initNativeFuncs(funcs map[string]interface{}) error {
|
|
||||||
for name, f := range funcs {
|
|
||||||
err := checkNativeFunc(name, f)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort functions by name, then use those indexes to build slice
|
|
||||||
// (this has to match how the parser sets the indexes).
|
|
||||||
names := make([]string, 0, len(funcs))
|
|
||||||
for name := range funcs {
|
|
||||||
names = append(names, name)
|
|
||||||
}
|
|
||||||
sort.Strings(names)
|
|
||||||
p.nativeFuncs = make([]nativeFunc, len(names))
|
|
||||||
for i, name := range names {
|
|
||||||
f := funcs[name]
|
|
||||||
typ := reflect.TypeOf(f)
|
|
||||||
in := make([]reflect.Type, typ.NumIn())
|
|
||||||
for j := 0; j < len(in); j++ {
|
|
||||||
in[j] = typ.In(j)
|
|
||||||
}
|
|
||||||
p.nativeFuncs[i] = nativeFunc{
|
|
||||||
isVariadic: typ.IsVariadic(),
|
|
||||||
in: in,
|
|
||||||
value: reflect.ValueOf(f),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Got this trick from the Go stdlib text/template source
|
|
||||||
var errorType = reflect.TypeOf((*error)(nil)).Elem()
|
|
||||||
|
|
||||||
// Check that native function with given name is okay to call from
|
|
||||||
// AWK, return an *interp.Error if not. This checks that f is actually
|
|
||||||
// a function, and that its parameter and return types are good.
|
|
||||||
func checkNativeFunc(name string, f interface{}) error {
|
|
||||||
if KeywordToken(name) != ILLEGAL {
|
|
||||||
return newError("can't use keyword %q as native function name", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
typ := reflect.TypeOf(f)
|
|
||||||
if typ.Kind() != reflect.Func {
|
|
||||||
return newError("native function %q is not a function", name)
|
|
||||||
}
|
|
||||||
for i := 0; i < typ.NumIn(); i++ {
|
|
||||||
param := typ.In(i)
|
|
||||||
if typ.IsVariadic() && i == typ.NumIn()-1 {
|
|
||||||
param = param.Elem()
|
|
||||||
}
|
|
||||||
if !validNativeType(param) {
|
|
||||||
return newError("native function %q param %d is not int or string", name, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch typ.NumOut() {
|
|
||||||
case 0:
|
|
||||||
// No return value is fine
|
|
||||||
case 1:
|
|
||||||
// Single scalar return value is fine
|
|
||||||
if !validNativeType(typ.Out(0)) {
|
|
||||||
return newError("native function %q return value is not int or string", name)
|
|
||||||
}
|
|
||||||
case 2:
|
|
||||||
// Returning (scalar, error) is handled too
|
|
||||||
if !validNativeType(typ.Out(0)) {
|
|
||||||
return newError("native function %q first return value is not int or string", name)
|
|
||||||
}
|
|
||||||
if typ.Out(1) != errorType {
|
|
||||||
return newError("native function %q second return value is not an error", name)
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return newError("native function %q returns more than two values", name)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if typ is a valid parameter or return type.
|
|
||||||
func validNativeType(typ reflect.Type) bool {
|
|
||||||
switch typ.Kind() {
|
|
||||||
case reflect.Bool:
|
|
||||||
return true
|
|
||||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
||||||
return true
|
|
||||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
||||||
return true
|
|
||||||
case reflect.Float32, reflect.Float64:
|
|
||||||
return true
|
|
||||||
case reflect.String:
|
|
||||||
return true
|
|
||||||
case reflect.Slice:
|
|
||||||
// Only allow []byte (convert to string in AWK)
|
|
||||||
return typ.Elem().Kind() == reflect.Uint8
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Guts of the split() function
|
|
||||||
func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) {
|
|
||||||
var parts []string
|
|
||||||
if fs == " " {
|
|
||||||
parts = strings.Fields(s)
|
|
||||||
} else if s == "" {
|
|
||||||
// Leave parts 0 length on empty string
|
|
||||||
} else if utf8.RuneCountInString(fs) <= 1 {
|
|
||||||
parts = strings.Split(s, fs)
|
|
||||||
} else {
|
|
||||||
re, err := p.compileRegex(fs)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
parts = re.Split(s, -1)
|
|
||||||
}
|
|
||||||
array := make(map[string]value, len(parts))
|
|
||||||
for i, part := range parts {
|
|
||||||
array[strconv.Itoa(i+1)] = numStr(part)
|
|
||||||
}
|
|
||||||
p.arrays[p.arrayIndex(scope, index)] = array
|
|
||||||
return len(array), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Guts of the sub() and gsub() functions
|
|
||||||
func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) {
|
|
||||||
re, err := p.compileRegex(regex)
|
|
||||||
if err != nil {
|
|
||||||
return "", 0, err
|
|
||||||
}
|
|
||||||
count := 0
|
|
||||||
out = re.ReplaceAllStringFunc(in, func(s string) string {
|
|
||||||
// Only do the first replacement for sub(), or all for gsub()
|
|
||||||
if !global && count > 0 {
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
count++
|
|
||||||
// Handle & (ampersand) properly in replacement string
|
|
||||||
r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation
|
|
||||||
for i := 0; i < len(repl); i++ {
|
|
||||||
switch repl[i] {
|
|
||||||
case '&':
|
|
||||||
r = append(r, s...)
|
|
||||||
case '\\':
|
|
||||||
i++
|
|
||||||
if i < len(repl) {
|
|
||||||
switch repl[i] {
|
|
||||||
case '&':
|
|
||||||
r = append(r, '&')
|
|
||||||
case '\\':
|
|
||||||
r = append(r, '\\')
|
|
||||||
default:
|
|
||||||
r = append(r, '\\', repl[i])
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
r = append(r, '\\')
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
r = append(r, repl[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return string(r)
|
|
||||||
})
|
|
||||||
return out, count, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type cachedFormat struct {
|
|
||||||
format string
|
|
||||||
types []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse given sprintf format string into Go format string, along with
|
|
||||||
// type conversion specifiers. Output is memoized in a simple cache
|
|
||||||
// for performance.
|
|
||||||
func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) {
|
|
||||||
if item, ok := p.formatCache[s]; ok {
|
|
||||||
return item.format, item.types, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
out := []byte(s)
|
|
||||||
for i := 0; i < len(s); i++ {
|
|
||||||
if s[i] == '%' {
|
|
||||||
i++
|
|
||||||
if i >= len(s) {
|
|
||||||
return "", nil, errors.New("expected type specifier after %")
|
|
||||||
}
|
|
||||||
if s[i] == '%' {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 {
|
|
||||||
if s[i] == '*' {
|
|
||||||
types = append(types, 'd')
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i >= len(s) {
|
|
||||||
return "", nil, errors.New("expected type specifier after %")
|
|
||||||
}
|
|
||||||
var t byte
|
|
||||||
switch s[i] {
|
|
||||||
case 's':
|
|
||||||
t = 's'
|
|
||||||
case 'd', 'i', 'o', 'x', 'X':
|
|
||||||
t = 'd'
|
|
||||||
case 'f', 'e', 'E', 'g', 'G':
|
|
||||||
t = 'f'
|
|
||||||
case 'u':
|
|
||||||
t = 'u'
|
|
||||||
out[i] = 'd'
|
|
||||||
case 'c':
|
|
||||||
t = 'c'
|
|
||||||
out[i] = 's'
|
|
||||||
default:
|
|
||||||
return "", nil, fmt.Errorf("invalid format type %q", s[i])
|
|
||||||
}
|
|
||||||
types = append(types, t)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dumb, non-LRU cache: just cache the first N formats
|
|
||||||
format = string(out)
|
|
||||||
if len(p.formatCache) < maxCachedFormats {
|
|
||||||
p.formatCache[s] = cachedFormat{format, types}
|
|
||||||
}
|
|
||||||
return format, types, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Guts of sprintf() function (also used by "printf" statement)
|
|
||||||
func (p *interp) sprintf(format string, args []value) (string, error) {
|
|
||||||
format, types, err := p.parseFmtTypes(format)
|
|
||||||
if err != nil {
|
|
||||||
return "", newError("format error: %s", err)
|
|
||||||
}
|
|
||||||
if len(types) > len(args) {
|
|
||||||
return "", newError("format error: got %d args, expected %d", len(args), len(types))
|
|
||||||
}
|
|
||||||
converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation
|
|
||||||
for i, t := range types {
|
|
||||||
a := args[i]
|
|
||||||
var v interface{}
|
|
||||||
switch t {
|
|
||||||
case 's':
|
|
||||||
v = p.toString(a)
|
|
||||||
case 'd':
|
|
||||||
v = int(a.num())
|
|
||||||
case 'f':
|
|
||||||
v = a.num()
|
|
||||||
case 'u':
|
|
||||||
v = uint(a.num())
|
|
||||||
case 'c':
|
|
||||||
var c []byte
|
|
||||||
n, isStr := a.isTrueStr()
|
|
||||||
if isStr {
|
|
||||||
s := p.toString(a)
|
|
||||||
if len(s) > 0 {
|
|
||||||
c = []byte{s[0]}
|
|
||||||
} else {
|
|
||||||
c = []byte{0}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Follow the behaviour of awk and mawk, where %c
|
|
||||||
// operates on bytes (0-255), not Unicode codepoints
|
|
||||||
c = []byte{byte(n)}
|
|
||||||
}
|
|
||||||
v = c
|
|
||||||
}
|
|
||||||
converted = append(converted, v)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf(format, converted...), nil
|
|
||||||
}
|
|
|
@ -1,107 +0,0 @@
|
||||||
// Fuzz tests for use with the Go 1.18 fuzzer.
|
|
||||||
|
|
||||||
//go:build go1.18
|
|
||||||
// +build go1.18
|
|
||||||
|
|
||||||
package interp_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/interp"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
func isFuzzTest(test interpTest) bool {
|
|
||||||
return test.err == "" && test.awkErr == "" && !strings.Contains(test.src, "!fuzz")
|
|
||||||
}
|
|
||||||
|
|
||||||
func FuzzSource(f *testing.F) {
|
|
||||||
for _, test := range interpTests {
|
|
||||||
if isFuzzTest(test) {
|
|
||||||
f.Add(test.src)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, src string) {
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
interpreter, err := interp.New(prog)
|
|
||||||
if err != nil {
|
|
||||||
f.Fatalf("interp.New error: %v", err)
|
|
||||||
}
|
|
||||||
config := interp.Config{
|
|
||||||
Stdin: strings.NewReader("foo bar\nbazz\n"),
|
|
||||||
Output: ioutil.Discard,
|
|
||||||
Error: ioutil.Discard,
|
|
||||||
NoExec: true,
|
|
||||||
NoFileWrites: true,
|
|
||||||
NoFileReads: true,
|
|
||||||
Environ: []string{},
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
_, _ = interpreter.ExecuteContext(ctx, &config)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func FuzzInput(f *testing.F) {
|
|
||||||
f.Add("")
|
|
||||||
added := make(map[string]bool)
|
|
||||||
for _, test := range interpTests {
|
|
||||||
if test.in != "" && !added[test.in] {
|
|
||||||
f.Add(test.in)
|
|
||||||
added[test.in] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prog, err := parser.ParseProgram([]byte(`{ print $0, $3, $1, $10 }`), nil)
|
|
||||||
if err != nil {
|
|
||||||
f.Fatalf("parse error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
interpreter, err := interp.New(prog)
|
|
||||||
if err != nil {
|
|
||||||
f.Fatalf("interp.New error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var vars = [][]string{
|
|
||||||
{"FS", " ", "RS", "\n"},
|
|
||||||
{"FS", ",", "RS", "\n"},
|
|
||||||
{"FS", "\t", "RS", "\n"},
|
|
||||||
{"FS", "@+", "RS", "\n"},
|
|
||||||
{"FS", "\n", "RS", ""},
|
|
||||||
{"FS", " ", "RS", "X+"},
|
|
||||||
}
|
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, in string) {
|
|
||||||
for _, v := range vars {
|
|
||||||
t.Run(fmt.Sprintf("Vars=%q", v), func(t *testing.T) {
|
|
||||||
interpreter.ResetVars()
|
|
||||||
config := interp.Config{
|
|
||||||
Stdin: strings.NewReader(in),
|
|
||||||
Output: ioutil.Discard,
|
|
||||||
Error: ioutil.Discard,
|
|
||||||
Vars: v,
|
|
||||||
NoExec: true,
|
|
||||||
NoFileWrites: true,
|
|
||||||
NoFileReads: true,
|
|
||||||
Environ: []string{},
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
_, err := interpreter.ExecuteContext(ctx, &config)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("execute error: %v", err)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
|
@ -1,75 +0,0 @@
|
||||||
// Fuzz tests for unexported functions for use with the Go 1.18 fuzzer.
|
|
||||||
|
|
||||||
//go:build go1.18
|
|
||||||
// +build go1.18
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func FuzzParseFloatPrefix(f *testing.F) {
|
|
||||||
f.Add("")
|
|
||||||
f.Add("foo")
|
|
||||||
f.Add("The quick.")
|
|
||||||
f.Add("0")
|
|
||||||
f.Add("9")
|
|
||||||
f.Add("1.3e4")
|
|
||||||
f.Add("1.3E0")
|
|
||||||
f.Add("1.3e+5")
|
|
||||||
f.Add("1.3e-5")
|
|
||||||
f.Add("1E1000")
|
|
||||||
f.Add(" 1234 ")
|
|
||||||
f.Add("1234xyz")
|
|
||||||
f.Add("-1234567890")
|
|
||||||
f.Add("0x0")
|
|
||||||
f.Add("0X10")
|
|
||||||
f.Add("0x1234567890")
|
|
||||||
f.Add("0xabcdef")
|
|
||||||
f.Add("0xABCDEF")
|
|
||||||
f.Add("-0xa")
|
|
||||||
f.Add("+0XA")
|
|
||||||
f.Add("0xf.f")
|
|
||||||
f.Add("0xf.fp10")
|
|
||||||
f.Add("0xf.fp-10")
|
|
||||||
f.Add("0x.f")
|
|
||||||
f.Add("0xf.")
|
|
||||||
f.Add("0x.")
|
|
||||||
f.Add("nan")
|
|
||||||
f.Add("+nan")
|
|
||||||
f.Add("-nan")
|
|
||||||
f.Add("NAN")
|
|
||||||
f.Add("inf")
|
|
||||||
f.Add("+inf")
|
|
||||||
f.Add("-inf")
|
|
||||||
f.Add("INF")
|
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, in string) {
|
|
||||||
nPrefix := parseFloatPrefix(in)
|
|
||||||
if nPrefix != 0 {
|
|
||||||
for i := 1; i <= len(in); i++ {
|
|
||||||
n, _ := parseFloatHelper(in[:i])
|
|
||||||
if n == nPrefix || math.IsNaN(n) && math.IsNaN(nPrefix) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t.Fatalf("no ParseFloat match: %q", in)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseFloatHelper(s string) (float64, error) {
|
|
||||||
s = strings.TrimSpace(s)
|
|
||||||
s = strings.ToLower(s)
|
|
||||||
if s == "+nan" || s == "-nan" {
|
|
||||||
return math.NaN(), nil
|
|
||||||
}
|
|
||||||
if strings.Contains(s, "0x") && strings.IndexAny(s, "pP") < 0 {
|
|
||||||
s += "p0"
|
|
||||||
}
|
|
||||||
return strconv.ParseFloat(s, 64)
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,899 +0,0 @@
|
||||||
// Input/output handling for GoAWK interpreter
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"encoding/csv"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"regexp"
|
|
||||||
"runtime"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/internal/ast"
|
|
||||||
. "github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Print a line of output followed by a newline
|
|
||||||
func (p *interp) printLine(writer io.Writer, line string) error {
|
|
||||||
err := writeOutput(writer, line)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return writeOutput(writer, p.outputRecordSep)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print given arguments followed by a newline (for "print" statement).
|
|
||||||
func (p *interp) printArgs(writer io.Writer, args []value) error {
|
|
||||||
switch p.outputMode {
|
|
||||||
case CSVMode, TSVMode:
|
|
||||||
fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation
|
|
||||||
for _, arg := range args {
|
|
||||||
fields = append(fields, arg.str(p.outputFormat))
|
|
||||||
}
|
|
||||||
err := p.writeCSV(writer, fields)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
// Print OFS-separated args followed by ORS (usually newline).
|
|
||||||
for i, arg := range args {
|
|
||||||
if i > 0 {
|
|
||||||
err := writeOutput(writer, p.outputFieldSep)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err := writeOutput(writer, arg.str(p.outputFormat))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err := writeOutput(writer, p.outputRecordSep)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *interp) writeCSV(output io.Writer, fields []string) error {
|
|
||||||
// If output is already a *bufio.Writer (the common case), csv.NewWriter
|
|
||||||
// will use it directly. This is not explicitly documented, but
|
|
||||||
// csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize
|
|
||||||
// with a 4KB buffer, and bufio.NewWriterSize is documented as returning
|
|
||||||
// the underlying bufio.Writer if it's passed a large enough one.
|
|
||||||
var flush func() error
|
|
||||||
_, isBuffered := output.(*bufio.Writer)
|
|
||||||
if !isBuffered {
|
|
||||||
// Otherwise create a new buffered writer and flush after writing.
|
|
||||||
if p.csvOutput == nil {
|
|
||||||
p.csvOutput = bufio.NewWriterSize(output, 4096)
|
|
||||||
} else {
|
|
||||||
p.csvOutput.Reset(output)
|
|
||||||
}
|
|
||||||
output = p.csvOutput
|
|
||||||
flush = p.csvOutput.Flush
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given the above, creating a new one of these is cheap.
|
|
||||||
writer := csv.NewWriter(output)
|
|
||||||
writer.Comma = p.csvOutputConfig.Separator
|
|
||||||
writer.UseCRLF = runtime.GOOS == "windows"
|
|
||||||
err := writer.Write(fields)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if flush != nil {
|
|
||||||
return flush()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implement a buffered version of WriteCloser so output is buffered
|
|
||||||
// when redirecting to a file (eg: print >"out")
|
|
||||||
type bufferedWriteCloser struct {
|
|
||||||
*bufio.Writer
|
|
||||||
io.Closer
|
|
||||||
}
|
|
||||||
|
|
||||||
func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser {
|
|
||||||
writer := bufio.NewWriterSize(w, outputBufSize)
|
|
||||||
return &bufferedWriteCloser{writer, w}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (wc *bufferedWriteCloser) Close() error {
|
|
||||||
err := wc.Writer.Flush()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return wc.Closer.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the output stream for given redirect token and
|
|
||||||
// destination (file or pipe name)
|
|
||||||
func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) {
|
|
||||||
name := p.toString(destValue)
|
|
||||||
if _, ok := p.inputStreams[name]; ok {
|
|
||||||
return nil, newError("can't write to reader stream")
|
|
||||||
}
|
|
||||||
if w, ok := p.outputStreams[name]; ok {
|
|
||||||
return w, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
switch redirect {
|
|
||||||
case GREATER, APPEND:
|
|
||||||
if name == "-" {
|
|
||||||
// filename of "-" means write to stdout, eg: print "x" >"-"
|
|
||||||
return p.output, nil
|
|
||||||
}
|
|
||||||
// Write or append to file
|
|
||||||
if p.noFileWrites {
|
|
||||||
return nil, newError("can't write to file due to NoFileWrites")
|
|
||||||
}
|
|
||||||
p.flushOutputAndError() // ensure synchronization
|
|
||||||
flags := os.O_CREATE | os.O_WRONLY
|
|
||||||
if redirect == GREATER {
|
|
||||||
flags |= os.O_TRUNC
|
|
||||||
} else {
|
|
||||||
flags |= os.O_APPEND
|
|
||||||
}
|
|
||||||
w, err := os.OpenFile(name, flags, 0644)
|
|
||||||
if err != nil {
|
|
||||||
return nil, newError("output redirection error: %s", err)
|
|
||||||
}
|
|
||||||
buffered := newBufferedWriteCloser(w)
|
|
||||||
p.outputStreams[name] = buffered
|
|
||||||
return buffered, nil
|
|
||||||
|
|
||||||
case PIPE:
|
|
||||||
// Pipe to command
|
|
||||||
if p.noExec {
|
|
||||||
return nil, newError("can't write to pipe due to NoExec")
|
|
||||||
}
|
|
||||||
cmd := p.execShell(name)
|
|
||||||
w, err := cmd.StdinPipe()
|
|
||||||
if err != nil {
|
|
||||||
return nil, newError("error connecting to stdin pipe: %v", err)
|
|
||||||
}
|
|
||||||
cmd.Stdout = p.output
|
|
||||||
cmd.Stderr = p.errorOutput
|
|
||||||
p.flushOutputAndError() // ensure synchronization
|
|
||||||
err = cmd.Start()
|
|
||||||
if err != nil {
|
|
||||||
p.printErrorf("%s\n", err)
|
|
||||||
return ioutil.Discard, nil
|
|
||||||
}
|
|
||||||
p.commands[name] = cmd
|
|
||||||
buffered := newBufferedWriteCloser(w)
|
|
||||||
p.outputStreams[name] = buffered
|
|
||||||
return buffered, nil
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Should never happen
|
|
||||||
panic(fmt.Sprintf("unexpected redirect type %s", redirect))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Executes code using configured system shell
|
|
||||||
func (p *interp) execShell(code string) *exec.Cmd {
|
|
||||||
executable := p.shellCommand[0]
|
|
||||||
args := p.shellCommand[1:]
|
|
||||||
args = append(args, code)
|
|
||||||
if p.checkCtx {
|
|
||||||
return exec.CommandContext(p.ctx, executable, args...)
|
|
||||||
} else {
|
|
||||||
return exec.Command(executable, args...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get input Scanner to use for "getline" based on file name
|
|
||||||
func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
|
|
||||||
if _, ok := p.outputStreams[name]; ok {
|
|
||||||
return nil, newError("can't read from writer stream")
|
|
||||||
}
|
|
||||||
if _, ok := p.inputStreams[name]; ok {
|
|
||||||
return p.scanners[name], nil
|
|
||||||
}
|
|
||||||
if name == "-" {
|
|
||||||
// filename of "-" means read from stdin, eg: getline <"-"
|
|
||||||
if scanner, ok := p.scanners["-"]; ok {
|
|
||||||
return scanner, nil
|
|
||||||
}
|
|
||||||
scanner := p.newScanner(p.stdin, make([]byte, inputBufSize))
|
|
||||||
p.scanners[name] = scanner
|
|
||||||
return scanner, nil
|
|
||||||
}
|
|
||||||
if p.noFileReads {
|
|
||||||
return nil, newError("can't read from file due to NoFileReads")
|
|
||||||
}
|
|
||||||
r, err := os.Open(name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err // *os.PathError is handled by caller (getline returns -1)
|
|
||||||
}
|
|
||||||
scanner := p.newScanner(r, make([]byte, inputBufSize))
|
|
||||||
p.scanners[name] = scanner
|
|
||||||
p.inputStreams[name] = r
|
|
||||||
return scanner, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get input Scanner to use for "getline" based on pipe name
|
|
||||||
func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
|
|
||||||
if _, ok := p.outputStreams[name]; ok {
|
|
||||||
return nil, newError("can't read from writer stream")
|
|
||||||
}
|
|
||||||
if _, ok := p.inputStreams[name]; ok {
|
|
||||||
return p.scanners[name], nil
|
|
||||||
}
|
|
||||||
if p.noExec {
|
|
||||||
return nil, newError("can't read from pipe due to NoExec")
|
|
||||||
}
|
|
||||||
cmd := p.execShell(name)
|
|
||||||
cmd.Stdin = p.stdin
|
|
||||||
cmd.Stderr = p.errorOutput
|
|
||||||
r, err := cmd.StdoutPipe()
|
|
||||||
if err != nil {
|
|
||||||
return nil, newError("error connecting to stdout pipe: %v", err)
|
|
||||||
}
|
|
||||||
p.flushOutputAndError() // ensure synchronization
|
|
||||||
err = cmd.Start()
|
|
||||||
if err != nil {
|
|
||||||
p.printErrorf("%s\n", err)
|
|
||||||
return bufio.NewScanner(strings.NewReader("")), nil
|
|
||||||
}
|
|
||||||
scanner := p.newScanner(r, make([]byte, inputBufSize))
|
|
||||||
p.commands[name] = cmd
|
|
||||||
p.inputStreams[name] = r
|
|
||||||
p.scanners[name] = scanner
|
|
||||||
return scanner, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new buffered Scanner for reading input records
|
|
||||||
func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner {
|
|
||||||
scanner := bufio.NewScanner(input)
|
|
||||||
switch {
|
|
||||||
case p.inputMode == CSVMode || p.inputMode == TSVMode:
|
|
||||||
splitter := csvSplitter{
|
|
||||||
separator: p.csvInputConfig.Separator,
|
|
||||||
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
|
|
||||||
comment: p.csvInputConfig.Comment,
|
|
||||||
header: p.csvInputConfig.Header,
|
|
||||||
fields: &p.fields,
|
|
||||||
setFieldNames: p.setFieldNames,
|
|
||||||
}
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
case p.recordSep == "\n":
|
|
||||||
// Scanner default is to split on newlines
|
|
||||||
case p.recordSep == "":
|
|
||||||
// Empty string for RS means split on \n\n (blank lines)
|
|
||||||
splitter := blankLineSplitter{terminator: &p.recordTerminator}
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
case len(p.recordSep) == 1:
|
|
||||||
splitter := byteSplitter{sep: p.recordSep[0]}
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
case utf8.RuneCountInString(p.recordSep) >= 1:
|
|
||||||
// Multi-byte and single char but multi-byte RS use regex
|
|
||||||
splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator}
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
}
|
|
||||||
scanner.Buffer(buffer, maxRecordLength)
|
|
||||||
return scanner
|
|
||||||
}
|
|
||||||
|
|
||||||
// setFieldNames is called by csvSplitter.scan on the first row (if the
|
|
||||||
// "header" option is specified).
|
|
||||||
func (p *interp) setFieldNames(names []string) {
|
|
||||||
p.fieldNames = names
|
|
||||||
p.fieldIndexes = nil // clear name-to-index cache
|
|
||||||
|
|
||||||
// Populate FIELDS array (mapping of field indexes to field names).
|
|
||||||
fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"])
|
|
||||||
for k := range fieldsArray {
|
|
||||||
delete(fieldsArray, k)
|
|
||||||
}
|
|
||||||
for i, name := range names {
|
|
||||||
fieldsArray[strconv.Itoa(i+1)] = str(name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copied from bufio/scan.go in the stdlib: I guess it's a bit more
|
|
||||||
// efficient than bytes.TrimSuffix(data, []byte("\r"))
|
|
||||||
func dropCR(data []byte) []byte {
|
|
||||||
if len(data) > 0 && data[len(data)-1] == '\r' {
|
|
||||||
return data[:len(data)-1]
|
|
||||||
}
|
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
func dropLF(data []byte) []byte {
|
|
||||||
if len(data) > 0 && data[len(data)-1] == '\n' {
|
|
||||||
return data[:len(data)-1]
|
|
||||||
}
|
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
type blankLineSplitter struct {
|
|
||||||
terminator *string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
||||||
if atEOF && len(data) == 0 {
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip newlines at beginning of data
|
|
||||||
i := 0
|
|
||||||
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i >= len(data) {
|
|
||||||
// At end of data after newlines, skip entire data block
|
|
||||||
return i, nil, nil
|
|
||||||
}
|
|
||||||
start := i
|
|
||||||
|
|
||||||
// Try to find two consecutive newlines (or \n\r\n for Windows)
|
|
||||||
for ; i < len(data); i++ {
|
|
||||||
if data[i] != '\n' {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
end := i
|
|
||||||
if i+1 < len(data) && data[i+1] == '\n' {
|
|
||||||
i += 2
|
|
||||||
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
|
||||||
i++ // Skip newlines at end of record
|
|
||||||
}
|
|
||||||
*s.terminator = string(data[end:i])
|
|
||||||
return i, dropCR(data[start:end]), nil
|
|
||||||
}
|
|
||||||
if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' {
|
|
||||||
i += 3
|
|
||||||
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
|
||||||
i++ // Skip newlines at end of record
|
|
||||||
}
|
|
||||||
*s.terminator = string(data[end:i])
|
|
||||||
return i, dropCR(data[start:end]), nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're at EOF, we have one final record; return it
|
|
||||||
if atEOF {
|
|
||||||
token = dropCR(dropLF(data[start:]))
|
|
||||||
*s.terminator = string(data[len(token):])
|
|
||||||
return len(data), token, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Request more data
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Splitter that splits records on the given separator byte
|
|
||||||
type byteSplitter struct {
|
|
||||||
sep byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
||||||
if atEOF && len(data) == 0 {
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
if i := bytes.IndexByte(data, s.sep); i >= 0 {
|
|
||||||
// We have a full sep-terminated record
|
|
||||||
return i + 1, data[:i], nil
|
|
||||||
}
|
|
||||||
// If at EOF, we have a final, non-terminated record; return it
|
|
||||||
if atEOF {
|
|
||||||
return len(data), data, nil
|
|
||||||
}
|
|
||||||
// Request more data
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Splitter that splits records on the given regular expression
|
|
||||||
type regexSplitter struct {
|
|
||||||
re *regexp.Regexp
|
|
||||||
terminator *string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
||||||
if atEOF && len(data) == 0 {
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
loc := s.re.FindIndex(data)
|
|
||||||
// Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this
|
|
||||||
// case is to match the entire input.
|
|
||||||
if loc != nil && loc[0] != loc[1] {
|
|
||||||
*s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable
|
|
||||||
return loc[1], data[:loc[0]], nil
|
|
||||||
}
|
|
||||||
// If at EOF, we have a final, non-terminated record; return it
|
|
||||||
if atEOF {
|
|
||||||
*s.terminator = ""
|
|
||||||
return len(data), data, nil
|
|
||||||
}
|
|
||||||
// Request more data
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Splitter that splits records in CSV or TSV format.
|
|
||||||
type csvSplitter struct {
|
|
||||||
separator rune
|
|
||||||
sepLen int
|
|
||||||
comment rune
|
|
||||||
header bool
|
|
||||||
|
|
||||||
recordBuffer []byte
|
|
||||||
fieldIndexes []int
|
|
||||||
noBOMCheck bool
|
|
||||||
|
|
||||||
fields *[]string
|
|
||||||
setFieldNames func(names []string)
|
|
||||||
rowNum int
|
|
||||||
}
|
|
||||||
|
|
||||||
// The structure of this code is taken from the stdlib encoding/csv Reader
|
|
||||||
// code, which is licensed under a compatible BSD-style license.
|
|
||||||
//
|
|
||||||
// We don't support all encoding/csv features: FieldsPerRecord is not
|
|
||||||
// supported, LazyQuotes is always on, and TrimLeadingSpace is always off.
|
|
||||||
func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
||||||
// Some CSV files are saved with a UTF-8 BOM at the start; skip it.
|
|
||||||
if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
|
|
||||||
data = data[3:]
|
|
||||||
advance = 3
|
|
||||||
s.noBOMCheck = true
|
|
||||||
}
|
|
||||||
|
|
||||||
origData := data
|
|
||||||
if atEOF && len(data) == 0 {
|
|
||||||
// No more data, tell Scanner to stop.
|
|
||||||
return 0, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
readLine := func() []byte {
|
|
||||||
newline := bytes.IndexByte(data, '\n')
|
|
||||||
var line []byte
|
|
||||||
switch {
|
|
||||||
case newline >= 0:
|
|
||||||
// Process a single line (including newline).
|
|
||||||
line = data[:newline+1]
|
|
||||||
data = data[newline+1:]
|
|
||||||
case atEOF:
|
|
||||||
// If at EOF, we have a final record without a newline.
|
|
||||||
line = data
|
|
||||||
data = data[len(data):]
|
|
||||||
default:
|
|
||||||
// Need more data
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// For backwards compatibility, drop trailing \r before EOF.
|
|
||||||
if len(line) > 0 && atEOF && line[len(line)-1] == '\r' {
|
|
||||||
line = line[:len(line)-1]
|
|
||||||
advance++
|
|
||||||
}
|
|
||||||
|
|
||||||
return line
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read line (automatically skipping past empty lines and any comments).
|
|
||||||
skip := 0
|
|
||||||
var line []byte
|
|
||||||
for {
|
|
||||||
line = readLine()
|
|
||||||
if len(line) == 0 {
|
|
||||||
return 0, nil, nil // Request more data
|
|
||||||
}
|
|
||||||
if s.comment != 0 && nextRune(line) == s.comment {
|
|
||||||
advance += len(line)
|
|
||||||
skip += len(line)
|
|
||||||
continue // Skip comment lines
|
|
||||||
}
|
|
||||||
if len(line) == lenNewline(line) {
|
|
||||||
advance += len(line)
|
|
||||||
skip += len(line)
|
|
||||||
continue // Skip empty lines
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse each field in the record.
|
|
||||||
const quoteLen = len(`"`)
|
|
||||||
tokenHasCR := false
|
|
||||||
s.recordBuffer = s.recordBuffer[:0]
|
|
||||||
s.fieldIndexes = s.fieldIndexes[:0]
|
|
||||||
parseField:
|
|
||||||
for {
|
|
||||||
if len(line) == 0 || line[0] != '"' {
|
|
||||||
// Non-quoted string field
|
|
||||||
i := bytes.IndexRune(line, s.separator)
|
|
||||||
field := line
|
|
||||||
if i >= 0 {
|
|
||||||
advance += i + s.sepLen
|
|
||||||
field = field[:i]
|
|
||||||
} else {
|
|
||||||
advance += len(field)
|
|
||||||
field = field[:len(field)-lenNewline(field)]
|
|
||||||
}
|
|
||||||
s.recordBuffer = append(s.recordBuffer, field...)
|
|
||||||
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
|
||||||
if i >= 0 {
|
|
||||||
line = line[i+s.sepLen:]
|
|
||||||
continue parseField
|
|
||||||
}
|
|
||||||
break parseField
|
|
||||||
} else {
|
|
||||||
// Quoted string field
|
|
||||||
line = line[quoteLen:]
|
|
||||||
advance += quoteLen
|
|
||||||
for {
|
|
||||||
i := bytes.IndexByte(line, '"')
|
|
||||||
if i >= 0 {
|
|
||||||
// Hit next quote.
|
|
||||||
s.recordBuffer = append(s.recordBuffer, line[:i]...)
|
|
||||||
line = line[i+quoteLen:]
|
|
||||||
advance += i + quoteLen
|
|
||||||
switch rn := nextRune(line); {
|
|
||||||
case rn == '"':
|
|
||||||
// `""` sequence (append quote).
|
|
||||||
s.recordBuffer = append(s.recordBuffer, '"')
|
|
||||||
line = line[quoteLen:]
|
|
||||||
advance += quoteLen
|
|
||||||
case rn == s.separator:
|
|
||||||
// `",` sequence (end of field).
|
|
||||||
line = line[s.sepLen:]
|
|
||||||
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
|
||||||
advance += s.sepLen
|
|
||||||
continue parseField
|
|
||||||
case lenNewline(line) == len(line):
|
|
||||||
// `"\n` sequence (end of line).
|
|
||||||
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
|
||||||
advance += len(line)
|
|
||||||
break parseField
|
|
||||||
default:
|
|
||||||
// `"` sequence (bare quote).
|
|
||||||
s.recordBuffer = append(s.recordBuffer, '"')
|
|
||||||
}
|
|
||||||
} else if len(line) > 0 {
|
|
||||||
// Hit end of line (copy all data so far).
|
|
||||||
advance += len(line)
|
|
||||||
newlineLen := lenNewline(line)
|
|
||||||
if newlineLen == 2 {
|
|
||||||
tokenHasCR = true
|
|
||||||
s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...)
|
|
||||||
s.recordBuffer = append(s.recordBuffer, '\n')
|
|
||||||
} else {
|
|
||||||
s.recordBuffer = append(s.recordBuffer, line...)
|
|
||||||
}
|
|
||||||
line = readLine()
|
|
||||||
if line == nil {
|
|
||||||
return 0, nil, nil // Request more data
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Abrupt end of file.
|
|
||||||
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
|
||||||
advance += len(line)
|
|
||||||
break parseField
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a single string and create slices out of it.
|
|
||||||
// This pins the memory of the fields together, but allocates once.
|
|
||||||
strBuf := string(s.recordBuffer) // Convert to string once to batch allocations
|
|
||||||
fields := make([]string, len(s.fieldIndexes))
|
|
||||||
preIdx := 0
|
|
||||||
for i, idx := range s.fieldIndexes {
|
|
||||||
fields[i] = strBuf[preIdx:idx]
|
|
||||||
preIdx = idx
|
|
||||||
}
|
|
||||||
|
|
||||||
s.noBOMCheck = true
|
|
||||||
|
|
||||||
if s.rowNum == 0 && s.header {
|
|
||||||
// Set header field names and advance, but don't return a line (token).
|
|
||||||
s.rowNum++
|
|
||||||
s.setFieldNames(fields)
|
|
||||||
return advance, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normal row, set fields and return a line (token).
|
|
||||||
s.rowNum++
|
|
||||||
*s.fields = fields
|
|
||||||
token = origData[skip:advance]
|
|
||||||
token = token[:len(token)-lenNewline(token)]
|
|
||||||
if tokenHasCR {
|
|
||||||
token = bytes.ReplaceAll(token, []byte{'\r'}, nil)
|
|
||||||
}
|
|
||||||
return advance, token, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// lenNewline reports the number of bytes for the trailing \n.
|
|
||||||
func lenNewline(b []byte) int {
|
|
||||||
if len(b) > 0 && b[len(b)-1] == '\n' {
|
|
||||||
if len(b) > 1 && b[len(b)-2] == '\r' {
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// nextRune returns the next rune in b or utf8.RuneError.
|
|
||||||
func nextRune(b []byte) rune {
|
|
||||||
r, _ := utf8.DecodeRune(b)
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setup for a new input file with given name (empty string if stdin)
|
|
||||||
func (p *interp) setFile(filename string) {
|
|
||||||
p.filename = numStr(filename)
|
|
||||||
p.fileLineNum = 0
|
|
||||||
p.hadFiles = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setup for a new input line (but don't parse it into fields till we
|
|
||||||
// need to)
|
|
||||||
func (p *interp) setLine(line string, isTrueStr bool) {
|
|
||||||
p.line = line
|
|
||||||
p.lineIsTrueStr = isTrueStr
|
|
||||||
p.haveFields = false
|
|
||||||
p.reparseCSV = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that the current line is parsed into fields, splitting it
|
|
||||||
// into fields if it hasn't been already
|
|
||||||
func (p *interp) ensureFields() {
|
|
||||||
if p.haveFields {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
p.haveFields = true
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case p.inputMode == CSVMode || p.inputMode == TSVMode:
|
|
||||||
if p.reparseCSV {
|
|
||||||
scanner := bufio.NewScanner(strings.NewReader(p.line))
|
|
||||||
scanner.Buffer(nil, maxRecordLength)
|
|
||||||
splitter := csvSplitter{
|
|
||||||
separator: p.csvInputConfig.Separator,
|
|
||||||
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
|
|
||||||
comment: p.csvInputConfig.Comment,
|
|
||||||
fields: &p.fields,
|
|
||||||
}
|
|
||||||
scanner.Split(splitter.scan)
|
|
||||||
if !scanner.Scan() {
|
|
||||||
p.fields = nil
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Normally fields have already been parsed by csvSplitter
|
|
||||||
}
|
|
||||||
case p.fieldSep == " ":
|
|
||||||
// FS space (default) means split fields on any whitespace
|
|
||||||
p.fields = strings.Fields(p.line)
|
|
||||||
case p.line == "":
|
|
||||||
p.fields = nil
|
|
||||||
case utf8.RuneCountInString(p.fieldSep) <= 1:
|
|
||||||
// 1-char FS is handled as plain split (not regex)
|
|
||||||
p.fields = strings.Split(p.line, p.fieldSep)
|
|
||||||
default:
|
|
||||||
// Split on FS as a regex
|
|
||||||
p.fields = p.fieldSepRegex.Split(p.line, -1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case for when RS=="" and FS is single character,
|
|
||||||
// split on newline in addition to FS. See more here:
|
|
||||||
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
|
|
||||||
if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
|
|
||||||
fields := make([]string, 0, len(p.fields))
|
|
||||||
for _, field := range p.fields {
|
|
||||||
lines := strings.Split(field, "\n")
|
|
||||||
for _, line := range lines {
|
|
||||||
trimmed := strings.TrimSuffix(line, "\r")
|
|
||||||
fields = append(fields, trimmed)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p.fields = fields
|
|
||||||
}
|
|
||||||
|
|
||||||
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time
|
|
||||||
for range p.fields {
|
|
||||||
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
|
|
||||||
}
|
|
||||||
p.numFields = len(p.fields)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch next line (record) of input from current input file, opening
|
|
||||||
// next input file if done with previous one
|
|
||||||
func (p *interp) nextLine() (string, error) {
|
|
||||||
for {
|
|
||||||
if p.scanner == nil {
|
|
||||||
if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin {
|
|
||||||
// Previous input is file, close it
|
|
||||||
_ = prevInput.Close()
|
|
||||||
}
|
|
||||||
if p.filenameIndex >= p.argc && !p.hadFiles {
|
|
||||||
// Moved past number of ARGV args and haven't seen
|
|
||||||
// any files yet, use stdin
|
|
||||||
p.input = p.stdin
|
|
||||||
p.setFile("-")
|
|
||||||
} else {
|
|
||||||
if p.filenameIndex >= p.argc {
|
|
||||||
// Done with ARGV args, all done with input
|
|
||||||
return "", io.EOF
|
|
||||||
}
|
|
||||||
// Fetch next filename from ARGV. Can't use
|
|
||||||
// getArrayValue() here as it would set the value if
|
|
||||||
// not present
|
|
||||||
index := strconv.Itoa(p.filenameIndex)
|
|
||||||
argvIndex := p.program.Arrays["ARGV"]
|
|
||||||
argvArray := p.array(ast.ScopeGlobal, argvIndex)
|
|
||||||
filename := p.toString(argvArray[index])
|
|
||||||
p.filenameIndex++
|
|
||||||
|
|
||||||
// Is it actually a var=value assignment?
|
|
||||||
matches := varRegex.FindStringSubmatch(filename)
|
|
||||||
if len(matches) >= 3 {
|
|
||||||
// Yep, set variable to value and keep going
|
|
||||||
name, val := matches[1], matches[2]
|
|
||||||
// Oddly, var=value args must interpret escapes (issue #129)
|
|
||||||
unescaped, err := Unescape(val)
|
|
||||||
if err == nil {
|
|
||||||
val = unescaped
|
|
||||||
}
|
|
||||||
err = p.setVarByName(name, val)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
} else if filename == "" {
|
|
||||||
// ARGV arg is empty string, skip
|
|
||||||
p.input = nil
|
|
||||||
continue
|
|
||||||
} else if filename == "-" {
|
|
||||||
// ARGV arg is "-" meaning stdin
|
|
||||||
p.input = p.stdin
|
|
||||||
p.setFile("-")
|
|
||||||
} else {
|
|
||||||
// A regular file name, open it
|
|
||||||
if p.noFileReads {
|
|
||||||
return "", newError("can't read from file due to NoFileReads")
|
|
||||||
}
|
|
||||||
input, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
p.input = input
|
|
||||||
p.setFile(filename)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if p.inputBuffer == nil { // reuse buffer from last input file
|
|
||||||
p.inputBuffer = make([]byte, inputBufSize)
|
|
||||||
}
|
|
||||||
p.scanner = p.newScanner(p.input, p.inputBuffer)
|
|
||||||
}
|
|
||||||
p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars
|
|
||||||
if p.scanner.Scan() {
|
|
||||||
// We scanned some input, break and return it
|
|
||||||
break
|
|
||||||
}
|
|
||||||
err := p.scanner.Err()
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error reading from input: %s", err)
|
|
||||||
}
|
|
||||||
// Signal loop to move onto next file
|
|
||||||
p.scanner = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Got a line (record) of input, return it
|
|
||||||
p.lineNum++
|
|
||||||
p.fileLineNum++
|
|
||||||
return p.scanner.Text(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write output string to given writer, producing correct line endings
|
|
||||||
// on Windows (CR LF).
|
|
||||||
func writeOutput(w io.Writer, s string) error {
|
|
||||||
if crlfNewline {
|
|
||||||
// First normalize to \n, then convert all newlines to \r\n
|
|
||||||
// (on Windows). NOTE: creating two new strings is almost
|
|
||||||
// certainly slow; would be better to create a custom Writer.
|
|
||||||
s = strings.Replace(s, "\r\n", "\n", -1)
|
|
||||||
s = strings.Replace(s, "\n", "\r\n", -1)
|
|
||||||
}
|
|
||||||
_, err := io.WriteString(w, s)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close all streams, commands, and so on (after program execution).
|
|
||||||
func (p *interp) closeAll() {
|
|
||||||
if prevInput, ok := p.input.(io.Closer); ok {
|
|
||||||
_ = prevInput.Close()
|
|
||||||
}
|
|
||||||
for _, r := range p.inputStreams {
|
|
||||||
_ = r.Close()
|
|
||||||
}
|
|
||||||
for _, w := range p.outputStreams {
|
|
||||||
_ = w.Close()
|
|
||||||
}
|
|
||||||
for _, cmd := range p.commands {
|
|
||||||
_ = cmd.Wait()
|
|
||||||
}
|
|
||||||
if f, ok := p.output.(flusher); ok {
|
|
||||||
_ = f.Flush()
|
|
||||||
}
|
|
||||||
if f, ok := p.errorOutput.(flusher); ok {
|
|
||||||
_ = f.Flush()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush all output streams as well as standard output. Report whether all
|
|
||||||
// streams were flushed successfully (logging error(s) if not).
|
|
||||||
func (p *interp) flushAll() bool {
|
|
||||||
allGood := true
|
|
||||||
for name, writer := range p.outputStreams {
|
|
||||||
allGood = allGood && p.flushWriter(name, writer)
|
|
||||||
}
|
|
||||||
if _, ok := p.output.(flusher); ok {
|
|
||||||
// User-provided output may or may not be flushable
|
|
||||||
allGood = allGood && p.flushWriter("stdout", p.output)
|
|
||||||
}
|
|
||||||
return allGood
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush a single, named output stream, and report whether it was flushed
|
|
||||||
// successfully (logging an error if not).
|
|
||||||
func (p *interp) flushStream(name string) bool {
|
|
||||||
writer := p.outputStreams[name]
|
|
||||||
if writer == nil {
|
|
||||||
p.printErrorf("error flushing %q: not an output file or pipe\n", name)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return p.flushWriter(name, writer)
|
|
||||||
}
|
|
||||||
|
|
||||||
type flusher interface {
|
|
||||||
Flush() error
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush given output writer, and report whether it was flushed successfully
|
|
||||||
// (logging an error if not).
|
|
||||||
func (p *interp) flushWriter(name string, writer io.Writer) bool {
|
|
||||||
flusher, ok := writer.(flusher)
|
|
||||||
if !ok {
|
|
||||||
return true // not a flusher, don't error
|
|
||||||
}
|
|
||||||
err := flusher.Flush()
|
|
||||||
if err != nil {
|
|
||||||
p.printErrorf("error flushing %q: %v\n", name, err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush output and error streams.
|
|
||||||
func (p *interp) flushOutputAndError() {
|
|
||||||
if flusher, ok := p.output.(flusher); ok {
|
|
||||||
_ = flusher.Flush()
|
|
||||||
}
|
|
||||||
if flusher, ok := p.errorOutput.(flusher); ok {
|
|
||||||
_ = flusher.Flush()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print a message to the error output stream, flushing as necessary.
|
|
||||||
func (p *interp) printErrorf(format string, args ...interface{}) {
|
|
||||||
if flusher, ok := p.output.(flusher); ok {
|
|
||||||
_ = flusher.Flush() // ensure synchronization
|
|
||||||
}
|
|
||||||
fmt.Fprintf(p.errorOutput, format, args...)
|
|
||||||
if flusher, ok := p.errorOutput.(flusher); ok {
|
|
||||||
_ = flusher.Flush()
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,176 +0,0 @@
|
||||||
// The New...Execute API (allows you to efficiently execute the same program repeatedly).
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
const checkContextOps = 1000 // for efficiency, only check context every N instructions
|
|
||||||
|
|
||||||
// Interpreter is an interpreter for a specific program, allowing you to
|
|
||||||
// efficiently execute the same program over and over with different inputs.
|
|
||||||
// Use New to create an Interpreter.
|
|
||||||
//
|
|
||||||
// Most programs won't need reusable execution, and should use the simpler
|
|
||||||
// Exec or ExecProgram functions instead.
|
|
||||||
type Interpreter struct {
|
|
||||||
interp *interp
|
|
||||||
}
|
|
||||||
|
|
||||||
// New creates a reusable interpreter for the given program.
|
|
||||||
//
|
|
||||||
// Most programs won't need reusable execution, and should use the simpler
|
|
||||||
// Exec or ExecProgram functions instead.
|
|
||||||
func New(program *parser.Program) (*Interpreter, error) {
|
|
||||||
p := newInterp(program)
|
|
||||||
return &Interpreter{interp: p}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Execute runs this program with the given execution configuration (input,
|
|
||||||
// output, and variables) and returns the exit status code of the program. A
|
|
||||||
// nil config is valid and will use the defaults (zero values).
|
|
||||||
//
|
|
||||||
// Internal memory allocations are reused, so calling Execute on the same
|
|
||||||
// Interpreter instance is significantly more efficient than calling
|
|
||||||
// ExecProgram multiple times.
|
|
||||||
//
|
|
||||||
// I/O state is reset between each run, but variables and the random number
|
|
||||||
// generator seed are not; use ResetVars and ResetRand to reset those.
|
|
||||||
//
|
|
||||||
// It's best to set config.Environ to a non-nil slice, otherwise Execute will
|
|
||||||
// call the relatively inefficient os.Environ each time. Set config.Environ to
|
|
||||||
// []string{} if the script doesn't need environment variables, or call
|
|
||||||
// os.Environ once and set config.Environ to that value each execution.
|
|
||||||
//
|
|
||||||
// Note that config.Funcs must be the same value provided to
|
|
||||||
// parser.ParseProgram, and must not change between calls to Execute.
|
|
||||||
func (p *Interpreter) Execute(config *Config) (int, error) {
|
|
||||||
p.interp.resetCore()
|
|
||||||
p.interp.checkCtx = false
|
|
||||||
|
|
||||||
err := p.interp.setExecuteConfig(config)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return p.interp.executeAll()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *interp) resetCore() {
|
|
||||||
p.scanner = nil
|
|
||||||
for k := range p.scanners {
|
|
||||||
delete(p.scanners, k)
|
|
||||||
}
|
|
||||||
p.input = nil
|
|
||||||
for k := range p.inputStreams {
|
|
||||||
delete(p.inputStreams, k)
|
|
||||||
}
|
|
||||||
for k := range p.outputStreams {
|
|
||||||
delete(p.outputStreams, k)
|
|
||||||
}
|
|
||||||
for k := range p.commands {
|
|
||||||
delete(p.commands, k)
|
|
||||||
}
|
|
||||||
|
|
||||||
p.sp = 0
|
|
||||||
p.localArrays = p.localArrays[:0]
|
|
||||||
p.callDepth = 0
|
|
||||||
|
|
||||||
p.filename = null()
|
|
||||||
p.line = ""
|
|
||||||
p.lineIsTrueStr = false
|
|
||||||
p.lineNum = 0
|
|
||||||
p.fileLineNum = 0
|
|
||||||
p.fields = nil
|
|
||||||
p.fieldsIsTrueStr = nil
|
|
||||||
p.numFields = 0
|
|
||||||
p.haveFields = false
|
|
||||||
|
|
||||||
p.exitStatus = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *interp) resetVars() {
|
|
||||||
// Reset global scalars
|
|
||||||
for i := range p.globals {
|
|
||||||
p.globals[i] = null()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset global arrays
|
|
||||||
for _, array := range p.arrays {
|
|
||||||
for k := range array {
|
|
||||||
delete(array, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset special variables
|
|
||||||
p.convertFormat = "%.6g"
|
|
||||||
p.outputFormat = "%.6g"
|
|
||||||
p.fieldSep = " "
|
|
||||||
p.fieldSepRegex = nil
|
|
||||||
p.recordSep = "\n"
|
|
||||||
p.recordSepRegex = nil
|
|
||||||
p.recordTerminator = ""
|
|
||||||
p.outputFieldSep = " "
|
|
||||||
p.outputRecordSep = "\n"
|
|
||||||
p.subscriptSep = "\x1c"
|
|
||||||
p.matchLength = 0
|
|
||||||
p.matchStart = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResetVars resets this interpreter's variables, setting scalar variables to
|
|
||||||
// null, clearing arrays, and resetting special variables such as FS and RS to
|
|
||||||
// their defaults.
|
|
||||||
func (p *Interpreter) ResetVars() {
|
|
||||||
p.interp.resetVars()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResetRand resets this interpreter's random number generator seed, so that
|
|
||||||
// rand() produces the same sequence it would have after calling New. This is
|
|
||||||
// a relatively CPU-intensive operation.
|
|
||||||
func (p *Interpreter) ResetRand() {
|
|
||||||
p.interp.randSeed = 1.0
|
|
||||||
p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExecuteContext is like Execute, but takes a context to allow the caller to
|
|
||||||
// set an execution timeout or cancel the execution. For efficiency, the
|
|
||||||
// context is only tested every 1000 virtual machine instructions.
|
|
||||||
//
|
|
||||||
// Context handling is not preemptive: currently long-running operations like
|
|
||||||
// system() won't be interrupted.
|
|
||||||
func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) {
|
|
||||||
p.interp.resetCore()
|
|
||||||
p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO()
|
|
||||||
p.interp.ctx = ctx
|
|
||||||
p.interp.ctxDone = ctx.Done()
|
|
||||||
p.interp.ctxOps = 0
|
|
||||||
|
|
||||||
err := p.interp.setExecuteConfig(config)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return p.interp.executeAll()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *interp) checkContext() error {
|
|
||||||
p.ctxOps++
|
|
||||||
if p.ctxOps < checkContextOps {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
p.ctxOps = 0
|
|
||||||
return p.checkContextNow()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *interp) checkContextNow() error {
|
|
||||||
select {
|
|
||||||
case <-p.ctxDone:
|
|
||||||
return p.ctx.Err()
|
|
||||||
default:
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,163 +0,0 @@
|
||||||
// Tests for the New...Execute API.
|
|
||||||
|
|
||||||
package interp_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/interp"
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
// This definitely doesn't test that everything was reset, but it's a good start.
|
|
||||||
func TestNewExecute(t *testing.T) {
|
|
||||||
source := `{ print NR, OFMT, x, y, a["k"], $1, $3; OFMT="%g"; x++; y++; a["k"]++ }`
|
|
||||||
interpreter := newInterp(t, source)
|
|
||||||
|
|
||||||
// First execution.
|
|
||||||
var output bytes.Buffer
|
|
||||||
status, err := interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("one two three\nfour five six\n"),
|
|
||||||
Output: &output,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
if status != 0 {
|
|
||||||
t.Fatalf("expected status 0, got %d", status)
|
|
||||||
}
|
|
||||||
normalized := normalizeNewlines(output.String())
|
|
||||||
expected := "1 %.6g one three\n2 %g 1 1 1 four six\n"
|
|
||||||
if normalized != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, normalized)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second execution, with ResetVars.
|
|
||||||
output.Reset()
|
|
||||||
interpreter.ResetVars()
|
|
||||||
status, err = interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("ONE TWO THREE\nFOUR FIVE SIX\n"),
|
|
||||||
Output: &output,
|
|
||||||
Vars: []string{"x", "10"},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
if status != 0 {
|
|
||||||
t.Fatalf("expected status 0, got %d", status)
|
|
||||||
}
|
|
||||||
normalized = normalizeNewlines(output.String())
|
|
||||||
expected = "1 %.6g 10 ONE THREE\n2 %g 11 1 1 FOUR SIX\n"
|
|
||||||
if normalized != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, normalized)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Third execution, without ResetVars.
|
|
||||||
output.Reset()
|
|
||||||
status, err = interpreter.Execute(&interp.Config{
|
|
||||||
Stdin: strings.NewReader("1 2 3\n4 5 6\n"),
|
|
||||||
Output: &output,
|
|
||||||
Vars: []string{"x", "100"},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
if status != 0 {
|
|
||||||
t.Fatalf("expected status 0, got %d", status)
|
|
||||||
}
|
|
||||||
normalized = normalizeNewlines(output.String())
|
|
||||||
expected = "1 %g 100 2 2 1 3\n2 %g 101 3 3 4 6\n"
|
|
||||||
if normalized != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, normalized)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResetRand(t *testing.T) {
|
|
||||||
source := `BEGIN { print rand(), rand(), rand() }`
|
|
||||||
interpreter := newInterp(t, source)
|
|
||||||
var output bytes.Buffer
|
|
||||||
|
|
||||||
_, err := interpreter.Execute(&interp.Config{Output: &output})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
original := output.String()
|
|
||||||
|
|
||||||
output.Reset()
|
|
||||||
_, err = interpreter.Execute(&interp.Config{Output: &output})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
noResetRand := output.String()
|
|
||||||
if original == noResetRand {
|
|
||||||
t.Fatalf("expected different random numbers, got %q both times", original)
|
|
||||||
}
|
|
||||||
|
|
||||||
output.Reset()
|
|
||||||
interpreter.ResetRand()
|
|
||||||
_, err = interpreter.Execute(&interp.Config{Output: &output})
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error executing: %v", err)
|
|
||||||
}
|
|
||||||
withResetRand := output.String()
|
|
||||||
if original != withResetRand {
|
|
||||||
t.Fatalf("expected same random numbers (%q) as original (%q)", withResetRand, original)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecuteContextNoError(t *testing.T) {
|
|
||||||
interpreter := newInterp(t, `BEGIN {}`)
|
|
||||||
_, err := interpreter.ExecuteContext(context.Background(), nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("execute error: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecuteContextTimeout(t *testing.T) {
|
|
||||||
interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
_, err := interpreter.ExecuteContext(ctx, nil)
|
|
||||||
if !errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
t.Fatalf("expected DeadlineExceeded error, got: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecuteContextCancel(t *testing.T) {
|
|
||||||
interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
cancel() // cancel it right away
|
|
||||||
_, err := interpreter.ExecuteContext(ctx, nil)
|
|
||||||
if !errors.Is(err, context.Canceled) {
|
|
||||||
t.Fatalf("expected Canceled error, got: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestExecuteContextSystemTimeout(t *testing.T) {
|
|
||||||
t.Skip("TODO: skipping for now due to #122")
|
|
||||||
interpreter := newInterp(t, `BEGIN { print system("sleep 4") }`)
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
_, err := interpreter.ExecuteContext(ctx, nil)
|
|
||||||
if !errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
t.Fatalf("expected DeadlineExceeded error, got: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newInterp(t *testing.T, src string) *interp.Interpreter {
|
|
||||||
t.Helper()
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("parse error: %v", err)
|
|
||||||
}
|
|
||||||
interpreter, err := interp.New(prog)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("interp.New error: %v", err)
|
|
||||||
}
|
|
||||||
return interpreter
|
|
||||||
}
|
|
|
@ -1,294 +0,0 @@
|
||||||
// GoAWK interpreter value type (not exported).
|
|
||||||
|
|
||||||
package interp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
type valueType uint8
|
|
||||||
|
|
||||||
const (
|
|
||||||
typeNull valueType = iota
|
|
||||||
typeStr
|
|
||||||
typeNum
|
|
||||||
typeNumStr
|
|
||||||
)
|
|
||||||
|
|
||||||
// An AWK value (these are passed around by value)
|
|
||||||
type value struct {
|
|
||||||
typ valueType // Type of value
|
|
||||||
s string // String value (for typeStr and typeNumStr)
|
|
||||||
n float64 // Numeric value (for typeNum)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new null value
|
|
||||||
func null() value {
|
|
||||||
return value{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new number value
|
|
||||||
func num(n float64) value {
|
|
||||||
return value{typ: typeNum, n: n}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new string value
|
|
||||||
func str(s string) value {
|
|
||||||
return value{typ: typeStr, s: s}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new value to represent a "numeric string" from an input field
|
|
||||||
func numStr(s string) value {
|
|
||||||
return value{typ: typeNumStr, s: s}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a numeric value from a Go bool
|
|
||||||
func boolean(b bool) value {
|
|
||||||
if b {
|
|
||||||
return num(1)
|
|
||||||
}
|
|
||||||
return num(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// String returns a string representation of v for debugging.
|
|
||||||
func (v value) String() string {
|
|
||||||
switch v.typ {
|
|
||||||
case typeStr:
|
|
||||||
return fmt.Sprintf("str(%q)", v.s)
|
|
||||||
case typeNum:
|
|
||||||
return fmt.Sprintf("num(%s)", v.str("%.6g"))
|
|
||||||
case typeNumStr:
|
|
||||||
return fmt.Sprintf("numStr(%q)", v.s)
|
|
||||||
default:
|
|
||||||
return "null()"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if value is a "true string" (a string or a "numeric string"
|
|
||||||
// from an input field that can't be converted to a number). If false,
|
|
||||||
// also return the (possibly converted) number.
|
|
||||||
func (v value) isTrueStr() (float64, bool) {
|
|
||||||
switch v.typ {
|
|
||||||
case typeStr:
|
|
||||||
return 0, true
|
|
||||||
case typeNumStr:
|
|
||||||
f, err := parseFloat(v.s)
|
|
||||||
if err != nil {
|
|
||||||
return 0, true
|
|
||||||
}
|
|
||||||
return f, false
|
|
||||||
default: // typeNum, typeNull
|
|
||||||
return v.n, false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return Go bool value of AWK value. For numbers or numeric strings,
|
|
||||||
// zero is false and everything else is true. For strings, empty
|
|
||||||
// string is false and everything else is true.
|
|
||||||
func (v value) boolean() bool {
|
|
||||||
switch v.typ {
|
|
||||||
case typeStr:
|
|
||||||
return v.s != ""
|
|
||||||
case typeNumStr:
|
|
||||||
f, err := parseFloat(v.s)
|
|
||||||
if err != nil {
|
|
||||||
return v.s != ""
|
|
||||||
}
|
|
||||||
return f != 0
|
|
||||||
default: // typeNum, typeNull
|
|
||||||
return v.n != 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Like strconv.ParseFloat, but allow hex floating point without exponent, and
|
|
||||||
// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow
|
|
||||||
// underscore digit separators.
|
|
||||||
func parseFloat(s string) (float64, error) {
|
|
||||||
s = strings.TrimSpace(s)
|
|
||||||
if len(s) > 1 && (s[0] == '+' || s[0] == '-') {
|
|
||||||
if len(s) == 4 && hasNaNPrefix(s[1:]) {
|
|
||||||
// ParseFloat doesn't handle "nan" with sign prefix, so handle it here.
|
|
||||||
return math.NaN(), nil
|
|
||||||
}
|
|
||||||
if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 {
|
|
||||||
s += "p0"
|
|
||||||
}
|
|
||||||
} else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 {
|
|
||||||
s += "p0"
|
|
||||||
}
|
|
||||||
n, err := strconv.ParseFloat(s, 64)
|
|
||||||
if err == nil && strings.IndexByte(s, '_') >= 0 {
|
|
||||||
// Underscore separators aren't supported by AWK.
|
|
||||||
return 0, strconv.ErrSyntax
|
|
||||||
}
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return value's string value, or convert to a string using given
|
|
||||||
// format if a number value. Integers are a special case and don't
|
|
||||||
// use floatFormat.
|
|
||||||
func (v value) str(floatFormat string) string {
|
|
||||||
if v.typ == typeNum {
|
|
||||||
switch {
|
|
||||||
case math.IsNaN(v.n):
|
|
||||||
return "nan"
|
|
||||||
case math.IsInf(v.n, 0):
|
|
||||||
if v.n < 0 {
|
|
||||||
return "-inf"
|
|
||||||
} else {
|
|
||||||
return "inf"
|
|
||||||
}
|
|
||||||
case v.n == float64(int(v.n)):
|
|
||||||
return strconv.Itoa(int(v.n))
|
|
||||||
default:
|
|
||||||
if floatFormat == "%.6g" {
|
|
||||||
return strconv.FormatFloat(v.n, 'g', 6, 64)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf(floatFormat, v.n)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// For typeStr and typeNumStr we already have the string, for
|
|
||||||
// typeNull v.s == "".
|
|
||||||
return v.s
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return value's number value, converting from string if necessary
|
|
||||||
func (v value) num() float64 {
|
|
||||||
switch v.typ {
|
|
||||||
case typeStr, typeNumStr:
|
|
||||||
// Ensure string starts with a float and convert it
|
|
||||||
return parseFloatPrefix(v.s)
|
|
||||||
default: // typeNum, typeNull
|
|
||||||
return v.n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
|
|
||||||
|
|
||||||
// Like strconv.ParseFloat, but parses at the start of string and
|
|
||||||
// allows things like "1.5foo"
|
|
||||||
func parseFloatPrefix(s string) float64 {
|
|
||||||
// Skip whitespace at start
|
|
||||||
i := 0
|
|
||||||
for i < len(s) && asciiSpace[s[i]] != 0 {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
start := i
|
|
||||||
|
|
||||||
// Parse optional sign and check for NaN and Inf.
|
|
||||||
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i+3 <= len(s) {
|
|
||||||
if hasNaNPrefix(s[i:]) {
|
|
||||||
return math.NaN()
|
|
||||||
}
|
|
||||||
if hasInfPrefix(s[i:]) {
|
|
||||||
if s[start] == '-' {
|
|
||||||
return math.Inf(-1)
|
|
||||||
}
|
|
||||||
return math.Inf(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse mantissa: initial digit(s), optional '.', then more digits
|
|
||||||
if i+2 < len(s) && hasHexPrefix(s[i:]) {
|
|
||||||
return parseHexFloatPrefix(s, start, i+2)
|
|
||||||
}
|
|
||||||
gotDigit := false
|
|
||||||
for i < len(s) && isDigit(s[i]) {
|
|
||||||
gotDigit = true
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i < len(s) && s[i] == '.' {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
for i < len(s) && isDigit(s[i]) {
|
|
||||||
gotDigit = true
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if !gotDigit {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse exponent ("1e" and similar are allowed, but ParseFloat
|
|
||||||
// rejects them)
|
|
||||||
end := i
|
|
||||||
if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
|
|
||||||
i++
|
|
||||||
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
for i < len(s) && isDigit(s[i]) {
|
|
||||||
i++
|
|
||||||
end = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
floatStr := s[start:end]
|
|
||||||
f, _ := strconv.ParseFloat(floatStr, 64)
|
|
||||||
return f // Returns infinity in case of "value out of range" error
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasHexPrefix(s string) bool {
|
|
||||||
return s[0] == '0' && (s[1] == 'x' || s[1] == 'X')
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasNaNPrefix(s string) bool {
|
|
||||||
return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N')
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasInfPrefix(s string) bool {
|
|
||||||
return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper used by parseFloatPrefix to handle hexadecimal floating point.
|
|
||||||
func parseHexFloatPrefix(s string, start, i int) float64 {
|
|
||||||
gotDigit := false
|
|
||||||
for i < len(s) && isHexDigit(s[i]) {
|
|
||||||
gotDigit = true
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i < len(s) && s[i] == '.' {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
for i < len(s) && isHexDigit(s[i]) {
|
|
||||||
gotDigit = true
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if !gotDigit {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
gotExponent := false
|
|
||||||
end := i
|
|
||||||
if i < len(s) && (s[i] == 'p' || s[i] == 'P') {
|
|
||||||
i++
|
|
||||||
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
for i < len(s) && isDigit(s[i]) {
|
|
||||||
gotExponent = true
|
|
||||||
i++
|
|
||||||
end = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
floatStr := s[start:end]
|
|
||||||
if !gotExponent {
|
|
||||||
floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0"
|
|
||||||
}
|
|
||||||
f, _ := strconv.ParseFloat(floatStr, 64)
|
|
||||||
return f // Returns infinity in case of "value out of range" error
|
|
||||||
}
|
|
||||||
|
|
||||||
func isDigit(c byte) bool {
|
|
||||||
return c >= '0' && c <= '9'
|
|
||||||
}
|
|
||||||
|
|
||||||
func isHexDigit(c byte) bool {
|
|
||||||
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,499 +0,0 @@
|
||||||
// Package lexer is an AWK lexer (tokenizer).
|
|
||||||
//
|
|
||||||
// The lexer turns a string of AWK source code into a stream of
|
|
||||||
// tokens for parsing.
|
|
||||||
//
|
|
||||||
// To tokenize some source, create a new lexer with NewLexer(src) and
|
|
||||||
// then call Scan() until the token type is EOF or ILLEGAL.
|
|
||||||
package lexer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
|
|
||||||
// actually create a lexer, and Scan() or ScanRegex() to get tokens.
|
|
||||||
type Lexer struct {
|
|
||||||
src []byte
|
|
||||||
offset int
|
|
||||||
ch byte
|
|
||||||
pos Position
|
|
||||||
nextPos Position
|
|
||||||
hadSpace bool
|
|
||||||
lastTok Token
|
|
||||||
}
|
|
||||||
|
|
||||||
// Position stores the source line and column where a token starts.
|
|
||||||
type Position struct {
|
|
||||||
// Line number of the token (starts at 1).
|
|
||||||
Line int
|
|
||||||
// Column on the line (starts at 1). Note that this is the byte
|
|
||||||
// offset into the line, not rune offset.
|
|
||||||
Column int
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewLexer creates a new lexer that will tokenize the given source
|
|
||||||
// code. See the module-level example for a working example.
|
|
||||||
func NewLexer(src []byte) *Lexer {
|
|
||||||
l := &Lexer{src: src}
|
|
||||||
l.nextPos.Line = 1
|
|
||||||
l.nextPos.Column = 1
|
|
||||||
l.next()
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
// HadSpace returns true if the previously-scanned token had
|
|
||||||
// whitespace before it. Used by the parser because when calling a
|
|
||||||
// user-defined function the grammar doesn't allow a space between
|
|
||||||
// the function name and the left parenthesis.
|
|
||||||
func (l *Lexer) HadSpace() bool {
|
|
||||||
return l.hadSpace
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scan scans the next token and returns its position (line/column),
|
|
||||||
// token value (one of the uppercase token constants), and the
|
|
||||||
// string value of the token. For most tokens, the token value is
|
|
||||||
// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the
|
|
||||||
// token's value. For an ILLEGAL token, it's the error message.
|
|
||||||
func (l *Lexer) Scan() (Position, Token, string) {
|
|
||||||
pos, tok, val := l.scan()
|
|
||||||
l.lastTok = tok
|
|
||||||
return pos, tok, val
|
|
||||||
}
|
|
||||||
|
|
||||||
// Does the real work of scanning. Scan() wraps this to more easily
|
|
||||||
// set lastTok.
|
|
||||||
func (l *Lexer) scan() (Position, Token, string) {
|
|
||||||
// Skip whitespace (except newline, which is a token)
|
|
||||||
l.hadSpace = false
|
|
||||||
for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' {
|
|
||||||
l.hadSpace = true
|
|
||||||
if l.ch == '\\' {
|
|
||||||
l.next()
|
|
||||||
if l.ch == '\r' {
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
if l.ch != '\n' {
|
|
||||||
return l.pos, ILLEGAL, "expected \\n after \\ line continuation"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
if l.ch == '#' {
|
|
||||||
// Skip comment till end of line
|
|
||||||
l.next()
|
|
||||||
for l.ch != '\n' && l.ch != 0 {
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if l.ch == 0 {
|
|
||||||
// l.next() reached end of input
|
|
||||||
return l.pos, EOF, ""
|
|
||||||
}
|
|
||||||
|
|
||||||
pos := l.pos
|
|
||||||
tok := ILLEGAL
|
|
||||||
val := ""
|
|
||||||
|
|
||||||
ch := l.ch
|
|
||||||
l.next()
|
|
||||||
|
|
||||||
// Names: keywords and functions
|
|
||||||
if isNameStart(ch) {
|
|
||||||
start := l.offset - 2
|
|
||||||
for isNameStart(l.ch) || isDigit(l.ch) {
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
name := string(l.src[start : l.offset-1])
|
|
||||||
tok := KeywordToken(name)
|
|
||||||
if tok == ILLEGAL {
|
|
||||||
tok = NAME
|
|
||||||
val = name
|
|
||||||
}
|
|
||||||
return pos, tok, val
|
|
||||||
}
|
|
||||||
|
|
||||||
// These are ordered by my guess at frequency of use. Should run
|
|
||||||
// through a corpus of real AWK programs to determine actual
|
|
||||||
// frequency.
|
|
||||||
switch ch {
|
|
||||||
case '$':
|
|
||||||
tok = DOLLAR
|
|
||||||
case '@':
|
|
||||||
tok = AT
|
|
||||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
|
|
||||||
// Avoid make/append and use l.offset directly for performance
|
|
||||||
start := l.offset - 2
|
|
||||||
gotDigit := false
|
|
||||||
if ch != '.' {
|
|
||||||
gotDigit = true
|
|
||||||
for isDigit(l.ch) {
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
if l.ch == '.' {
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for isDigit(l.ch) {
|
|
||||||
gotDigit = true
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
if !gotDigit {
|
|
||||||
return l.pos, ILLEGAL, "expected digits"
|
|
||||||
}
|
|
||||||
if l.ch == 'e' || l.ch == 'E' {
|
|
||||||
l.next()
|
|
||||||
gotSign := false
|
|
||||||
if l.ch == '+' || l.ch == '-' {
|
|
||||||
gotSign = true
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
gotDigit = false
|
|
||||||
for isDigit(l.ch) {
|
|
||||||
l.next()
|
|
||||||
gotDigit = true
|
|
||||||
}
|
|
||||||
// Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e"
|
|
||||||
// considered a variable). "1e+" is parsed as "1e + ...".
|
|
||||||
if !gotDigit {
|
|
||||||
if gotSign {
|
|
||||||
l.unread() // unread the '+' or '-'
|
|
||||||
}
|
|
||||||
l.unread() // unread the 'e' or 'E'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tok = NUMBER
|
|
||||||
val = string(l.src[start : l.offset-1])
|
|
||||||
case '{':
|
|
||||||
tok = LBRACE
|
|
||||||
case '}':
|
|
||||||
tok = RBRACE
|
|
||||||
case '=':
|
|
||||||
tok = l.choice('=', ASSIGN, EQUALS)
|
|
||||||
case '<':
|
|
||||||
tok = l.choice('=', LESS, LTE)
|
|
||||||
case '>':
|
|
||||||
switch l.ch {
|
|
||||||
case '=':
|
|
||||||
l.next()
|
|
||||||
tok = GTE
|
|
||||||
case '>':
|
|
||||||
l.next()
|
|
||||||
tok = APPEND
|
|
||||||
default:
|
|
||||||
tok = GREATER
|
|
||||||
}
|
|
||||||
case '"', '\'':
|
|
||||||
// Note: POSIX awk spec doesn't allow single-quoted strings,
|
|
||||||
// but this helps with quoting, especially on Windows
|
|
||||||
// where the shell quote character is " (double quote).
|
|
||||||
s, err := parseString(ch, func() byte { return l.ch }, l.next)
|
|
||||||
if err != nil {
|
|
||||||
return l.pos, ILLEGAL, err.Error()
|
|
||||||
}
|
|
||||||
if l.ch != ch {
|
|
||||||
return l.pos, ILLEGAL, "didn't find end quote in string"
|
|
||||||
}
|
|
||||||
l.next()
|
|
||||||
tok = STRING
|
|
||||||
val = s
|
|
||||||
case '(':
|
|
||||||
tok = LPAREN
|
|
||||||
case ')':
|
|
||||||
tok = RPAREN
|
|
||||||
case ',':
|
|
||||||
tok = COMMA
|
|
||||||
case ';':
|
|
||||||
tok = SEMICOLON
|
|
||||||
case '+':
|
|
||||||
switch l.ch {
|
|
||||||
case '+':
|
|
||||||
l.next()
|
|
||||||
tok = INCR
|
|
||||||
case '=':
|
|
||||||
l.next()
|
|
||||||
tok = ADD_ASSIGN
|
|
||||||
default:
|
|
||||||
tok = ADD
|
|
||||||
}
|
|
||||||
case '-':
|
|
||||||
switch l.ch {
|
|
||||||
case '-':
|
|
||||||
l.next()
|
|
||||||
tok = DECR
|
|
||||||
case '=':
|
|
||||||
l.next()
|
|
||||||
tok = SUB_ASSIGN
|
|
||||||
default:
|
|
||||||
tok = SUB
|
|
||||||
}
|
|
||||||
case '*':
|
|
||||||
switch l.ch {
|
|
||||||
case '*':
|
|
||||||
l.next()
|
|
||||||
tok = l.choice('=', POW, POW_ASSIGN)
|
|
||||||
case '=':
|
|
||||||
l.next()
|
|
||||||
tok = MUL_ASSIGN
|
|
||||||
default:
|
|
||||||
tok = MUL
|
|
||||||
}
|
|
||||||
case '/':
|
|
||||||
tok = l.choice('=', DIV, DIV_ASSIGN)
|
|
||||||
case '%':
|
|
||||||
tok = l.choice('=', MOD, MOD_ASSIGN)
|
|
||||||
case '[':
|
|
||||||
tok = LBRACKET
|
|
||||||
case ']':
|
|
||||||
tok = RBRACKET
|
|
||||||
case '\n':
|
|
||||||
tok = NEWLINE
|
|
||||||
case '^':
|
|
||||||
tok = l.choice('=', POW, POW_ASSIGN)
|
|
||||||
case '!':
|
|
||||||
switch l.ch {
|
|
||||||
case '=':
|
|
||||||
l.next()
|
|
||||||
tok = NOT_EQUALS
|
|
||||||
case '~':
|
|
||||||
l.next()
|
|
||||||
tok = NOT_MATCH
|
|
||||||
default:
|
|
||||||
tok = NOT
|
|
||||||
}
|
|
||||||
case '~':
|
|
||||||
tok = MATCH
|
|
||||||
case '?':
|
|
||||||
tok = QUESTION
|
|
||||||
case ':':
|
|
||||||
tok = COLON
|
|
||||||
case '&':
|
|
||||||
tok = l.choice('&', ILLEGAL, AND)
|
|
||||||
if tok == ILLEGAL {
|
|
||||||
return l.pos, ILLEGAL, "unexpected char after '&'"
|
|
||||||
}
|
|
||||||
case '|':
|
|
||||||
tok = l.choice('|', PIPE, OR)
|
|
||||||
default:
|
|
||||||
tok = ILLEGAL
|
|
||||||
val = "unexpected char"
|
|
||||||
}
|
|
||||||
return pos, tok, val
|
|
||||||
}
|
|
||||||
|
|
||||||
// ScanRegex parses an AWK regular expression in /slash/ syntax. The
|
|
||||||
// AWK grammar has somewhat special handling of regex tokens, so the
|
|
||||||
// parser can only call this after a DIV or DIV_ASSIGN token has just
|
|
||||||
// been scanned.
|
|
||||||
func (l *Lexer) ScanRegex() (Position, Token, string) {
|
|
||||||
pos, tok, val := l.scanRegex()
|
|
||||||
l.lastTok = tok
|
|
||||||
return pos, tok, val
|
|
||||||
}
|
|
||||||
|
|
||||||
// Does the real work of scanning a regex. ScanRegex() wraps this to
|
|
||||||
// more easily set lastTok.
|
|
||||||
func (l *Lexer) scanRegex() (Position, Token, string) {
|
|
||||||
pos := l.pos
|
|
||||||
chars := make([]byte, 0, 32) // most won't require heap allocation
|
|
||||||
switch l.lastTok {
|
|
||||||
case DIV:
|
|
||||||
// Regex after '/' (the usual case)
|
|
||||||
pos.Column -= 1
|
|
||||||
case DIV_ASSIGN:
|
|
||||||
// Regex after '/=' (happens when regex starts with '=')
|
|
||||||
pos.Column -= 2
|
|
||||||
chars = append(chars, '=')
|
|
||||||
default:
|
|
||||||
panic("ScanRegex should only be called after DIV or DIV_ASSIGN token")
|
|
||||||
}
|
|
||||||
for l.ch != '/' {
|
|
||||||
c := l.ch
|
|
||||||
if c == 0 {
|
|
||||||
return l.pos, ILLEGAL, "didn't find end slash in regex"
|
|
||||||
}
|
|
||||||
if c == '\r' || c == '\n' {
|
|
||||||
return l.pos, ILLEGAL, "can't have newline in regex"
|
|
||||||
}
|
|
||||||
if c == '\\' {
|
|
||||||
l.next()
|
|
||||||
if l.ch != '/' {
|
|
||||||
chars = append(chars, '\\')
|
|
||||||
}
|
|
||||||
c = l.ch
|
|
||||||
}
|
|
||||||
chars = append(chars, c)
|
|
||||||
l.next()
|
|
||||||
}
|
|
||||||
l.next()
|
|
||||||
return pos, REGEX, string(chars)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the next character into l.ch (or 0 on end of input) and update
|
|
||||||
// line and column position.
|
|
||||||
func (l *Lexer) next() {
|
|
||||||
l.pos = l.nextPos
|
|
||||||
if l.offset >= len(l.src) {
|
|
||||||
// For last character, move offset 1 past the end as it
|
|
||||||
// simplifies offset calculations in NAME and NUMBER
|
|
||||||
if l.ch != 0 {
|
|
||||||
l.ch = 0
|
|
||||||
l.offset++
|
|
||||||
l.nextPos.Column++
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ch := l.src[l.offset]
|
|
||||||
if ch == '\n' {
|
|
||||||
l.nextPos.Line++
|
|
||||||
l.nextPos.Column = 1
|
|
||||||
} else if ch != '\r' {
|
|
||||||
l.nextPos.Column++
|
|
||||||
}
|
|
||||||
l.ch = ch
|
|
||||||
l.offset++
|
|
||||||
}
|
|
||||||
|
|
||||||
// Un-read the character just scanned (doesn't handle line boundaries).
|
|
||||||
func (l *Lexer) unread() {
|
|
||||||
l.offset--
|
|
||||||
l.pos.Column--
|
|
||||||
l.nextPos.Column--
|
|
||||||
l.ch = l.src[l.offset-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
func isNameStart(ch byte) bool {
|
|
||||||
return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
|
|
||||||
}
|
|
||||||
|
|
||||||
func isDigit(ch byte) bool {
|
|
||||||
return ch >= '0' && ch <= '9'
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the hex digit 0-15 corresponding to the given ASCII byte,
|
|
||||||
// or -1 if it's not a valid hex digit.
|
|
||||||
func hexDigit(ch byte) int {
|
|
||||||
switch {
|
|
||||||
case isDigit(ch):
|
|
||||||
return int(ch - '0')
|
|
||||||
case ch >= 'a' && ch <= 'f':
|
|
||||||
return int(ch - 'a' + 10)
|
|
||||||
case ch >= 'A' && ch <= 'F':
|
|
||||||
return int(ch - 'A' + 10)
|
|
||||||
default:
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Lexer) choice(ch byte, one, two Token) Token {
|
|
||||||
if l.ch == ch {
|
|
||||||
l.next()
|
|
||||||
return two
|
|
||||||
}
|
|
||||||
return one
|
|
||||||
}
|
|
||||||
|
|
||||||
// PeekByte returns the next unscanned byte; used when parsing
|
|
||||||
// "getline lvalue" expressions. Returns 0 at end of input.
|
|
||||||
func (l *Lexer) PeekByte() byte {
|
|
||||||
return l.ch
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unescape unescapes the backslash escapes in s (which shouldn't include the
|
|
||||||
// surrounding quotes) and returns the unquoted string. It's intended for use
|
|
||||||
// when unescaping command line var=value assignments, as required by the
|
|
||||||
// POSIX AWK spec.
|
|
||||||
func Unescape(s string) (string, error) {
|
|
||||||
i := 0
|
|
||||||
ch := func() byte {
|
|
||||||
if i >= len(s) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return s[i]
|
|
||||||
}
|
|
||||||
next := func() {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
return parseString(0, ch, next)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parses a string ending with given quote character (not parsed). The ch
|
|
||||||
// function returns the current character (or 0 at the end); the next function
|
|
||||||
// moves forward one character.
|
|
||||||
func parseString(quote byte, ch func() byte, next func()) (string, error) {
|
|
||||||
chars := make([]byte, 0, 32) // most strings won't require heap allocation
|
|
||||||
for {
|
|
||||||
c := ch()
|
|
||||||
if c == quote || c == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if c == '\r' || c == '\n' {
|
|
||||||
return "", errors.New("can't have newline in string")
|
|
||||||
}
|
|
||||||
if c != '\\' {
|
|
||||||
// Normal, non-escaped character
|
|
||||||
chars = append(chars, c)
|
|
||||||
next()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Escape sequence, skip over \ and process
|
|
||||||
next()
|
|
||||||
switch ch() {
|
|
||||||
case 'n':
|
|
||||||
c = '\n'
|
|
||||||
next()
|
|
||||||
case 't':
|
|
||||||
c = '\t'
|
|
||||||
next()
|
|
||||||
case 'r':
|
|
||||||
c = '\r'
|
|
||||||
next()
|
|
||||||
case 'a':
|
|
||||||
c = '\a'
|
|
||||||
next()
|
|
||||||
case 'b':
|
|
||||||
c = '\b'
|
|
||||||
next()
|
|
||||||
case 'f':
|
|
||||||
c = '\f'
|
|
||||||
next()
|
|
||||||
case 'v':
|
|
||||||
c = '\v'
|
|
||||||
next()
|
|
||||||
case 'x':
|
|
||||||
// Hex byte of one of two hex digits
|
|
||||||
next()
|
|
||||||
digit := hexDigit(ch())
|
|
||||||
if digit < 0 {
|
|
||||||
return "", errors.New("1 or 2 hex digits expected")
|
|
||||||
}
|
|
||||||
c = byte(digit)
|
|
||||||
next()
|
|
||||||
digit = hexDigit(ch())
|
|
||||||
if digit >= 0 {
|
|
||||||
c = c*16 + byte(digit)
|
|
||||||
next()
|
|
||||||
}
|
|
||||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
||||||
// Octal byte of 1-3 octal digits
|
|
||||||
c = ch() - '0'
|
|
||||||
next()
|
|
||||||
for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ {
|
|
||||||
c = c*8 + ch() - '0'
|
|
||||||
next()
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
// Any other escape character is just the char
|
|
||||||
// itself, eg: "\z" is just "z".
|
|
||||||
c = ch()
|
|
||||||
if c == 0 {
|
|
||||||
// Expect backslash right at the end of the string, which is
|
|
||||||
// interpreted as a literal backslash (only for Unescape).
|
|
||||||
c = '\\'
|
|
||||||
}
|
|
||||||
next()
|
|
||||||
}
|
|
||||||
chars = append(chars, c)
|
|
||||||
}
|
|
||||||
return string(chars), nil
|
|
||||||
}
|
|
|
@ -1,393 +0,0 @@
|
||||||
// Test GoAWK Lexer
|
|
||||||
|
|
||||||
package lexer_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
. "github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestLexer(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
output string
|
|
||||||
}{
|
|
||||||
// Comments, whitespace, line continuations
|
|
||||||
{"+# foo \n- #foo", `1:1 + "", 1:8 <newline> "", 2:1 - ""`},
|
|
||||||
{"+\\\n-", `1:1 + "", 2:1 - ""`},
|
|
||||||
{"+\\\r\n-", `1:1 + "", 2:1 - ""`},
|
|
||||||
{"+\\-", `1:1 + "", 1:3 <illegal> "expected \\n after \\ line continuation", 1:3 - ""`},
|
|
||||||
|
|
||||||
// Names and keywords
|
|
||||||
{"x", `1:1 name "x"`},
|
|
||||||
{"x y0", `1:1 name "x", 1:3 name "y0"`},
|
|
||||||
{"x 0y", `1:1 name "x", 1:3 number "0", 1:4 name "y"`},
|
|
||||||
{"sub SUB", `1:1 sub "", 1:5 name "SUB"`},
|
|
||||||
|
|
||||||
// String tokens
|
|
||||||
{`"foo"`, `1:1 string "foo"`},
|
|
||||||
{`"a\t\r\n\z\'\"\a\b\f\vb"`, `1:1 string "a\t\r\nz'\"\a\b\f\vb"`},
|
|
||||||
{`"x`, `1:3 <illegal> "didn't find end quote in string"`},
|
|
||||||
{`"foo\"`, `1:7 <illegal> "didn't find end quote in string"`},
|
|
||||||
{"\"x\n\"", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
|
|
||||||
{`'foo'`, `1:1 string "foo"`},
|
|
||||||
{`'a\t\r\n\z\'\"b'`, `1:1 string "a\t\r\nz'\"b"`},
|
|
||||||
{`'x`, `1:3 <illegal> "didn't find end quote in string"`},
|
|
||||||
{"'x\n'", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
|
|
||||||
{`"\x0.\x00.\x0A\x10\xff\xFF\x41"`, `1:1 string "\x00.\x00.\n\x10\xff\xffA"`},
|
|
||||||
{`"\xg"`, `1:4 <illegal> "1 or 2 hex digits expected", 1:4 name "g", 1:6 <illegal> "didn't find end quote in string"`},
|
|
||||||
{`"\0\78\7\77\777\0 \141 "`, `1:1 string "\x00\a8\a?\xff\x00 a "`},
|
|
||||||
|
|
||||||
// Number tokens
|
|
||||||
{"0", `1:1 number "0"`},
|
|
||||||
{"9", `1:1 number "9"`},
|
|
||||||
{" 0 ", `1:2 number "0"`},
|
|
||||||
{"\n 1", `1:1 <newline> "", 2:3 number "1"`},
|
|
||||||
{"1234", `1:1 number "1234"`},
|
|
||||||
{".5", `1:1 number ".5"`},
|
|
||||||
{".5e1", `1:1 number ".5e1"`},
|
|
||||||
{"5e+1", `1:1 number "5e+1"`},
|
|
||||||
{"5e-1", `1:1 number "5e-1"`},
|
|
||||||
{"0.", `1:1 number "0."`},
|
|
||||||
{"42e", `1:1 number "42", 1:3 name "e"`},
|
|
||||||
{"4.2e", `1:1 number "4.2", 1:4 name "e"`},
|
|
||||||
{"1.e3", `1:1 number "1.e3"`},
|
|
||||||
{"1.e3", `1:1 number "1.e3"`},
|
|
||||||
{"1e3foo", `1:1 number "1e3", 1:4 name "foo"`},
|
|
||||||
{"1e3+", `1:1 number "1e3", 1:4 + ""`},
|
|
||||||
{"1e3.4", `1:1 number "1e3", 1:4 number ".4"`},
|
|
||||||
{"1e-", `1:1 number "1", 1:2 name "e", 1:3 - ""`},
|
|
||||||
{"1e+", `1:1 number "1", 1:2 name "e", 1:3 + ""`},
|
|
||||||
{"42`", `1:1 number "42", 1:3 <illegal> "unexpected char"`},
|
|
||||||
{"0..", `1:1 number "0.", 1:4 <illegal> "expected digits"`},
|
|
||||||
{".", `1:2 <illegal> "expected digits"`},
|
|
||||||
|
|
||||||
// Misc errors
|
|
||||||
{"&=", `1:2 <illegal> "unexpected char after '&'", 1:2 = ""`},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.input, func(t *testing.T) {
|
|
||||||
l := NewLexer([]byte(test.input))
|
|
||||||
strs := []string{}
|
|
||||||
for {
|
|
||||||
pos, tok, val := l.Scan()
|
|
||||||
if tok == EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if tok == NUMBER {
|
|
||||||
// Ensure ParseFloat() works, as that's what our
|
|
||||||
// parser uses to convert
|
|
||||||
trimmed := strings.TrimRight(val, "eE")
|
|
||||||
_, err := strconv.ParseFloat(trimmed, 64)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("couldn't parse float: %q", val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
strs = append(strs, fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val))
|
|
||||||
}
|
|
||||||
output := strings.Join(strs, ", ")
|
|
||||||
if output != test.output {
|
|
||||||
t.Errorf("expected %q, got %q", test.output, output)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRegex(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
output string
|
|
||||||
}{
|
|
||||||
{`/foo/`, `1:1 regex "foo"`},
|
|
||||||
{`/=foo/`, `1:1 regex "=foo"`},
|
|
||||||
{`/a\/b/`, `1:1 regex "a/b"`},
|
|
||||||
{`/a\/\zb/`, `1:1 regex "a/\\zb"`},
|
|
||||||
{`/a`, `1:3 <illegal> "didn't find end slash in regex"`},
|
|
||||||
{"/a\n", `1:3 <illegal> "can't have newline in regex"`},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.input, func(t *testing.T) {
|
|
||||||
l := NewLexer([]byte(test.input))
|
|
||||||
l.Scan() // Scan first token (probably DIV)
|
|
||||||
pos, tok, val := l.ScanRegex()
|
|
||||||
output := fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val)
|
|
||||||
if output != test.output {
|
|
||||||
t.Errorf("expected %q, got %q", test.output, output)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestScanRegexInvalid(t *testing.T) {
|
|
||||||
defer func() {
|
|
||||||
r := recover()
|
|
||||||
if message, ok := r.(string); ok {
|
|
||||||
expected := "ScanRegex should only be called after DIV or DIV_ASSIGN token"
|
|
||||||
if message != expected {
|
|
||||||
t.Fatalf("expected %q, got %q", expected, message)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
t.Fatalf("expected panic of string type")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
l := NewLexer([]byte("foo/"))
|
|
||||||
l.Scan() // Scan first token (NAME foo)
|
|
||||||
l.ScanRegex()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestHadSpace(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
tokens []Token
|
|
||||||
spaces []bool
|
|
||||||
}{
|
|
||||||
{`foo(x)`, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, false, false, false}},
|
|
||||||
{`foo (x) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, true, false, false}},
|
|
||||||
{` foo ( x ) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{true, true, true, true}},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.input, func(t *testing.T) {
|
|
||||||
l := NewLexer([]byte(test.input))
|
|
||||||
for i := 0; ; i++ {
|
|
||||||
_, tok, _ := l.Scan()
|
|
||||||
if tok == EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if tok != test.tokens[i] {
|
|
||||||
t.Errorf("expected %s for token %d, got %s", test.tokens[i], i, tok)
|
|
||||||
}
|
|
||||||
if l.HadSpace() != test.spaces[i] {
|
|
||||||
t.Errorf("expected %v for space %d, got %v", test.spaces[i], i, l.HadSpace())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestPeekByte(t *testing.T) {
|
|
||||||
l := NewLexer([]byte("foo()"))
|
|
||||||
b := l.PeekByte()
|
|
||||||
if b != 'f' {
|
|
||||||
t.Errorf("expected 'f', got %q", b)
|
|
||||||
}
|
|
||||||
_, tok, _ := l.Scan()
|
|
||||||
if tok != NAME {
|
|
||||||
t.Errorf("expected name, got %s", tok)
|
|
||||||
}
|
|
||||||
b = l.PeekByte()
|
|
||||||
if b != '(' {
|
|
||||||
t.Errorf("expected '(', got %q", b)
|
|
||||||
}
|
|
||||||
_, tok, _ = l.Scan()
|
|
||||||
if tok != LPAREN {
|
|
||||||
t.Errorf("expected (, got %s", tok)
|
|
||||||
}
|
|
||||||
_, tok, _ = l.Scan()
|
|
||||||
if tok != RPAREN {
|
|
||||||
t.Errorf("expected ), got %s", tok)
|
|
||||||
}
|
|
||||||
b = l.PeekByte()
|
|
||||||
if b != 0 {
|
|
||||||
t.Errorf("expected 0, got %q", b)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestKeywordToken(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
tok Token
|
|
||||||
}{
|
|
||||||
{"print", PRINT},
|
|
||||||
{"split", F_SPLIT},
|
|
||||||
{"BEGIN", BEGIN},
|
|
||||||
{"foo", ILLEGAL},
|
|
||||||
{"GoAWK", ILLEGAL},
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
tok := KeywordToken(test.name)
|
|
||||||
if tok != test.tok {
|
|
||||||
t.Errorf("expected %v, got %v", test.tok, tok)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAllTokens(t *testing.T) {
|
|
||||||
input := "# comment line\n" +
|
|
||||||
"+ += && = : , -- /\n/= $ @ == >= > >> ++ { [ < ( #\n" +
|
|
||||||
"<= ~ % %= * *= !~ ! != | || ^ ^= ** **= ? } ] ) ; - -= " +
|
|
||||||
"BEGIN break continue delete do else END exit " +
|
|
||||||
"for function getline if in next print printf return while " +
|
|
||||||
"atan2 close cos exp fflush gsub index int length log match rand " +
|
|
||||||
"sin split sprintf sqrt srand sub substr system tolower toupper " +
|
|
||||||
"x \"str\\n\" 1234\n" +
|
|
||||||
"` ."
|
|
||||||
|
|
||||||
strs := make([]string, 0, LAST+1)
|
|
||||||
seen := make([]bool, LAST+1)
|
|
||||||
l := NewLexer([]byte(input))
|
|
||||||
for {
|
|
||||||
_, tok, _ := l.Scan()
|
|
||||||
strs = append(strs, tok.String())
|
|
||||||
seen[int(tok)] = true
|
|
||||||
if tok == EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output := strings.Join(strs, " ")
|
|
||||||
|
|
||||||
expected := "<newline> " +
|
|
||||||
"+ += && = : , -- / <newline> /= $ @ == >= > >> ++ { [ < ( <newline> " +
|
|
||||||
"<= ~ % %= * *= !~ ! != | || ^ ^= ^ ^= ? } ] ) ; - -= " +
|
|
||||||
"BEGIN break continue delete do else END exit " +
|
|
||||||
"for function getline if in next print printf return while " +
|
|
||||||
"atan2 close cos exp fflush gsub index int length log match rand " +
|
|
||||||
"sin split sprintf sqrt srand sub substr system tolower toupper " +
|
|
||||||
"name string number <newline> " +
|
|
||||||
"<illegal> <illegal> EOF"
|
|
||||||
if output != expected {
|
|
||||||
t.Errorf("expected %q, got %q", expected, output)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, s := range seen {
|
|
||||||
if !s && Token(i) != CONCAT && Token(i) != REGEX {
|
|
||||||
t.Errorf("token %s (%d) not seen", Token(i), i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
l = NewLexer([]byte(`/foo/`))
|
|
||||||
_, tok1, _ := l.Scan()
|
|
||||||
_, tok2, val := l.ScanRegex()
|
|
||||||
if tok1 != DIV || tok2 != REGEX || val != "foo" {
|
|
||||||
t.Errorf(`expected / regex "foo", got %s %s %q`, tok1, tok2, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
l = NewLexer([]byte(`/=foo/`))
|
|
||||||
_, tok1, _ = l.Scan()
|
|
||||||
_, tok2, val = l.ScanRegex()
|
|
||||||
if tok1 != DIV_ASSIGN || tok2 != REGEX || val != "=foo" {
|
|
||||||
t.Errorf(`expected /= regex "=foo", got %s %s %q`, tok1, tok2, val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUnescape(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
output string
|
|
||||||
error string
|
|
||||||
}{
|
|
||||||
{``, "", ""},
|
|
||||||
{`foo bar`, "foo bar", ""},
|
|
||||||
{`foo\tbar`, "foo\tbar", ""},
|
|
||||||
{"foo\nbar", "", "can't have newline in string"},
|
|
||||||
{`foo"`, "foo\"", ""},
|
|
||||||
{`O'Connor`, "O'Connor", ""},
|
|
||||||
{`foo\`, "foo\\", ""},
|
|
||||||
// Other cases tested in TestLexer string handling.
|
|
||||||
}
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.input, func(t *testing.T) {
|
|
||||||
got, err := Unescape(test.input)
|
|
||||||
if err != nil {
|
|
||||||
if err.Error() != test.error {
|
|
||||||
t.Fatalf("expected error %q, got %q", test.error, err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if test.error != "" {
|
|
||||||
t.Fatalf("expected error %q, got %q", test.error, "")
|
|
||||||
}
|
|
||||||
if got != test.output {
|
|
||||||
t.Fatalf("expected %q, got %q", test.output, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func benchmarkLexer(b *testing.B, repeat int, source string) {
|
|
||||||
fullSource := []byte(strings.Repeat(source+"\n", repeat))
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
l := NewLexer(fullSource)
|
|
||||||
for {
|
|
||||||
_, tok, _ := l.Scan()
|
|
||||||
if tok == EOF || tok == ILLEGAL {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkProgram(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `{ print $1, ($3+$4)*$5 }`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkNames(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `x y i foobar abcdefghij0123456789 _`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkKeywords(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `BEGIN END print sub if length`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkSimpleTokens(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, "\n : , { [ ( } ] ) ~ ? ; $")
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkChoiceTokens(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `/ /= % %= + ++ += * ** **= *= = == ^ ^= ! != !~ < <= > >= >> && | ||`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkNumbers(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `0 1 .5 1234 1234567890 1234.56789e-50`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkStrings(b *testing.B) {
|
|
||||||
benchmarkLexer(b, 5, `"x" "y" "xyz" "foo" "foo bar baz" "foo\tbar\rbaz\n"`)
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkRegex(b *testing.B) {
|
|
||||||
source := `/x/ /./ /foo/ /bar/ /=equals=/ /\/\/\/\//`
|
|
||||||
fullSource := []byte(strings.Repeat(source+" ", 5))
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
l := NewLexer(fullSource)
|
|
||||||
for {
|
|
||||||
_, tok, _ := l.Scan()
|
|
||||||
if tok == EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if tok != DIV && tok != DIV_ASSIGN {
|
|
||||||
b.Fatalf("expected / or /=, got %s", tok)
|
|
||||||
}
|
|
||||||
_, tok, _ = l.ScanRegex()
|
|
||||||
if tok != REGEX {
|
|
||||||
b.Fatalf("expected regex, got %s", tok)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example() {
|
|
||||||
lexer := NewLexer([]byte(`$0 { print $1 }`))
|
|
||||||
for {
|
|
||||||
pos, tok, val := lexer.Scan()
|
|
||||||
if tok == EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
fmt.Printf("%d:%d %s %q\n", pos.Line, pos.Column, tok, val)
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 1:1 $ ""
|
|
||||||
// 1:2 number "0"
|
|
||||||
// 1:4 { ""
|
|
||||||
// 1:6 print ""
|
|
||||||
// 1:12 $ ""
|
|
||||||
// 1:13 number "1"
|
|
||||||
// 1:15 } ""
|
|
||||||
}
|
|
|
@ -1,263 +0,0 @@
|
||||||
// Lexer tokens
|
|
||||||
|
|
||||||
package lexer
|
|
||||||
|
|
||||||
// Token is the type of a single token.
|
|
||||||
type Token int
|
|
||||||
|
|
||||||
const (
|
|
||||||
ILLEGAL Token = iota
|
|
||||||
EOF
|
|
||||||
NEWLINE
|
|
||||||
CONCAT // Not really a token, but used as an operator
|
|
||||||
|
|
||||||
// Symbols
|
|
||||||
|
|
||||||
ADD
|
|
||||||
ADD_ASSIGN
|
|
||||||
AND
|
|
||||||
APPEND
|
|
||||||
ASSIGN
|
|
||||||
AT
|
|
||||||
COLON
|
|
||||||
COMMA
|
|
||||||
DECR
|
|
||||||
DIV
|
|
||||||
DIV_ASSIGN
|
|
||||||
DOLLAR
|
|
||||||
EQUALS
|
|
||||||
GTE
|
|
||||||
GREATER
|
|
||||||
INCR
|
|
||||||
LBRACE
|
|
||||||
LBRACKET
|
|
||||||
LESS
|
|
||||||
LPAREN
|
|
||||||
LTE
|
|
||||||
MATCH
|
|
||||||
MOD
|
|
||||||
MOD_ASSIGN
|
|
||||||
MUL
|
|
||||||
MUL_ASSIGN
|
|
||||||
NOT_MATCH
|
|
||||||
NOT
|
|
||||||
NOT_EQUALS
|
|
||||||
OR
|
|
||||||
PIPE
|
|
||||||
POW
|
|
||||||
POW_ASSIGN
|
|
||||||
QUESTION
|
|
||||||
RBRACE
|
|
||||||
RBRACKET
|
|
||||||
RPAREN
|
|
||||||
SEMICOLON
|
|
||||||
SUB
|
|
||||||
SUB_ASSIGN
|
|
||||||
|
|
||||||
// Keywords
|
|
||||||
|
|
||||||
BEGIN
|
|
||||||
BREAK
|
|
||||||
CONTINUE
|
|
||||||
DELETE
|
|
||||||
DO
|
|
||||||
ELSE
|
|
||||||
END
|
|
||||||
EXIT
|
|
||||||
FOR
|
|
||||||
FUNCTION
|
|
||||||
GETLINE
|
|
||||||
IF
|
|
||||||
IN
|
|
||||||
NEXT
|
|
||||||
PRINT
|
|
||||||
PRINTF
|
|
||||||
RETURN
|
|
||||||
WHILE
|
|
||||||
|
|
||||||
// Built-in functions
|
|
||||||
|
|
||||||
F_ATAN2
|
|
||||||
F_CLOSE
|
|
||||||
F_COS
|
|
||||||
F_EXP
|
|
||||||
F_FFLUSH
|
|
||||||
F_GSUB
|
|
||||||
F_INDEX
|
|
||||||
F_INT
|
|
||||||
F_LENGTH
|
|
||||||
F_LOG
|
|
||||||
F_MATCH
|
|
||||||
F_RAND
|
|
||||||
F_SIN
|
|
||||||
F_SPLIT
|
|
||||||
F_SPRINTF
|
|
||||||
F_SQRT
|
|
||||||
F_SRAND
|
|
||||||
F_SUB
|
|
||||||
F_SUBSTR
|
|
||||||
F_SYSTEM
|
|
||||||
F_TOLOWER
|
|
||||||
F_TOUPPER
|
|
||||||
|
|
||||||
// Literals and names (variables and arrays)
|
|
||||||
|
|
||||||
NAME
|
|
||||||
NUMBER
|
|
||||||
STRING
|
|
||||||
REGEX
|
|
||||||
|
|
||||||
LAST = REGEX
|
|
||||||
FIRST_FUNC = F_ATAN2
|
|
||||||
LAST_FUNC = F_TOUPPER
|
|
||||||
)
|
|
||||||
|
|
||||||
var keywordTokens = map[string]Token{
|
|
||||||
"BEGIN": BEGIN,
|
|
||||||
"break": BREAK,
|
|
||||||
"continue": CONTINUE,
|
|
||||||
"delete": DELETE,
|
|
||||||
"do": DO,
|
|
||||||
"else": ELSE,
|
|
||||||
"END": END,
|
|
||||||
"exit": EXIT,
|
|
||||||
"for": FOR,
|
|
||||||
"function": FUNCTION,
|
|
||||||
"getline": GETLINE,
|
|
||||||
"if": IF,
|
|
||||||
"in": IN,
|
|
||||||
"next": NEXT,
|
|
||||||
"print": PRINT,
|
|
||||||
"printf": PRINTF,
|
|
||||||
"return": RETURN,
|
|
||||||
"while": WHILE,
|
|
||||||
|
|
||||||
"atan2": F_ATAN2,
|
|
||||||
"close": F_CLOSE,
|
|
||||||
"cos": F_COS,
|
|
||||||
"exp": F_EXP,
|
|
||||||
"fflush": F_FFLUSH,
|
|
||||||
"gsub": F_GSUB,
|
|
||||||
"index": F_INDEX,
|
|
||||||
"int": F_INT,
|
|
||||||
"length": F_LENGTH,
|
|
||||||
"log": F_LOG,
|
|
||||||
"match": F_MATCH,
|
|
||||||
"rand": F_RAND,
|
|
||||||
"sin": F_SIN,
|
|
||||||
"split": F_SPLIT,
|
|
||||||
"sprintf": F_SPRINTF,
|
|
||||||
"sqrt": F_SQRT,
|
|
||||||
"srand": F_SRAND,
|
|
||||||
"sub": F_SUB,
|
|
||||||
"substr": F_SUBSTR,
|
|
||||||
"system": F_SYSTEM,
|
|
||||||
"tolower": F_TOLOWER,
|
|
||||||
"toupper": F_TOUPPER,
|
|
||||||
}
|
|
||||||
|
|
||||||
// KeywordToken returns the token associated with the given keyword
|
|
||||||
// string, or ILLEGAL if given name is not a keyword.
|
|
||||||
func KeywordToken(name string) Token {
|
|
||||||
return keywordTokens[name]
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokenNames = map[Token]string{
|
|
||||||
ILLEGAL: "<illegal>",
|
|
||||||
EOF: "EOF",
|
|
||||||
NEWLINE: "<newline>",
|
|
||||||
CONCAT: "<concat>",
|
|
||||||
|
|
||||||
ADD: "+",
|
|
||||||
ADD_ASSIGN: "+=",
|
|
||||||
AND: "&&",
|
|
||||||
APPEND: ">>",
|
|
||||||
ASSIGN: "=",
|
|
||||||
AT: "@",
|
|
||||||
COLON: ":",
|
|
||||||
COMMA: ",",
|
|
||||||
DECR: "--",
|
|
||||||
DIV: "/",
|
|
||||||
DIV_ASSIGN: "/=",
|
|
||||||
DOLLAR: "$",
|
|
||||||
EQUALS: "==",
|
|
||||||
GTE: ">=",
|
|
||||||
GREATER: ">",
|
|
||||||
INCR: "++",
|
|
||||||
LBRACE: "{",
|
|
||||||
LBRACKET: "[",
|
|
||||||
LESS: "<",
|
|
||||||
LPAREN: "(",
|
|
||||||
LTE: "<=",
|
|
||||||
MATCH: "~",
|
|
||||||
MOD: "%",
|
|
||||||
MOD_ASSIGN: "%=",
|
|
||||||
MUL: "*",
|
|
||||||
MUL_ASSIGN: "*=",
|
|
||||||
NOT_MATCH: "!~",
|
|
||||||
NOT: "!",
|
|
||||||
NOT_EQUALS: "!=",
|
|
||||||
OR: "||",
|
|
||||||
PIPE: "|",
|
|
||||||
POW: "^",
|
|
||||||
POW_ASSIGN: "^=",
|
|
||||||
QUESTION: "?",
|
|
||||||
RBRACE: "}",
|
|
||||||
RBRACKET: "]",
|
|
||||||
RPAREN: ")",
|
|
||||||
SEMICOLON: ";",
|
|
||||||
SUB: "-",
|
|
||||||
SUB_ASSIGN: "-=",
|
|
||||||
|
|
||||||
BEGIN: "BEGIN",
|
|
||||||
BREAK: "break",
|
|
||||||
CONTINUE: "continue",
|
|
||||||
DELETE: "delete",
|
|
||||||
DO: "do",
|
|
||||||
ELSE: "else",
|
|
||||||
END: "END",
|
|
||||||
EXIT: "exit",
|
|
||||||
FOR: "for",
|
|
||||||
FUNCTION: "function",
|
|
||||||
GETLINE: "getline",
|
|
||||||
IF: "if",
|
|
||||||
IN: "in",
|
|
||||||
NEXT: "next",
|
|
||||||
PRINT: "print",
|
|
||||||
PRINTF: "printf",
|
|
||||||
RETURN: "return",
|
|
||||||
WHILE: "while",
|
|
||||||
|
|
||||||
F_ATAN2: "atan2",
|
|
||||||
F_CLOSE: "close",
|
|
||||||
F_COS: "cos",
|
|
||||||
F_EXP: "exp",
|
|
||||||
F_FFLUSH: "fflush",
|
|
||||||
F_GSUB: "gsub",
|
|
||||||
F_INDEX: "index",
|
|
||||||
F_INT: "int",
|
|
||||||
F_LENGTH: "length",
|
|
||||||
F_LOG: "log",
|
|
||||||
F_MATCH: "match",
|
|
||||||
F_RAND: "rand",
|
|
||||||
F_SIN: "sin",
|
|
||||||
F_SPLIT: "split",
|
|
||||||
F_SPRINTF: "sprintf",
|
|
||||||
F_SQRT: "sqrt",
|
|
||||||
F_SRAND: "srand",
|
|
||||||
F_SUB: "sub",
|
|
||||||
F_SUBSTR: "substr",
|
|
||||||
F_SYSTEM: "system",
|
|
||||||
F_TOLOWER: "tolower",
|
|
||||||
F_TOUPPER: "toupper",
|
|
||||||
|
|
||||||
NAME: "name",
|
|
||||||
NUMBER: "number",
|
|
||||||
STRING: "string",
|
|
||||||
REGEX: "regex",
|
|
||||||
}
|
|
||||||
|
|
||||||
// String returns the string name of this token.
|
|
||||||
func (t Token) String() string {
|
|
||||||
return tokenNames[t]
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2022 Ben Hoyt
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,242 +0,0 @@
|
||||||
// Test parser package
|
|
||||||
|
|
||||||
package parser_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
// NOTE: apart from TestParseAndString, the parser doesn't have
|
|
||||||
// extensive tests of its own; the idea is to test the parser in the
|
|
||||||
// interp tests.
|
|
||||||
|
|
||||||
func TestParseAndString(t *testing.T) {
|
|
||||||
// This program should have one of every AST element to ensure
|
|
||||||
// we can parse and String()ify each.
|
|
||||||
source := strings.TrimSpace(`
|
|
||||||
BEGIN {
|
|
||||||
print "begin one"
|
|
||||||
}
|
|
||||||
|
|
||||||
BEGIN {
|
|
||||||
print "begin two"
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
print "empty pattern"
|
|
||||||
}
|
|
||||||
|
|
||||||
$0 {
|
|
||||||
print "normal pattern"
|
|
||||||
print 1, 2, 3
|
|
||||||
printf "%.3f", 3.14159
|
|
||||||
print "x" >"file"
|
|
||||||
print "x" >>"append"
|
|
||||||
print "y" |"prog"
|
|
||||||
delete a[k]
|
|
||||||
if (c) {
|
|
||||||
get(a, k)
|
|
||||||
}
|
|
||||||
if (1 + 2) {
|
|
||||||
get(a, k)
|
|
||||||
} else {
|
|
||||||
set(a, k, v)
|
|
||||||
}
|
|
||||||
for (i = 0; i < 10; i++) {
|
|
||||||
print i
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for (k in a) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
while (0) {
|
|
||||||
print "x"
|
|
||||||
}
|
|
||||||
do {
|
|
||||||
print "y"
|
|
||||||
exit status
|
|
||||||
} while (x)
|
|
||||||
next
|
|
||||||
"cmd" |getline
|
|
||||||
"cmd" |getline x
|
|
||||||
"cmd" |getline a[1]
|
|
||||||
"cmd" |getline $1
|
|
||||||
getline
|
|
||||||
getline x
|
|
||||||
(getline x + 1)
|
|
||||||
getline $1
|
|
||||||
getline a[1]
|
|
||||||
getline <"file"
|
|
||||||
getline x <"file"
|
|
||||||
(getline x <"file" "x")
|
|
||||||
getline $1 <"file"
|
|
||||||
getline a[1] <"file"
|
|
||||||
x = 0
|
|
||||||
y = z = 0
|
|
||||||
b += 1
|
|
||||||
c -= 2
|
|
||||||
d *= 3
|
|
||||||
e /= 4
|
|
||||||
g ^= 5
|
|
||||||
h %= 6
|
|
||||||
(x ? "t" : "f")
|
|
||||||
((b && c) || d)
|
|
||||||
(k in a)
|
|
||||||
((x, y, z) in a)
|
|
||||||
(s ~ "foo")
|
|
||||||
(b < 1)
|
|
||||||
(c <= 2)
|
|
||||||
(d > 3)
|
|
||||||
(e >= 4)
|
|
||||||
(g == 5)
|
|
||||||
(h != 6)
|
|
||||||
((x y) z)
|
|
||||||
((b + c) + d)
|
|
||||||
((b * c) * d)
|
|
||||||
((b - c) - d)
|
|
||||||
((b / c) / d)
|
|
||||||
(b ^ (c ^ d))
|
|
||||||
x++
|
|
||||||
x--
|
|
||||||
++y
|
|
||||||
--y
|
|
||||||
1234
|
|
||||||
1.5
|
|
||||||
"This is a string"
|
|
||||||
if (/a.b/) {
|
|
||||||
print "match"
|
|
||||||
}
|
|
||||||
$1
|
|
||||||
$(1 + 2)
|
|
||||||
!x
|
|
||||||
+x
|
|
||||||
-x
|
|
||||||
var
|
|
||||||
a[key]
|
|
||||||
a[x, y, z]
|
|
||||||
f()
|
|
||||||
set(a, k, v)
|
|
||||||
sub(regex, repl)
|
|
||||||
sub(regex, repl, s)
|
|
||||||
gsub(regex, repl)
|
|
||||||
gsub(regex, repl, s)
|
|
||||||
split(s, a)
|
|
||||||
split(s, a, regex)
|
|
||||||
match(s, regex)
|
|
||||||
rand()
|
|
||||||
srand()
|
|
||||||
srand(1)
|
|
||||||
length()
|
|
||||||
length($1)
|
|
||||||
sprintf("")
|
|
||||||
sprintf("%.3f", 3.14159)
|
|
||||||
sprintf("%.3f %d", 3.14159, 42)
|
|
||||||
cos(1)
|
|
||||||
sin(1)
|
|
||||||
exp(1)
|
|
||||||
log(1)
|
|
||||||
sqrt(1)
|
|
||||||
int("42")
|
|
||||||
tolower("FOO")
|
|
||||||
toupper("foo")
|
|
||||||
system("ls")
|
|
||||||
close("file")
|
|
||||||
atan2(x, y)
|
|
||||||
index(haystack, needle)
|
|
||||||
{
|
|
||||||
print "block statement"
|
|
||||||
f()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(NR == 1), (NR == 2) {
|
|
||||||
print "range pattern"
|
|
||||||
}
|
|
||||||
|
|
||||||
($1 == "foo")
|
|
||||||
|
|
||||||
END {
|
|
||||||
print "end one"
|
|
||||||
}
|
|
||||||
|
|
||||||
END {
|
|
||||||
print "end two"
|
|
||||||
}
|
|
||||||
|
|
||||||
function f() {
|
|
||||||
}
|
|
||||||
|
|
||||||
function get(a, k) {
|
|
||||||
return a[k]
|
|
||||||
}
|
|
||||||
|
|
||||||
function set(a, k, v) {
|
|
||||||
a[k] = v
|
|
||||||
return
|
|
||||||
}
|
|
||||||
`)
|
|
||||||
prog, err := parser.ParseProgram([]byte(source), nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("error parsing program: %v", err)
|
|
||||||
}
|
|
||||||
progStr := prog.String()
|
|
||||||
if progStr != source {
|
|
||||||
t.Fatalf("expected first, got second:\n%s\n----------\n%s", source, progStr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResolveLargeCallGraph(t *testing.T) {
|
|
||||||
const numCalls = 10000
|
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
var i int
|
|
||||||
for i = 0; i < numCalls; i++ {
|
|
||||||
fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&buf, "function f%d(a) { return a }\n", i)
|
|
||||||
fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
|
|
||||||
_, err := parser.ParseProgram(buf.Bytes(), nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.Reset()
|
|
||||||
fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
|
|
||||||
fmt.Fprintf(&buf, "function f%d(a) { return a }\n", numCalls)
|
|
||||||
for i = numCalls - 1; i >= 0; i-- {
|
|
||||||
fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
|
|
||||||
}
|
|
||||||
_, err = parser.ParseProgram(buf.Bytes(), nil)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_valid() {
|
|
||||||
prog, err := parser.ParseProgram([]byte("$0 { print $1 }"), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
} else {
|
|
||||||
fmt.Println(prog)
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// $0 {
|
|
||||||
// print $1
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_error() {
|
|
||||||
prog, err := parser.ParseProgram([]byte("{ for if }"), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
} else {
|
|
||||||
fmt.Println(prog)
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// parse error at 1:7: expected ( instead of if
|
|
||||||
}
|
|
|
@ -1,462 +0,0 @@
|
||||||
// Resolve function calls and variable types
|
|
||||||
|
|
||||||
package parser
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"reflect"
|
|
||||||
"sort"
|
|
||||||
|
|
||||||
"github.com/mojosa-software/goblin/src/tool/awk/internal/ast"
|
|
||||||
. "github.com/mojosa-software/goblin/src/tool/awk/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
type varType int
|
|
||||||
|
|
||||||
const (
|
|
||||||
typeUnknown varType = iota
|
|
||||||
typeScalar
|
|
||||||
typeArray
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t varType) String() string {
|
|
||||||
switch t {
|
|
||||||
case typeScalar:
|
|
||||||
return "Scalar"
|
|
||||||
case typeArray:
|
|
||||||
return "Array"
|
|
||||||
default:
|
|
||||||
return "Unknown"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// typeInfo records type information for a single variable
|
|
||||||
type typeInfo struct {
|
|
||||||
typ varType
|
|
||||||
ref *ast.VarExpr
|
|
||||||
scope ast.VarScope
|
|
||||||
index int
|
|
||||||
callName string
|
|
||||||
argIndex int
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used by printVarTypes when debugTypes is turned on
|
|
||||||
func (t typeInfo) String() string {
|
|
||||||
var scope string
|
|
||||||
switch t.scope {
|
|
||||||
case ast.ScopeGlobal:
|
|
||||||
scope = "Global"
|
|
||||||
case ast.ScopeLocal:
|
|
||||||
scope = "Local"
|
|
||||||
default:
|
|
||||||
scope = "Special"
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d",
|
|
||||||
t.typ, t.ref, scope, t.index, t.callName, t.argIndex)
|
|
||||||
}
|
|
||||||
|
|
||||||
// A single variable reference (normally scalar)
|
|
||||||
type varRef struct {
|
|
||||||
funcName string
|
|
||||||
ref *ast.VarExpr
|
|
||||||
isArg bool
|
|
||||||
pos Position
|
|
||||||
}
|
|
||||||
|
|
||||||
// A single array reference
|
|
||||||
type arrayRef struct {
|
|
||||||
funcName string
|
|
||||||
ref *ast.ArrayExpr
|
|
||||||
pos Position
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the resolver
|
|
||||||
func (p *parser) initResolve() {
|
|
||||||
p.varTypes = make(map[string]map[string]typeInfo)
|
|
||||||
p.varTypes[""] = make(map[string]typeInfo) // globals
|
|
||||||
p.functions = make(map[string]int)
|
|
||||||
p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present
|
|
||||||
p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays
|
|
||||||
p.arrayRef("FIELDS", Position{1, 1})
|
|
||||||
p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Signal the start of a function
|
|
||||||
func (p *parser) startFunction(name string, params []string) {
|
|
||||||
p.funcName = name
|
|
||||||
p.varTypes[name] = make(map[string]typeInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Signal the end of a function
|
|
||||||
func (p *parser) stopFunction() {
|
|
||||||
p.funcName = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add function by name with given index
|
|
||||||
func (p *parser) addFunction(name string, index int) {
|
|
||||||
p.functions[name] = index
|
|
||||||
}
|
|
||||||
|
|
||||||
// Records a call to a user function (for resolving indexes later)
|
|
||||||
type userCall struct {
|
|
||||||
call *ast.UserCallExpr
|
|
||||||
pos Position
|
|
||||||
inFunc string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record a user call site
|
|
||||||
func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) {
|
|
||||||
p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
|
|
||||||
}
|
|
||||||
|
|
||||||
// After parsing, resolve all user calls to their indexes. Also
|
|
||||||
// ensures functions called have actually been defined, and that
|
|
||||||
// they're not being called with too many arguments.
|
|
||||||
func (p *parser) resolveUserCalls(prog *Program) {
|
|
||||||
// Number the native funcs (order by name to get consistent order)
|
|
||||||
nativeNames := make([]string, 0, len(p.nativeFuncs))
|
|
||||||
for name := range p.nativeFuncs {
|
|
||||||
nativeNames = append(nativeNames, name)
|
|
||||||
}
|
|
||||||
sort.Strings(nativeNames)
|
|
||||||
nativeIndexes := make(map[string]int, len(nativeNames))
|
|
||||||
for i, name := range nativeNames {
|
|
||||||
nativeIndexes[name] = i
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range p.userCalls {
|
|
||||||
// AWK-defined functions take precedence over native Go funcs
|
|
||||||
index, ok := p.functions[c.call.Name]
|
|
||||||
if !ok {
|
|
||||||
f, haveNative := p.nativeFuncs[c.call.Name]
|
|
||||||
if !haveNative {
|
|
||||||
panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name))
|
|
||||||
}
|
|
||||||
typ := reflect.TypeOf(f)
|
|
||||||
if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() {
|
|
||||||
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
|
|
||||||
}
|
|
||||||
c.call.Native = true
|
|
||||||
c.call.Index = nativeIndexes[c.call.Name]
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
function := prog.Functions[index]
|
|
||||||
if len(c.call.Args) > len(function.Params) {
|
|
||||||
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
|
|
||||||
}
|
|
||||||
c.call.Index = index
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// For arguments that are variable references, we don't know the
|
|
||||||
// type based on context, so mark the types for these as unknown.
|
|
||||||
func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) {
|
|
||||||
if varExpr, ok := arg.(*ast.VarExpr); ok {
|
|
||||||
scope, varFuncName := p.getScope(varExpr.Name)
|
|
||||||
ref := p.varTypes[varFuncName][varExpr.Name].ref
|
|
||||||
if ref == varExpr {
|
|
||||||
// Only applies if this is the first reference to this
|
|
||||||
// variable (otherwise we know the type already)
|
|
||||||
p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index}
|
|
||||||
}
|
|
||||||
// Mark the last related varRef (the most recent one) as a
|
|
||||||
// call argument for later error handling
|
|
||||||
p.varRefs[len(p.varRefs)-1].isArg = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine scope of given variable reference (and funcName if it's
|
|
||||||
// a local, otherwise empty string)
|
|
||||||
func (p *parser) getScope(name string) (ast.VarScope, string) {
|
|
||||||
switch {
|
|
||||||
case p.locals[name]:
|
|
||||||
return ast.ScopeLocal, p.funcName
|
|
||||||
case ast.SpecialVarIndex(name) > 0:
|
|
||||||
return ast.ScopeSpecial, ""
|
|
||||||
default:
|
|
||||||
return ast.ScopeGlobal, ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record a variable (scalar) reference and return the *VarExpr (but
|
|
||||||
// VarExpr.Index won't be set till later)
|
|
||||||
func (p *parser) varRef(name string, pos Position) *ast.VarExpr {
|
|
||||||
scope, funcName := p.getScope(name)
|
|
||||||
expr := &ast.VarExpr{scope, 0, name}
|
|
||||||
p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
|
|
||||||
info := p.varTypes[funcName][name]
|
|
||||||
if info.typ == typeUnknown {
|
|
||||||
p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0}
|
|
||||||
}
|
|
||||||
return expr
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record an array reference and return the *ArrayExpr (but
|
|
||||||
// ArrayExpr.Index won't be set till later)
|
|
||||||
func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr {
|
|
||||||
scope, funcName := p.getScope(name)
|
|
||||||
if scope == ast.ScopeSpecial {
|
|
||||||
panic(p.errorf("can't use scalar %q as array", name))
|
|
||||||
}
|
|
||||||
expr := &ast.ArrayExpr{scope, 0, name}
|
|
||||||
p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
|
|
||||||
info := p.varTypes[funcName][name]
|
|
||||||
if info.typ == typeUnknown {
|
|
||||||
p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0}
|
|
||||||
}
|
|
||||||
return expr
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print variable type information (for debugging) on p.debugWriter
|
|
||||||
func (p *parser) printVarTypes(prog *Program) {
|
|
||||||
fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars)
|
|
||||||
fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays)
|
|
||||||
funcNames := []string{}
|
|
||||||
for funcName := range p.varTypes {
|
|
||||||
funcNames = append(funcNames, funcName)
|
|
||||||
}
|
|
||||||
sort.Strings(funcNames)
|
|
||||||
for _, funcName := range funcNames {
|
|
||||||
if funcName != "" {
|
|
||||||
fmt.Fprintf(p.debugWriter, "function %s\n", funcName)
|
|
||||||
} else {
|
|
||||||
fmt.Fprintf(p.debugWriter, "globals\n")
|
|
||||||
}
|
|
||||||
varNames := []string{}
|
|
||||||
for name := range p.varTypes[funcName] {
|
|
||||||
varNames = append(varNames, name)
|
|
||||||
}
|
|
||||||
sort.Strings(varNames)
|
|
||||||
for _, name := range varNames {
|
|
||||||
info := p.varTypes[funcName][name]
|
|
||||||
fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve unknown variables types and generate variable indexes and
|
|
||||||
// name-to-index mappings for interpreter
|
|
||||||
func (p *parser) resolveVars(prog *Program) {
|
|
||||||
// First go through all unknown types and try to determine the
|
|
||||||
// type from the parameter type in that function definition.
|
|
||||||
// Iterate through functions in topological order, for example
|
|
||||||
// if f() calls g(), process g first, then f.
|
|
||||||
callGraph := make(map[string]map[string]struct{})
|
|
||||||
for _, call := range p.userCalls {
|
|
||||||
if _, ok := callGraph[call.inFunc]; !ok {
|
|
||||||
callGraph[call.inFunc] = make(map[string]struct{})
|
|
||||||
}
|
|
||||||
callGraph[call.inFunc][call.call.Name] = struct{}{}
|
|
||||||
}
|
|
||||||
sortedFuncs := topoSort(callGraph)
|
|
||||||
for _, funcName := range sortedFuncs {
|
|
||||||
infos := p.varTypes[funcName]
|
|
||||||
for name, info := range infos {
|
|
||||||
if info.scope == ast.ScopeSpecial || info.typ != typeUnknown {
|
|
||||||
// It's a special var or type is already known
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
funcIndex, ok := p.functions[info.callName]
|
|
||||||
if !ok {
|
|
||||||
// Function being called is a native function
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Determine var type based on type of this parameter
|
|
||||||
// in the called function (if we know that)
|
|
||||||
paramName := prog.Functions[funcIndex].Params[info.argIndex]
|
|
||||||
typ := p.varTypes[info.callName][paramName].typ
|
|
||||||
if typ != typeUnknown {
|
|
||||||
if p.debugTypes {
|
|
||||||
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
|
|
||||||
funcName, name, typ)
|
|
||||||
}
|
|
||||||
info.typ = typ
|
|
||||||
p.varTypes[funcName][name] = info
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve global variables (iteration order is undefined, so
|
|
||||||
// assign indexes basically randomly)
|
|
||||||
prog.Scalars = make(map[string]int)
|
|
||||||
prog.Arrays = make(map[string]int)
|
|
||||||
for name, info := range p.varTypes[""] {
|
|
||||||
_, isFunc := p.functions[name]
|
|
||||||
if isFunc {
|
|
||||||
// Global var can't also be the name of a function
|
|
||||||
panic(p.errorf("global var %q can't also be a function", name))
|
|
||||||
}
|
|
||||||
var index int
|
|
||||||
if info.scope == ast.ScopeSpecial {
|
|
||||||
index = ast.SpecialVarIndex(name)
|
|
||||||
} else if info.typ == typeArray {
|
|
||||||
index = len(prog.Arrays)
|
|
||||||
prog.Arrays[name] = index
|
|
||||||
} else {
|
|
||||||
index = len(prog.Scalars)
|
|
||||||
prog.Scalars[name] = index
|
|
||||||
}
|
|
||||||
info.index = index
|
|
||||||
p.varTypes[""][name] = info
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill in unknown parameter types that are being called with arrays,
|
|
||||||
// for example, as in the following code:
|
|
||||||
//
|
|
||||||
// BEGIN { arr[0]; f(arr) }
|
|
||||||
// function f(a) { }
|
|
||||||
for _, c := range p.userCalls {
|
|
||||||
if c.call.Native {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
function := prog.Functions[c.call.Index]
|
|
||||||
for i, arg := range c.call.Args {
|
|
||||||
varExpr, ok := arg.(*ast.VarExpr)
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
|
||||||
argType := p.varTypes[funcName][varExpr.Name]
|
|
||||||
paramType := p.varTypes[function.Name][function.Params[i]]
|
|
||||||
if argType.typ == typeArray && paramType.typ == typeUnknown {
|
|
||||||
paramType.typ = argType.typ
|
|
||||||
p.varTypes[function.Name][function.Params[i]] = paramType
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve local variables (assign indexes in order of params).
|
|
||||||
// Also patch up Function.Arrays (tells interpreter which args
|
|
||||||
// are arrays).
|
|
||||||
for funcName, infos := range p.varTypes {
|
|
||||||
if funcName == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
scalarIndex := 0
|
|
||||||
arrayIndex := 0
|
|
||||||
functionIndex := p.functions[funcName]
|
|
||||||
function := prog.Functions[functionIndex]
|
|
||||||
arrays := make([]bool, len(function.Params))
|
|
||||||
for i, name := range function.Params {
|
|
||||||
info := infos[name]
|
|
||||||
var index int
|
|
||||||
if info.typ == typeArray {
|
|
||||||
index = arrayIndex
|
|
||||||
arrayIndex++
|
|
||||||
arrays[i] = true
|
|
||||||
} else {
|
|
||||||
// typeScalar or typeUnknown: variables may still be
|
|
||||||
// of unknown type if they've never been referenced --
|
|
||||||
// default to scalar in that case
|
|
||||||
index = scalarIndex
|
|
||||||
scalarIndex++
|
|
||||||
}
|
|
||||||
info.index = index
|
|
||||||
p.varTypes[funcName][name] = info
|
|
||||||
}
|
|
||||||
prog.Functions[functionIndex].Arrays = arrays
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that variables passed to functions are the correct type
|
|
||||||
for _, c := range p.userCalls {
|
|
||||||
// Check native function calls
|
|
||||||
if c.call.Native {
|
|
||||||
for _, arg := range c.call.Args {
|
|
||||||
varExpr, ok := arg.(*ast.VarExpr)
|
|
||||||
if !ok {
|
|
||||||
// Non-variable expression, must be scalar
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
|
||||||
info := p.varTypes[funcName][varExpr.Name]
|
|
||||||
if info.typ == typeArray {
|
|
||||||
panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check AWK function calls
|
|
||||||
function := prog.Functions[c.call.Index]
|
|
||||||
for i, arg := range c.call.Args {
|
|
||||||
varExpr, ok := arg.(*ast.VarExpr)
|
|
||||||
if !ok {
|
|
||||||
if function.Arrays[i] {
|
|
||||||
panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
|
||||||
info := p.varTypes[funcName][varExpr.Name]
|
|
||||||
if info.typ == typeArray && !function.Arrays[i] {
|
|
||||||
panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name))
|
|
||||||
}
|
|
||||||
if info.typ != typeArray && function.Arrays[i] {
|
|
||||||
panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.debugTypes {
|
|
||||||
p.printVarTypes(prog)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Patch up variable indexes (interpreter uses an index instead
|
|
||||||
// of name for more efficient lookups)
|
|
||||||
for _, varRef := range p.varRefs {
|
|
||||||
info := p.varTypes[varRef.funcName][varRef.ref.Name]
|
|
||||||
if info.typ == typeArray && !varRef.isArg {
|
|
||||||
panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name))
|
|
||||||
}
|
|
||||||
varRef.ref.Index = info.index
|
|
||||||
}
|
|
||||||
for _, arrayRef := range p.arrayRefs {
|
|
||||||
info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name]
|
|
||||||
if info.typ == typeScalar {
|
|
||||||
panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name))
|
|
||||||
}
|
|
||||||
arrayRef.ref.Index = info.index
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If name refers to a local (in function inFunc), return that
|
|
||||||
// function's name, otherwise return "" (meaning global).
|
|
||||||
func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
|
|
||||||
if inFunc == "" {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
for _, param := range prog.Functions[p.functions[inFunc]].Params {
|
|
||||||
if name == param {
|
|
||||||
return inFunc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record a "multi expression" (comma-separated pseudo-expression
|
|
||||||
// used to allow commas around print/printf arguments).
|
|
||||||
func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
|
|
||||||
expr := &ast.MultiExpr{exprs}
|
|
||||||
p.multiExprs[expr] = pos
|
|
||||||
return expr
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mark the multi expression as used (by a print/printf statement).
|
|
||||||
func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
|
|
||||||
delete(p.multiExprs, expr)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that there are no unused multi expressions (syntax error).
|
|
||||||
func (p *parser) checkMultiExprs() {
|
|
||||||
if len(p.multiExprs) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Show error on first comma-separated expression
|
|
||||||
min := Position{1000000000, 1000000000}
|
|
||||||
for _, pos := range p.multiExprs {
|
|
||||||
if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) {
|
|
||||||
min = pos
|
|
||||||
}
|
|
||||||
}
|
|
||||||
panic(p.posErrorf(min, "unexpected comma-separated expression"))
|
|
||||||
}
|
|
|
@ -1,72 +0,0 @@
|
||||||
// Topological sorting
|
|
||||||
|
|
||||||
package parser
|
|
||||||
|
|
||||||
/*
|
|
||||||
This algorithm is taken from:
|
|
||||||
https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
|
|
||||||
|
|
||||||
L ← Empty list that will contain the sorted nodes
|
|
||||||
while exists nodes without a permanent mark do
|
|
||||||
select an unmarked node n
|
|
||||||
visit(n)
|
|
||||||
|
|
||||||
function visit(node n)
|
|
||||||
if n has a permanent mark then
|
|
||||||
return
|
|
||||||
if n has a temporary mark then
|
|
||||||
stop (not a DAG)
|
|
||||||
|
|
||||||
mark n with a temporary mark
|
|
||||||
|
|
||||||
for each node m with an edge from n to m do
|
|
||||||
visit(m)
|
|
||||||
|
|
||||||
remove temporary mark from n
|
|
||||||
mark n with a permanent mark
|
|
||||||
add n to head of L
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Perform a topological sort on the given graph.
|
|
||||||
func topoSort(graph map[string]map[string]struct{}) []string {
|
|
||||||
if len(graph) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
unmarked := make(map[string]struct{})
|
|
||||||
for node := range graph {
|
|
||||||
unmarked[node] = struct{}{}
|
|
||||||
}
|
|
||||||
permMarks := make(map[string]struct{})
|
|
||||||
tempMarks := make(map[string]struct{})
|
|
||||||
var sorted []string
|
|
||||||
|
|
||||||
var visit func(string)
|
|
||||||
visit = func(n string) {
|
|
||||||
if _, ok := permMarks[n]; ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if _, ok := tempMarks[n]; ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
tempMarks[n] = struct{}{}
|
|
||||||
for m := range graph[n] {
|
|
||||||
visit(m)
|
|
||||||
}
|
|
||||||
delete(tempMarks, n)
|
|
||||||
permMarks[n] = struct{}{}
|
|
||||||
delete(unmarked, n)
|
|
||||||
sorted = append(sorted, n)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for len(unmarked) > 0 {
|
|
||||||
var n string
|
|
||||||
for n = range unmarked {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
visit(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
return sorted
|
|
||||||
}
|
|
|
@ -1,100 +0,0 @@
|
||||||
package parser
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strconv"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestTopoSortEmpty(t *testing.T) {
|
|
||||||
sorted := topoSort(nil)
|
|
||||||
if len(sorted) != 0 {
|
|
||||||
t.Fatalf("expected empty slice, got %v", sorted)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTopoSortSimple(t *testing.T) {
|
|
||||||
sorted := topoSort(map[string]map[string]struct{}{
|
|
||||||
"a": {"b": struct{}{}},
|
|
||||||
"b": {"c": struct{}{}},
|
|
||||||
})
|
|
||||||
if len(sorted) != 3 {
|
|
||||||
t.Fatalf("expected 3 items, got %d", len(sorted))
|
|
||||||
}
|
|
||||||
assertBefore(t, sorted, "c", "b")
|
|
||||||
assertBefore(t, sorted, "b", "a")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTopoSortComplex(t *testing.T) {
|
|
||||||
sorted := topoSort(map[string]map[string]struct{}{
|
|
||||||
"a": {"b": struct{}{}, "c": struct{}{}},
|
|
||||||
"c": {"d": struct{}{}},
|
|
||||||
"f": {"g": struct{}{}, "h": struct{}{}},
|
|
||||||
"g": {},
|
|
||||||
"h": {},
|
|
||||||
})
|
|
||||||
if len(sorted) != 7 {
|
|
||||||
t.Fatalf("expected 7 items, got %d", len(sorted))
|
|
||||||
}
|
|
||||||
assertBefore(t, sorted, "g", "f")
|
|
||||||
assertBefore(t, sorted, "h", "f")
|
|
||||||
assertBefore(t, sorted, "d", "c")
|
|
||||||
assertBefore(t, sorted, "c", "a")
|
|
||||||
assertBefore(t, sorted, "b", "a")
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertBefore(t *testing.T, sorted []string, x, y string) {
|
|
||||||
xi := strIndex(sorted, x)
|
|
||||||
if xi < 0 {
|
|
||||||
t.Fatalf("expected %q to be in result", x)
|
|
||||||
}
|
|
||||||
yi := strIndex(sorted, y)
|
|
||||||
if yi < 0 {
|
|
||||||
t.Fatalf("expected %q to be in result", y)
|
|
||||||
}
|
|
||||||
if xi >= yi {
|
|
||||||
t.Fatalf("expected %q to come before %q, got indexes %d and %d", x, y, xi, yi)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func strIndex(slice []string, s string) int {
|
|
||||||
for i, item := range slice {
|
|
||||||
if s == item {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTopoSortCycle(t *testing.T) {
|
|
||||||
sorted := topoSort(map[string]map[string]struct{}{
|
|
||||||
"a": {"b": struct{}{}, "c": struct{}{}},
|
|
||||||
"c": {"a": struct{}{}},
|
|
||||||
})
|
|
||||||
if len(sorted) != 3 {
|
|
||||||
t.Fatalf("expected 3 items, got %d", len(sorted))
|
|
||||||
}
|
|
||||||
assertBefore(t, sorted, "b", "a")
|
|
||||||
c := strIndex(sorted, "a")
|
|
||||||
if c < 0 {
|
|
||||||
t.Fatalf("expected %q to be in result", c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTopoSortLarge(t *testing.T) {
|
|
||||||
const num = 1000
|
|
||||||
graph := make(map[string]map[string]struct{})
|
|
||||||
for i := 0; i < num; i++ {
|
|
||||||
graph[strconv.Itoa(i)] = map[string]struct{}{strconv.Itoa(i + 1): {}}
|
|
||||||
}
|
|
||||||
graph[strconv.Itoa(num)] = map[string]struct{}{}
|
|
||||||
sorted := topoSort(graph)
|
|
||||||
if len(sorted) != num+1 {
|
|
||||||
t.Fatalf("expected %d items, got %d", num+1, len(sorted))
|
|
||||||
}
|
|
||||||
for i := 0; i < num+1; i++ {
|
|
||||||
expected := num - i
|
|
||||||
if sorted[i] != strconv.Itoa(expected) {
|
|
||||||
t.Fatalf("expected %d to be at index %d, got %s", num-1, i, sorted[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,125 +0,0 @@
|
||||||
|
|
||||||
# GoAWK: an AWK interpreter with CSV support
|
|
||||||
|
|
||||||
[![Documentation](https://pkg.go.dev/badge/github.com/mojosa-software/goblin/src/tool/awk)](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk)
|
|
||||||
[![GitHub Actions Build](https://github.com/mojosa-software/goblin/src/tool/awk/workflows/Go/badge.svg)](https://github.com/mojosa-software/goblin/src/tool/awk/actions?query=workflow%3AGo)
|
|
||||||
|
|
||||||
|
|
||||||
AWK is a fascinating text-processing language, and somehow after reading the delightfully-terse [*The AWK Programming Language*](https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf) I was inspired to write an interpreter for it in Go. So here it is, feature-complete and tested against "the one true AWK" and GNU AWK test suites.
|
|
||||||
|
|
||||||
GoAWK is a POSIX-compatible version of AWK, and additionally has a CSV mode for reading and writing CSV and TSV files. This feature was sponsored by the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/). Read the [CSV documentation](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/csv.md).
|
|
||||||
|
|
||||||
You can also read one of the articles I've written about GoAWK:
|
|
||||||
|
|
||||||
* The original article about [how GoAWK works and performs](https://benhoyt.com/writings/goawk/)
|
|
||||||
* How I converted the tree-walking interpreter to a [bytecode compiler and virtual machine](https://benhoyt.com/writings/goawk-compiler-vm/)
|
|
||||||
* A description of why and how I added [CSV support](https://benhoyt.com/writings/goawk-csv/)
|
|
||||||
|
|
||||||
|
|
||||||
## Basic usage
|
|
||||||
|
|
||||||
To use the command-line version, simply use `go install` to install it, and then run it using `goawk` (assuming `~/go/bin` is in your `PATH`):
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ go install github.com/mojosa-software/goblin/src/tool/awk@latest
|
|
||||||
|
|
||||||
$ goawk 'BEGIN { print "foo", 42 }'
|
|
||||||
foo 42
|
|
||||||
|
|
||||||
$ echo 1 2 3 | goawk '{ print $1 + $3 }'
|
|
||||||
4
|
|
||||||
|
|
||||||
# Or use GoAWK's CSV and @"named-field" support:
|
|
||||||
$ echo -e 'name,amount\nBob,17.50\nJill,20\n"Boba Fett",100.00' | \
|
|
||||||
goawk -i csv -H '{ total += @"amount" } END { print total }'
|
|
||||||
137.5
|
|
||||||
```
|
|
||||||
|
|
||||||
On Windows, `"` is the shell quoting character, so use `"` around the entire AWK program on the command line, and use `'` around AWK strings -- this is a non-POSIX extension to make GoAWK easier to use on Windows:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
C:\> goawk "BEGIN { print 'foo', 42 }"
|
|
||||||
foo 42
|
|
||||||
```
|
|
||||||
|
|
||||||
To use it in your Go programs, you can call `interp.Exec()` directly for simple needs:
|
|
||||||
|
|
||||||
```go
|
|
||||||
input := strings.NewReader("foo bar\n\nbaz buz")
|
|
||||||
err := interp.Exec("$0 { print $1 }", " ", input, nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// foo
|
|
||||||
// baz
|
|
||||||
```
|
|
||||||
|
|
||||||
Or you can use the `parser` module and then `interp.ExecProgram()` to control execution, set variables, and so on:
|
|
||||||
|
|
||||||
```go
|
|
||||||
src := "{ print NR, tolower($0) }"
|
|
||||||
input := "A\naB\nAbC"
|
|
||||||
|
|
||||||
prog, err := parser.ParseProgram([]byte(src), nil)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
config := &interp.Config{
|
|
||||||
Stdin: strings.NewReader(input),
|
|
||||||
Vars: []string{"OFS", ":"},
|
|
||||||
}
|
|
||||||
_, err = interp.ExecProgram(prog, config)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// 1:a
|
|
||||||
// 2:ab
|
|
||||||
// 3:abc
|
|
||||||
```
|
|
||||||
|
|
||||||
If you need to repeat execution of the same program on different inputs, you can call [`interp.New`](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk/interp#New) once, and then call the returned object's `Execute` method as many times as you need.
|
|
||||||
|
|
||||||
Read the [package documentation](https://pkg.go.dev/github.com/mojosa-software/goblin/src/tool/awk) for more details.
|
|
||||||
|
|
||||||
|
|
||||||
## Differences from AWK
|
|
||||||
|
|
||||||
The intention is for GoAWK to conform to `awk`'s behavior and to the [POSIX AWK spec](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html), but this section describes some areas where it's different.
|
|
||||||
|
|
||||||
Additional features GoAWK has over AWK:
|
|
||||||
|
|
||||||
* It has proper support for CSV and TSV files ([read the documentation](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/csv.md)).
|
|
||||||
* It supports negative field indexes to access fields from the right, for example, `$-1` refers to the last field.
|
|
||||||
* It's embeddable in your Go programs! You can even call custom Go functions from your AWK scripts.
|
|
||||||
* Most AWK scripts are faster than `awk` and on a par with `gawk`, though usually slower than `mawk`. (See [recent benchmarks](https://benhoyt.com/writings/goawk-compiler-vm/#virtual-machine-results).)
|
|
||||||
* The parser supports `'single-quoted strings'` in addition to `"double-quoted strings"`, primarily to make Windows one-liners easier (the Windows `cmd.exe` shell uses `"` as the quote character).
|
|
||||||
|
|
||||||
Things AWK has over GoAWK:
|
|
||||||
|
|
||||||
* Scripts that use regular expressions are slower than other implementations (unfortunately Go's `regexp` package is relatively slow).
|
|
||||||
* AWK is written by Alfred Aho, Peter Weinberger, and Brian Kernighan.
|
|
||||||
|
|
||||||
|
|
||||||
## Stability
|
|
||||||
|
|
||||||
This project has a good suite of tests, which include my own intepreter tests, the original AWK test suite, and the relevant tests from the Gawk test suite. I've used it a bunch personally, and it's used in the [Benthos](https://github.com/benthosdev/benthos) stream processor as well as by the software team at the library of the University of Antwerp. However, to `err == human`, so please use GoAWK at your own risk. I intend not to change the Go API in a breaking way in any v1.x.y version.
|
|
||||||
|
|
||||||
|
|
||||||
## AWKGo
|
|
||||||
|
|
||||||
The GoAWK repository also includes the creatively-named AWKGo, an AWK-to-Go compiler. This is experimental and is not subject to the stability requirements of GoAWK itself. You can [read more about AWKGo](https://benhoyt.com/writings/awkgo/) or browse the code on the [`awkgo` branch](https://github.com/mojosa-software/goblin/src/tool/awk/tree/awkgo/awkgo).
|
|
||||||
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
GoAWK is licensed under an open source [MIT license](https://github.com/mojosa-software/goblin/src/tool/awk/blob/master/LICENSE.txt).
|
|
||||||
|
|
||||||
|
|
||||||
## The end
|
|
||||||
|
|
||||||
Have fun, and please [contact me](https://benhoyt.com/) if you're using GoAWK or have any feedback!
|
|
|
@ -1,2 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
go test ./interp -bench=. -count=5 > benchmarks_new.txt
|
|
|
@ -1,124 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# Benchmark GoAWK against other AWK versions
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import glob
|
|
||||||
import os.path
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
AWKS = [
|
|
||||||
'./goawk',
|
|
||||||
'./orig', # GoAWK without perf improvements
|
|
||||||
'original-awk',
|
|
||||||
'gawk',
|
|
||||||
'mawk',
|
|
||||||
]
|
|
||||||
NORM_INDEX = AWKS.index('original-awk')
|
|
||||||
TESTS_TO_MEAN = None # By default, calculate the mean of all tests
|
|
||||||
if False:
|
|
||||||
# Only get the mean of these tests because these are the only ones
|
|
||||||
# we show in the GoAWK article.
|
|
||||||
TESTS_TO_MEAN = [
|
|
||||||
'tt.01_print',
|
|
||||||
'tt.02_print_NR_NF',
|
|
||||||
'tt.02a_print_length',
|
|
||||||
'tt.03_sum_length',
|
|
||||||
'tt.03a_sum_field',
|
|
||||||
'tt.04_printf_fields',
|
|
||||||
'tt.05_concat_fields',
|
|
||||||
'tt.06_count_lengths',
|
|
||||||
'tt.07_even_fields',
|
|
||||||
'tt.big_complex_program',
|
|
||||||
'tt.x1_mandelbrot',
|
|
||||||
'tt.x2_sum_loop',
|
|
||||||
]
|
|
||||||
NUM_RUNS = 6
|
|
||||||
MIN_TIME = 0.5
|
|
||||||
PROGRAM_GLOB = 'testdata/tt.*'
|
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
PROGRAM_GLOB = 'testdata/' + sys.argv[1]
|
|
||||||
|
|
||||||
|
|
||||||
def repeat_file(input_file, repeated_file, n):
|
|
||||||
with open(input_file, 'rb') as fin, open(repeated_file, 'wb') as fout:
|
|
||||||
for i in range(n):
|
|
||||||
fin.seek(0)
|
|
||||||
shutil.copyfileobj(fin, fout)
|
|
||||||
|
|
||||||
|
|
||||||
print('Test ', end='')
|
|
||||||
for awk in AWKS:
|
|
||||||
display_awk = os.path.basename(awk)
|
|
||||||
display_awk = display_awk.replace('original-awk', 'awk')
|
|
||||||
print('| {:>8} '.format(display_awk), end='')
|
|
||||||
print()
|
|
||||||
print('-'*28 + ' | --------'*len(AWKS))
|
|
||||||
|
|
||||||
repeats_created = []
|
|
||||||
products = [1] * len(AWKS)
|
|
||||||
num_products = 0
|
|
||||||
programs = sorted(glob.glob(PROGRAM_GLOB))
|
|
||||||
for program in programs:
|
|
||||||
# First do a test run with GoAWK to see roughly how long it takes
|
|
||||||
cmdline = '{} -f {} testdata/foo.td >tt.out'.format(AWKS[0], program)
|
|
||||||
start = time.time()
|
|
||||||
status = subprocess.call(cmdline, shell=True)
|
|
||||||
elapsed = time.time() - start
|
|
||||||
|
|
||||||
# If test run took less than MIN_TIME seconds, scale/repeat input
|
|
||||||
# file accordingly
|
|
||||||
input_file = 'testdata/foo.td'
|
|
||||||
if elapsed < MIN_TIME:
|
|
||||||
multiplier = int(round(MIN_TIME / elapsed))
|
|
||||||
repeated_file = '{}.{}'.format(input_file, multiplier)
|
|
||||||
if not os.path.exists(repeated_file):
|
|
||||||
repeat_file(input_file, repeated_file, multiplier)
|
|
||||||
repeats_created.append(repeated_file)
|
|
||||||
input_file = repeated_file
|
|
||||||
|
|
||||||
# Record time taken to run this test, running each NUM_RUMS times
|
|
||||||
# and taking the minimum elapsed time
|
|
||||||
awk_times = []
|
|
||||||
for awk in AWKS:
|
|
||||||
cmdline = '{} -f {} {} >tt.out'.format(awk, program, input_file)
|
|
||||||
times = []
|
|
||||||
for i in range(NUM_RUNS):
|
|
||||||
start = time.time()
|
|
||||||
status = subprocess.call(cmdline, shell=True)
|
|
||||||
elapsed = time.time() - start
|
|
||||||
times.append(elapsed)
|
|
||||||
if status != 0:
|
|
||||||
print('ERROR status {} from cmd: {}'.format(status, cmdline), file=sys.stderr)
|
|
||||||
min_time = min(sorted(times)[1:])
|
|
||||||
awk_times.append(min_time)
|
|
||||||
|
|
||||||
# Normalize to One True AWK time = 1.0
|
|
||||||
norm_time = awk_times[NORM_INDEX]
|
|
||||||
speeds = [norm_time/t for t in awk_times]
|
|
||||||
test_name = program.split('/')[1]
|
|
||||||
if TESTS_TO_MEAN is None or test_name in TESTS_TO_MEAN:
|
|
||||||
num_products += 1
|
|
||||||
for i in range(len(AWKS)):
|
|
||||||
products[i] *= speeds[i]
|
|
||||||
|
|
||||||
display_name = test_name.split('_')[0] + ' (' + ' '.join(test_name.split('_')[1:]) + ')'
|
|
||||||
print('{:28}'.format(display_name), end='')
|
|
||||||
for i, awk in enumerate(AWKS):
|
|
||||||
print(' | {:8.2f}'.format(speeds[i]), end='')
|
|
||||||
print()
|
|
||||||
|
|
||||||
print('-'*28 + ' | --------'*len(AWKS))
|
|
||||||
print('**Geo mean** ', end='')
|
|
||||||
for i, awk in enumerate(AWKS):
|
|
||||||
print(' | **{:.2f}**'.format(products[i] ** (1.0/num_products)), end='')
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Delete temporary files created
|
|
||||||
os.remove('tt.out')
|
|
||||||
for repeated_file in repeats_created:
|
|
||||||
os.remove(repeated_file)
|
|
|
@ -1,2 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
~/go/bin/benchstat -sort=delta -geomean benchmarks_old.txt benchmarks_new.txt
|
|
|
@ -1,9 +0,0 @@
|
||||||
import csv
|
|
||||||
import sys
|
|
||||||
|
|
||||||
lines, fields = 0, 0
|
|
||||||
for row in csv.reader(sys.stdin):
|
|
||||||
lines += 1
|
|
||||||
fields += len(row)
|
|
||||||
|
|
||||||
print(lines, fields)
|
|
|
@ -1,27 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"encoding/csv"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
reader := csv.NewReader(bufio.NewReader(os.Stdin))
|
|
||||||
lines, fields := 0, 0
|
|
||||||
for {
|
|
||||||
row, err := reader.Read()
|
|
||||||
if err == io.EOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
lines++
|
|
||||||
fields += len(row)
|
|
||||||
}
|
|
||||||
fmt.Println(lines, fields)
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
echo ===== Writing 1GB - goawk
|
|
||||||
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
|
||||||
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
|
||||||
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
|
||||||
|
|
||||||
echo ===== Writing 1GB - frawk
|
|
||||||
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
|
||||||
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
|
||||||
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
|
||||||
|
|
||||||
echo ===== Writing 1GB - Python
|
|
||||||
time python3 write.py >/dev/null
|
|
||||||
time python3 write.py >/dev/null
|
|
||||||
time python3 write.py >/dev/null
|
|
||||||
|
|
||||||
echo ===== Writing 1GB - Go
|
|
||||||
go build -o bin/write ./write
|
|
||||||
time ./bin/write >/dev/null
|
|
||||||
time ./bin/write >/dev/null
|
|
||||||
time ./bin/write >/dev/null
|
|
||||||
|
|
||||||
|
|
||||||
./bin/write >count.csv
|
|
||||||
|
|
||||||
echo ===== Reading 1GB - goawk
|
|
||||||
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
|
|
||||||
echo ===== Reading 1GB - frawk
|
|
||||||
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
|
||||||
|
|
||||||
echo ===== Reading 1GB - Python
|
|
||||||
time python3 count.py <count.csv
|
|
||||||
time python3 count.py <count.csv
|
|
||||||
time python3 count.py <count.csv
|
|
||||||
|
|
||||||
echo ===== Reading 1GB - Go
|
|
||||||
go build -o bin/count ./count
|
|
||||||
time ./bin/count <count.csv
|
|
||||||
time ./bin/count <count.csv
|
|
||||||
time ./bin/count <count.csv
|
|
|
@ -1,27 +0,0 @@
|
||||||
import csv
|
|
||||||
import sys
|
|
||||||
|
|
||||||
writer = csv.writer(sys.stdout)
|
|
||||||
for i in range(3514073): # will create a ~1GB file
|
|
||||||
writer.writerow([
|
|
||||||
i,
|
|
||||||
"foo",
|
|
||||||
"bob@example.com",
|
|
||||||
"simple,quoted",
|
|
||||||
"quoted string with \" in it",
|
|
||||||
"0123456789",
|
|
||||||
"9876543210",
|
|
||||||
"The quick brown fox jumps over the lazy dog",
|
|
||||||
"",
|
|
||||||
"final field",
|
|
||||||
i,
|
|
||||||
"foo",
|
|
||||||
"bob@example.com",
|
|
||||||
"simple,quoted",
|
|
||||||
"quoted string with \" in it",
|
|
||||||
"0123456789",
|
|
||||||
"9876543210",
|
|
||||||
"The quick brown fox jumps over the lazy dog",
|
|
||||||
"",
|
|
||||||
"final field",
|
|
||||||
])
|
|
|
@ -1,43 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/csv"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"strconv"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
writer := csv.NewWriter(os.Stdout)
|
|
||||||
for i := 0; i < 3514073; i++ { // will create a ~1GB file
|
|
||||||
err := writer.Write([]string{
|
|
||||||
strconv.Itoa(i),
|
|
||||||
"foo",
|
|
||||||
"bob@example.com",
|
|
||||||
"simple,quoted",
|
|
||||||
"quoted string with \" in it",
|
|
||||||
"0123456789",
|
|
||||||
"9876543210",
|
|
||||||
"The quick brown fox jumps over the lazy dog",
|
|
||||||
"",
|
|
||||||
"final field",
|
|
||||||
strconv.Itoa(i),
|
|
||||||
"foo",
|
|
||||||
"bob@example.com",
|
|
||||||
"simple,quoted",
|
|
||||||
"quoted string with \" in it",
|
|
||||||
"0123456789",
|
|
||||||
"9876543210",
|
|
||||||
"The quick brown fox jumps over the lazy dog",
|
|
||||||
"",
|
|
||||||
"final field",
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
writer.Flush()
|
|
||||||
if writer.Error() != nil {
|
|
||||||
log.Fatal(writer.Error())
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,2 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
go1.18rc1 test ./interp -run=^$ -fuzz=Input -parallel=4
|
|
|
@ -1,2 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
go1.18rc1 test ./interp -run=^$ -fuzz=Source -parallel=4
|
|
|
@ -1,21 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
go build
|
|
||||||
VERSION="$(./goawk -version)"
|
|
||||||
|
|
||||||
GOOS=windows GOARCH=386 go build -ldflags="-w"
|
|
||||||
zip "goawk_${VERSION}_windows_386.zip" goawk.exe README.md csv.md LICENSE.txt
|
|
||||||
GOOS=windows GOARCH=amd64 go build -ldflags="-w"
|
|
||||||
zip "goawk_${VERSION}_windows_amd64.zip" goawk.exe README.md csv.md LICENSE.txt
|
|
||||||
|
|
||||||
GOOS=linux GOARCH=386 go build -ldflags="-w"
|
|
||||||
tar -cvzf "goawk_${VERSION}_linux_386.tar.gz" goawk README.md csv.md LICENSE.txt
|
|
||||||
GOOS=linux GOARCH=amd64 go build -ldflags="-w"
|
|
||||||
tar -cvzf "goawk_${VERSION}_linux_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
|
|
||||||
|
|
||||||
GOOS=darwin GOARCH=amd64 go build -ldflags="-w"
|
|
||||||
tar -cvzf "goawk_${VERSION}_darwin_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
|
|
||||||
GOOS=darwin GOARCH=arm64 go build -ldflags="-w"
|
|
||||||
tar -cvzf "goawk_${VERSION}_darwin_arm64.tar.gz" goawk README.md csv.md LICENSE.txt
|
|
||||||
|
|
||||||
rm -f goawk goawk.exe
|
|
46
src/tool/awk/testdata/README
vendored
46
src/tool/awk/testdata/README
vendored
|
@ -1,46 +0,0 @@
|
||||||
Original README.TESTS from one-true-awk regdir tests directory:
|
|
||||||
---------------------------------------------------------------
|
|
||||||
The archive of test files contains
|
|
||||||
|
|
||||||
- A shell file called REGRESS that controls the testing process.
|
|
||||||
|
|
||||||
- Several shell files called Compare* that control sub-parts
|
|
||||||
of the testing.
|
|
||||||
|
|
||||||
- About 160 small tests called t.* that constitute a random
|
|
||||||
sampling of awk constructions collected over the years.
|
|
||||||
Not organized, but they touch almost everything.
|
|
||||||
|
|
||||||
- About 60 small tests called p.* that come from the first
|
|
||||||
two chapters of The AWK Programming Environment. This is
|
|
||||||
basic stuff -- they have to work.
|
|
||||||
|
|
||||||
These two sets are intended as regression tests, to be sure
|
|
||||||
that a new version produces the same results as a previous one.
|
|
||||||
There are a couple of standard data files used with them,
|
|
||||||
test.data and test.countries, but others would work too.
|
|
||||||
|
|
||||||
- About 20 files called T.* that are self-contained and
|
|
||||||
more systematic tests of specific language features.
|
|
||||||
For example, T.clv tests command-line variable handling.
|
|
||||||
These tests are not regressions -- they compute the right
|
|
||||||
answer by separate means, then compare the awk output.
|
|
||||||
A specific test for each new bug found shows up in at least
|
|
||||||
one of these, most often T.misc. There are about 220 tests
|
|
||||||
total in these files.
|
|
||||||
|
|
||||||
- Two of these files, T.re and T.sub, are systematic tests
|
|
||||||
of the regular expression and substitution code. They express
|
|
||||||
tests in a small language, then generate awk programs that
|
|
||||||
verify behavior.
|
|
||||||
|
|
||||||
- About 20 files called tt.* that are used as timing tests;
|
|
||||||
they use the most common awk constructions in straightforward
|
|
||||||
ways, against a large input file constructed by Compare.tt.
|
|
||||||
|
|
||||||
|
|
||||||
There is undoubtedly more stuff in the archive; it's been
|
|
||||||
collecting for years and may need pruning. Suggestions for
|
|
||||||
improvement, additional tests (especially systematic ones),
|
|
||||||
and the like are all welcome.
|
|
||||||
|
|
3
src/tool/awk/testdata/csv/1.csv
vendored
3
src/tool/awk/testdata/csv/1.csv
vendored
|
@ -1,3 +0,0 @@
|
||||||
name,age
|
|
||||||
Bob,42
|
|
||||||
Jill,37
|
|
|
2
src/tool/awk/testdata/csv/2.csv
vendored
2
src/tool/awk/testdata/csv/2.csv
vendored
|
@ -1,2 +0,0 @@
|
||||||
age,email,name
|
|
||||||
25,sarah@example.com,Sarah
|
|
|
2
src/tool/awk/testdata/csv/address5.csv
vendored
2
src/tool/awk/testdata/csv/address5.csv
vendored
|
@ -1,2 +0,0 @@
|
||||||
name,address_1,address_2,address_3,address_4,address_5
|
|
||||||
Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
|
|
|
2
src/tool/awk/testdata/csv/fields.csv
vendored
2
src/tool/awk/testdata/csv/fields.csv
vendored
|
@ -1,2 +0,0 @@
|
||||||
id,name,email
|
|
||||||
1,Bob,b@bob.com
|
|
|
2563
src/tool/awk/testdata/csv/nz-schools.csv
vendored
2563
src/tool/awk/testdata/csv/nz-schools.csv
vendored
File diff suppressed because it is too large
Load diff
52
src/tool/awk/testdata/csv/states.csv
vendored
52
src/tool/awk/testdata/csv/states.csv
vendored
|
@ -1,52 +0,0 @@
|
||||||
"State","Abbreviation"
|
|
||||||
"Alabama","AL"
|
|
||||||
"Alaska","AK"
|
|
||||||
"Arizona","AZ"
|
|
||||||
"Arkansas","AR"
|
|
||||||
"California","CA"
|
|
||||||
"Colorado","CO"
|
|
||||||
"Connecticut","CT"
|
|
||||||
"Delaware","DE"
|
|
||||||
"District of Columbia","DC"
|
|
||||||
"Florida","FL"
|
|
||||||
"Georgia","GA"
|
|
||||||
"Hawaii","HI"
|
|
||||||
"Idaho","ID"
|
|
||||||
"Illinois","IL"
|
|
||||||
"Indiana","IN"
|
|
||||||
"Iowa","IA"
|
|
||||||
"Kansas","KS"
|
|
||||||
"Kentucky","KY"
|
|
||||||
"Louisiana","LA"
|
|
||||||
"Maine","ME"
|
|
||||||
"Montana","MT"
|
|
||||||
"Nebraska","NE"
|
|
||||||
"Nevada","NV"
|
|
||||||
"New Hampshire","NH"
|
|
||||||
"New Jersey","NJ"
|
|
||||||
"New Mexico","NM"
|
|
||||||
"New York","NY"
|
|
||||||
"North Carolina","NC"
|
|
||||||
"North Dakota","ND"
|
|
||||||
"Ohio","OH"
|
|
||||||
"Oklahoma","OK"
|
|
||||||
"Oregon","OR"
|
|
||||||
"Maryland","MD"
|
|
||||||
"Massachusetts","MA"
|
|
||||||
"Michigan","MI"
|
|
||||||
"Minnesota","MN"
|
|
||||||
"Mississippi","MS"
|
|
||||||
"Missouri","MO"
|
|
||||||
"Pennsylvania","PA"
|
|
||||||
"Rhode Island","RI"
|
|
||||||
"South Carolina","SC"
|
|
||||||
"South Dakota","SD"
|
|
||||||
"Tennessee","TN"
|
|
||||||
"Texas","TX"
|
|
||||||
"Utah","UT"
|
|
||||||
"Vermont","VT"
|
|
||||||
"Virginia","VA"
|
|
||||||
"Washington","WA"
|
|
||||||
"West Virginia","WV"
|
|
||||||
"Wisconsin","WI"
|
|
||||||
"Wyoming","WY"
|
|
|
53
src/tool/awk/testdata/csv/states.psv
vendored
53
src/tool/awk/testdata/csv/states.psv
vendored
|
@ -1,53 +0,0 @@
|
||||||
# comment
|
|
||||||
State|Abbreviation
|
|
||||||
Alabama|AL
|
|
||||||
Alaska|AK
|
|
||||||
Arizona|AZ
|
|
||||||
Arkansas|AR
|
|
||||||
California|CA
|
|
||||||
Colorado|CO
|
|
||||||
Connecticut|CT
|
|
||||||
Delaware|DE
|
|
||||||
District of Columbia|DC
|
|
||||||
Florida|FL
|
|
||||||
Georgia|GA
|
|
||||||
Hawaii|HI
|
|
||||||
Idaho|ID
|
|
||||||
Illinois|IL
|
|
||||||
Indiana|IN
|
|
||||||
Iowa|IA
|
|
||||||
Kansas|KS
|
|
||||||
Kentucky|KY
|
|
||||||
Louisiana|LA
|
|
||||||
Maine|ME
|
|
||||||
Montana|MT
|
|
||||||
Nebraska|NE
|
|
||||||
Nevada|NV
|
|
||||||
New Hampshire|NH
|
|
||||||
New Jersey|NJ
|
|
||||||
New Mexico|NM
|
|
||||||
New York|NY
|
|
||||||
North Carolina|NC
|
|
||||||
North Dakota|ND
|
|
||||||
Ohio|OH
|
|
||||||
Oklahoma|OK
|
|
||||||
Oregon|OR
|
|
||||||
Maryland|MD
|
|
||||||
Massachusetts|MA
|
|
||||||
Michigan|MI
|
|
||||||
Minnesota|MN
|
|
||||||
Mississippi|MS
|
|
||||||
Missouri|MO
|
|
||||||
Pennsylvania|PA
|
|
||||||
Rhode Island|RI
|
|
||||||
South Carolina|SC
|
|
||||||
South Dakota|SD
|
|
||||||
Tennessee|TN
|
|
||||||
Texas|TX
|
|
||||||
Utah|UT
|
|
||||||
Vermont|VT
|
|
||||||
Virginia|VA
|
|
||||||
Washington|WA
|
|
||||||
West Virginia|WV
|
|
||||||
Wisconsin|WI
|
|
||||||
Wyoming|WY
|
|
BIN
src/tool/awk/testdata/echo
vendored
BIN
src/tool/awk/testdata/echo
vendored
Binary file not shown.
1
src/tool/awk/testdata/filename/10
vendored
1
src/tool/awk/testdata/filename/10
vendored
|
@ -1 +0,0 @@
|
||||||
foo
|
|
1
src/tool/awk/testdata/filename/10x
vendored
1
src/tool/awk/testdata/filename/10x
vendored
|
@ -1 +0,0 @@
|
||||||
bar
|
|
37801
src/tool/awk/testdata/foo.td
vendored
37801
src/tool/awk/testdata/foo.td
vendored
File diff suppressed because it is too large
Load diff
1
src/tool/awk/testdata/g.1
vendored
1
src/tool/awk/testdata/g.1
vendored
|
@ -1 +0,0 @@
|
||||||
ONE
|
|
1
src/tool/awk/testdata/g.2
vendored
1
src/tool/awk/testdata/g.2
vendored
|
@ -1 +0,0 @@
|
||||||
TWO
|
|
10
src/tool/awk/testdata/g.3
vendored
10
src/tool/awk/testdata/g.3
vendored
|
@ -1,10 +0,0 @@
|
||||||
BEGIN {
|
|
||||||
printf "A=%d, B=%d\n", A, B
|
|
||||||
for (i = 1; i < ARGC; i++) {
|
|
||||||
printf "\tARGV[%d] = %s\n", i, ARGV[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
END {
|
|
||||||
printf "A=%d, B=%d\n", A, B
|
|
||||||
}
|
|
2
src/tool/awk/testdata/g.4
vendored
2
src/tool/awk/testdata/g.4
vendored
|
@ -1,2 +0,0 @@
|
||||||
FOUR a
|
|
||||||
FOUR b
|
|
15
src/tool/awk/testdata/gawk/addcomma.awk
vendored
15
src/tool/awk/testdata/gawk/addcomma.awk
vendored
|
@ -1,15 +0,0 @@
|
||||||
# addcomma - put commas in numbers
|
|
||||||
# input: a number per line
|
|
||||||
# output: the input number followed by
|
|
||||||
# the number with commas and two decimal places
|
|
||||||
|
|
||||||
{ printf("%-12s %20s\n", $0, addcomma($0)) }
|
|
||||||
|
|
||||||
function addcomma(x, num) {
|
|
||||||
if (x < 0)
|
|
||||||
return "-" addcomma(-x)
|
|
||||||
num = sprintf("%.2f", x) # num is dddddd.dd
|
|
||||||
while (num ~ /[0-9][0-9][0-9][0-9]/)
|
|
||||||
sub(/[0-9][0-9][0-9][,.]/, ",&", num)
|
|
||||||
return num
|
|
||||||
}
|
|
7
src/tool/awk/testdata/gawk/addcomma.in
vendored
7
src/tool/awk/testdata/gawk/addcomma.in
vendored
|
@ -1,7 +0,0 @@
|
||||||
0
|
|
||||||
-1
|
|
||||||
-12.34
|
|
||||||
12345
|
|
||||||
-1234567.89
|
|
||||||
-123.
|
|
||||||
-123456
|
|
7
src/tool/awk/testdata/gawk/addcomma.ok
vendored
7
src/tool/awk/testdata/gawk/addcomma.ok
vendored
|
@ -1,7 +0,0 @@
|
||||||
0 0.00
|
|
||||||
-1 -1.00
|
|
||||||
-12.34 -12.34
|
|
||||||
12345 12,345.00
|
|
||||||
-1234567.89 -1,234,567.89
|
|
||||||
-123. -123.00
|
|
||||||
-123456 -123,456.00
|
|
1
src/tool/awk/testdata/gawk/anchgsub.awk
vendored
1
src/tool/awk/testdata/gawk/anchgsub.awk
vendored
|
@ -1 +0,0 @@
|
||||||
{ gsub(/^[ ]*/, "", $0) ; print }
|
|
1
src/tool/awk/testdata/gawk/anchgsub.in
vendored
1
src/tool/awk/testdata/gawk/anchgsub.in
vendored
|
@ -1 +0,0 @@
|
||||||
This is a test, this is only a test.
|
|
1
src/tool/awk/testdata/gawk/anchgsub.ok
vendored
1
src/tool/awk/testdata/gawk/anchgsub.ok
vendored
|
@ -1 +0,0 @@
|
||||||
This is a test, this is only a test.
|
|
33
src/tool/awk/testdata/gawk/anchor.awk
vendored
33
src/tool/awk/testdata/gawk/anchor.awk
vendored
|
@ -1,33 +0,0 @@
|
||||||
BEGIN { RS = "" }
|
|
||||||
|
|
||||||
{
|
|
||||||
if (/^A/)
|
|
||||||
print "ok"
|
|
||||||
else
|
|
||||||
print "not ok"
|
|
||||||
|
|
||||||
if (/B$/)
|
|
||||||
print "not ok"
|
|
||||||
else
|
|
||||||
print "ok"
|
|
||||||
|
|
||||||
if (/^C/)
|
|
||||||
print "not ok"
|
|
||||||
else
|
|
||||||
print "ok"
|
|
||||||
|
|
||||||
if (/D$/)
|
|
||||||
print "not ok"
|
|
||||||
else
|
|
||||||
print "ok"
|
|
||||||
|
|
||||||
if (/^E/)
|
|
||||||
print "not ok"
|
|
||||||
else
|
|
||||||
print "ok"
|
|
||||||
|
|
||||||
if (/F$/)
|
|
||||||
print "ok"
|
|
||||||
else
|
|
||||||
print "not ok"
|
|
||||||
}
|
|
3
src/tool/awk/testdata/gawk/anchor.in
vendored
3
src/tool/awk/testdata/gawk/anchor.in
vendored
|
@ -1,3 +0,0 @@
|
||||||
A line1 B
|
|
||||||
C line2 D
|
|
||||||
E line3 F
|
|
6
src/tool/awk/testdata/gawk/anchor.ok
vendored
6
src/tool/awk/testdata/gawk/anchor.ok
vendored
|
@ -1,6 +0,0 @@
|
||||||
ok
|
|
||||||
ok
|
|
||||||
ok
|
|
||||||
ok
|
|
||||||
ok
|
|
||||||
ok
|
|
14
src/tool/awk/testdata/gawk/argarray.awk
vendored
14
src/tool/awk/testdata/gawk/argarray.awk
vendored
|
@ -1,14 +0,0 @@
|
||||||
BEGIN {
|
|
||||||
argn = " argument" (ARGC > 1 ? "s" : "")
|
|
||||||
are = ARGC > 1 ? "are" : "is"
|
|
||||||
print "here we have " ARGC argn
|
|
||||||
print "which " are
|
|
||||||
for (x = 0; x < ARGC; x++)
|
|
||||||
print "\t", ARGV[x]
|
|
||||||
print "Environment variable TEST=" ENVIRON["TEST"]
|
|
||||||
print "and the current input file is called \"" FILENAME "\""
|
|
||||||
}
|
|
||||||
|
|
||||||
FNR == 1 {
|
|
||||||
print "in main loop, this input file is known as \"" FILENAME "\""
|
|
||||||
}
|
|
1
src/tool/awk/testdata/gawk/argarray.in
vendored
1
src/tool/awk/testdata/gawk/argarray.in
vendored
|
@ -1 +0,0 @@
|
||||||
this is a simple test file
|
|
6
src/tool/awk/testdata/gawk/argarray.ok
vendored
6
src/tool/awk/testdata/gawk/argarray.ok
vendored
|
@ -1,6 +0,0 @@
|
||||||
here we have 1 argument
|
|
||||||
which is
|
|
||||||
|
|
||||||
Environment variable TEST=
|
|
||||||
and the current input file is called ""
|
|
||||||
in main loop, this input file is known as "-"
|
|
19
src/tool/awk/testdata/gawk/arrayind3.awk
vendored
19
src/tool/awk/testdata/gawk/arrayind3.awk
vendored
|
@ -1,19 +0,0 @@
|
||||||
BEGIN {
|
|
||||||
# initialize cint arrays
|
|
||||||
pos[0] = 0
|
|
||||||
posout[0] = 0
|
|
||||||
split("00000779770060", f) # f[1] is a strnum
|
|
||||||
pos[f[1]] = 1 # subscripts must be strings!
|
|
||||||
for (x in pos) {
|
|
||||||
# if x is a strnum, then the
|
|
||||||
# x != 0 test may convert it to an integral NUMBER,
|
|
||||||
# and we might lose the unusual string representation
|
|
||||||
# if the cint code is not careful to recognize that this is
|
|
||||||
# actually a string
|
|
||||||
if (x != 0)
|
|
||||||
posout[x] = pos[x]
|
|
||||||
}
|
|
||||||
# which array element is populated?
|
|
||||||
print posout[779770060]
|
|
||||||
print posout["00000779770060"]
|
|
||||||
}
|
|
2
src/tool/awk/testdata/gawk/arrayind3.ok
vendored
2
src/tool/awk/testdata/gawk/arrayind3.ok
vendored
|
@ -1,2 +0,0 @@
|
||||||
|
|
||||||
1
|
|
21
src/tool/awk/testdata/gawk/arrayparm.awk
vendored
21
src/tool/awk/testdata/gawk/arrayparm.awk
vendored
|
@ -1,21 +0,0 @@
|
||||||
#
|
|
||||||
# Test program from:
|
|
||||||
#
|
|
||||||
# Date: Tue, 21 Feb 95 16:09:29 EST
|
|
||||||
# From: emory!blackhawk.com!aaron (Aaron Sosnick)
|
|
||||||
#
|
|
||||||
BEGIN {
|
|
||||||
foo[1]=1;
|
|
||||||
foo[2]=2;
|
|
||||||
bug1(foo);
|
|
||||||
}
|
|
||||||
function bug1(i) {
|
|
||||||
for (i in foo) {
|
|
||||||
bug2(i);
|
|
||||||
delete foo[i];
|
|
||||||
print i,1,bot[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
function bug2(arg) {
|
|
||||||
bot[arg]=arg;
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue