Trying to embed the gawk.

2023-02-01 02:38:20 +05:00 · 2023-02-01 02:38:20 +05:00 · 80356b3878
commit 80356b3878
parent 708052ec0a
1105 changed files with 133607 additions and 1 deletions
--- a/license.txt
+++ b/license.txt
--- a/readme.md
+++ b/readme.md
--- a/src/cmd/goblin/main.go
+++ b/src/cmd/goblin/main.go
@ -27,6 +27,7 @@ import(
 	"github.com/surdeus/goblin/src/tool/useprog"
 	"github.com/surdeus/goblin/src/tool/path"
 	"github.com/surdeus/goblin/src/tool/mk"
+	//"github.com/surdeus/goblin/src/tool/awk"
 )

 func main() {
@ -55,7 +56,8 @@ func main() {
 		"in" : mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments"},
 		"useprog" : mtool.Tool{useprog.Run, "print the name of the first existing program in arg list"},
 		"path" : mtool.Tool{path.Run, "print cross platform path based on cmd arguments"},
-		"mk" : mtool.Tool{mk.Run, "file dependency system"},
+		"mk" : mtool.Tool{mk.Run, "file dependency system, simpler make"},
+		//"awk" : mtool.Tool{awk.Run, "simple scripting language for working with string templates"},
 	}

 	mtool.Main("goblin", tools)
--- a/src/tool/awk/csv.md
+++ b/src/tool/awk/csv.md
@ -0,0 +1,387 @@
+
+# GoAWK's CSV and TSV file support
+
+[CSV](https://en.wikipedia.org/wiki/Comma-separated_values) and [TSV](https://en.wikipedia.org/wiki/Tab-separated_values) files are often used in data processing today, but unfortunately you can't properly process them using POSIX AWK. You can change the field separator to `,` or tab (for example `awk -F,` or `awk '-F\t'`) but that doesn't handle quoted or multi-line fields.
+
+There are other workarounds, such as [Gawk's FPAT feature](https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html), various [CSV extensions](http://mcollado.z15.es/xgawk/) for Gawk, or Adam Gordon Bell's [csvquote](https://github.com/adamgordonbell/csvquote) tool. There's also [frawk](https://github.com/ezrosent/frawk), which is an amazing tool that natively supports CSV, but unfortunately it deviates quite a bit from POSIX-compatible AWK.
+
+Since version v1.17.0, GoAWK has included CSV support, which allows you to read and write CSV and TSV files, including proper handling of quoted and multi-line fields as per [RFC 4180](https://rfc-editor.org/rfc/rfc4180.html). In addition, GoAWK supports a "named field" construct that allows you to access CSV fields by name as well as number, for example `@"Address"` rather than `$5`.
+
+**Many thanks to the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/), who sponsored this feature in May 2022.** Thanks also to [Eli Rosenthal](https://github.com/ezrosent), whose frawk tool inspired aspects of the design (including the `-i` and `-o` command line arguments).
+
+Links to sections:
+
+* [CSV input configuration](#csv-input-configuration)
+* [CSV output configuration](#csv-output-configuration)
+* [Named field syntax](#named-field-syntax)
+* [Go API](#go-api)
+* [Examples](#examples)
+* [Examples based on csvkit](#examples-based-on-csvkit)
+* [Performance](#performance)
+* [Future work](#future-work)
+
+
+## CSV input configuration
+
+When in CSV input mode, GoAWK ignores the regular field and record separators (`FS` and `RS`), instead parsing input into records and fields using the CSV or TSV format. Fields can be accessed using the standard AWK numbered field syntax (for example, `$1` or `$5`), or using the GoAWK-specific [named field syntax](#named-field-syntax).
+
+To enable CSV input mode when using the `goawk` program, use the `-i mode` command line argument. You can also enable CSV input mode by setting the `INPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
+
+```
+csv|tsv [separator=<char>] [comment=<char>] [header]
+```
+
+The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
+
+* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
+* `comment=<char>`: consider lines starting with the given character to be comments and skip them, for example `comment=#` will ignore any lines starting with `#` (without preceding whitespace). The default is not to support comments.
+* `header`: treat the first line of each input file as a header row providing the field names, and enable the `@"field"` syntax as well as the `FIELDS` array. This option is equivalent to the `-H` command line argument. If neither `header` or `-H` is specified, you can't use named fields.
+
+
+
+## CSV output configuration
+
+When in CSV output mode, the GoAWK `print` statement with one or more arguments ignores `OFS` and `ORS` and separates its arguments (fields) and records using CSV formatting. No header row is printed; if required, a header row can be printed in the `BEGIN` block manually. No other functionality is changed, for example, `printf` doesn't do anything different in CSV output mode.
+
+**NOTE:** The behaviour of `print` without arguments remains unchanged. This means you can print the input line (`$0`) without further quoting by using a bare `print` statement, but `print $0` will print the input line as a single CSV field, which is probably not what you want. See the [example](#example-convert-between-formats-all-fields) below.
+
+To enable CSV output mode when using the `goawk` program, use the `-o mode` command line argument. You can also enable CSV output mode by setting the `OUTPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
+
+```
+csv|tsv [separator=<char>]
+```
+
+The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
+
+* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
+
+
+## Named field syntax
+
+If the `header` option or `-H` argument is given, CSV input mode parses the first row of each input file as a header row containing a list of field names.
+
+When the header option is enabled, you can use the GoAWK-specific "named field" operator (`@`) to access fields by name instead of by number (`$`). For example, given the header row `id,name,email`, for each record you can access the email address using `@"email"`, `$3`, or even `$-1` (first field from the right). Further usage examples are shown [below](#examples).
+
+Every time a header row is processed, the `FIELDS` special array is updated: it is a mapping of field number to field name, allowing you to loop over the field names dynamically. For example, given the header row `id,name,email`, GoAWK sets `FIELDS` using the equivalent of:
+
+```
+FIELDS[1] = "id"
+FIELDS[2] = "name"
+FIELDS[3] = "email"
+```
+
+Note that named field assignment such as `@"id" = 42` is not yet supported, but this feature may be added later.
+
+
+## Go API
+
+When using GoAWK via the Go API, you can still use `INPUTMODE`, but it may be more convenient to use the `interp.Config` fields directly: `InputMode`, `CSVInput`, `OutputMode`, and `CSVOutput`.
+
+Here's a simple snippet showing the use of the `InputMode` and `CSVInput` fields to enable `#` as the comment character:
+
+```
+prog, err := parser.ParseProgram([]byte(src), nil)
+if err != nil { ... }
+
+config := &interp.Config{
+    InputMode: interp.CSVMode,
+    CSVInput:  interp.CSVInputConfig{Comment: '#'},
+}
+_, err = interp.ExecProgram(prog, config)
+if err != nil { ... }
+```
+
+Note that `INPUTMODE` and `OUTPUTMODE` set using `Vars` or in the `BEGIN` block will override these settings.
+
+See the [full reference documentation](https://pkg.go.dev/github.com/benhoyt/goawk/interp#Config) for the `interp.Config` struct.
+
+
+## Examples
+
+Below are some examples using the [testdata/csv/states.csv](https://github.com/benhoyt/goawk/blob/master/testdata/csv/states.csv) file, which is a simple CSV file whose contents are as follows:
+
+```
+"State","Abbreviation"
+"Alabama","AL"
+"Alaska","AK"
+"Arizona","AZ"
+"Arkansas","AR"
+"California","CA"
+...
+```
+
+### Example: output a field by name
+
+To output a field by name (in this case the state's abbreviation):
+
+```
+$ goawk -i csv -H '{ print @"Abbreviation" }' testdata/csv/states.csv
+AL
+AK
+AZ
+...
+```
+
+### Example: match a field and count
+
+To count the number of states that have "New" in the name, and then print out what they are:
+
+```
+$ goawk -i csv -H '@"State" ~ /New/ { n++ } END { print n }' testdata/csv/states.csv
+4
+$ goawk -i csv -H '@"State" ~ /New/ { print @"State" }' testdata/csv/states.csv
+New Hampshire
+New Jersey
+New Mexico
+New York
+```
+
+### Example: rename and reorder fields
+
+To rename and reorder the fields from `State`, `Abbreviation` to `abbr`, `name`. Note that the `print` statement in the `BEGIN` block prints the header row for the output:
+
+```
+$ goawk -i csv -H -o csv 'BEGIN { print "abbr", "name" } { print @"Abbreviation", @"State" }' testdata/csv/states.csv
+abbr,name
+AL,Alabama
+AK,Alaska
+...
+```
+
+### Example: convert between formats (explicit field list)
+
+To convert the file from CSV to TSV format (note how we're *not* using `-H`, so the header row is included):
+
+```
+$ goawk -i csv -o tsv '{ print $1, $2 }' testdata/csv/states.csv
+State	Abbreviation
+Alabama	AL
+Alaska	AK
+...
+```
+
+### Example: convert between formats (all fields)
+
+If you want to convert between CSV and TSV format but don't know the number of fields, you can use a field assignment like `$1=$1` so that GoAWK reformats `$0` according to the output format (TSV in this case). This is similar to how in POSIX AWK a field assignment reformats `$0` according to the output field separator (`OFS`). Then `print` without arguments prints the raw value of `$0`:
+
+```
+$ goawk -i csv -o tsv '{ $1=$1; print }' testdata/csv/states.csv
+State	Abbreviation
+Alabama	AL
+Alaska	AK
+...
+```
+
+**NOTE:** It's not correct to use `print $0` in this case, because that would print `$0` as a single TSV field, which you generally don't want:
+
+```
+$ goawk -i csv -o tsv '{ $1=$1; print $0 }' testdata/csv/states.csv  # INCORRECT!
+"State	Abbreviation"
+"Alabama	AL"
+"Alaska	AK"
+...
+```
+
+### Example: override separator
+
+To test overriding the separator character, we can use GoAWK to add a comment and convert the separator to `|` (pipe). We'll also add a comment line to test comment handling:
+
+```
+$ goawk -i csv -o 'csv separator=|' 'BEGIN { printf "# comment\n" } { $1=$1; print }' testdata/csv/states.csv
+# comment
+State|Abbreviation
+Alabama|AL
+Alaska|AK
+...
+```
+
+### Example: skip comment lines
+
+We can process the "pipe-separated values" file generated above, skipping comment lines, and printing the first three state names (accessed by field number this time):
+
+```
+$ goawk -i 'csv header comment=# separator=|' 'NR<=3 { print $1 }' testdata/csv/states.psv
+Alabama
+Alaska
+Arizona
+```
+
+### Example: use dynamic field names
+
+Similar to the `$` operator, you can also use `@` with dynamic values. For example, if there are fields named `address_1`, `address_2`, up through `address_5`, you could loop over them as follows:
+
+```
+$ cat testdata/csv/address5.csv
+name,address_1,address_2,address_3,address_4,address_5
+Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
+$ goawk -i csv -H '{ for (i=1; i<=5; i++) print @("address_" i) }' testdata/csv/address5.csv
+123 Way St
+Apt 2B
+Township
+Cityville
+United Plates
+```
+
+### Example: use the `FIELDS` array
+
+A somewhat contrived example showing use of the `FIELDS` array:
+
+```
+$ cat testdata/csv/fields.csv
+id,name,email
+1,Bob,b@bob.com
+$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) print i, FIELDS[i] }' testdata/csv/fields.csv
+1 id
+2 name
+3 email
+```
+
+### Example: create CSV file from array
+
+The following example shows how you might pull fields out of an integer-indexed array to produce a CSV file:
+
+```
+$ goawk -o csv 'BEGIN { print "id", "name"; names[1]="Bob"; names[2]="Jane"; for (i=1; i in names; i++) print i, names[i] }'
+id,name
+1,Bob
+2,Jane
+```
+
+### Example: create CSV file by assigning fields
+
+This example shows the same result, but producing the CSV output by assigning individual fields and then using a bare `print` statement:
+
+```
+$ goawk -o csv 'BEGIN { print "id", "name"; $1=1; $2="Bob"; print; $1=2; $2="Jane"; print }'
+id,name
+1,Bob
+2,Jane
+```
+
+### Example: different ways to specify CSV mode
+
+And finally, four equivalent examples showing different ways to specify the input mode, using `-i` or the `INPUTMODE` special variable (the same techniques work for `-o` and `OUTPUTMODE`):
+
+```
+$ goawk -i csv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
+NY
+$ goawk -icsv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
+NY
+$ goawk 'BEGIN { INPUTMODE="csv header" } @"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
+NY
+$ goawk -v 'INPUTMODE=csv header' '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
+NY
+```
+
+
+## Examples based on csvkit
+
+The [csvkit](https://csvkit.readthedocs.io/en/latest/index.html) suite is a set of tools that allow you to quickly analyze and extract fields from CSV files. Each csvkit tool allows you to do a specific task; GoAWK is more low-level and verbose, but also a more general tool ([`csvsql`](https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html#csvsql-and-sql2csv-ultimate-power) being the exception!). GoAWK also runs significantly faster than csvkit (the latter is written in Python).
+
+Below are a few snippets showing how you'd do some of the tasks in the csvkit documentation, but using GoAWK (the input file is [testdata/csv/nz-schools.csv](https://github.com/benhoyt/goawk/blob/master/testdata/csv/nz-schools.csv)):
+
+### csvkit example: print column names
+
+```
+$ csvcut -n testdata/csv/nz-schools.csv
+  1: School_Id
+  2: Org_Name
+  3: Decile
+  4: Total
+
+# In GoAWK you have to loop through the fields, but you can print the data in
+# any format you want (note the "exit" so it stops after the first row):
+$ goawk -i csv '{ for (i=1; i<=NF; i++) printf "%3d: %s\n", i, $i; exit }' testdata/csv/nz-schools.csv
+  1: School_Id
+  2: Org_Name
+  3: Decile
+  4: Total
+
+# You could also use -H and the FIELDS array to do this:
+$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) printf "%3d: %s\n", i, FIELDS[i]; exit }' testdata/csv/nz-schools.csv
+  1: School_Id
+  2: Org_Name
+  3: Decile
+  4: Total
+```
+
+### csvkit example: select a subset of columns
+
+```
+$ csvcut -c Org_Name,Total testdata/csv/nz-schools.csv
+Org_Name,Total
+Waipa Christian School,60
+Remarkables Primary School,494
+...
+
+# In GoAWK you need to print the field names explicitly in BEGIN:
+$ goawk -i csv -H -o csv 'BEGIN { print "Org_Name", "Total" } { print @"Org_Name", @"Total" }' testdata/csv/nz-schools.csv
+Org_Name,Total
+Waipa Christian School,60
+Remarkables Primary School,494
+...
+
+# But you can also change the column names and reorder them:
+$ goawk -i csv -H -o csv 'BEGIN { print "# Students", "School" } { print @"Total", @"Org_Name" }' testdata/csv/nz-schools.csv
+# Students,School
+60,Waipa Christian School
+494,Remarkables Primary School
+...
+```
+
+### csvkit example: generate statistics
+
+There's no equivalent of the `csvstat` tool in GoAWK, but you can calculate statistics yourself. For example, to calculate the total number of students in New Zealand schools, you can do the following (`csvstat` is giving a warning due to the single-column input):
+
+```
+$ csvcut -c Total testdata/csv/nz-schools.csv | csvstat --sum
+/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
+802,516
+
+$ goawk -i csv -H '{ sum += @"Total" } END { print sum }' testdata/csv/nz-schools.csv
+802516
+```
+
+To calculate the average (mean) decile level for boys' and girls' schools (sorry, boys!):
+
+```
+$ csvgrep -c Org_Name -m Boys testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
+/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
+6.45
+$ csvgrep -c Org_Name -m Girls testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
+/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
+8.889
+
+$ goawk -i csv -H '/Boys/  { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv 
+6.45
+$ goawk -i csv -H '/Girls/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv 
+8.88889
+```
+
+
+## Performance
+
+The performance of GoAWK's CSV input and output mode is quite good, on a par with using the `encoding/csv` package from Go directly, and much faster than the `csv` module in Python. CSV input speed is significantly slower than `frawk`, though CSV output speed is significantly faster than `frawk`.
+
+Below are the results of some simple read and write [benchmarks](https://github.com/benhoyt/goawk/blob/master/scripts/csvbench) using `goawk` and `frawk` as well as plain Python and Go. The output of the write benchmarks is a 1GB, 3.5 million row CSV file with 20 columns (including quoted columns); the input for the read benchmarks uses that same file. Times are in seconds, showing the best of three runs on a 64-bit Linux laptop with an SSD drive:
+
+Test            | goawk | frawk | Python |   Go
+--------------- | ----- | ----- | ------ | ----
+Reading 1GB CSV |  3.18 |  1.01 |   13.4 | 3.22
+Writing 1GB CSV |  5.64 |  13.0 |   17.0 | 3.24
+
+
+## Future work
+
+* Consider adding a `printrow(a)` or similar function to make it easier to construct CSV rows from scratch.
+  - `a` would be an array such as: `a["name"] = "Bob"; a["age"] = 7`
+  - keys would be ordered by `OFIELDS` (eg: `OFIELDS[1] = "name"; OFIELDS[2] = "age"`) or by "smart name" if `OFIELDS` not set ("smart name" meaning numeric if `a` keys are numeric, string otherwise)
+  - `printrow(a)` could take an optional second `fields` array arg to use that instead of the global `OFIELDS`
+* Consider allowing `-H` to accept an optional list of field names which could be used as headers in the absence of headers in the file itself (either `-H=name,age` or `-i 'csv header=name,age'`).
+* Consider adding TrimLeadingSpace CSV input option. See: https://github.com/benhoyt/goawk/issues/109
+* Consider supporting `@"id" = 42` named field assignment.
+
+
+## Feedback
+
+Please [open an issue](https://github.com/benhoyt/goawk/issues) if you have bug reports or feature requests for GoAWK's CSV support.
--- a/src/tool/awk/go.mod
+++ b/src/tool/awk/go.mod
@ -0,0 +1,3 @@
+module github.com/benhoyt/goawk
+
+go 1.14
--- a/src/tool/awk/goawk
+++ b/src/tool/awk/goawk
--- a/src/tool/awk/goawk.go
+++ b/src/tool/awk/goawk.go
@ -0,0 +1,394 @@
+// Package goawk is an implementation of AWK with CSV support
+//
+// You can use the command-line "goawk" command or run AWK from your
+// Go programs using the "interp" package. The command-line program
+// has the same interface as regular awk:
+//
+//	goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]
+//
+// The -F flag specifies the field separator (the default is to split
+// on whitespace). The -v flag allows you to set a variable to a
+// given value (multiple -v flags allowed). The -f flag allows you to
+// read AWK source from a file instead of the 'prog' command-line
+// argument. The rest of the arguments are input filenames (default
+// is to read from stdin).
+//
+// A simple example (prints the sum of the numbers in the file's
+// second column):
+//
+//	$ echo 'foo 12
+//	> bar 34
+//	> baz 56' >file.txt
+//	$ goawk '{ sum += $2 } END { print sum }' file.txt
+//	102
+//
+// To use GoAWK in your Go programs, see README.md or the "interp"
+// package docs.
+package awk
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/benhoyt/goawk/interp"
+	"github.com/benhoyt/goawk/lexer"
+	"github.com/benhoyt/goawk/parser"
+)
+
+const (
+	version    = "v1.19.0"
+	copyright  = "GoAWK " + version + " - Copyright (c) 2022 Ben Hoyt"
+	shortUsage = "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"
+	longUsage  = `Standard AWK arguments:
+  -F separator      field separator (default " ")
+  -f progfile       load AWK source from progfile (multiple allowed)
+  -v var=value      variable assignment (multiple allowed)
+
+Additional GoAWK arguments:
+  -cpuprofile file  write CPU profile to file
+  -d                print parsed syntax tree to stderr (debug mode)
+  -da               print virtual machine assembly instructions to stderr
+  -dt               print variable type information to stderr
+  -H                parse header row and enable @"field" in CSV input mode
+  -h, --help        show this help message
+  -i mode           parse input into fields using CSV format (ignore FS and RS)
+                    'csv|tsv [separator=<char>] [comment=<char>] [header]'
+  -o mode           use CSV output for print with args (ignore OFS and ORS)
+                    'csv|tsv [separator=<char>]'
+  -version          show GoAWK version and exit
+`
+)
+
+func Run(args []string) {
+	// Parse command line arguments manually rather than using the
+	// "flag" package, so we can support flags with no space between
+	// flag and argument, like '-F:' (allowed by POSIX)
+	var progFiles []string
+	var vars []string
+	fieldSep := " "
+	cpuprofile := ""
+	debug := false
+	debugAsm := false
+	debugTypes := false
+	memprofile := ""
+	inputMode := ""
+	outputMode := ""
+	header := false
+
+	var i int
+	for i = 1; i < len(args); i++ {
+		// Stop on explicit end of args or first arg not prefixed with "-"
+		arg := args[i]
+		if arg == "--" {
+			i++
+			break
+		}
+		if arg == "-" || !strings.HasPrefix(arg, "-") {
+			break
+		}
+
+		switch arg {
+		case "-F":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -F")
+			}
+			i++
+			fieldSep = args[i]
+		case "-f":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -f")
+			}
+			i++
+			progFiles = append(progFiles, args[i])
+		case "-v":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -v")
+			}
+			i++
+			vars = append(vars, args[i])
+		case "-cpuprofile":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -cpuprofile")
+			}
+			i++
+			cpuprofile = args[i]
+		case "-d":
+			debug = true
+		case "-da":
+			debugAsm = true
+		case "-dt":
+			debugTypes = true
+		case "-H":
+			header = true
+		case "-h", "--help":
+			fmt.Printf("%s\n\n%s\n\n%s", copyright, shortUsage, longUsage)
+			os.Exit(0)
+		case "-i":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -i")
+			}
+			i++
+			inputMode = args[i]
+		case "-memprofile":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -memprofile")
+			}
+			i++
+			memprofile = args[i]
+		case "-o":
+			if i+1 >= len(args) {
+				errorExitf("flag needs an argument: -o")
+			}
+			i++
+			outputMode = args[i]
+		case "-version", "--version":
+			fmt.Println(version)
+			os.Exit(0)
+		default:
+			switch {
+			case strings.HasPrefix(arg, "-F"):
+				fieldSep = arg[2:]
+			case strings.HasPrefix(arg, "-f"):
+				progFiles = append(progFiles, arg[2:])
+			case strings.HasPrefix(arg, "-i"):
+				inputMode = arg[2:]
+			case strings.HasPrefix(arg, "-o"):
+				outputMode = arg[2:]
+			case strings.HasPrefix(arg, "-v"):
+				vars = append(vars, arg[2:])
+			case strings.HasPrefix(arg, "-cpuprofile="):
+				cpuprofile = arg[12:]
+			case strings.HasPrefix(arg, "-memprofile="):
+				memprofile = arg[12:]
+			default:
+				errorExitf("flag provided but not defined: %s", arg)
+			}
+		}
+	}
+
+	// Any remaining args are program and input files
+	args = args[i:]
+
+	var src []byte
+	var stdinBytes []byte // used if there's a parse error
+	if len(progFiles) > 0 {
+		// Read source: the concatenation of all source files specified
+		buf := &bytes.Buffer{}
+		progFiles = expandWildcardsOnWindows(progFiles)
+		for _, progFile := range progFiles {
+			if progFile == "-" {
+				b, err := ioutil.ReadAll(os.Stdin)
+				if err != nil {
+					errorExit(err)
+				}
+				stdinBytes = b
+				_, _ = buf.Write(b)
+			} else {
+				f, err := os.Open(progFile)
+				if err != nil {
+					errorExit(err)
+				}
+				_, err = buf.ReadFrom(f)
+				if err != nil {
+					_ = f.Close()
+					errorExit(err)
+				}
+				_ = f.Close()
+			}
+			// Append newline to file in case it doesn't end with one
+			_ = buf.WriteByte('\n')
+		}
+		src = buf.Bytes()
+	} else {
+		if len(args) < 1 {
+			errorExitf(shortUsage)
+		}
+		src = []byte(args[0])
+		args = args[1:]
+	}
+
+	// Parse source code and setup interpreter
+	parserConfig := &parser.ParserConfig{
+		DebugTypes:  debugTypes,
+		DebugWriter: os.Stderr,
+	}
+	prog, err := parser.ParseProgram(src, parserConfig)
+	if err != nil {
+		if err, ok := err.(*parser.ParseError); ok {
+			name, line := errorFileLine(progFiles, stdinBytes, err.Position.Line)
+			fmt.Fprintf(os.Stderr, "%s:%d:%d: %s\n",
+				name, line, err.Position.Column, err.Message)
+			showSourceLine(src, err.Position)
+			os.Exit(1)
+		}
+		errorExitf("%s", err)
+	}
+
+	if debug {
+		fmt.Fprintln(os.Stderr, prog)
+	}
+
+	if debugAsm {
+		err := prog.Disassemble(os.Stderr)
+		if err != nil {
+			errorExitf("could not disassemble program: %v", err)
+		}
+	}
+
+	if header {
+		if inputMode == "" {
+			errorExitf("-H only allowed together with -i")
+		}
+		inputMode += " header"
+	}
+
+	// Don't buffer output if stdout is a terminal (default output writer when
+	// Config.Output is nil is a buffered version of os.Stdout).
+	var stdout io.Writer
+	stdoutInfo, err := os.Stdout.Stat()
+	if err == nil && stdoutInfo.Mode()&os.ModeCharDevice != 0 {
+		stdout = os.Stdout
+	}
+
+	config := &interp.Config{
+		Argv0: filepath.Base(args[0]),
+		Args:  expandWildcardsOnWindows(args),
+		Vars: []string{
+			"FS", fieldSep,
+			"INPUTMODE", inputMode,
+			"OUTPUTMODE", outputMode,
+		},
+		Output: stdout,
+	}
+	for _, v := range vars {
+		equals := strings.IndexByte(v, '=')
+		if equals < 0 {
+			errorExitf("-v flag must be in format name=value")
+		}
+		name, value := v[:equals], v[equals+1:]
+		// Oddly, -v must interpret escapes (issue #129)
+		unescaped, err := lexer.Unescape(value)
+		if err == nil {
+			value = unescaped
+		}
+		config.Vars = append(config.Vars, name, value)
+	}
+
+	if cpuprofile != "" {
+		f, err := os.Create(cpuprofile)
+		if err != nil {
+			errorExitf("could not create CPU profile: %v", err)
+		}
+		if err := pprof.StartCPUProfile(f); err != nil {
+			errorExitf("could not start CPU profile: %v", err)
+		}
+	}
+
+	// Run the program!
+	status, err := interp.ExecProgram(prog, config)
+	if err != nil {
+		errorExit(err)
+	}
+
+	if cpuprofile != "" {
+		pprof.StopCPUProfile()
+	}
+	if memprofile != "" {
+		f, err := os.Create(memprofile)
+		if err != nil {
+			errorExitf("could not create memory profile: %v", err)
+		}
+		runtime.GC() // get up-to-date statistics
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			errorExitf("could not write memory profile: %v", err)
+		}
+		_ = f.Close()
+	}
+
+	os.Exit(status)
+}
+
+// Show source line and position of error, for example:
+//
+//	BEGIN { x*; }
+//	          ^
+func showSourceLine(src []byte, pos lexer.Position) {
+	lines := bytes.Split(src, []byte{'\n'})
+	srcLine := string(lines[pos.Line-1])
+	numTabs := strings.Count(srcLine[:pos.Column-1], "\t")
+	runeColumn := utf8.RuneCountInString(srcLine[:pos.Column-1])
+	fmt.Fprintln(os.Stderr, strings.Replace(srcLine, "\t", "    ", -1))
+	fmt.Fprintln(os.Stderr, strings.Repeat(" ", runeColumn)+strings.Repeat("   ", numTabs)+"^")
+}
+
+// Determine which filename and line number to display for the overall
+// error line number.
+func errorFileLine(progFiles []string, stdinBytes []byte, errorLine int) (string, int) {
+	if len(progFiles) == 0 {
+		return "<cmdline>", errorLine
+	}
+	startLine := 1
+	for _, progFile := range progFiles {
+		var content []byte
+		if progFile == "-" {
+			progFile = "<stdin>"
+			content = stdinBytes
+		} else {
+			b, err := ioutil.ReadFile(progFile)
+			if err != nil {
+				return "<unknown>", errorLine
+			}
+			content = b
+		}
+		content = append(content, '\n')
+
+		numLines := bytes.Count(content, []byte{'\n'})
+		if errorLine >= startLine && errorLine < startLine+numLines {
+			return progFile, errorLine - startLine + 1
+		}
+		startLine += numLines
+	}
+	return "<unknown>", errorLine
+}
+
+func errorExit(err error) {
+	pathErr, ok := err.(*os.PathError)
+	if ok && os.IsNotExist(err) {
+		errorExitf("file %q not found", pathErr.Path)
+	}
+	errorExitf("%s", err)
+}
+
+func errorExitf(format string, args ...interface{}) {
+	fmt.Fprintf(os.Stderr, format+"\n", args...)
+	os.Exit(1)
+}
+
+func expandWildcardsOnWindows(args []string) []string {
+	if runtime.GOOS != "windows" {
+		return args
+	}
+	return expandWildcards(args)
+}
+
+// Originally from https://github.com/mattn/getwild (compatible LICENSE).
+func expandWildcards(args []string) []string {
+	result := make([]string, 0, len(args))
+	for _, arg := range args {
+		matches, err := filepath.Glob(arg)
+		if err == nil && len(matches) > 0 {
+			result = append(result, matches...)
+		} else {
+			result = append(result, arg)
+		}
+	}
+	return result
+}
--- a/src/tool/awk/goawk_test.go
+++ b/src/tool/awk/goawk_test.go
@ -0,0 +1,749 @@
+// GoAWK tests
+
+package awk_test
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/benhoyt/goawk/interp"
+	"github.com/benhoyt/goawk/parser"
+)
+
+var (
+	goExe      string
+	testsDir   string
+	outputDir  string
+	awkExe     string
+	goAWKExe   string
+	writeAWK   bool
+	writeGoAWK bool
+)
+
+func TestMain(m *testing.M) {
+	flag.StringVar(&goExe, "goexe", "go", "set to override Go executable used to build goawk")
+	flag.StringVar(&testsDir, "testsdir", "./testdata", "directory with one-true-awk tests")
+	flag.StringVar(&outputDir, "outputdir", "./testdata/output", "directory for test output")
+	flag.StringVar(&awkExe, "awk", "gawk", "awk executable name")
+	flag.StringVar(&goAWKExe, "goawk", "./goawk", "goawk executable name")
+	flag.BoolVar(&writeAWK, "writeawk", false, "write expected output")
+	flag.BoolVar(&writeGoAWK, "writegoawk", true, "write Go AWK output")
+	flag.Parse()
+
+	cmd := exec.Command(goExe, "build", "-ldflags=-w")
+	stderr, err := cmd.CombinedOutput()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error building goawk: %v\n%s\n", err, stderr)
+		os.Exit(1)
+	}
+
+	os.Exit(m.Run())
+}
+
+func TestAWK(t *testing.T) {
+	inputByPrefix := map[string]string{
+		"t": "test.data",
+		"p": "test.countries",
+	}
+	// These programs exit with non-zero status code
+	errorExits := map[string]bool{
+		"t.exit":   true,
+		"t.exit1":  true,
+		"t.gsub4":  true,
+		"t.split3": true,
+	}
+	// These programs have known different output
+	knownDifferent := map[string]bool{
+		"t.printf2": true, // because awk is weird here (our behavior is like mawk)
+	}
+	// Can't really diff test rand() tests as we're using a totally
+	// different algorithm for random numbers
+	randTests := map[string]bool{
+		"p.48b":   true,
+		"t.randk": true,
+	}
+	// These tests use "for (x in a)", which iterates in an undefined
+	// order (according to the spec), so sort lines before comparing.
+	sortLines := map[string]bool{
+		"p.43":      true,
+		"t.in1":     true, // because "sort" is locale-dependent
+		"t.in2":     true,
+		"t.intest2": true,
+	}
+	dontRunOnWindows := map[string]bool{
+		"p.50": true, // because this pipes to Unix sort "sort -t: +0 -1 +2nr"
+	}
+
+	infos, err := ioutil.ReadDir(testsDir)
+	if err != nil {
+		t.Fatalf("couldn't read test files: %v", err)
+	}
+	for _, info := range infos {
+		if !strings.HasPrefix(info.Name(), "t.") && !strings.HasPrefix(info.Name(), "p.") {
+			continue
+		}
+		if runtime.GOOS == "windows" && dontRunOnWindows[info.Name()] {
+			continue
+		}
+		t.Run(info.Name(), func(t *testing.T) {
+			srcPath := filepath.Join(testsDir, info.Name())
+			inputPath := filepath.Join(testsDir, inputByPrefix[info.Name()[:1]])
+			outputPath := filepath.Join(outputDir, info.Name())
+
+			cmd := exec.Command(awkExe, "-f", srcPath, inputPath)
+			expected, err := cmd.Output()
+			if err != nil && !errorExits[info.Name()] {
+				t.Fatalf("error running %s: %v", awkExe, err)
+			}
+			expected = bytes.Replace(expected, []byte{0}, []byte("<00>"), -1)
+			expected = normalizeNewlines(expected)
+			if sortLines[info.Name()] {
+				expected = sortedLines(expected)
+			}
+			if writeAWK {
+				err := ioutil.WriteFile(outputPath, expected, 0644)
+				if err != nil {
+					t.Fatalf("error writing awk output: %v", err)
+				}
+			}
+
+			prog, err := parseGoAWK(srcPath)
+			if err != nil {
+				t.Fatal(err)
+			}
+			output, err := interpGoAWK(prog, inputPath)
+			if err != nil && !errorExits[info.Name()] {
+				t.Fatal(err)
+			}
+			output = bytes.Replace(output, []byte{0}, []byte("<00>"), -1)
+			output = normalizeNewlines(output)
+			if randTests[info.Name()] || knownDifferent[info.Name()] {
+				// For tests that use rand(), run them to ensure they
+				// parse and interpret, but can't compare the output,
+				// so stop now
+				return
+			}
+			if sortLines[info.Name()] {
+				output = sortedLines(output)
+			}
+			if writeGoAWK {
+				err := ioutil.WriteFile(outputPath, output, 0644)
+				if err != nil {
+					t.Fatalf("error writing goawk output: %v", err)
+				}
+			}
+			if string(output) != string(expected) {
+				t.Fatalf("output differs, run: git diff %s", outputPath)
+			}
+		})
+	}
+
+	_ = os.Remove("tempbig")
+	_ = os.Remove("tempsmall")
+}
+
+func parseGoAWK(srcPath string) (*parser.Program, error) {
+	src, err := ioutil.ReadFile(srcPath)
+	if err != nil {
+		return nil, err
+	}
+	prog, err := parser.ParseProgram(src, nil)
+	if err != nil {
+		return nil, err
+	}
+	return prog, nil
+}
+
+func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) {
+	outBuf := &bytes.Buffer{}
+	errBuf := &bytes.Buffer{}
+	config := &interp.Config{
+		Output: outBuf,
+		Error:  &concurrentWriter{w: errBuf},
+		Args:   []string{inputPath},
+	}
+	_, err := interp.ExecProgram(prog, config)
+	result := outBuf.Bytes()
+	result = append(result, errBuf.Bytes()...)
+	return result, err
+}
+
+func interpGoAWKStdin(prog *parser.Program, inputPath string) ([]byte, error) {
+	input, _ := ioutil.ReadFile(inputPath)
+	outBuf := &bytes.Buffer{}
+	errBuf := &bytes.Buffer{}
+	config := &interp.Config{
+		Stdin:  &concurrentReader{r: bytes.NewReader(input)},
+		Output: outBuf,
+		Error:  &concurrentWriter{w: errBuf},
+		// srcdir is for "redfilnm.awk"
+		Vars: []string{"srcdir", filepath.Dir(inputPath)},
+	}
+	_, err := interp.ExecProgram(prog, config)
+	result := outBuf.Bytes()
+	result = append(result, errBuf.Bytes()...)
+	return result, err
+}
+
+// Wraps a Writer but makes Write calls safe for concurrent use.
+type concurrentWriter struct {
+	w  io.Writer
+	mu sync.Mutex
+}
+
+func (w *concurrentWriter) Write(p []byte) (int, error) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	return w.w.Write(p)
+}
+
+// Wraps a Reader but makes Read calls safe for concurrent use.
+type concurrentReader struct {
+	r  io.Reader
+	mu sync.Mutex
+}
+
+func (r *concurrentReader) Read(p []byte) (int, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return r.r.Read(p)
+}
+
+func sortedLines(data []byte) []byte {
+	trimmed := strings.TrimSuffix(string(data), "\n")
+	lines := strings.Split(trimmed, "\n")
+	sort.Strings(lines)
+	return []byte(strings.Join(lines, "\n") + "\n")
+}
+
+func TestGAWK(t *testing.T) {
+	skip := map[string]bool{ // TODO: fix these (at least the ones that are bugs)
+		"getline":  true, // getline syntax issues (may be okay, see grammar notes at http://pubs.opengroup.org/onlinepubs/007904975/utilities/awk.html#tag_04_06_13_14)
+		"getline3": true, // getline syntax issues (similar to above)
+
+		"gsubtst7":     true, // something wrong with gsub or field split/join
+		"splitwht":     true, // other awks handle split(s, a, " ") differently from split(s, a, / /)
+		"status-close": true, // hmmm, not sure what's up here
+		"sigpipe1":     true, // probable race condition: sometimes fails, sometimes passes
+
+		"parse1": true, // incorrect parsing of $$a++++ (see TODOs in interp_test.go too)
+
+		"rscompat": true, // GoAWK allows multi-char RS by default
+		"rsstart2": true, // GoAWK ^ and $ anchors match beginning and end of line, not file (unlike Gawk)
+
+		"hex2":   true, // GoAWK allows hex numbers / floating point (per POSIX)
+		"strtod": true, // GoAWK allows hex numbers / floating point (per POSIX)
+	}
+
+	dontRunOnWindows := map[string]bool{
+		"delargv":  true, // reads from /dev/null
+		"eofsplit": true, // reads from /etc/passwd
+		"getline5": true, // removes a file while it's open
+		"iobug1":   true, // reads from /dev/null
+	}
+
+	sortLines := map[string]bool{
+		"arryref2": true,
+		"delargv":  true,
+		"delarpm2": true,
+		"forref":   true,
+	}
+
+	gawkDir := filepath.Join(testsDir, "gawk")
+	infos, err := ioutil.ReadDir(gawkDir)
+	if err != nil {
+		t.Fatalf("couldn't read test files: %v", err)
+	}
+	for _, info := range infos {
+		if !strings.HasSuffix(info.Name(), ".awk") {
+			continue
+		}
+		testName := info.Name()[:len(info.Name())-4]
+		if skip[testName] {
+			continue
+		}
+		if runtime.GOOS == "windows" && dontRunOnWindows[testName] {
+			continue
+		}
+		t.Run(testName, func(t *testing.T) {
+			srcPath := filepath.Join(gawkDir, info.Name())
+			inputPath := filepath.Join(gawkDir, testName+".in")
+			okPath := filepath.Join(gawkDir, testName+".ok")
+
+			expected, err := ioutil.ReadFile(okPath)
+			if err != nil {
+				t.Fatal(err)
+			}
+			expected = normalizeNewlines(expected)
+
+			prog, err := parseGoAWK(srcPath)
+			if err != nil {
+				if err.Error() != string(expected) {
+					t.Fatalf("parser error differs, got:\n%s\nexpected:\n%s", err.Error(), expected)
+				}
+				return
+			}
+			output, err := interpGoAWKStdin(prog, inputPath)
+			output = normalizeNewlines(output)
+			if err != nil {
+				errStr := string(output) + err.Error()
+				if errStr != string(expected) {
+					t.Fatalf("interp error differs, got:\n%s\nexpected:\n%s", errStr, expected)
+				}
+				return
+			}
+
+			if sortLines[testName] {
+				output = sortedLines(output)
+				expected = sortedLines(expected)
+			}
+
+			if string(output) != string(expected) {
+				t.Fatalf("output differs, got:\n%s\nexpected:\n%s", output, expected)
+			}
+		})
+	}
+
+	_ = os.Remove("seq")
+}
+
+func TestCommandLine(t *testing.T) {
+	tests := []struct {
+		args   []string
+		stdin  string
+		output string
+		error  string
+	}{
+		// Load source from stdin
+		{[]string{"-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
+		{[]string{"-f", "-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
+		{[]string{"-f-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
+
+		// Program with no input
+		{[]string{`BEGIN { print "a" }`}, "", "a\n", ""},
+
+		// Read input from stdin
+		{[]string{`$0`}, "one\n\nthree", "one\nthree\n", ""},
+		{[]string{`$0`, "-"}, "one\n\nthree", "one\nthree\n", ""},
+		{[]string{`$0`, "-", "-"}, "one\n\nthree", "one\nthree\n", ""},
+		{[]string{"-f", "testdata/t.0", "-"}, "one\ntwo\n", "one\ntwo\n", ""},
+		{[]string{"{ print FILENAME }"}, "a", "-\n", ""},
+		{[]string{"{ print FILENAME }", "-"}, "a", "-\n", ""},
+
+		// Read input from file(s)
+		{[]string{`$0`, "testdata/g.1"}, "", "ONE\n", ""},
+		{[]string{`$0`, "testdata/g.1", "testdata/g.2"}, "", "ONE\nTWO\n", ""},
+		{[]string{`{ print FILENAME ":" FNR "/" NR ": " $0 }`, "testdata/g.1", "testdata/g.4"}, "",
+			"testdata/g.1:1/1: ONE\ntestdata/g.4:1/2: FOUR a\ntestdata/g.4:2/3: FOUR b\n", ""},
+		{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
+		{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2", "-"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
+		{[]string{"-F", " ", "--", "$0", "testdata/g.1"}, "", "ONE\n", ""},
+		{[]string{"{ print NR, FNR } END { print NR, FNR }", "-"}, "a\nb\nc\n", "1 1\n2 2\n3 3\n3 3\n", ""},
+		// I've deleted the "-ftest" file for now as it was causing problems with "go install" zip files
+		// {[]string{"--", "$0", "-ftest"}, "", "used in tests; do not delete\n", ""}, // Issue #53
+		// {[]string{"$0", "-ftest"}, "", "used in tests; do not delete\n", ""},
+
+		// Specifying field separator with -F
+		{[]string{`{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 3\n4 6\n", ""},
+		{[]string{"-F", ",", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 2 3 \n4 5 6 \n", ""},
+		{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
+		{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
+		{[]string{"-F,", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
+
+		// Assigning other variables with -v
+		{[]string{"-v", "OFS=.", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.3\n4.6\n", ""},
+		{[]string{"-v", "OFS=.", "-v", "ORS=", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.34.6", ""},
+		{[]string{"-v", "x=42", "-v", "y=foo", `BEGIN { print x, y }`}, "", "42 foo\n", ""},
+		{[]string{"-v", "RS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
+		{[]string{"-vRS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
+		{[]string{"-v", `X=x\ty`, `BEGIN { printf X }`}, "", "x\ty", ""},
+
+		// ARGV/ARGC handling
+		{[]string{`
+			BEGIN {
+				for (i=1; i<ARGC; i++) {
+					print i, ARGV[i]
+				}
+			}`, "a", "b"}, "", "1 a\n2 b\n", ""},
+		{[]string{`
+			BEGIN {
+				for (i=1; i<ARGC; i++) {
+					print i, ARGV[i]
+					delete ARGV[i]
+				}
+			}
+			$0`, "a", "b"}, "c\nd", "1 a\n2 b\nc\nd\n", ""},
+		{[]string{`
+			BEGIN {
+				ARGV[1] = ""
+			}
+			$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "c\nd\nTWO\n", ""},
+		{[]string{`
+			BEGIN {
+				ARGC = 3
+			}
+			$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "ONE\nc\nd\n", ""},
+		{[]string{"-v", "A=1", "-f", "testdata/g.3", "B=2", "testdata/test.countries"}, "",
+			"A=1, B=0\n\tARGV[1] = B=2\n\tARGV[2] = testdata/test.countries\nA=1, B=2\n", ""},
+		{[]string{`END { print (x==42) }`, "x=42.0"}, "", "1\n", ""},
+		{[]string{`END { printf X }`, `X=a\tb`}, "", "a\tb", ""},
+		{[]string{"-v", "x=42.0", `BEGIN { print (x==42) }`}, "", "1\n", ""},
+		{[]string{`BEGIN { print(ARGV[1]<2, ARGV[2]<2); ARGV[1]="10"; ARGV[2]="10x"; print(ARGV[1]<2, ARGV[2]<2) }`,
+			"10", "10x"}, "", "0 1\n1 1\n", ""},
+
+		// Error handling
+		{[]string{}, "", "", "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"},
+		{[]string{"-F"}, "", "", "flag needs an argument: -F"},
+		{[]string{"-f"}, "", "", "flag needs an argument: -f"},
+		{[]string{"-v"}, "", "", "flag needs an argument: -v"},
+		{[]string{"-z"}, "", "", "flag provided but not defined: -z"},
+		{[]string{"{ print }", "notexist"}, "", "", `file "notexist" not found`},
+		{[]string{"BEGIN { print 1/0 }"}, "", "", "division by zero"},
+		{[]string{"-v", "foo", "BEGIN {}"}, "", "", "-v flag must be in format name=value"},
+		{[]string{"--", "{ print $1 }", "-file"}, "", "", `file "-file" not found`},
+		{[]string{"{ print $1 }", "-file"}, "", "", `file "-file" not found`},
+
+		// Output synchronization
+		{[]string{`BEGIN { print "1"; print "2"|"cat" }`}, "", "1\n2\n", ""},
+		{[]string{`BEGIN { print "1"; "echo 2" | getline x; print x }`}, "", "1\n2\n", ""},
+
+		// Parse error formatting
+		{[]string{"`"}, "", "", "<cmdline>:1:1: unexpected char\n`\n^"},
+		{[]string{"BEGIN {\n\tx*;\n}"}, "", "", "<cmdline>:2:4: expected expression instead of ;\n    x*;\n      ^"},
+		{[]string{"BEGIN {\n\tx*\r\n}"}, "", "", "<cmdline>:2:4: expected expression instead of <newline>\n    x*\n      ^"},
+		{[]string{"-f", "-"}, "\n ++", "", "<stdin>:2:4: expected expression instead of <newline>\n ++\n   ^"},
+		{[]string{"-f", "testdata/parseerror/good.awk", "-f", "testdata/parseerror/bad.awk"},
+			"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n  ^"},
+		{[]string{"-f", "testdata/parseerror/bad.awk", "-f", "testdata/parseerror/good.awk"},
+			"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n  ^"},
+		{[]string{"-f", "testdata/parseerror/good.awk", "-f", "-", "-f", "testdata/parseerror/bad.awk"},
+			"`", "", "<stdin>:1:1: unexpected char\n`\n^"},
+	}
+	for _, test := range tests {
+		testName := strings.Join(test.args, " ")
+		t.Run(testName, func(t *testing.T) {
+			runAWKs(t, test.args, test.stdin, test.output, test.error)
+		})
+	}
+}
+
+func TestDevStdout(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("/dev/stdout not presnt on Windows")
+	}
+	runAWKs(t, []string{`BEGIN { print "1"; print "2">"/dev/stdout" }`}, "", "1\n2\n", "")
+}
+
+func runGoAWK(args []string, stdin string) (stdout, stderr string, err error) {
+	cmd := exec.Command(goAWKExe, args...)
+	if stdin != "" {
+		cmd.Stdin = strings.NewReader(stdin)
+	}
+	errBuf := &bytes.Buffer{}
+	cmd.Stderr = errBuf
+	output, err := cmd.Output()
+	stdout = string(normalizeNewlines(output))
+	stderr = string(normalizeNewlines(errBuf.Bytes()))
+	return stdout, stderr, err
+}
+
+func runAWKs(t *testing.T, testArgs []string, testStdin, testOutput, testError string) {
+	var args []string
+	if strings.Contains(awkExe, "gawk") {
+		args = append(args, "--posix")
+	}
+	args = append(args, testArgs...)
+	cmd := exec.Command(awkExe, testArgs...)
+	if testStdin != "" {
+		cmd.Stdin = strings.NewReader(testStdin)
+	}
+	errBuf := &bytes.Buffer{}
+	cmd.Stderr = errBuf
+	output, err := cmd.Output()
+	if err != nil {
+		if testError == "" {
+			t.Fatalf("expected no error, got AWK error: %v (%s)", err, errBuf.String())
+		}
+	} else {
+		if testError != "" {
+			t.Fatalf("expected AWK error, got none")
+		}
+	}
+	stdout := string(normalizeNewlines(output))
+	if stdout != testOutput {
+		t.Fatalf("expected AWK to give %q, got %q", testOutput, stdout)
+	}
+
+	stdout, stderr, err := runGoAWK(testArgs, testStdin)
+	if err != nil {
+		stderr = strings.TrimSpace(stderr)
+		if stderr != testError {
+			t.Fatalf("expected GoAWK error %q, got %q", testError, stderr)
+		}
+	} else {
+		if testError != "" {
+			t.Fatalf("expected GoAWK error %q, got none", testError)
+		}
+	}
+	if stdout != testOutput {
+		t.Fatalf("expected GoAWK to give %q, got %q", testOutput, stdout)
+	}
+}
+
+func TestWildcards(t *testing.T) {
+	if runtime.GOOS != "windows" {
+		// Wildcards shouldn't be expanded on non-Windows systems, and a file
+		// literally named "*.go" doesn't exist, so expect a failure.
+		_, stderr, err := runGoAWK([]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"}, "")
+		if err == nil {
+			t.Fatal("expected error using wildcards on non-Windows system")
+		}
+		expected := "file \"testdata/wildcards/*.txt\" not found\n"
+		if stderr != expected {
+			t.Fatalf("expected %q, got %q", expected, stderr)
+		}
+		return
+	}
+
+	tests := []struct {
+		args   []string
+		output string
+	}{
+		{
+			[]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"},
+			"testdata/wildcards/one.txt\ntestdata/wildcards/two.txt\n",
+		},
+		{
+			[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/one.txt"},
+			"testdata/wildcards/one.txt\nbee\n",
+		},
+		{
+			[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/*.txt"},
+			"testdata/wildcards/one.txt\nbee\ntestdata/wildcards/two.txt\nbee\n",
+		},
+	}
+
+	for _, test := range tests {
+		testName := strings.Join(test.args, " ")
+		t.Run(testName, func(t *testing.T) {
+			stdout, stderr, err := runGoAWK(test.args, "")
+			if err != nil {
+				t.Fatalf("expected no error, got %v (%q)", err, stderr)
+			}
+			stdout = strings.Replace(stdout, "\\", "/", -1)
+			if stdout != test.output {
+				t.Fatalf("expected %q, got %q", test.output, stdout)
+			}
+		})
+	}
+}
+
+func TestFILENAME(t *testing.T) {
+	origGoAWKExe := goAWKExe
+	goAWKExe = "../../" + goAWKExe
+	defer func() { goAWKExe = origGoAWKExe }()
+
+	origDir, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = os.Chdir("testdata/filename")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Chdir(origDir)
+
+	src := `
+BEGIN { FILENAME = "10"; print(FILENAME, FILENAME<2) }
+BEGIN { FILENAME = 10; print(FILENAME, FILENAME<2) }
+{ print(FILENAME, FILENAME<2) }
+`
+	runAWKs(t, []string{src, "10", "10x"}, "", "10 1\n10 0\n10 0\n10x 1\n", "")
+}
+
+func normalizeNewlines(b []byte) []byte {
+	return bytes.Replace(b, []byte("\r\n"), []byte{'\n'}, -1)
+}
+
+func TestInputOutputMode(t *testing.T) {
+	tests := []struct {
+		args   []string
+		input  string
+		output string
+		error  string
+	}{
+		{[]string{"-icsv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
+		{[]string{"-i", "csv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
+		{[]string{"-icsv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
+		{[]string{"-i", "csv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
+		{[]string{"-icsv", "-H", "-ocsv", `{ print @"age", @"name" }`}, "name,age\n\"Bo,ba\",42\nJane,37", "42,\"Bo,ba\"\n37,Jane\n", ""},
+		{[]string{"-o", "csv", `BEGIN { print "foo,bar", 3.14, "baz" }`}, "", "\"foo,bar\",3.14,baz\n", ""},
+		{[]string{"-iabc", `{}`}, "", "", "invalid input mode \"abc\"\n"},
+		{[]string{"-oxyz", `{}`}, "", "", "invalid output mode \"xyz\"\n"},
+		{[]string{"-H", `{}`}, "", "", "-H only allowed together with -i\n"},
+	}
+
+	for _, test := range tests {
+		testName := strings.Join(test.args, " ")
+		t.Run(testName, func(t *testing.T) {
+			stdout, stderr, err := runGoAWK(test.args, test.input)
+			if err != nil {
+				if test.error == "" {
+					t.Fatalf("expected no error, got %v (%q)", err, stderr)
+				} else if stderr != test.error {
+					t.Fatalf("expected error message %q, got %q", test.error, stderr)
+				}
+			}
+			if stdout != test.output {
+				t.Fatalf("expected %q, got %q", test.output, stdout)
+			}
+		})
+	}
+}
+
+func TestMultipleCSVFiles(t *testing.T) {
+	// Ensure CSV handling works across multiple files with different headers (field names).
+	src := `
+{
+    for (i=1; i in FIELDS; i++) {
+        if (i>1)
+            printf ",";
+        printf "%s", FIELDS[i]
+    }
+    printf " "
+}
+{ print @"name", @"age" }
+`
+	stdout, stderr, err := runGoAWK([]string{"-i", "csv", "-H", src, "testdata/csv/1.csv", "testdata/csv/2.csv"}, "")
+	if err != nil {
+		t.Fatalf("expected no error, got %v (%q)", err, stderr)
+	}
+	expected := `
+name,age Bob 42
+name,age Jill 37
+age,email,name Sarah 25
+`[1:]
+	if stdout != expected {
+		t.Fatalf("expected %q, got %q", expected, stdout)
+	}
+}
+
+func TestCSVDocExamples(t *testing.T) {
+	f, err := os.Open("csv.md")
+	if err != nil {
+		t.Fatalf("error opening examples file: %v", err)
+	}
+	defer f.Close()
+
+	var (
+		command   string
+		output    string
+		truncated bool
+		n         = 1
+	)
+	runTest := func() {
+		t.Run(fmt.Sprintf("Example%d", n), func(t *testing.T) {
+			shell := "/bin/sh"
+			if runtime.GOOS == "windows" {
+				shell = "sh"
+			}
+			cmd := exec.Command(shell, "-c", command)
+			gotBytes, err := cmd.CombinedOutput()
+			if err != nil {
+				t.Fatalf("error running %q: %v\n%s", command, err, gotBytes)
+			}
+			got := string(gotBytes)
+			if truncated {
+				numLines := strings.Count(output, "\n")
+				got = strings.Join(strings.Split(got, "\n")[:numLines], "\n") + "\n"
+			}
+			got = string(normalizeNewlines([]byte(got)))
+			if got != output {
+				t.Fatalf("error running %q\ngot:\n%s\nexpected:\n%s", command, got, output)
+			}
+		})
+		n++
+	}
+
+	scanner := bufio.NewScanner(f)
+	inTest := false
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.HasPrefix(line, "$ goawk") {
+			if inTest {
+				runTest()
+			}
+			inTest = true
+			command = "./" + line[2:]
+			output = ""
+			truncated = false
+		} else if inTest {
+			switch line {
+			case "```", "":
+				runTest()
+				inTest = false
+			case "...":
+				truncated = true
+				runTest()
+				inTest = false
+			default:
+				output += line + "\n"
+			}
+		}
+	}
+	if scanner.Err() != nil {
+		t.Errorf("error reading input: %v", scanner.Err())
+	}
+	if inTest {
+		t.Error("unexpectedly in test at end of file")
+	}
+}
+
+func TestMandelbrot(t *testing.T) {
+	stdout, stderr, err := runGoAWK([]string{"-v", "width=80", "-v", "height=25", "-f", "testdata/tt.x1_mandelbrot"}, "")
+	if err != nil {
+		t.Fatalf("expected no error, got %v (%q)", err, stderr)
+	}
+	expected := `
+................................................................................
+......................................................--+-----..................
+....................................................-----+*+-++-................
+.................................................--------+* *+-----.............
+..............................................--------+#     #%*-------.........
+.........................................------------++$       +-----------.....
+...................................---------*  # +*              # *+++++%+--...
+............................----------------++                       @   *----..
+.......................-+----------------+$                             %+----..
+..................-------*++%++**+++---++                                  #+--.
+...............----------+*         #*++*                                 %*---.
+.............-------+++++*              #                                 #----.
+....------+-------++**@  @                                               ------.
+....------+-------++**@  @                                               ------.
+.............-------+++++*              #                                 #----.
+...............----------+*         #*++*                                 %*---.
+..................-------*++%++**+++---++                                  #+--.
+.......................-+----------------+$                             %+----..
+............................----------------++                       @   *----..
+...................................---------*  # +*              # *+++++%+--...
+.........................................------------++$       +-----------.....
+..............................................--------+#     #%*-------.........
+.................................................--------+* *+-----.............
+....................................................-----+*+-++-................
+......................................................--+-----..................
+`[1:]
+	if stdout != expected {
+		t.Fatalf("expected:\n%s\ngot:\n%s", expected, stdout)
+	}
+}
--- a/src/tool/awk/internal/ast/ast.go
+++ b/src/tool/awk/internal/ast/ast.go
@ -0,0 +1,600 @@
+// GoAWK parser - abstract syntax tree structs
+
+package ast
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	. "github.com/benhoyt/goawk/lexer"
+)
+
+// Program is an entire AWK program.
+type Program struct {
+	Begin     []Stmts
+	Actions   []Action
+	End       []Stmts
+	Functions []Function
+	Scalars   map[string]int
+	Arrays    map[string]int
+}
+
+// String returns an indented, pretty-printed version of the parsed
+// program.
+func (p *Program) String() string {
+	parts := []string{}
+	for _, ss := range p.Begin {
+		parts = append(parts, "BEGIN {\n"+ss.String()+"}")
+	}
+	for _, a := range p.Actions {
+		parts = append(parts, a.String())
+	}
+	for _, ss := range p.End {
+		parts = append(parts, "END {\n"+ss.String()+"}")
+	}
+	for _, function := range p.Functions {
+		parts = append(parts, function.String())
+	}
+	return strings.Join(parts, "\n\n")
+}
+
+// Stmts is a block containing multiple statements.
+type Stmts []Stmt
+
+func (ss Stmts) String() string {
+	lines := []string{}
+	for _, s := range ss {
+		subLines := strings.Split(s.String(), "\n")
+		for _, sl := range subLines {
+			lines = append(lines, "    "+sl+"\n")
+		}
+	}
+	return strings.Join(lines, "")
+}
+
+// Action is pattern-action section of a program.
+type Action struct {
+	Pattern []Expr
+	Stmts   Stmts
+}
+
+func (a *Action) String() string {
+	patterns := make([]string, len(a.Pattern))
+	for i, p := range a.Pattern {
+		patterns[i] = p.String()
+	}
+	sep := ""
+	if len(patterns) > 0 && a.Stmts != nil {
+		sep = " "
+	}
+	stmtsStr := ""
+	if a.Stmts != nil {
+		stmtsStr = "{\n" + a.Stmts.String() + "}"
+	}
+	return strings.Join(patterns, ", ") + sep + stmtsStr
+}
+
+// Expr is the abstract syntax tree for any AWK expression.
+type Expr interface {
+	expr()
+	String() string
+}
+
+// All these types implement the Expr interface.
+func (e *FieldExpr) expr()      {}
+func (e *NamedFieldExpr) expr() {}
+func (e *UnaryExpr) expr()      {}
+func (e *BinaryExpr) expr()     {}
+func (e *ArrayExpr) expr()      {}
+func (e *InExpr) expr()         {}
+func (e *CondExpr) expr()       {}
+func (e *NumExpr) expr()        {}
+func (e *StrExpr) expr()        {}
+func (e *RegExpr) expr()        {}
+func (e *VarExpr) expr()        {}
+func (e *IndexExpr) expr()      {}
+func (e *AssignExpr) expr()     {}
+func (e *AugAssignExpr) expr()  {}
+func (e *IncrExpr) expr()       {}
+func (e *CallExpr) expr()       {}
+func (e *UserCallExpr) expr()   {}
+func (e *MultiExpr) expr()      {}
+func (e *GetlineExpr) expr()    {}
+
+// FieldExpr is an expression like $0.
+type FieldExpr struct {
+	Index Expr
+}
+
+func (e *FieldExpr) String() string {
+	return "$" + e.Index.String()
+}
+
+// NamedFieldExpr is an expression like @"name".
+type NamedFieldExpr struct {
+	Field Expr
+}
+
+func (e *NamedFieldExpr) String() string {
+	return "@" + e.Field.String()
+}
+
+// UnaryExpr is an expression like -1234.
+type UnaryExpr struct {
+	Op    Token
+	Value Expr
+}
+
+func (e *UnaryExpr) String() string {
+	return e.Op.String() + e.Value.String()
+}
+
+// BinaryExpr is an expression like 1 + 2.
+type BinaryExpr struct {
+	Left  Expr
+	Op    Token
+	Right Expr
+}
+
+func (e *BinaryExpr) String() string {
+	var opStr string
+	if e.Op == CONCAT {
+		opStr = " "
+	} else {
+		opStr = " " + e.Op.String() + " "
+	}
+	return "(" + e.Left.String() + opStr + e.Right.String() + ")"
+}
+
+// ArrayExpr is an array reference. Not really a stand-alone
+// expression, except as an argument to split() or a user function
+// call.
+type ArrayExpr struct {
+	Scope VarScope
+	Index int
+	Name  string
+}
+
+func (e *ArrayExpr) String() string {
+	return e.Name
+}
+
+// InExpr is an expression like (index in array).
+type InExpr struct {
+	Index []Expr
+	Array *ArrayExpr
+}
+
+func (e *InExpr) String() string {
+	if len(e.Index) == 1 {
+		return "(" + e.Index[0].String() + " in " + e.Array.String() + ")"
+	}
+	indices := make([]string, len(e.Index))
+	for i, index := range e.Index {
+		indices[i] = index.String()
+	}
+	return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")"
+}
+
+// CondExpr is an expression like cond ? 1 : 0.
+type CondExpr struct {
+	Cond  Expr
+	True  Expr
+	False Expr
+}
+
+func (e *CondExpr) String() string {
+	return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")"
+}
+
+// NumExpr is a literal number like 1234.
+type NumExpr struct {
+	Value float64
+}
+
+func (e *NumExpr) String() string {
+	if e.Value == float64(int(e.Value)) {
+		return strconv.Itoa(int(e.Value))
+	} else {
+		return fmt.Sprintf("%.6g", e.Value)
+	}
+}
+
+// StrExpr is a literal string like "foo".
+type StrExpr struct {
+	Value string
+}
+
+func (e *StrExpr) String() string {
+	return strconv.Quote(e.Value)
+}
+
+// RegExpr is a stand-alone regex expression, equivalent to:
+// $0 ~ /regex/.
+type RegExpr struct {
+	Regex string
+}
+
+func (e *RegExpr) String() string {
+	escaped := strings.Replace(e.Regex, "/", `\/`, -1)
+	return "/" + escaped + "/"
+}
+
+type VarScope int
+
+const (
+	ScopeSpecial VarScope = iota
+	ScopeGlobal
+	ScopeLocal
+)
+
+// VarExpr is a variable reference (special var, global, or local).
+// Index is the resolved variable index used by the interpreter; Name
+// is the original name used by String().
+type VarExpr struct {
+	Scope VarScope
+	Index int
+	Name  string
+}
+
+func (e *VarExpr) String() string {
+	return e.Name
+}
+
+// IndexExpr is an expression like a[k] (rvalue or lvalue).
+type IndexExpr struct {
+	Array *ArrayExpr
+	Index []Expr
+}
+
+func (e *IndexExpr) String() string {
+	indices := make([]string, len(e.Index))
+	for i, index := range e.Index {
+		indices[i] = index.String()
+	}
+	return e.Array.String() + "[" + strings.Join(indices, ", ") + "]"
+}
+
+// AssignExpr is an expression like x = 1234.
+type AssignExpr struct {
+	Left  Expr // can be one of: var, array[x], $n
+	Right Expr
+}
+
+func (e *AssignExpr) String() string {
+	return e.Left.String() + " = " + e.Right.String()
+}
+
+// AugAssignExpr is an assignment expression like x += 5.
+type AugAssignExpr struct {
+	Left  Expr // can be one of: var, array[x], $n
+	Op    Token
+	Right Expr
+}
+
+func (e *AugAssignExpr) String() string {
+	return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String()
+}
+
+// IncrExpr is an increment or decrement expression like x++ or --y.
+type IncrExpr struct {
+	Expr Expr
+	Op   Token
+	Pre  bool
+}
+
+func (e *IncrExpr) String() string {
+	if e.Pre {
+		return e.Op.String() + e.Expr.String()
+	} else {
+		return e.Expr.String() + e.Op.String()
+	}
+}
+
+// CallExpr is a builtin function call like length($1).
+type CallExpr struct {
+	Func Token
+	Args []Expr
+}
+
+func (e *CallExpr) String() string {
+	args := make([]string, len(e.Args))
+	for i, a := range e.Args {
+		args[i] = a.String()
+	}
+	return e.Func.String() + "(" + strings.Join(args, ", ") + ")"
+}
+
+// UserCallExpr is a user-defined function call like my_func(1, 2, 3)
+//
+// Index is the resolved function index used by the interpreter; Name
+// is the original name used by String().
+type UserCallExpr struct {
+	Native bool // false = AWK-defined function, true = native Go func
+	Index  int
+	Name   string
+	Args   []Expr
+}
+
+func (e *UserCallExpr) String() string {
+	args := make([]string, len(e.Args))
+	for i, a := range e.Args {
+		args[i] = a.String()
+	}
+	return e.Name + "(" + strings.Join(args, ", ") + ")"
+}
+
+// MultiExpr isn't an interpretable expression, but it's used as a
+// pseudo-expression for print[f] parsing.
+type MultiExpr struct {
+	Exprs []Expr
+}
+
+func (e *MultiExpr) String() string {
+	exprs := make([]string, len(e.Exprs))
+	for i, e := range e.Exprs {
+		exprs[i] = e.String()
+	}
+	return "(" + strings.Join(exprs, ", ") + ")"
+}
+
+// GetlineExpr is an expression read from file or pipe input.
+type GetlineExpr struct {
+	Command Expr
+	Target  Expr
+	File    Expr
+}
+
+func (e *GetlineExpr) String() string {
+	s := ""
+	if e.Command != nil {
+		s += e.Command.String() + " |"
+	}
+	s += "getline"
+	if e.Target != nil {
+		s += " " + e.Target.String()
+	}
+	if e.File != nil {
+		s += " <" + e.File.String()
+	}
+	return s
+}
+
+// IsLValue returns true if the given expression can be used as an
+// lvalue (on the left-hand side of an assignment, in a ++ or --
+// operation, or as the third argument to sub or gsub).
+func IsLValue(expr Expr) bool {
+	switch expr.(type) {
+	case *VarExpr, *IndexExpr, *FieldExpr:
+		return true
+	default:
+		return false
+	}
+}
+
+// Stmt is the abstract syntax tree for any AWK statement.
+type Stmt interface {
+	stmt()
+	String() string
+}
+
+// All these types implement the Stmt interface.
+func (s *PrintStmt) stmt()    {}
+func (s *PrintfStmt) stmt()   {}
+func (s *ExprStmt) stmt()     {}
+func (s *IfStmt) stmt()       {}
+func (s *ForStmt) stmt()      {}
+func (s *ForInStmt) stmt()    {}
+func (s *WhileStmt) stmt()    {}
+func (s *DoWhileStmt) stmt()  {}
+func (s *BreakStmt) stmt()    {}
+func (s *ContinueStmt) stmt() {}
+func (s *NextStmt) stmt()     {}
+func (s *ExitStmt) stmt()     {}
+func (s *DeleteStmt) stmt()   {}
+func (s *ReturnStmt) stmt()   {}
+func (s *BlockStmt) stmt()    {}
+
+// PrintStmt is a statement like print $1, $3.
+type PrintStmt struct {
+	Args     []Expr
+	Redirect Token
+	Dest     Expr
+}
+
+func (s *PrintStmt) String() string {
+	return printString("print", s.Args, s.Redirect, s.Dest)
+}
+
+func printString(f string, args []Expr, redirect Token, dest Expr) string {
+	parts := make([]string, len(args))
+	for i, a := range args {
+		parts[i] = a.String()
+	}
+	str := f + " " + strings.Join(parts, ", ")
+	if dest != nil {
+		str += " " + redirect.String() + dest.String()
+	}
+	return str
+}
+
+// PrintfStmt is a statement like printf "%3d", 1234.
+type PrintfStmt struct {
+	Args     []Expr
+	Redirect Token
+	Dest     Expr
+}
+
+func (s *PrintfStmt) String() string {
+	return printString("printf", s.Args, s.Redirect, s.Dest)
+}
+
+// ExprStmt is statement like a bare function call: my_func(x).
+type ExprStmt struct {
+	Expr Expr
+}
+
+func (s *ExprStmt) String() string {
+	return s.Expr.String()
+}
+
+// IfStmt is an if or if-else statement.
+type IfStmt struct {
+	Cond Expr
+	Body Stmts
+	Else Stmts
+}
+
+func (s *IfStmt) String() string {
+	str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
+	if len(s.Else) > 0 {
+		str += " else {\n" + s.Else.String() + "}"
+	}
+	return str
+}
+
+// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i.
+type ForStmt struct {
+	Pre  Stmt
+	Cond Expr
+	Post Stmt
+	Body Stmts
+}
+
+func (s *ForStmt) String() string {
+	preStr := ""
+	if s.Pre != nil {
+		preStr = s.Pre.String()
+	}
+	condStr := ""
+	if s.Cond != nil {
+		condStr = " " + trimParens(s.Cond.String())
+	}
+	postStr := ""
+	if s.Post != nil {
+		postStr = " " + s.Post.String()
+	}
+	return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}"
+}
+
+// ForInStmt is a for loop like for (k in a) print k, a[k].
+type ForInStmt struct {
+	Var   *VarExpr
+	Array *ArrayExpr
+	Body  Stmts
+}
+
+func (s *ForInStmt) String() string {
+	return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}"
+}
+
+// WhileStmt is a while loop.
+type WhileStmt struct {
+	Cond Expr
+	Body Stmts
+}
+
+func (s *WhileStmt) String() string {
+	return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
+}
+
+// DoWhileStmt is a do-while loop.
+type DoWhileStmt struct {
+	Body Stmts
+	Cond Expr
+}
+
+func (s *DoWhileStmt) String() string {
+	return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")"
+}
+
+// BreakStmt is a break statement.
+type BreakStmt struct{}
+
+func (s *BreakStmt) String() string {
+	return "break"
+}
+
+// ContinueStmt is a continue statement.
+type ContinueStmt struct{}
+
+func (s *ContinueStmt) String() string {
+	return "continue"
+}
+
+// NextStmt is a next statement.
+type NextStmt struct{}
+
+func (s *NextStmt) String() string {
+	return "next"
+}
+
+// ExitStmt is an exit statement.
+type ExitStmt struct {
+	Status Expr
+}
+
+func (s *ExitStmt) String() string {
+	var statusStr string
+	if s.Status != nil {
+		statusStr = " " + s.Status.String()
+	}
+	return "exit" + statusStr
+}
+
+// DeleteStmt is a statement like delete a[k].
+type DeleteStmt struct {
+	Array *ArrayExpr
+	Index []Expr
+}
+
+func (s *DeleteStmt) String() string {
+	indices := make([]string, len(s.Index))
+	for i, index := range s.Index {
+		indices[i] = index.String()
+	}
+	return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]"
+}
+
+// ReturnStmt is a return statement.
+type ReturnStmt struct {
+	Value Expr
+}
+
+func (s *ReturnStmt) String() string {
+	var valueStr string
+	if s.Value != nil {
+		valueStr = " " + s.Value.String()
+	}
+	return "return" + valueStr
+}
+
+// BlockStmt is a stand-alone block like { print "x" }.
+type BlockStmt struct {
+	Body Stmts
+}
+
+func (s *BlockStmt) String() string {
+	return "{\n" + s.Body.String() + "}"
+}
+
+// Function is the AST for a user-defined function.
+type Function struct {
+	Name   string
+	Params []string
+	Arrays []bool
+	Body   Stmts
+}
+
+func (f *Function) String() string {
+	return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" +
+		f.Body.String() + "}"
+}
+
+func trimParens(s string) string {
+	if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") {
+		s = s[1 : len(s)-1]
+	}
+	return s
+}
--- a/src/tool/awk/internal/ast/specialvars.go
+++ b/src/tool/awk/internal/ast/specialvars.go
@ -0,0 +1,100 @@
+// Special variable constants
+
+package ast
+
+import (
+	"fmt"
+)
+
+const (
+	V_ILLEGAL = iota
+	V_ARGC
+	V_CONVFMT
+	V_FILENAME
+	V_FNR
+	V_FS
+	V_INPUTMODE
+	V_NF
+	V_NR
+	V_OFMT
+	V_OFS
+	V_ORS
+	V_OUTPUTMODE
+	V_RLENGTH
+	V_RS
+	V_RSTART
+	V_RT
+	V_SUBSEP
+
+	V_LAST = V_SUBSEP
+)
+
+var specialVars = map[string]int{
+	"ARGC":       V_ARGC,
+	"CONVFMT":    V_CONVFMT,
+	"FILENAME":   V_FILENAME,
+	"FNR":        V_FNR,
+	"FS":         V_FS,
+	"INPUTMODE":  V_INPUTMODE,
+	"NF":         V_NF,
+	"NR":         V_NR,
+	"OFMT":       V_OFMT,
+	"OFS":        V_OFS,
+	"ORS":        V_ORS,
+	"OUTPUTMODE": V_OUTPUTMODE,
+	"RLENGTH":    V_RLENGTH,
+	"RS":         V_RS,
+	"RSTART":     V_RSTART,
+	"RT":         V_RT,
+	"SUBSEP":     V_SUBSEP,
+}
+
+// SpecialVarIndex returns the "index" of the special variable, or 0
+// if it's not a special variable.
+func SpecialVarIndex(name string) int {
+	return specialVars[name]
+}
+
+// SpecialVarName returns the name of the special variable by index.
+func SpecialVarName(index int) string {
+	switch index {
+	case V_ILLEGAL:
+		return "ILLEGAL"
+	case V_ARGC:
+		return "ARGC"
+	case V_CONVFMT:
+		return "CONVFMT"
+	case V_FILENAME:
+		return "FILENAME"
+	case V_FNR:
+		return "FNR"
+	case V_FS:
+		return "FS"
+	case V_INPUTMODE:
+		return "INPUTMODE"
+	case V_NF:
+		return "NF"
+	case V_NR:
+		return "NR"
+	case V_OFMT:
+		return "OFMT"
+	case V_OFS:
+		return "OFS"
+	case V_ORS:
+		return "ORS"
+	case V_OUTPUTMODE:
+		return "OUTPUTMODE"
+	case V_RLENGTH:
+		return "RLENGTH"
+	case V_RS:
+		return "RS"
+	case V_RSTART:
+		return "RSTART"
+	case V_RT:
+		return "RT"
+	case V_SUBSEP:
+		return "SUBSEP"
+	default:
+		return fmt.Sprintf("<unknown special var %d>", index)
+	}
+}
--- a/src/tool/awk/internal/ast/specialvars_test.go
+++ b/src/tool/awk/internal/ast/specialvars_test.go
@ -0,0 +1,46 @@
+package ast
+
+import (
+	"testing"
+)
+
+func TestNameIndex(t *testing.T) {
+	tests := []struct {
+		name  string
+		index int
+	}{
+		{"ILLEGAL", V_ILLEGAL},
+		{"ARGC", V_ARGC},
+		{"CONVFMT", V_CONVFMT},
+		{"FILENAME", V_FILENAME},
+		{"FNR", V_FNR},
+		{"FS", V_FS},
+		{"INPUTMODE", V_INPUTMODE},
+		{"NF", V_NF},
+		{"NR", V_NR},
+		{"OFMT", V_OFMT},
+		{"OFS", V_OFS},
+		{"ORS", V_ORS},
+		{"OUTPUTMODE", V_OUTPUTMODE},
+		{"RLENGTH", V_RLENGTH},
+		{"RS", V_RS},
+		{"RSTART", V_RSTART},
+		{"RT", V_RT},
+		{"SUBSEP", V_SUBSEP},
+		{"<unknown special var 42>", 42},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			name := SpecialVarName(test.index)
+			if name != test.name {
+				t.Errorf("got %q, want %q", name, test.name)
+			}
+			if test.index <= V_LAST {
+				index := SpecialVarIndex(test.name)
+				if index != test.index {
+					t.Errorf("got %d, want %d", index, test.index)
+				}
+			}
+		})
+	}
+}
--- a/src/tool/awk/internal/compiler/compiler.go
+++ b/src/tool/awk/internal/compiler/compiler.go
--- a/src/tool/awk/internal/compiler/disassembler.go
+++ b/src/tool/awk/internal/compiler/disassembler.go
@ -0,0 +1,495 @@
+// Disassembles compiled program to text assembly instructions
+
+package compiler
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"github.com/benhoyt/goawk/internal/ast"
+	"github.com/benhoyt/goawk/lexer"
+)
+
+// Disassemble writes a human-readable form of the program's virtual machine
+// instructions to writer.
+func (p *Program) Disassemble(writer io.Writer) error {
+	if p.Begin != nil {
+		d := &disassembler{
+			program:         p,
+			writer:          writer,
+			code:            p.Begin,
+			nativeFuncNames: p.nativeFuncNames,
+		}
+		err := d.disassemble("BEGIN")
+		if err != nil {
+			return err
+		}
+	}
+
+	for _, action := range p.Actions {
+		switch len(action.Pattern) {
+		case 0:
+			// Nothing to do here.
+		case 1:
+			d := &disassembler{
+				program:         p,
+				writer:          writer,
+				code:            action.Pattern[0],
+				nativeFuncNames: p.nativeFuncNames,
+			}
+			err := d.disassemble("pattern")
+			if err != nil {
+				return err
+			}
+		case 2:
+			d := &disassembler{
+				program:         p,
+				writer:          writer,
+				code:            action.Pattern[0],
+				nativeFuncNames: p.nativeFuncNames,
+			}
+			err := d.disassemble("start")
+			if err != nil {
+				return err
+			}
+			d = &disassembler{
+				program:         p,
+				writer:          writer,
+				code:            action.Pattern[1],
+				nativeFuncNames: p.nativeFuncNames,
+			}
+			err = d.disassemble("stop")
+			if err != nil {
+				return err
+			}
+		}
+		if len(action.Body) > 0 {
+			d := &disassembler{
+				program:         p,
+				writer:          writer,
+				code:            action.Body,
+				nativeFuncNames: p.nativeFuncNames,
+			}
+			err := d.disassemble("{ body }")
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	if p.End != nil {
+		d := &disassembler{
+			program:         p,
+			writer:          writer,
+			code:            p.End,
+			nativeFuncNames: p.nativeFuncNames,
+		}
+		err := d.disassemble("END")
+		if err != nil {
+			return err
+		}
+	}
+
+	for i, f := range p.Functions {
+		d := &disassembler{
+			program:         p,
+			writer:          writer,
+			code:            f.Body,
+			nativeFuncNames: p.nativeFuncNames,
+			funcIndex:       i,
+		}
+		err := d.disassemble("function " + f.Name)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Disassembles a single block of opcodes.
+type disassembler struct {
+	program         *Program
+	writer          io.Writer
+	code            []Opcode
+	nativeFuncNames []string
+	funcIndex       int
+	ip              int
+	opAddr          int
+	err             error
+}
+
+func (d *disassembler) disassemble(prefix string) error {
+	if prefix != "" {
+		d.writef("        // %s\n", prefix)
+	}
+
+	for d.ip < len(d.code) && d.err == nil {
+		d.opAddr = d.ip
+		op := d.fetch()
+
+		switch op {
+		case Num:
+			index := d.fetch()
+			num := d.program.Nums[index]
+			if num == float64(int(num)) {
+				d.writeOpf("Num %d (%d)", int(num), index)
+			} else {
+				d.writeOpf("Num %.6g (%d)", num, index)
+			}
+
+		case Str:
+			index := d.fetch()
+			d.writeOpf("Str %q (%d)", d.program.Strs[index], index)
+
+		case FieldInt:
+			index := d.fetch()
+			d.writeOpf("FieldInt %d", index)
+
+		case FieldByNameStr:
+			index := d.fetch()
+			d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index)
+
+		case Global:
+			index := d.fetch()
+			d.writeOpf("Global %s", d.program.scalarNames[index])
+
+		case Local:
+			index := int(d.fetch())
+			d.writeOpf("Local %s", d.localName(index))
+
+		case Special:
+			index := d.fetch()
+			d.writeOpf("Special %s", ast.SpecialVarName(int(index)))
+
+		case ArrayGlobal:
+			arrayIndex := d.fetch()
+			d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex])
+
+		case ArrayLocal:
+			arrayIndex := d.fetch()
+			d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex)))
+
+		case InGlobal:
+			arrayIndex := d.fetch()
+			d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex])
+
+		case InLocal:
+			arrayIndex := int(d.fetch())
+			d.writeOpf("InLocal %s", d.localArrayName(arrayIndex))
+
+		case AssignGlobal:
+			index := d.fetch()
+			d.writeOpf("AssignGlobal %s", d.program.scalarNames[index])
+
+		case AssignLocal:
+			index := int(d.fetch())
+			d.writeOpf("AssignLocal %s", d.localName(index))
+
+		case AssignSpecial:
+			index := d.fetch()
+			d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index)))
+
+		case AssignArrayGlobal:
+			arrayIndex := d.fetch()
+			d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex])
+
+		case AssignArrayLocal:
+			arrayIndex := int(d.fetch())
+			d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex))
+
+		case Delete:
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex))
+
+		case DeleteAll:
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex))
+
+		case IncrField:
+			amount := d.fetch()
+			d.writeOpf("IncrField %d", amount)
+
+		case IncrGlobal:
+			amount := d.fetch()
+			index := d.fetch()
+			d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index])
+
+		case IncrLocal:
+			amount := d.fetch()
+			index := int(d.fetch())
+			d.writeOpf("IncrLocal %d %s", amount, d.localName(index))
+
+		case IncrSpecial:
+			amount := d.fetch()
+			index := d.fetch()
+			d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index)))
+
+		case IncrArrayGlobal:
+			amount := d.fetch()
+			arrayIndex := d.fetch()
+			d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex])
+
+		case IncrArrayLocal:
+			amount := d.fetch()
+			arrayIndex := int(d.fetch())
+			d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex))
+
+		case AugAssignField:
+			operation := AugOp(d.fetch())
+			d.writeOpf("AugAssignField %s", operation)
+
+		case AugAssignGlobal:
+			operation := AugOp(d.fetch())
+			index := d.fetch()
+			d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index])
+
+		case AugAssignLocal:
+			operation := AugOp(d.fetch())
+			index := int(d.fetch())
+			d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index))
+
+		case AugAssignSpecial:
+			operation := AugOp(d.fetch())
+			index := d.fetch()
+			d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index)))
+
+		case AugAssignArrayGlobal:
+			operation := AugOp(d.fetch())
+			arrayIndex := d.fetch()
+			d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex])
+
+		case AugAssignArrayLocal:
+			operation := AugOp(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex))
+
+		case Regex:
+			regexIndex := d.fetch()
+			d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex)
+
+		case IndexMulti:
+			num := d.fetch()
+			d.writeOpf("IndexMulti %d", num)
+
+		case ConcatMulti:
+			num := d.fetch()
+			d.writeOpf("ConcatMulti %d", num)
+
+		case Jump:
+			offset := d.fetch()
+			d.writeOpf("Jump 0x%04x", d.ip+int(offset))
+
+		case JumpFalse:
+			offset := d.fetch()
+			d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset))
+
+		case JumpTrue:
+			offset := d.fetch()
+			d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset))
+
+		case JumpEquals:
+			offset := d.fetch()
+			d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset))
+
+		case JumpNotEquals:
+			offset := d.fetch()
+			d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset))
+
+		case JumpLess:
+			offset := d.fetch()
+			d.writeOpf("JumpLess 0x%04x", d.ip+int(offset))
+
+		case JumpGreater:
+			offset := d.fetch()
+			d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset))
+
+		case JumpLessOrEqual:
+			offset := d.fetch()
+			d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset))
+
+		case JumpGreaterOrEqual:
+			offset := d.fetch()
+			d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset))
+
+		case ForIn:
+			varScope := ast.VarScope(d.fetch())
+			varIndex := int(d.fetch())
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			offset := d.fetch()
+			d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset))
+
+		case CallBuiltin:
+			builtinOp := BuiltinOp(d.fetch())
+			d.writeOpf("CallBuiltin %s", builtinOp)
+
+		case CallSplit:
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex))
+
+		case CallSplitSep:
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex))
+
+		case CallSprintf:
+			numArgs := d.fetch()
+			d.writeOpf("CallSprintf %d", numArgs)
+
+		case CallUser:
+			funcIndex := d.fetch()
+			numArrayArgs := int(d.fetch())
+			var arrayArgs []string
+			for i := 0; i < numArrayArgs; i++ {
+				arrayScope := ast.VarScope(d.fetch())
+				arrayIndex := int(d.fetch())
+				arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex))
+			}
+			d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", "))
+
+		case CallNative:
+			funcIndex := d.fetch()
+			numArgs := d.fetch()
+			d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs)
+
+		case Nulls:
+			numNulls := d.fetch()
+			d.writeOpf("Nulls %d", numNulls)
+
+		case Print:
+			numArgs := d.fetch()
+			redirect := lexer.Token(d.fetch())
+			if redirect == lexer.ILLEGAL {
+				d.writeOpf("Print %d", numArgs)
+			} else {
+				d.writeOpf("Print %d %s", numArgs, redirect)
+			}
+
+		case Printf:
+			numArgs := d.fetch()
+			redirect := lexer.Token(d.fetch())
+			if redirect == lexer.ILLEGAL {
+				d.writeOpf("Printf %d", numArgs)
+			} else {
+				d.writeOpf("Printf %d %s", numArgs, redirect)
+			}
+
+		case Getline:
+			redirect := lexer.Token(d.fetch())
+			d.writeOpf("Getline %s", redirect)
+
+		case GetlineField:
+			redirect := lexer.Token(d.fetch())
+			d.writeOpf("GetlineField %s", redirect)
+
+		case GetlineGlobal:
+			redirect := lexer.Token(d.fetch())
+			index := d.fetch()
+			d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index])
+
+		case GetlineLocal:
+			redirect := lexer.Token(d.fetch())
+			index := int(d.fetch())
+			d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index))
+
+		case GetlineSpecial:
+			redirect := lexer.Token(d.fetch())
+			index := d.fetch()
+			d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index)))
+
+		case GetlineArray:
+			redirect := lexer.Token(d.fetch())
+			arrayScope := ast.VarScope(d.fetch())
+			arrayIndex := int(d.fetch())
+			d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex))
+
+		default:
+			// Handles all other opcodes with no arguments
+			d.writeOpf("%s", op)
+		}
+	}
+
+	d.writef("\n")
+	return d.err
+}
+
+// Fetch the next opcode and increment the "instruction pointer".
+func (d *disassembler) fetch() Opcode {
+	op := d.code[d.ip]
+	d.ip++
+	return op
+}
+
+// Write formatted string to the disassembly output.
+func (d *disassembler) writef(format string, args ...interface{}) {
+	if d.err != nil {
+		return
+	}
+	_, d.err = fmt.Fprintf(d.writer, format, args...)
+}
+
+// Write formatted opcode (with address and newline) to disassembly output.
+func (d *disassembler) writeOpf(format string, args ...interface{}) {
+	if d.err != nil {
+		return
+	}
+	addrStr := fmt.Sprintf("%04x", d.opAddr)
+	_, d.err = fmt.Fprintf(d.writer, addrStr+"    "+format+"\n", args...)
+}
+
+// Return the scalar variable name described by scope and index.
+func (d *disassembler) varName(scope ast.VarScope, index int) string {
+	switch scope {
+	case ast.ScopeGlobal:
+		return d.program.scalarNames[index]
+	case ast.ScopeLocal:
+		return d.localName(index)
+	default: // ScopeSpecial
+		return ast.SpecialVarName(index)
+	}
+}
+
+// Return the local variable name with the given index.
+func (d *disassembler) localName(index int) string {
+	f := d.program.Functions[d.funcIndex]
+	n := 0
+	for i, p := range f.Params {
+		if f.Arrays[i] {
+			continue
+		}
+		if n == index {
+			return p
+		}
+		n++
+	}
+	panic(fmt.Sprintf("unexpected local variable index %d", index))
+}
+
+// Return the array variable name describes by scope and index.
+func (d *disassembler) arrayName(scope ast.VarScope, index int) string {
+	if scope == ast.ScopeLocal {
+		return d.localArrayName(index)
+	}
+	return d.program.arrayNames[index]
+}
+
+// Return the local array name with the given index.
+func (d *disassembler) localArrayName(index int) string {
+	f := d.program.Functions[d.funcIndex]
+	n := 0
+	for i, p := range f.Params {
+		if !f.Arrays[i] {
+			continue
+		}
+		if n == index {
+			return p
+		}
+		n++
+	}
+	panic(fmt.Sprintf("unexpected local array index %d", index))
+}
--- a/src/tool/awk/internal/compiler/disassembler_test.go
+++ b/src/tool/awk/internal/compiler/disassembler_test.go
@ -0,0 +1,51 @@
+package compiler
+
+import (
+	"bytes"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+func TestDisassembler(t *testing.T) {
+	// Note: this doesn't really test the disassembly, just that each opcode
+	// disassembly includes the opcode name, to help catch silly typos.
+	for op := Nop; op < EndOpcode; op++ {
+		t.Run(op.String(), func(t *testing.T) {
+			p := Program{
+				Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0},
+				Functions: []Function{
+					{
+						Name:       "f",
+						Params:     []string{"a", "k"},
+						Arrays:     []bool{true, false},
+						NumScalars: 1,
+						NumArrays:  1,
+					},
+				},
+				Nums:            []float64{0},
+				Strs:            []string{""},
+				Regexes:         []*regexp.Regexp{regexp.MustCompile("")},
+				scalarNames:     []string{"s"},
+				arrayNames:      []string{"a"},
+				nativeFuncNames: []string{"n"},
+			}
+			var buf bytes.Buffer
+			err := p.Disassemble(&buf)
+			if err != nil {
+				t.Fatalf("error disassembling opcode %s: %v", op, err)
+			}
+			lines := strings.Split(buf.String(), "\n")
+			if strings.TrimSpace(lines[0]) != "// BEGIN" {
+				t.Fatalf("first line should be \"// BEGIN\", not %q", lines[0])
+			}
+			fields := strings.Fields(lines[1])
+			if fields[0] != "0000" {
+				t.Fatalf("address should be \"0000\", not %q", fields[0])
+			}
+			if fields[1] != op.String() {
+				t.Fatalf("opcode name should be %q, not %q", op.String(), fields[1])
+			}
+		})
+	}
+}
--- a/src/tool/awk/internal/compiler/opcode_string.go
+++ b/src/tool/awk/internal/compiler/opcode_string.go
@ -0,0 +1,174 @@
+// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT.
+
+package compiler
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[Nop-0]
+	_ = x[Num-1]
+	_ = x[Str-2]
+	_ = x[Dupe-3]
+	_ = x[Drop-4]
+	_ = x[Swap-5]
+	_ = x[Field-6]
+	_ = x[FieldInt-7]
+	_ = x[FieldByName-8]
+	_ = x[FieldByNameStr-9]
+	_ = x[Global-10]
+	_ = x[Local-11]
+	_ = x[Special-12]
+	_ = x[ArrayGlobal-13]
+	_ = x[ArrayLocal-14]
+	_ = x[InGlobal-15]
+	_ = x[InLocal-16]
+	_ = x[AssignField-17]
+	_ = x[AssignGlobal-18]
+	_ = x[AssignLocal-19]
+	_ = x[AssignSpecial-20]
+	_ = x[AssignArrayGlobal-21]
+	_ = x[AssignArrayLocal-22]
+	_ = x[Delete-23]
+	_ = x[DeleteAll-24]
+	_ = x[IncrField-25]
+	_ = x[IncrGlobal-26]
+	_ = x[IncrLocal-27]
+	_ = x[IncrSpecial-28]
+	_ = x[IncrArrayGlobal-29]
+	_ = x[IncrArrayLocal-30]
+	_ = x[AugAssignField-31]
+	_ = x[AugAssignGlobal-32]
+	_ = x[AugAssignLocal-33]
+	_ = x[AugAssignSpecial-34]
+	_ = x[AugAssignArrayGlobal-35]
+	_ = x[AugAssignArrayLocal-36]
+	_ = x[Regex-37]
+	_ = x[IndexMulti-38]
+	_ = x[ConcatMulti-39]
+	_ = x[Add-40]
+	_ = x[Subtract-41]
+	_ = x[Multiply-42]
+	_ = x[Divide-43]
+	_ = x[Power-44]
+	_ = x[Modulo-45]
+	_ = x[Equals-46]
+	_ = x[NotEquals-47]
+	_ = x[Less-48]
+	_ = x[Greater-49]
+	_ = x[LessOrEqual-50]
+	_ = x[GreaterOrEqual-51]
+	_ = x[Concat-52]
+	_ = x[Match-53]
+	_ = x[NotMatch-54]
+	_ = x[Not-55]
+	_ = x[UnaryMinus-56]
+	_ = x[UnaryPlus-57]
+	_ = x[Boolean-58]
+	_ = x[Jump-59]
+	_ = x[JumpFalse-60]
+	_ = x[JumpTrue-61]
+	_ = x[JumpEquals-62]
+	_ = x[JumpNotEquals-63]
+	_ = x[JumpLess-64]
+	_ = x[JumpGreater-65]
+	_ = x[JumpLessOrEqual-66]
+	_ = x[JumpGreaterOrEqual-67]
+	_ = x[Next-68]
+	_ = x[Exit-69]
+	_ = x[ForIn-70]
+	_ = x[BreakForIn-71]
+	_ = x[CallBuiltin-72]
+	_ = x[CallSplit-73]
+	_ = x[CallSplitSep-74]
+	_ = x[CallSprintf-75]
+	_ = x[CallUser-76]
+	_ = x[CallNative-77]
+	_ = x[Return-78]
+	_ = x[ReturnNull-79]
+	_ = x[Nulls-80]
+	_ = x[Print-81]
+	_ = x[Printf-82]
+	_ = x[Getline-83]
+	_ = x[GetlineField-84]
+	_ = x[GetlineGlobal-85]
+	_ = x[GetlineLocal-86]
+	_ = x[GetlineSpecial-87]
+	_ = x[GetlineArray-88]
+	_ = x[EndOpcode-89]
+}
+
+const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
+
+var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826}
+
+func (i Opcode) String() string {
+	if i < 0 || i >= Opcode(len(_Opcode_index)-1) {
+		return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[AugOpAdd-0]
+	_ = x[AugOpSub-1]
+	_ = x[AugOpMul-2]
+	_ = x[AugOpDiv-3]
+	_ = x[AugOpPow-4]
+	_ = x[AugOpMod-5]
+}
+
+const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod"
+
+var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48}
+
+func (i AugOp) String() string {
+	if i < 0 || i >= AugOp(len(_AugOp_index)-1) {
+		return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[BuiltinAtan2-0]
+	_ = x[BuiltinClose-1]
+	_ = x[BuiltinCos-2]
+	_ = x[BuiltinExp-3]
+	_ = x[BuiltinFflush-4]
+	_ = x[BuiltinFflushAll-5]
+	_ = x[BuiltinGsub-6]
+	_ = x[BuiltinIndex-7]
+	_ = x[BuiltinInt-8]
+	_ = x[BuiltinLength-9]
+	_ = x[BuiltinLengthArg-10]
+	_ = x[BuiltinLog-11]
+	_ = x[BuiltinMatch-12]
+	_ = x[BuiltinRand-13]
+	_ = x[BuiltinSin-14]
+	_ = x[BuiltinSqrt-15]
+	_ = x[BuiltinSrand-16]
+	_ = x[BuiltinSrandSeed-17]
+	_ = x[BuiltinSub-18]
+	_ = x[BuiltinSubstr-19]
+	_ = x[BuiltinSubstrLength-20]
+	_ = x[BuiltinSystem-21]
+	_ = x[BuiltinTolower-22]
+	_ = x[BuiltinToupper-23]
+}
+
+const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper"
+
+var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300}
+
+func (i BuiltinOp) String() string {
+	if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) {
+		return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]]
+}
--- a/src/tool/awk/internal/compiler/opcodes.go
+++ b/src/tool/awk/internal/compiler/opcodes.go
@ -0,0 +1,180 @@
+package compiler
+
+//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp
+
+// Opcode represents a single virtual machine instruction (or argument). The
+// comments beside each opcode show any arguments that instruction consumes.
+//
+// Normally this is called "bytecode", but I've avoided that term here as each
+// opcode is a 32-bit word, not an 8-bit byte.
+//
+// I tested various bit widths, and I believe 32 bit was the fastest, but also
+// means we don't have to worry about jump offsets overflowing. That's tested
+// in the compiler, but who's going to have an AWK program bigger than 2GB?
+type Opcode int32
+
+const (
+	Nop Opcode = iota
+
+	// Stack operations
+	Num // numIndex
+	Str // strIndex
+	Dupe
+	Drop
+	Swap
+
+	// Fetch a field, variable, or array item
+	Field
+	FieldInt // index
+	FieldByName
+	FieldByNameStr // strIndex
+	Global         // index
+	Local          // index
+	Special        // index
+	ArrayGlobal    // arrayIndex
+	ArrayLocal     // arrayIndex
+	InGlobal       // arrayIndex
+	InLocal        // arrayIndex
+
+	// Assign a field, variable, or array item
+	AssignField
+	AssignGlobal      // index
+	AssignLocal       // index
+	AssignSpecial     // index
+	AssignArrayGlobal // arrayIndex
+	AssignArrayLocal  // arrayIndex
+
+	// Delete statement
+	Delete    // arrayScope arrayIndex
+	DeleteAll // arrayScope arrayIndex
+
+	// Post-increment and post-decrement
+	IncrField       // amount
+	IncrGlobal      // amount index
+	IncrLocal       // amount index
+	IncrSpecial     // amount index
+	IncrArrayGlobal // amount arrayIndex
+	IncrArrayLocal  // amount arrayIndex
+
+	// Augmented assignment (also used for pre-increment and pre-decrement)
+	AugAssignField       // augOp
+	AugAssignGlobal      // augOp index
+	AugAssignLocal       // augOp index
+	AugAssignSpecial     // augOp index
+	AugAssignArrayGlobal // augOp arrayIndex
+	AugAssignArrayLocal  // augOp arrayIndex
+
+	// Stand-alone regex expression /foo/
+	Regex // regexIndex
+
+	// Multi-index concatenation
+	IndexMulti // num
+
+	// Multi-value concatenation
+	ConcatMulti // num
+
+	// Binary operators
+	Add
+	Subtract
+	Multiply
+	Divide
+	Power
+	Modulo
+	Equals
+	NotEquals
+	Less
+	Greater
+	LessOrEqual
+	GreaterOrEqual
+	Concat
+	Match
+	NotMatch
+
+	// Unary operators
+	Not
+	UnaryMinus
+	UnaryPlus
+	Boolean
+
+	// Control flow
+	Jump               // offset
+	JumpFalse          // offset
+	JumpTrue           // offset
+	JumpEquals         // offset
+	JumpNotEquals      // offset
+	JumpLess           // offset
+	JumpGreater        // offset
+	JumpLessOrEqual    // offset
+	JumpGreaterOrEqual // offset
+	Next
+	Exit
+	ForIn // varScope varIndex arrayScope arrayIndex offset
+	BreakForIn
+
+	// Builtin functions
+	CallBuiltin  // builtinOp
+	CallSplit    // arrayScope arrayIndex
+	CallSplitSep // arrayScope arrayIndex
+	CallSprintf  // numArgs
+
+	// User and native functions
+	CallUser   // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...]
+	CallNative // funcIndex numArgs
+	Return
+	ReturnNull
+	Nulls // numNulls
+
+	// Print, printf, and getline
+	Print          // numArgs redirect
+	Printf         // numArgs redirect
+	Getline        // redirect
+	GetlineField   // redirect
+	GetlineGlobal  // redirect index
+	GetlineLocal   // redirect index
+	GetlineSpecial // redirect index
+	GetlineArray   // redirect arrayScope arrayIndex
+
+	EndOpcode
+)
+
+// AugOp represents an augmented assignment operation.
+type AugOp Opcode
+
+const (
+	AugOpAdd AugOp = iota
+	AugOpSub
+	AugOpMul
+	AugOpDiv
+	AugOpPow
+	AugOpMod
+)
+
+// BuiltinOp represents a builtin function call.
+type BuiltinOp Opcode
+
+const (
+	BuiltinAtan2 BuiltinOp = iota
+	BuiltinClose
+	BuiltinCos
+	BuiltinExp
+	BuiltinFflush
+	BuiltinFflushAll
+	BuiltinGsub
+	BuiltinIndex
+	BuiltinInt
+	BuiltinLength
+	BuiltinLengthArg
+	BuiltinLog
+	BuiltinMatch
+	BuiltinRand
+	BuiltinSin
+	BuiltinSqrt
+	BuiltinSrand
+	BuiltinSrandSeed
+	BuiltinSub
+	BuiltinSubstr
+	BuiltinSubstrLength
+	BuiltinSystem
+	BuiltinTolower
+	BuiltinToupper
+)
--- a/src/tool/awk/interp/csvreader_test.go
+++ b/src/tool/awk/interp/csvreader_test.go
@ -0,0 +1,392 @@
+// Tests copied from encoding/csv to ensure we pass all the relevant cases.
+
+// These tests are a subset of those in encoding/csv used to test Reader.
+// However, the §, ¶ and ∑ special characters (for error positions) have been
+// removed, and some tests have been removed or tweaked slightly because we
+// don't support all the encoding/csv features (FieldsPerRecord is not
+// supported, LazyQuotes is always on, and TrimLeadingSpace is always off).
+
+package interp
+
+import (
+	"bufio"
+	"encoding/csv"
+	"reflect"
+	"strings"
+	"testing"
+	"unicode/utf8"
+)
+
+type readTest struct {
+	Name   string
+	Input  string
+	Output [][]string
+	Error  string
+
+	// These fields are copied into the CSVInputConfig
+	Comma   rune
+	Comment rune
+}
+
+var readTests = []readTest{{
+	Name:   "Simple",
+	Input:  "a,b,c\n",
+	Output: [][]string{{"a", "b", "c"}},
+}, {
+	Name:   "CRLF",
+	Input:  "a,b\r\nc,d\r\n",
+	Output: [][]string{{"a", "b"}, {"c", "d"}},
+}, {
+	Name:   "BareCR",
+	Input:  "a,b\rc,d\r\n",
+	Output: [][]string{{"a", "b\rc", "d"}},
+}, {
+	Name: "RFC4180test",
+	Input: `#field1,field2,field3
+"aaa","bb
+b","ccc"
+"a,a","b""bb","ccc"
+zzz,yyy,xxx
+`,
+	Output: [][]string{
+		{"#field1", "field2", "field3"},
+		{"aaa", "bb\nb", "ccc"},
+		{"a,a", `b"bb`, "ccc"},
+		{"zzz", "yyy", "xxx"},
+	},
+}, {
+	Name:   "NoEOLTest",
+	Input:  "a,b,c",
+	Output: [][]string{{"a", "b", "c"}},
+}, {
+	Name:   "Semicolon",
+	Input:  "a;b;c\n",
+	Output: [][]string{{"a", "b", "c"}},
+	Comma:  ';',
+}, {
+	Name: "MultiLine",
+	Input: `"two
+line","one line","three
+line
+field"`,
+	Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
+}, {
+	Name:  "BlankLine",
+	Input: "a,b,c\n\nd,e,f\n\n",
+	Output: [][]string{
+		{"a", "b", "c"},
+		{"d", "e", "f"},
+	},
+}, {
+	Name:  "BlankLineFieldCount",
+	Input: "a,b,c\n\nd,e,f\n\n",
+	Output: [][]string{
+		{"a", "b", "c"},
+		{"d", "e", "f"},
+	},
+}, {
+	Name:   "LeadingSpace",
+	Input:  " a,  b,   c\n",
+	Output: [][]string{{" a", "  b", "   c"}},
+}, {
+	Name:    "Comment",
+	Input:   "#1,2,3\na,b,c\n#comment",
+	Output:  [][]string{{"a", "b", "c"}},
+	Comment: '#',
+}, {
+	Name:   "NoComment",
+	Input:  "#1,2,3\na,b,c",
+	Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
+}, {
+	Name:   "LazyQuotes",
+	Input:  `a "word","1"2",a","b`,
+	Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
+}, {
+	Name:   "BareQuotes",
+	Input:  `a "word","1"2",a"`,
+	Output: [][]string{{`a "word"`, `1"2`, `a"`}},
+}, {
+	Name:   "BareDoubleQuotes",
+	Input:  `a""b,c`,
+	Output: [][]string{{`a""b`, `c`}},
+}, {
+	Name:   "TrimQuote",
+	Input:  `"a"," b",c`,
+	Output: [][]string{{"a", " b", "c"}},
+}, {
+	Name:   "FieldCount",
+	Input:  "a,b,c\nd,e",
+	Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
+}, {
+	Name:   "TrailingCommaEOF",
+	Input:  "a,b,c,",
+	Output: [][]string{{"a", "b", "c", ""}},
+}, {
+	Name:   "TrailingCommaEOL",
+	Input:  "a,b,c,\n",
+	Output: [][]string{{"a", "b", "c", ""}},
+}, {
+	Name:   "TrailingCommaSpaceEOF",
+	Input:  "a,b,c, ",
+	Output: [][]string{{"a", "b", "c", " "}},
+}, {
+	Name:   "TrailingCommaSpaceEOL",
+	Input:  "a,b,c, \n",
+	Output: [][]string{{"a", "b", "c", " "}},
+}, {
+	Name:   "TrailingCommaLine3",
+	Input:  "a,b,c\nd,e,f\ng,hi,",
+	Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
+}, {
+	Name:   "NotTrailingComma3",
+	Input:  "a,b,c, \n",
+	Output: [][]string{{"a", "b", "c", " "}},
+}, {
+	Name: "CommaFieldTest",
+	Input: `x,y,z,w
+x,y,z,
+x,y,,
+x,,,
+,,,
+"x","y","z","w"
+"x","y","z",""
+"x","y","",""
+"x","","",""
+"","","",""
+`,
+	Output: [][]string{
+		{"x", "y", "z", "w"},
+		{"x", "y", "z", ""},
+		{"x", "y", "", ""},
+		{"x", "", "", ""},
+		{"", "", "", ""},
+		{"x", "y", "z", "w"},
+		{"x", "y", "z", ""},
+		{"x", "y", "", ""},
+		{"x", "", "", ""},
+		{"", "", "", ""},
+	},
+}, {
+	Name:  "TrailingCommaIneffective1",
+	Input: "a,b,\nc,d,e",
+	Output: [][]string{
+		{"a", "b", ""},
+		{"c", "d", "e"},
+	},
+}, {
+	Name:  "ReadAllReuseRecord",
+	Input: "a,b\nc,d",
+	Output: [][]string{
+		{"a", "b"},
+		{"c", "d"},
+	},
+}, {
+	Name:  "CRLFInQuotedField", // Issue 21201
+	Input: "A,\"Hello\r\nHi\",B\r\n",
+	Output: [][]string{
+		{"A", "Hello\nHi", "B"},
+	},
+}, {
+	Name:   "BinaryBlobField", // Issue 19410
+	Input:  "x09\x41\xb4\x1c,aktau",
+	Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
+}, {
+	Name:   "TrailingCR",
+	Input:  "field1,field2\r",
+	Output: [][]string{{"field1", "field2"}},
+}, {
+	Name:   "QuotedTrailingCR",
+	Input:  "\"field\"\r",
+	Output: [][]string{{"field"}},
+}, {
+	Name:   "FieldCR",
+	Input:  "field\rfield\r",
+	Output: [][]string{{"field\rfield"}},
+}, {
+	Name:   "FieldCRCR",
+	Input:  "field\r\rfield\r\r",
+	Output: [][]string{{"field\r\rfield\r"}},
+}, {
+	Name:   "FieldCRCRLF",
+	Input:  "field\r\r\nfield\r\r\n",
+	Output: [][]string{{"field\r"}, {"field\r"}},
+}, {
+	Name:   "FieldCRCRLFCR",
+	Input:  "field\r\r\n\rfield\r\r\n\r",
+	Output: [][]string{{"field\r"}, {"\rfield\r"}},
+}, {
+	Name:   "FieldCRCRLFCRCR",
+	Input:  "field\r\r\n\r\rfield\r\r\n\r\r",
+	Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
+}, {
+	Name:  "MultiFieldCRCRLFCRCR",
+	Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
+	Output: [][]string{
+		{"field1", "field2\r"},
+		{"\r\rfield1", "field2\r"},
+		{"\r\r", ""},
+	},
+}, {
+	Name:    "NonASCIICommaAndComment",
+	Input:   "a£b,c£ \td,e\n€ comment\n",
+	Output:  [][]string{{"a", "b,c", " \td,e"}},
+	Comma:   '£',
+	Comment: '€',
+}, {
+	Name:    "NonASCIICommaAndCommentWithQuotes",
+	Input:   "a€\"  b,\"€ c\nλ comment\n",
+	Output:  [][]string{{"a", "  b,", " c"}},
+	Comma:   '€',
+	Comment: 'λ',
+}, {
+	// λ and θ start with the same byte.
+	// This tests that the parser doesn't confuse such characters.
+	Name:    "NonASCIICommaConfusion",
+	Input:   "\"abθcd\"λefθgh",
+	Output:  [][]string{{"abθcd", "efθgh"}},
+	Comma:   'λ',
+	Comment: '€',
+}, {
+	Name:    "NonASCIICommentConfusion",
+	Input:   "λ\nλ\nθ\nλ\n",
+	Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
+	Comment: 'θ',
+}, {
+	Name:   "QuotedFieldMultipleLF",
+	Input:  "\"\n\n\n\n\"",
+	Output: [][]string{{"\n\n\n\n"}},
+}, {
+	Name:  "MultipleCRLF",
+	Input: "\r\n\r\n\r\n\r\n",
+}, {
+	// The implementation may read each line in several chunks if it doesn't fit entirely
+	// in the read buffer, so we should test the code to handle that condition.
+	Name:    "HugeLines",
+	Input:   strings.Repeat("#ignore\n", 10000) + "" + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
+	Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
+	Comment: '#',
+}, {
+	Name:   "LazyQuoteWithTrailingCRLF",
+	Input:  "\"foo\"bar\"\r\n",
+	Output: [][]string{{`foo"bar`}},
+}, {
+	Name:   "DoubleQuoteWithTrailingCRLF",
+	Input:  "\"foo\"\"bar\"\r\n",
+	Output: [][]string{{`foo"bar`}},
+}, {
+	Name:   "EvenQuotes",
+	Input:  `""""""""`,
+	Output: [][]string{{`"""`}},
+}, {
+	Name:   "LazyOddQuotes",
+	Input:  `"""""""`,
+	Output: [][]string{{`"""`}},
+}, {
+	Name:  "BadComma1",
+	Comma: '\n',
+	Error: "invalid CSV field separator or comment delimiter",
+}, {
+	Name:  "BadComma2",
+	Comma: '\r',
+	Error: "invalid CSV field separator or comment delimiter",
+}, {
+	Name:  "BadComma3",
+	Comma: '"',
+	Error: "invalid CSV field separator or comment delimiter",
+}, {
+	Name:  "BadComma4",
+	Comma: utf8.RuneError,
+	Error: "invalid CSV field separator or comment delimiter",
+}, {
+	Name:    "BadComment1",
+	Comment: '\n',
+	Error:   "invalid CSV field separator or comment delimiter",
+}, {
+	Name:    "BadComment2",
+	Comment: '\r',
+	Error:   "invalid CSV field separator or comment delimiter",
+}, {
+	Name:    "BadComment3",
+	Comment: utf8.RuneError,
+	Error:   "invalid CSV field separator or comment delimiter",
+}, {
+	Name:    "BadCommaComment",
+	Comma:   'X',
+	Comment: 'X',
+	Error:   "invalid CSV field separator or comment delimiter",
+}}
+
+func TestCSVReader(t *testing.T) {
+	for _, tt := range readTests {
+		t.Run(tt.Name, func(t *testing.T) {
+			inputConfig := CSVInputConfig{
+				Separator: tt.Comma,
+				Comment:   tt.Comment,
+			}
+			if inputConfig.Separator == 0 {
+				inputConfig.Separator = ','
+			}
+
+			var out [][]string
+			err := validateCSVInputConfig(CSVMode, inputConfig)
+			if err == nil {
+				var fields []string
+				splitter := csvSplitter{
+					separator: inputConfig.Separator,
+					sepLen:    utf8.RuneLen(inputConfig.Separator),
+					comment:   inputConfig.Comment,
+					fields:    &fields,
+				}
+				scanner := bufio.NewScanner(strings.NewReader(tt.Input))
+				scanner.Split(splitter.scan)
+				scanner.Buffer(make([]byte, inputBufSize), maxRecordLength)
+
+				for scanner.Scan() {
+					row := make([]string, len(fields))
+					copy(row, fields)
+					out = append(out, row)
+
+					// We don't explicitly check the returned token, but at
+					// least check it parses to the same row.
+					if strings.ContainsRune(tt.Input, '\r') {
+						// But FieldCRCRLF and similar tests don't round-trip
+						continue
+					}
+					token := scanner.Text()
+					reader := csv.NewReader(strings.NewReader(token))
+					reader.Comma = inputConfig.Separator
+					reader.Comment = inputConfig.Comment
+					reader.FieldsPerRecord = -1
+					reader.LazyQuotes = true
+					tokenRow, err := reader.Read()
+					if err != nil {
+						t.Fatalf("error reparsing token: %v", err)
+					}
+					if !reflect.DeepEqual(tokenRow, row) {
+						t.Fatalf("token mismatch:\ngot  %q\nwant %q", tokenRow, row)
+					}
+				}
+				err = scanner.Err()
+			}
+
+			if tt.Error != "" {
+				if err == nil {
+					t.Fatalf("error mismatch:\ngot  nil\nwant %q", tt.Error)
+				}
+				if err.Error() != tt.Error {
+					t.Fatalf("error mismatch:\ngot  %q\nwant %q", err.Error(), tt.Error)
+				}
+				if out != nil {
+					t.Fatalf("output mismatch:\ngot  %q\nwant nil", out)
+				}
+			} else {
+				if err != nil {
+					t.Fatalf("error mismatch:\ngot  %q\nwant nil", err.Error())
+				}
+				if !reflect.DeepEqual(out, tt.Output) {
+					t.Fatalf("output mismatch:\ngot  %q\nwant %q", out, tt.Output)
+				}
+			}
+		})
+	}
+}
--- a/src/tool/awk/interp/example_test.go
+++ b/src/tool/awk/interp/example_test.go
@ -0,0 +1,177 @@
+// Don't run these on Windows, because newline handling means they don't pass.
+
+//go:build !windows
+// +build !windows
+
+package interp_test
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/benhoyt/goawk/interp"
+	"github.com/benhoyt/goawk/parser"
+)
+
+func Example() {
+	input := strings.NewReader("foo bar\n\nbaz buz")
+	err := interp.Exec("$0 { print $1 }", " ", input, nil)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	// Output:
+	// foo
+	// baz
+}
+
+func Example_fieldsep() {
+	// Use ',' as the field separator
+	input := strings.NewReader("1,2\n3,4")
+	err := interp.Exec("{ print $1, $2 }", ",", input, nil)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	// Output:
+	// 1 2
+	// 3 4
+}
+
+func Example_program() {
+	src := "{ print NR, tolower($0) }"
+	input := "A\naB\nAbC"
+
+	prog, err := parser.ParseProgram([]byte(src), nil)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	config := &interp.Config{
+		Stdin: strings.NewReader(input),
+		Vars:  []string{"OFS", ":"},
+	}
+	_, err = interp.ExecProgram(prog, config)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	// Output:
+	// 1:a
+	// 2:ab
+	// 3:abc
+}
+
+func Example_funcs() {
+	src := `BEGIN { print sum(), sum(1), sum(2, 3, 4), repeat("xyz", 3) }`
+
+	parserConfig := &parser.ParserConfig{
+		Funcs: map[string]interface{}{
+			"sum": func(args ...float64) float64 {
+				sum := 0.0
+				for _, a := range args {
+					sum += a
+				}
+				return sum
+			},
+			"repeat": strings.Repeat,
+		},
+	}
+	prog, err := parser.ParseProgram([]byte(src), parserConfig)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	interpConfig := &interp.Config{
+		Funcs: parserConfig.Funcs,
+	}
+	_, err = interp.ExecProgram(prog, interpConfig)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	// Output:
+	// 0 1 9 xyzxyzxyz
+}
+
+func Example_new() {
+	// We'll execute this program multiple times on different inputs.
+	src := `{ print $1, x, $3; x++ }`
+
+	// Parse the program and set up the interpreter.
+	prog, err := parser.ParseProgram([]byte(src), nil)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	interpreter, err := interp.New(prog)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	// Run it once on one input.
+	_, err = interpreter.Execute(&interp.Config{
+		Stdin:   strings.NewReader("one two three"),
+		Environ: []string{}, // avoid calling os.Environ each time
+	})
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	// Reset variables and run it again efficiently on a different input (this
+	// could be from a completely different data source).
+	interpreter.ResetVars()
+	_, err = interpreter.Execute(&interp.Config{
+		Stdin:   strings.NewReader("a b c\nd e f\n"),
+		Environ: []string{},
+	})
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	// Run it on another input, this time without resetting variables.
+	_, err = interpreter.Execute(&interp.Config{
+		Stdin:   strings.NewReader("x y z"),
+		Environ: []string{},
+	})
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	// Output:
+	// one  three
+	// a  c
+	// d 1 f
+	// x 2 z
+}
+
+func Example_csv() {
+	src := `{ total += @"amount" } END { print total }`
+	input := `# comment
+name,amount
+Bob,17.50
+Jill,20
+"Boba Fett",100.00
+`
+	prog, err := parser.ParseProgram([]byte(src), nil)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	config := &interp.Config{
+		Stdin:     strings.NewReader(input),
+		InputMode: interp.CSVMode,
+		CSVInput:  interp.CSVInputConfig{Comment: '#', Header: true},
+	}
+	_, err = interp.ExecProgram(prog, config)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	// Output:
+	// 137.5
+}
--- a/src/tool/awk/interp/functions.go
+++ b/src/tool/awk/interp/functions.go
@ -0,0 +1,413 @@
+// Call native Go functions; helpers for some builtin function calls.
+
+package interp
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"reflect"
+	"sort"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/benhoyt/goawk/internal/ast"
+	. "github.com/benhoyt/goawk/lexer"
+)
+
+// Call native-defined function with given name and arguments, return
+// its return value (or null value if it doesn't return anything).
+func (p *interp) callNative(index int, args []value) (value, error) {
+	f := p.nativeFuncs[index]
+	minIn := len(f.in) // Minimum number of args we should pass
+	var variadicType reflect.Type
+	if f.isVariadic {
+		variadicType = f.in[len(f.in)-1].Elem()
+		minIn--
+	}
+
+	// Build list of args to pass to function
+	values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation
+	for i, a := range args {
+		var argType reflect.Type
+		if !f.isVariadic || i < len(f.in)-1 {
+			argType = f.in[i]
+		} else {
+			// Final arg(s) when calling a variadic are all of this type
+			argType = variadicType
+		}
+		values = append(values, p.toNative(a, argType))
+	}
+	// Use zero value for any unspecified args
+	for i := len(args); i < minIn; i++ {
+		values = append(values, reflect.Zero(f.in[i]))
+	}
+
+	// Call Go function, determine return value
+	outs := f.value.Call(values)
+	switch len(outs) {
+	case 0:
+		// No return value, return null value to AWK
+		return null(), nil
+	case 1:
+		// Single return value
+		return fromNative(outs[0]), nil
+	case 2:
+		// Two-valued return of (scalar, error)
+		if !outs[1].IsNil() {
+			return null(), outs[1].Interface().(error)
+		}
+		return fromNative(outs[0]), nil
+	default:
+		// Should never happen (checked at parse time)
+		panic(fmt.Sprintf("unexpected number of return values: %d", len(outs)))
+	}
+}
+
+// Convert from an AWK value to a native Go value
+func (p *interp) toNative(v value, typ reflect.Type) reflect.Value {
+	switch typ.Kind() {
+	case reflect.Bool:
+		return reflect.ValueOf(v.boolean())
+	case reflect.Int:
+		return reflect.ValueOf(int(v.num()))
+	case reflect.Int8:
+		return reflect.ValueOf(int8(v.num()))
+	case reflect.Int16:
+		return reflect.ValueOf(int16(v.num()))
+	case reflect.Int32:
+		return reflect.ValueOf(int32(v.num()))
+	case reflect.Int64:
+		return reflect.ValueOf(int64(v.num()))
+	case reflect.Uint:
+		return reflect.ValueOf(uint(v.num()))
+	case reflect.Uint8:
+		return reflect.ValueOf(uint8(v.num()))
+	case reflect.Uint16:
+		return reflect.ValueOf(uint16(v.num()))
+	case reflect.Uint32:
+		return reflect.ValueOf(uint32(v.num()))
+	case reflect.Uint64:
+		return reflect.ValueOf(uint64(v.num()))
+	case reflect.Float32:
+		return reflect.ValueOf(float32(v.num()))
+	case reflect.Float64:
+		return reflect.ValueOf(v.num())
+	case reflect.String:
+		return reflect.ValueOf(p.toString(v))
+	case reflect.Slice:
+		if typ.Elem().Kind() != reflect.Uint8 {
+			// Shouldn't happen: prevented by checkNativeFunc
+			panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind()))
+		}
+		return reflect.ValueOf([]byte(p.toString(v)))
+	default:
+		// Shouldn't happen: prevented by checkNativeFunc
+		panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind()))
+	}
+}
+
+// Convert from a native Go value to an AWK value
+func fromNative(v reflect.Value) value {
+	switch v.Kind() {
+	case reflect.Bool:
+		return boolean(v.Bool())
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return num(float64(v.Int()))
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		return num(float64(v.Uint()))
+	case reflect.Float32, reflect.Float64:
+		return num(v.Float())
+	case reflect.String:
+		return str(v.String())
+	case reflect.Slice:
+		if b, ok := v.Interface().([]byte); ok {
+			return str(string(b))
+		}
+		// Shouldn't happen: prevented by checkNativeFunc
+		panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind()))
+	default:
+		// Shouldn't happen: prevented by checkNativeFunc
+		panic(fmt.Sprintf("unexpected return type: %s", v.Kind()))
+	}
+}
+
+// Used for caching native function type information on init
+type nativeFunc struct {
+	isVariadic bool
+	in         []reflect.Type
+	value      reflect.Value
+}
+
+// Check and initialize native functions
+func (p *interp) initNativeFuncs(funcs map[string]interface{}) error {
+	for name, f := range funcs {
+		err := checkNativeFunc(name, f)
+		if err != nil {
+			return err
+		}
+	}
+
+	// Sort functions by name, then use those indexes to build slice
+	// (this has to match how the parser sets the indexes).
+	names := make([]string, 0, len(funcs))
+	for name := range funcs {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+	p.nativeFuncs = make([]nativeFunc, len(names))
+	for i, name := range names {
+		f := funcs[name]
+		typ := reflect.TypeOf(f)
+		in := make([]reflect.Type, typ.NumIn())
+		for j := 0; j < len(in); j++ {
+			in[j] = typ.In(j)
+		}
+		p.nativeFuncs[i] = nativeFunc{
+			isVariadic: typ.IsVariadic(),
+			in:         in,
+			value:      reflect.ValueOf(f),
+		}
+	}
+	return nil
+}
+
+// Got this trick from the Go stdlib text/template source
+var errorType = reflect.TypeOf((*error)(nil)).Elem()
+
+// Check that native function with given name is okay to call from
+// AWK, return an *interp.Error if not. This checks that f is actually
+// a function, and that its parameter and return types are good.
+func checkNativeFunc(name string, f interface{}) error {
+	if KeywordToken(name) != ILLEGAL {
+		return newError("can't use keyword %q as native function name", name)
+	}
+
+	typ := reflect.TypeOf(f)
+	if typ.Kind() != reflect.Func {
+		return newError("native function %q is not a function", name)
+	}
+	for i := 0; i < typ.NumIn(); i++ {
+		param := typ.In(i)
+		if typ.IsVariadic() && i == typ.NumIn()-1 {
+			param = param.Elem()
+		}
+		if !validNativeType(param) {
+			return newError("native function %q param %d is not int or string", name, i)
+		}
+	}
+
+	switch typ.NumOut() {
+	case 0:
+		// No return value is fine
+	case 1:
+		// Single scalar return value is fine
+		if !validNativeType(typ.Out(0)) {
+			return newError("native function %q return value is not int or string", name)
+		}
+	case 2:
+		// Returning (scalar, error) is handled too
+		if !validNativeType(typ.Out(0)) {
+			return newError("native function %q first return value is not int or string", name)
+		}
+		if typ.Out(1) != errorType {
+			return newError("native function %q second return value is not an error", name)
+		}
+	default:
+		return newError("native function %q returns more than two values", name)
+	}
+	return nil
+}
+
+// Return true if typ is a valid parameter or return type.
+func validNativeType(typ reflect.Type) bool {
+	switch typ.Kind() {
+	case reflect.Bool:
+		return true
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return true
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		return true
+	case reflect.Float32, reflect.Float64:
+		return true
+	case reflect.String:
+		return true
+	case reflect.Slice:
+		// Only allow []byte (convert to string in AWK)
+		return typ.Elem().Kind() == reflect.Uint8
+	default:
+		return false
+	}
+}
+
+// Guts of the split() function
+func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) {
+	var parts []string
+	if fs == " " {
+		parts = strings.Fields(s)
+	} else if s == "" {
+		// Leave parts 0 length on empty string
+	} else if utf8.RuneCountInString(fs) <= 1 {
+		parts = strings.Split(s, fs)
+	} else {
+		re, err := p.compileRegex(fs)
+		if err != nil {
+			return 0, err
+		}
+		parts = re.Split(s, -1)
+	}
+	array := make(map[string]value, len(parts))
+	for i, part := range parts {
+		array[strconv.Itoa(i+1)] = numStr(part)
+	}
+	p.arrays[p.arrayIndex(scope, index)] = array
+	return len(array), nil
+}
+
+// Guts of the sub() and gsub() functions
+func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) {
+	re, err := p.compileRegex(regex)
+	if err != nil {
+		return "", 0, err
+	}
+	count := 0
+	out = re.ReplaceAllStringFunc(in, func(s string) string {
+		// Only do the first replacement for sub(), or all for gsub()
+		if !global && count > 0 {
+			return s
+		}
+		count++
+		// Handle & (ampersand) properly in replacement string
+		r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation
+		for i := 0; i < len(repl); i++ {
+			switch repl[i] {
+			case '&':
+				r = append(r, s...)
+			case '\\':
+				i++
+				if i < len(repl) {
+					switch repl[i] {
+					case '&':
+						r = append(r, '&')
+					case '\\':
+						r = append(r, '\\')
+					default:
+						r = append(r, '\\', repl[i])
+					}
+				} else {
+					r = append(r, '\\')
+				}
+			default:
+				r = append(r, repl[i])
+			}
+		}
+		return string(r)
+	})
+	return out, count, nil
+}
+
+type cachedFormat struct {
+	format string
+	types  []byte
+}
+
+// Parse given sprintf format string into Go format string, along with
+// type conversion specifiers. Output is memoized in a simple cache
+// for performance.
+func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) {
+	if item, ok := p.formatCache[s]; ok {
+		return item.format, item.types, nil
+	}
+
+	out := []byte(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '%' {
+			i++
+			if i >= len(s) {
+				return "", nil, errors.New("expected type specifier after %")
+			}
+			if s[i] == '%' {
+				continue
+			}
+			for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 {
+				if s[i] == '*' {
+					types = append(types, 'd')
+				}
+				i++
+			}
+			if i >= len(s) {
+				return "", nil, errors.New("expected type specifier after %")
+			}
+			var t byte
+			switch s[i] {
+			case 's':
+				t = 's'
+			case 'd', 'i', 'o', 'x', 'X':
+				t = 'd'
+			case 'f', 'e', 'E', 'g', 'G':
+				t = 'f'
+			case 'u':
+				t = 'u'
+				out[i] = 'd'
+			case 'c':
+				t = 'c'
+				out[i] = 's'
+			default:
+				return "", nil, fmt.Errorf("invalid format type %q", s[i])
+			}
+			types = append(types, t)
+		}
+	}
+
+	// Dumb, non-LRU cache: just cache the first N formats
+	format = string(out)
+	if len(p.formatCache) < maxCachedFormats {
+		p.formatCache[s] = cachedFormat{format, types}
+	}
+	return format, types, nil
+}
+
+// Guts of sprintf() function (also used by "printf" statement)
+func (p *interp) sprintf(format string, args []value) (string, error) {
+	format, types, err := p.parseFmtTypes(format)
+	if err != nil {
+		return "", newError("format error: %s", err)
+	}
+	if len(types) > len(args) {
+		return "", newError("format error: got %d args, expected %d", len(args), len(types))
+	}
+	converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation
+	for i, t := range types {
+		a := args[i]
+		var v interface{}
+		switch t {
+		case 's':
+			v = p.toString(a)
+		case 'd':
+			v = int(a.num())
+		case 'f':
+			v = a.num()
+		case 'u':
+			v = uint(a.num())
+		case 'c':
+			var c []byte
+			n, isStr := a.isTrueStr()
+			if isStr {
+				s := p.toString(a)
+				if len(s) > 0 {
+					c = []byte{s[0]}
+				} else {
+					c = []byte{0}
+				}
+			} else {
+				// Follow the behaviour of awk and mawk, where %c
+				// operates on bytes (0-255), not Unicode codepoints
+				c = []byte{byte(n)}
+			}
+			v = c
+		}
+		converted = append(converted, v)
+	}
+	return fmt.Sprintf(format, converted...), nil
+}
--- a/src/tool/awk/interp/fuzz_test.go
+++ b/src/tool/awk/interp/fuzz_test.go
@ -0,0 +1,107 @@
+// Fuzz tests for use with the Go 1.18 fuzzer.
+
+//go:build go1.18
+// +build go1.18
+
+package interp_test
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/benhoyt/goawk/interp"
+	"github.com/benhoyt/goawk/parser"
+)
+
+func isFuzzTest(test interpTest) bool {
+	return test.err == "" && test.awkErr == "" && !strings.Contains(test.src, "!fuzz")
+}
+
+func FuzzSource(f *testing.F) {
+	for _, test := range interpTests {
+		if isFuzzTest(test) {
+			f.Add(test.src)
+		}
+	}
+
+	f.Fuzz(func(t *testing.T, src string) {
+		prog, err := parser.ParseProgram([]byte(src), nil)
+		if err != nil {
+			return
+		}
+		interpreter, err := interp.New(prog)
+		if err != nil {
+			f.Fatalf("interp.New error: %v", err)
+		}
+		config := interp.Config{
+			Stdin:        strings.NewReader("foo bar\nbazz\n"),
+			Output:       ioutil.Discard,
+			Error:        ioutil.Discard,
+			NoExec:       true,
+			NoFileWrites: true,
+			NoFileReads:  true,
+			Environ:      []string{},
+		}
+		ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
+		defer cancel()
+		_, _ = interpreter.ExecuteContext(ctx, &config)
+	})
+}
+
+func FuzzInput(f *testing.F) {
+	f.Add("")
+	added := make(map[string]bool)
+	for _, test := range interpTests {
+		if test.in != "" && !added[test.in] {
+			f.Add(test.in)
+			added[test.in] = true
+		}
+	}
+
+	prog, err := parser.ParseProgram([]byte(`{ print $0, $3, $1, $10 }`), nil)
+	if err != nil {
+		f.Fatalf("parse error: %v", err)
+	}
+
+	interpreter, err := interp.New(prog)
+	if err != nil {
+		f.Fatalf("interp.New error: %v", err)
+	}
+
+	var vars = [][]string{
+		{"FS", " ", "RS", "\n"},
+		{"FS", ",", "RS", "\n"},
+		{"FS", "\t", "RS", "\n"},
+		{"FS", "@+", "RS", "\n"},
+		{"FS", "\n", "RS", ""},
+		{"FS", " ", "RS", "X+"},
+	}
+
+	f.Fuzz(func(t *testing.T, in string) {
+		for _, v := range vars {
+			t.Run(fmt.Sprintf("Vars=%q", v), func(t *testing.T) {
+				interpreter.ResetVars()
+				config := interp.Config{
+					Stdin:        strings.NewReader(in),
+					Output:       ioutil.Discard,
+					Error:        ioutil.Discard,
+					Vars:         v,
+					NoExec:       true,
+					NoFileWrites: true,
+					NoFileReads:  true,
+					Environ:      []string{},
+				}
+				ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
+				defer cancel()
+				_, err := interpreter.ExecuteContext(ctx, &config)
+				if err != nil {
+					t.Fatalf("execute error: %v", err)
+				}
+			})
+		}
+	})
+}
--- a/src/tool/awk/interp/fuzz_unexported_test.go
+++ b/src/tool/awk/interp/fuzz_unexported_test.go
@ -0,0 +1,75 @@
+// Fuzz tests for unexported functions for use with the Go 1.18 fuzzer.
+
+//go:build go1.18
+// +build go1.18
+
+package interp
+
+import (
+	"math"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func FuzzParseFloatPrefix(f *testing.F) {
+	f.Add("")
+	f.Add("foo")
+	f.Add("The quick.")
+	f.Add("0")
+	f.Add("9")
+	f.Add("1.3e4")
+	f.Add("1.3E0")
+	f.Add("1.3e+5")
+	f.Add("1.3e-5")
+	f.Add("1E1000")
+	f.Add("    1234    ")
+	f.Add("1234xyz")
+	f.Add("-1234567890")
+	f.Add("0x0")
+	f.Add("0X10")
+	f.Add("0x1234567890")
+	f.Add("0xabcdef")
+	f.Add("0xABCDEF")
+	f.Add("-0xa")
+	f.Add("+0XA")
+	f.Add("0xf.f")
+	f.Add("0xf.fp10")
+	f.Add("0xf.fp-10")
+	f.Add("0x.f")
+	f.Add("0xf.")
+	f.Add("0x.")
+	f.Add("nan")
+	f.Add("+nan")
+	f.Add("-nan")
+	f.Add("NAN")
+	f.Add("inf")
+	f.Add("+inf")
+	f.Add("-inf")
+	f.Add("INF")
+
+	f.Fuzz(func(t *testing.T, in string) {
+		nPrefix := parseFloatPrefix(in)
+		if nPrefix != 0 {
+			for i := 1; i <= len(in); i++ {
+				n, _ := parseFloatHelper(in[:i])
+				if n == nPrefix || math.IsNaN(n) && math.IsNaN(nPrefix) {
+					return
+				}
+			}
+			t.Fatalf("no ParseFloat match: %q", in)
+		}
+	})
+}
+
+func parseFloatHelper(s string) (float64, error) {
+	s = strings.TrimSpace(s)
+	s = strings.ToLower(s)
+	if s == "+nan" || s == "-nan" {
+		return math.NaN(), nil
+	}
+	if strings.Contains(s, "0x") && strings.IndexAny(s, "pP") < 0 {
+		s += "p0"
+	}
+	return strconv.ParseFloat(s, 64)
+}
--- a/src/tool/awk/interp/interp.go
+++ b/src/tool/awk/interp/interp.go
--- a/src/tool/awk/interp/interp_test.go
+++ b/src/tool/awk/interp/interp_test.go
--- a/src/tool/awk/interp/io.go
+++ b/src/tool/awk/interp/io.go
@ -0,0 +1,899 @@
+// Input/output handling for GoAWK interpreter
+
+package interp
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/csv"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/benhoyt/goawk/internal/ast"
+	. "github.com/benhoyt/goawk/lexer"
+)
+
+// Print a line of output followed by a newline
+func (p *interp) printLine(writer io.Writer, line string) error {
+	err := writeOutput(writer, line)
+	if err != nil {
+		return err
+	}
+	return writeOutput(writer, p.outputRecordSep)
+}
+
+// Print given arguments followed by a newline (for "print" statement).
+func (p *interp) printArgs(writer io.Writer, args []value) error {
+	switch p.outputMode {
+	case CSVMode, TSVMode:
+		fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation
+		for _, arg := range args {
+			fields = append(fields, arg.str(p.outputFormat))
+		}
+		err := p.writeCSV(writer, fields)
+		if err != nil {
+			return err
+		}
+	default:
+		// Print OFS-separated args followed by ORS (usually newline).
+		for i, arg := range args {
+			if i > 0 {
+				err := writeOutput(writer, p.outputFieldSep)
+				if err != nil {
+					return err
+				}
+			}
+			err := writeOutput(writer, arg.str(p.outputFormat))
+			if err != nil {
+				return err
+			}
+		}
+		err := writeOutput(writer, p.outputRecordSep)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (p *interp) writeCSV(output io.Writer, fields []string) error {
+	// If output is already a *bufio.Writer (the common case), csv.NewWriter
+	// will use it directly. This is not explicitly documented, but
+	// csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize
+	// with a 4KB buffer, and bufio.NewWriterSize is documented as returning
+	// the underlying bufio.Writer if it's passed a large enough one.
+	var flush func() error
+	_, isBuffered := output.(*bufio.Writer)
+	if !isBuffered {
+		// Otherwise create a new buffered writer and flush after writing.
+		if p.csvOutput == nil {
+			p.csvOutput = bufio.NewWriterSize(output, 4096)
+		} else {
+			p.csvOutput.Reset(output)
+		}
+		output = p.csvOutput
+		flush = p.csvOutput.Flush
+	}
+
+	// Given the above, creating a new one of these is cheap.
+	writer := csv.NewWriter(output)
+	writer.Comma = p.csvOutputConfig.Separator
+	writer.UseCRLF = runtime.GOOS == "windows"
+	err := writer.Write(fields)
+	if err != nil {
+		return err
+	}
+	if flush != nil {
+		return flush()
+	}
+	return nil
+}
+
+// Implement a buffered version of WriteCloser so output is buffered
+// when redirecting to a file (eg: print >"out")
+type bufferedWriteCloser struct {
+	*bufio.Writer
+	io.Closer
+}
+
+func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser {
+	writer := bufio.NewWriterSize(w, outputBufSize)
+	return &bufferedWriteCloser{writer, w}
+}
+
+func (wc *bufferedWriteCloser) Close() error {
+	err := wc.Writer.Flush()
+	if err != nil {
+		return err
+	}
+	return wc.Closer.Close()
+}
+
+// Determine the output stream for given redirect token and
+// destination (file or pipe name)
+func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) {
+	name := p.toString(destValue)
+	if _, ok := p.inputStreams[name]; ok {
+		return nil, newError("can't write to reader stream")
+	}
+	if w, ok := p.outputStreams[name]; ok {
+		return w, nil
+	}
+
+	switch redirect {
+	case GREATER, APPEND:
+		if name == "-" {
+			// filename of "-" means write to stdout, eg: print "x" >"-"
+			return p.output, nil
+		}
+		// Write or append to file
+		if p.noFileWrites {
+			return nil, newError("can't write to file due to NoFileWrites")
+		}
+		p.flushOutputAndError() // ensure synchronization
+		flags := os.O_CREATE | os.O_WRONLY
+		if redirect == GREATER {
+			flags |= os.O_TRUNC
+		} else {
+			flags |= os.O_APPEND
+		}
+		w, err := os.OpenFile(name, flags, 0644)
+		if err != nil {
+			return nil, newError("output redirection error: %s", err)
+		}
+		buffered := newBufferedWriteCloser(w)
+		p.outputStreams[name] = buffered
+		return buffered, nil
+
+	case PIPE:
+		// Pipe to command
+		if p.noExec {
+			return nil, newError("can't write to pipe due to NoExec")
+		}
+		cmd := p.execShell(name)
+		w, err := cmd.StdinPipe()
+		if err != nil {
+			return nil, newError("error connecting to stdin pipe: %v", err)
+		}
+		cmd.Stdout = p.output
+		cmd.Stderr = p.errorOutput
+		p.flushOutputAndError() // ensure synchronization
+		err = cmd.Start()
+		if err != nil {
+			p.printErrorf("%s\n", err)
+			return ioutil.Discard, nil
+		}
+		p.commands[name] = cmd
+		buffered := newBufferedWriteCloser(w)
+		p.outputStreams[name] = buffered
+		return buffered, nil
+
+	default:
+		// Should never happen
+		panic(fmt.Sprintf("unexpected redirect type %s", redirect))
+	}
+}
+
+// Executes code using configured system shell
+func (p *interp) execShell(code string) *exec.Cmd {
+	executable := p.shellCommand[0]
+	args := p.shellCommand[1:]
+	args = append(args, code)
+	if p.checkCtx {
+		return exec.CommandContext(p.ctx, executable, args...)
+	} else {
+		return exec.Command(executable, args...)
+	}
+}
+
+// Get input Scanner to use for "getline" based on file name
+func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
+	if _, ok := p.outputStreams[name]; ok {
+		return nil, newError("can't read from writer stream")
+	}
+	if _, ok := p.inputStreams[name]; ok {
+		return p.scanners[name], nil
+	}
+	if name == "-" {
+		// filename of "-" means read from stdin, eg: getline <"-"
+		if scanner, ok := p.scanners["-"]; ok {
+			return scanner, nil
+		}
+		scanner := p.newScanner(p.stdin, make([]byte, inputBufSize))
+		p.scanners[name] = scanner
+		return scanner, nil
+	}
+	if p.noFileReads {
+		return nil, newError("can't read from file due to NoFileReads")
+	}
+	r, err := os.Open(name)
+	if err != nil {
+		return nil, err // *os.PathError is handled by caller (getline returns -1)
+	}
+	scanner := p.newScanner(r, make([]byte, inputBufSize))
+	p.scanners[name] = scanner
+	p.inputStreams[name] = r
+	return scanner, nil
+}
+
+// Get input Scanner to use for "getline" based on pipe name
+func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
+	if _, ok := p.outputStreams[name]; ok {
+		return nil, newError("can't read from writer stream")
+	}
+	if _, ok := p.inputStreams[name]; ok {
+		return p.scanners[name], nil
+	}
+	if p.noExec {
+		return nil, newError("can't read from pipe due to NoExec")
+	}
+	cmd := p.execShell(name)
+	cmd.Stdin = p.stdin
+	cmd.Stderr = p.errorOutput
+	r, err := cmd.StdoutPipe()
+	if err != nil {
+		return nil, newError("error connecting to stdout pipe: %v", err)
+	}
+	p.flushOutputAndError() // ensure synchronization
+	err = cmd.Start()
+	if err != nil {
+		p.printErrorf("%s\n", err)
+		return bufio.NewScanner(strings.NewReader("")), nil
+	}
+	scanner := p.newScanner(r, make([]byte, inputBufSize))
+	p.commands[name] = cmd
+	p.inputStreams[name] = r
+	p.scanners[name] = scanner
+	return scanner, nil
+}
+
+// Create a new buffered Scanner for reading input records
+func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner {
+	scanner := bufio.NewScanner(input)
+	switch {
+	case p.inputMode == CSVMode || p.inputMode == TSVMode:
+		splitter := csvSplitter{
+			separator:     p.csvInputConfig.Separator,
+			sepLen:        utf8.RuneLen(p.csvInputConfig.Separator),
+			comment:       p.csvInputConfig.Comment,
+			header:        p.csvInputConfig.Header,
+			fields:        &p.fields,
+			setFieldNames: p.setFieldNames,
+		}
+		scanner.Split(splitter.scan)
+	case p.recordSep == "\n":
+		// Scanner default is to split on newlines
+	case p.recordSep == "":
+		// Empty string for RS means split on \n\n (blank lines)
+		splitter := blankLineSplitter{terminator: &p.recordTerminator}
+		scanner.Split(splitter.scan)
+	case len(p.recordSep) == 1:
+		splitter := byteSplitter{sep: p.recordSep[0]}
+		scanner.Split(splitter.scan)
+	case utf8.RuneCountInString(p.recordSep) >= 1:
+		// Multi-byte and single char but multi-byte RS use regex
+		splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator}
+		scanner.Split(splitter.scan)
+	}
+	scanner.Buffer(buffer, maxRecordLength)
+	return scanner
+}
+
+// setFieldNames is called by csvSplitter.scan on the first row (if the
+// "header" option is specified).
+func (p *interp) setFieldNames(names []string) {
+	p.fieldNames = names
+	p.fieldIndexes = nil // clear name-to-index cache
+
+	// Populate FIELDS array (mapping of field indexes to field names).
+	fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"])
+	for k := range fieldsArray {
+		delete(fieldsArray, k)
+	}
+	for i, name := range names {
+		fieldsArray[strconv.Itoa(i+1)] = str(name)
+	}
+}
+
+// Copied from bufio/scan.go in the stdlib: I guess it's a bit more
+// efficient than bytes.TrimSuffix(data, []byte("\r"))
+func dropCR(data []byte) []byte {
+	if len(data) > 0 && data[len(data)-1] == '\r' {
+		return data[:len(data)-1]
+	}
+	return data
+}
+
+func dropLF(data []byte) []byte {
+	if len(data) > 0 && data[len(data)-1] == '\n' {
+		return data[:len(data)-1]
+	}
+	return data
+}
+
+type blankLineSplitter struct {
+	terminator *string
+}
+
+func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+
+	// Skip newlines at beginning of data
+	i := 0
+	for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
+		i++
+	}
+	if i >= len(data) {
+		// At end of data after newlines, skip entire data block
+		return i, nil, nil
+	}
+	start := i
+
+	// Try to find two consecutive newlines (or \n\r\n for Windows)
+	for ; i < len(data); i++ {
+		if data[i] != '\n' {
+			continue
+		}
+		end := i
+		if i+1 < len(data) && data[i+1] == '\n' {
+			i += 2
+			for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
+				i++ // Skip newlines at end of record
+			}
+			*s.terminator = string(data[end:i])
+			return i, dropCR(data[start:end]), nil
+		}
+		if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' {
+			i += 3
+			for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
+				i++ // Skip newlines at end of record
+			}
+			*s.terminator = string(data[end:i])
+			return i, dropCR(data[start:end]), nil
+		}
+	}
+
+	// If we're at EOF, we have one final record; return it
+	if atEOF {
+		token = dropCR(dropLF(data[start:]))
+		*s.terminator = string(data[len(token):])
+		return len(data), token, nil
+	}
+
+	// Request more data
+	return 0, nil, nil
+}
+
+// Splitter that splits records on the given separator byte
+type byteSplitter struct {
+	sep byte
+}
+
+func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+	if i := bytes.IndexByte(data, s.sep); i >= 0 {
+		// We have a full sep-terminated record
+		return i + 1, data[:i], nil
+	}
+	// If at EOF, we have a final, non-terminated record; return it
+	if atEOF {
+		return len(data), data, nil
+	}
+	// Request more data
+	return 0, nil, nil
+}
+
+// Splitter that splits records on the given regular expression
+type regexSplitter struct {
+	re         *regexp.Regexp
+	terminator *string
+}
+
+func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+	loc := s.re.FindIndex(data)
+	// Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this
+	// case is to match the entire input.
+	if loc != nil && loc[0] != loc[1] {
+		*s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable
+		return loc[1], data[:loc[0]], nil
+	}
+	// If at EOF, we have a final, non-terminated record; return it
+	if atEOF {
+		*s.terminator = ""
+		return len(data), data, nil
+	}
+	// Request more data
+	return 0, nil, nil
+}
+
+// Splitter that splits records in CSV or TSV format.
+type csvSplitter struct {
+	separator rune
+	sepLen    int
+	comment   rune
+	header    bool
+
+	recordBuffer []byte
+	fieldIndexes []int
+	noBOMCheck   bool
+
+	fields        *[]string
+	setFieldNames func(names []string)
+	rowNum        int
+}
+
+// The structure of this code is taken from the stdlib encoding/csv Reader
+// code, which is licensed under a compatible BSD-style license.
+//
+// We don't support all encoding/csv features: FieldsPerRecord is not
+// supported, LazyQuotes is always on, and TrimLeadingSpace is always off.
+func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	// Some CSV files are saved with a UTF-8 BOM at the start; skip it.
+	if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
+		data = data[3:]
+		advance = 3
+		s.noBOMCheck = true
+	}
+
+	origData := data
+	if atEOF && len(data) == 0 {
+		// No more data, tell Scanner to stop.
+		return 0, nil, nil
+	}
+
+	readLine := func() []byte {
+		newline := bytes.IndexByte(data, '\n')
+		var line []byte
+		switch {
+		case newline >= 0:
+			// Process a single line (including newline).
+			line = data[:newline+1]
+			data = data[newline+1:]
+		case atEOF:
+			// If at EOF, we have a final record without a newline.
+			line = data
+			data = data[len(data):]
+		default:
+			// Need more data
+			return nil
+		}
+
+		// For backwards compatibility, drop trailing \r before EOF.
+		if len(line) > 0 && atEOF && line[len(line)-1] == '\r' {
+			line = line[:len(line)-1]
+			advance++
+		}
+
+		return line
+	}
+
+	// Read line (automatically skipping past empty lines and any comments).
+	skip := 0
+	var line []byte
+	for {
+		line = readLine()
+		if len(line) == 0 {
+			return 0, nil, nil // Request more data
+		}
+		if s.comment != 0 && nextRune(line) == s.comment {
+			advance += len(line)
+			skip += len(line)
+			continue // Skip comment lines
+		}
+		if len(line) == lenNewline(line) {
+			advance += len(line)
+			skip += len(line)
+			continue // Skip empty lines
+		}
+		break
+	}
+
+	// Parse each field in the record.
+	const quoteLen = len(`"`)
+	tokenHasCR := false
+	s.recordBuffer = s.recordBuffer[:0]
+	s.fieldIndexes = s.fieldIndexes[:0]
+parseField:
+	for {
+		if len(line) == 0 || line[0] != '"' {
+			// Non-quoted string field
+			i := bytes.IndexRune(line, s.separator)
+			field := line
+			if i >= 0 {
+				advance += i + s.sepLen
+				field = field[:i]
+			} else {
+				advance += len(field)
+				field = field[:len(field)-lenNewline(field)]
+			}
+			s.recordBuffer = append(s.recordBuffer, field...)
+			s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
+			if i >= 0 {
+				line = line[i+s.sepLen:]
+				continue parseField
+			}
+			break parseField
+		} else {
+			// Quoted string field
+			line = line[quoteLen:]
+			advance += quoteLen
+			for {
+				i := bytes.IndexByte(line, '"')
+				if i >= 0 {
+					// Hit next quote.
+					s.recordBuffer = append(s.recordBuffer, line[:i]...)
+					line = line[i+quoteLen:]
+					advance += i + quoteLen
+					switch rn := nextRune(line); {
+					case rn == '"':
+						// `""` sequence (append quote).
+						s.recordBuffer = append(s.recordBuffer, '"')
+						line = line[quoteLen:]
+						advance += quoteLen
+					case rn == s.separator:
+						// `",` sequence (end of field).
+						line = line[s.sepLen:]
+						s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
+						advance += s.sepLen
+						continue parseField
+					case lenNewline(line) == len(line):
+						// `"\n` sequence (end of line).
+						s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
+						advance += len(line)
+						break parseField
+					default:
+						// `"` sequence (bare quote).
+						s.recordBuffer = append(s.recordBuffer, '"')
+					}
+				} else if len(line) > 0 {
+					// Hit end of line (copy all data so far).
+					advance += len(line)
+					newlineLen := lenNewline(line)
+					if newlineLen == 2 {
+						tokenHasCR = true
+						s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...)
+						s.recordBuffer = append(s.recordBuffer, '\n')
+					} else {
+						s.recordBuffer = append(s.recordBuffer, line...)
+					}
+					line = readLine()
+					if line == nil {
+						return 0, nil, nil // Request more data
+					}
+				} else {
+					// Abrupt end of file.
+					s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
+					advance += len(line)
+					break parseField
+				}
+			}
+		}
+	}
+
+	// Create a single string and create slices out of it.
+	// This pins the memory of the fields together, but allocates once.
+	strBuf := string(s.recordBuffer) // Convert to string once to batch allocations
+	fields := make([]string, len(s.fieldIndexes))
+	preIdx := 0
+	for i, idx := range s.fieldIndexes {
+		fields[i] = strBuf[preIdx:idx]
+		preIdx = idx
+	}
+
+	s.noBOMCheck = true
+
+	if s.rowNum == 0 && s.header {
+		// Set header field names and advance, but don't return a line (token).
+		s.rowNum++
+		s.setFieldNames(fields)
+		return advance, nil, nil
+	}
+
+	// Normal row, set fields and return a line (token).
+	s.rowNum++
+	*s.fields = fields
+	token = origData[skip:advance]
+	token = token[:len(token)-lenNewline(token)]
+	if tokenHasCR {
+		token = bytes.ReplaceAll(token, []byte{'\r'}, nil)
+	}
+	return advance, token, nil
+}
+
+// lenNewline reports the number of bytes for the trailing \n.
+func lenNewline(b []byte) int {
+	if len(b) > 0 && b[len(b)-1] == '\n' {
+		if len(b) > 1 && b[len(b)-2] == '\r' {
+			return 2
+		}
+		return 1
+	}
+	return 0
+}
+
+// nextRune returns the next rune in b or utf8.RuneError.
+func nextRune(b []byte) rune {
+	r, _ := utf8.DecodeRune(b)
+	return r
+}
+
+// Setup for a new input file with given name (empty string if stdin)
+func (p *interp) setFile(filename string) {
+	p.filename = numStr(filename)
+	p.fileLineNum = 0
+	p.hadFiles = true
+}
+
+// Setup for a new input line (but don't parse it into fields till we
+// need to)
+func (p *interp) setLine(line string, isTrueStr bool) {
+	p.line = line
+	p.lineIsTrueStr = isTrueStr
+	p.haveFields = false
+	p.reparseCSV = true
+}
+
+// Ensure that the current line is parsed into fields, splitting it
+// into fields if it hasn't been already
+func (p *interp) ensureFields() {
+	if p.haveFields {
+		return
+	}
+	p.haveFields = true
+
+	switch {
+	case p.inputMode == CSVMode || p.inputMode == TSVMode:
+		if p.reparseCSV {
+			scanner := bufio.NewScanner(strings.NewReader(p.line))
+			scanner.Buffer(nil, maxRecordLength)
+			splitter := csvSplitter{
+				separator: p.csvInputConfig.Separator,
+				sepLen:    utf8.RuneLen(p.csvInputConfig.Separator),
+				comment:   p.csvInputConfig.Comment,
+				fields:    &p.fields,
+			}
+			scanner.Split(splitter.scan)
+			if !scanner.Scan() {
+				p.fields = nil
+			}
+		} else {
+			// Normally fields have already been parsed by csvSplitter
+		}
+	case p.fieldSep == " ":
+		// FS space (default) means split fields on any whitespace
+		p.fields = strings.Fields(p.line)
+	case p.line == "":
+		p.fields = nil
+	case utf8.RuneCountInString(p.fieldSep) <= 1:
+		// 1-char FS is handled as plain split (not regex)
+		p.fields = strings.Split(p.line, p.fieldSep)
+	default:
+		// Split on FS as a regex
+		p.fields = p.fieldSepRegex.Split(p.line, -1)
+	}
+
+	// Special case for when RS=="" and FS is single character,
+	// split on newline in addition to FS. See more here:
+	// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
+	if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
+		fields := make([]string, 0, len(p.fields))
+		for _, field := range p.fields {
+			lines := strings.Split(field, "\n")
+			for _, line := range lines {
+				trimmed := strings.TrimSuffix(line, "\r")
+				fields = append(fields, trimmed)
+			}
+		}
+		p.fields = fields
+	}
+
+	p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time
+	for range p.fields {
+		p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
+	}
+	p.numFields = len(p.fields)
+}
+
+// Fetch next line (record) of input from current input file, opening
+// next input file if done with previous one
+func (p *interp) nextLine() (string, error) {
+	for {
+		if p.scanner == nil {
+			if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin {
+				// Previous input is file, close it
+				_ = prevInput.Close()
+			}
+			if p.filenameIndex >= p.argc && !p.hadFiles {
+				// Moved past number of ARGV args and haven't seen
+				// any files yet, use stdin
+				p.input = p.stdin
+				p.setFile("-")
+			} else {
+				if p.filenameIndex >= p.argc {
+					// Done with ARGV args, all done with input
+					return "", io.EOF
+				}
+				// Fetch next filename from ARGV. Can't use
+				// getArrayValue() here as it would set the value if
+				// not present
+				index := strconv.Itoa(p.filenameIndex)
+				argvIndex := p.program.Arrays["ARGV"]
+				argvArray := p.array(ast.ScopeGlobal, argvIndex)
+				filename := p.toString(argvArray[index])
+				p.filenameIndex++
+
+				// Is it actually a var=value assignment?
+				matches := varRegex.FindStringSubmatch(filename)
+				if len(matches) >= 3 {
+					// Yep, set variable to value and keep going
+					name, val := matches[1], matches[2]
+					// Oddly, var=value args must interpret escapes (issue #129)
+					unescaped, err := Unescape(val)
+					if err == nil {
+						val = unescaped
+					}
+					err = p.setVarByName(name, val)
+					if err != nil {
+						return "", err
+					}
+					continue
+				} else if filename == "" {
+					// ARGV arg is empty string, skip
+					p.input = nil
+					continue
+				} else if filename == "-" {
+					// ARGV arg is "-" meaning stdin
+					p.input = p.stdin
+					p.setFile("-")
+				} else {
+					// A regular file name, open it
+					if p.noFileReads {
+						return "", newError("can't read from file due to NoFileReads")
+					}
+					input, err := os.Open(filename)
+					if err != nil {
+						return "", err
+					}
+					p.input = input
+					p.setFile(filename)
+				}
+			}
+			if p.inputBuffer == nil { // reuse buffer from last input file
+				p.inputBuffer = make([]byte, inputBufSize)
+			}
+			p.scanner = p.newScanner(p.input, p.inputBuffer)
+		}
+		p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars
+		if p.scanner.Scan() {
+			// We scanned some input, break and return it
+			break
+		}
+		err := p.scanner.Err()
+		if err != nil {
+			return "", fmt.Errorf("error reading from input: %s", err)
+		}
+		// Signal loop to move onto next file
+		p.scanner = nil
+	}
+
+	// Got a line (record) of input, return it
+	p.lineNum++
+	p.fileLineNum++
+	return p.scanner.Text(), nil
+}
+
+// Write output string to given writer, producing correct line endings
+// on Windows (CR LF).
+func writeOutput(w io.Writer, s string) error {
+	if crlfNewline {
+		// First normalize to \n, then convert all newlines to \r\n
+		// (on Windows). NOTE: creating two new strings is almost
+		// certainly slow; would be better to create a custom Writer.
+		s = strings.Replace(s, "\r\n", "\n", -1)
+		s = strings.Replace(s, "\n", "\r\n", -1)
+	}
+	_, err := io.WriteString(w, s)
+	return err
+}
+
+// Close all streams, commands, and so on (after program execution).
+func (p *interp) closeAll() {
+	if prevInput, ok := p.input.(io.Closer); ok {
+		_ = prevInput.Close()
+	}
+	for _, r := range p.inputStreams {
+		_ = r.Close()
+	}
+	for _, w := range p.outputStreams {
+		_ = w.Close()
+	}
+	for _, cmd := range p.commands {
+		_ = cmd.Wait()
+	}
+	if f, ok := p.output.(flusher); ok {
+		_ = f.Flush()
+	}
+	if f, ok := p.errorOutput.(flusher); ok {
+		_ = f.Flush()
+	}
+}
+
+// Flush all output streams as well as standard output. Report whether all
+// streams were flushed successfully (logging error(s) if not).
+func (p *interp) flushAll() bool {
+	allGood := true
+	for name, writer := range p.outputStreams {
+		allGood = allGood && p.flushWriter(name, writer)
+	}
+	if _, ok := p.output.(flusher); ok {
+		// User-provided output may or may not be flushable
+		allGood = allGood && p.flushWriter("stdout", p.output)
+	}
+	return allGood
+}
+
+// Flush a single, named output stream, and report whether it was flushed
+// successfully (logging an error if not).
+func (p *interp) flushStream(name string) bool {
+	writer := p.outputStreams[name]
+	if writer == nil {
+		p.printErrorf("error flushing %q: not an output file or pipe\n", name)
+		return false
+	}
+	return p.flushWriter(name, writer)
+}
+
+type flusher interface {
+	Flush() error
+}
+
+// Flush given output writer, and report whether it was flushed successfully
+// (logging an error if not).
+func (p *interp) flushWriter(name string, writer io.Writer) bool {
+	flusher, ok := writer.(flusher)
+	if !ok {
+		return true // not a flusher, don't error
+	}
+	err := flusher.Flush()
+	if err != nil {
+		p.printErrorf("error flushing %q: %v\n", name, err)
+		return false
+	}
+	return true
+}
+
+// Flush output and error streams.
+func (p *interp) flushOutputAndError() {
+	if flusher, ok := p.output.(flusher); ok {
+		_ = flusher.Flush()
+	}
+	if flusher, ok := p.errorOutput.(flusher); ok {
+		_ = flusher.Flush()
+	}
+}
+
+// Print a message to the error output stream, flushing as necessary.
+func (p *interp) printErrorf(format string, args ...interface{}) {
+	if flusher, ok := p.output.(flusher); ok {
+		_ = flusher.Flush() // ensure synchronization
+	}
+	fmt.Fprintf(p.errorOutput, format, args...)
+	if flusher, ok := p.errorOutput.(flusher); ok {
+		_ = flusher.Flush()
+	}
+}
--- a/src/tool/awk/interp/newexecute.go
+++ b/src/tool/awk/interp/newexecute.go
@ -0,0 +1,176 @@
+// The New...Execute API (allows you to efficiently execute the same program repeatedly).
+
+package interp
+
+import (
+	"context"
+	"math"
+
+	"github.com/benhoyt/goawk/parser"
+)
+
+const checkContextOps = 1000 // for efficiency, only check context every N instructions
+
+// Interpreter is an interpreter for a specific program, allowing you to
+// efficiently execute the same program over and over with different inputs.
+// Use New to create an Interpreter.
+//
+// Most programs won't need reusable execution, and should use the simpler
+// Exec or ExecProgram functions instead.
+type Interpreter struct {
+	interp *interp
+}
+
+// New creates a reusable interpreter for the given program.
+//
+// Most programs won't need reusable execution, and should use the simpler
+// Exec or ExecProgram functions instead.
+func New(program *parser.Program) (*Interpreter, error) {
+	p := newInterp(program)
+	return &Interpreter{interp: p}, nil
+}
+
+// Execute runs this program with the given execution configuration (input,
+// output, and variables) and returns the exit status code of the program. A
+// nil config is valid and will use the defaults (zero values).
+//
+// Internal memory allocations are reused, so calling Execute on the same
+// Interpreter instance is significantly more efficient than calling
+// ExecProgram multiple times.
+//
+// I/O state is reset between each run, but variables and the random number
+// generator seed are not; use ResetVars and ResetRand to reset those.
+//
+// It's best to set config.Environ to a non-nil slice, otherwise Execute will
+// call the relatively inefficient os.Environ each time. Set config.Environ to
+// []string{} if the script doesn't need environment variables, or call
+// os.Environ once and set config.Environ to that value each execution.
+//
+// Note that config.Funcs must be the same value provided to
+// parser.ParseProgram, and must not change between calls to Execute.
+func (p *Interpreter) Execute(config *Config) (int, error) {
+	p.interp.resetCore()
+	p.interp.checkCtx = false
+
+	err := p.interp.setExecuteConfig(config)
+	if err != nil {
+		return 0, err
+	}
+
+	return p.interp.executeAll()
+}
+
+func (p *interp) resetCore() {
+	p.scanner = nil
+	for k := range p.scanners {
+		delete(p.scanners, k)
+	}
+	p.input = nil
+	for k := range p.inputStreams {
+		delete(p.inputStreams, k)
+	}
+	for k := range p.outputStreams {
+		delete(p.outputStreams, k)
+	}
+	for k := range p.commands {
+		delete(p.commands, k)
+	}
+
+	p.sp = 0
+	p.localArrays = p.localArrays[:0]
+	p.callDepth = 0
+
+	p.filename = null()
+	p.line = ""
+	p.lineIsTrueStr = false
+	p.lineNum = 0
+	p.fileLineNum = 0
+	p.fields = nil
+	p.fieldsIsTrueStr = nil
+	p.numFields = 0
+	p.haveFields = false
+
+	p.exitStatus = 0
+}
+
+func (p *interp) resetVars() {
+	// Reset global scalars
+	for i := range p.globals {
+		p.globals[i] = null()
+	}
+
+	// Reset global arrays
+	for _, array := range p.arrays {
+		for k := range array {
+			delete(array, k)
+		}
+	}
+
+	// Reset special variables
+	p.convertFormat = "%.6g"
+	p.outputFormat = "%.6g"
+	p.fieldSep = " "
+	p.fieldSepRegex = nil
+	p.recordSep = "\n"
+	p.recordSepRegex = nil
+	p.recordTerminator = ""
+	p.outputFieldSep = " "
+	p.outputRecordSep = "\n"
+	p.subscriptSep = "\x1c"
+	p.matchLength = 0
+	p.matchStart = 0
+}
+
+// ResetVars resets this interpreter's variables, setting scalar variables to
+// null, clearing arrays, and resetting special variables such as FS and RS to
+// their defaults.
+func (p *Interpreter) ResetVars() {
+	p.interp.resetVars()
+}
+
+// ResetRand resets this interpreter's random number generator seed, so that
+// rand() produces the same sequence it would have after calling New. This is
+// a relatively CPU-intensive operation.
+func (p *Interpreter) ResetRand() {
+	p.interp.randSeed = 1.0
+	p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed)))
+}
+
+// ExecuteContext is like Execute, but takes a context to allow the caller to
+// set an execution timeout or cancel the execution. For efficiency, the
+// context is only tested every 1000 virtual machine instructions.
+//
+// Context handling is not preemptive: currently long-running operations like
+// system() won't be interrupted.
+func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) {
+	p.interp.resetCore()
+	p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO()
+	p.interp.ctx = ctx
+	p.interp.ctxDone = ctx.Done()
+	p.interp.ctxOps = 0
+
+	err := p.interp.setExecuteConfig(config)
+	if err != nil {
+		return 0, err
+	}
+
+	return p.interp.executeAll()
+}
+
+func (p *interp) checkContext() error {
+	p.ctxOps++
+	if p.ctxOps < checkContextOps {
+		return nil
+	}
+	p.ctxOps = 0
+	return p.checkContextNow()
+}
+
+func (p *interp) checkContextNow() error {
+	select {
+	case <-p.ctxDone:
+		return p.ctx.Err()
+	default:
+		return nil
+	}
+}
--- a/src/tool/awk/interp/newexecute_test.go
+++ b/src/tool/awk/interp/newexecute_test.go
@ -0,0 +1,163 @@
+// Tests for the New...Execute API.
+
+package interp_test
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/benhoyt/goawk/interp"
+	"github.com/benhoyt/goawk/parser"
+)
+
+// This definitely doesn't test that everything was reset, but it's a good start.
+func TestNewExecute(t *testing.T) {
+	source := `{ print NR, OFMT, x, y, a["k"], $1, $3; OFMT="%g"; x++; y++; a["k"]++ }`
+	interpreter := newInterp(t, source)
+
+	// First execution.
+	var output bytes.Buffer
+	status, err := interpreter.Execute(&interp.Config{
+		Stdin:  strings.NewReader("one two three\nfour five six\n"),
+		Output: &output,
+	})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	if status != 0 {
+		t.Fatalf("expected status 0, got %d", status)
+	}
+	normalized := normalizeNewlines(output.String())
+	expected := "1 %.6g    one three\n2 %g 1 1 1 four six\n"
+	if normalized != expected {
+		t.Fatalf("expected %q, got %q", expected, normalized)
+	}
+
+	// Second execution, with ResetVars.
+	output.Reset()
+	interpreter.ResetVars()
+	status, err = interpreter.Execute(&interp.Config{
+		Stdin:  strings.NewReader("ONE TWO THREE\nFOUR FIVE SIX\n"),
+		Output: &output,
+		Vars:   []string{"x", "10"},
+	})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	if status != 0 {
+		t.Fatalf("expected status 0, got %d", status)
+	}
+	normalized = normalizeNewlines(output.String())
+	expected = "1 %.6g 10   ONE THREE\n2 %g 11 1 1 FOUR SIX\n"
+	if normalized != expected {
+		t.Fatalf("expected %q, got %q", expected, normalized)
+	}
+
+	// Third execution, without ResetVars.
+	output.Reset()
+	status, err = interpreter.Execute(&interp.Config{
+		Stdin:  strings.NewReader("1 2 3\n4 5 6\n"),
+		Output: &output,
+		Vars:   []string{"x", "100"},
+	})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	if status != 0 {
+		t.Fatalf("expected status 0, got %d", status)
+	}
+	normalized = normalizeNewlines(output.String())
+	expected = "1 %g 100 2 2 1 3\n2 %g 101 3 3 4 6\n"
+	if normalized != expected {
+		t.Fatalf("expected %q, got %q", expected, normalized)
+	}
+}
+
+func TestResetRand(t *testing.T) {
+	source := `BEGIN { print rand(), rand(), rand() }`
+	interpreter := newInterp(t, source)
+	var output bytes.Buffer
+
+	_, err := interpreter.Execute(&interp.Config{Output: &output})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	original := output.String()
+
+	output.Reset()
+	_, err = interpreter.Execute(&interp.Config{Output: &output})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	noResetRand := output.String()
+	if original == noResetRand {
+		t.Fatalf("expected different random numbers, got %q both times", original)
+	}
+
+	output.Reset()
+	interpreter.ResetRand()
+	_, err = interpreter.Execute(&interp.Config{Output: &output})
+	if err != nil {
+		t.Fatalf("error executing: %v", err)
+	}
+	withResetRand := output.String()
+	if original != withResetRand {
+		t.Fatalf("expected same random numbers (%q) as original (%q)", withResetRand, original)
+	}
+}
+
+func TestExecuteContextNoError(t *testing.T) {
+	interpreter := newInterp(t, `BEGIN {}`)
+	_, err := interpreter.ExecuteContext(context.Background(), nil)
+	if err != nil {
+		t.Fatalf("execute error: %v", err)
+	}
+}
+
+func TestExecuteContextTimeout(t *testing.T) {
+	interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
+	defer cancel()
+	_, err := interpreter.ExecuteContext(ctx, nil)
+	if !errors.Is(err, context.DeadlineExceeded) {
+		t.Fatalf("expected DeadlineExceeded error, got: %v", err)
+	}
+}
+
+func TestExecuteContextCancel(t *testing.T) {
+	interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel it right away
+	_, err := interpreter.ExecuteContext(ctx, nil)
+	if !errors.Is(err, context.Canceled) {
+		t.Fatalf("expected Canceled error, got: %v", err)
+	}
+}
+
+func TestExecuteContextSystemTimeout(t *testing.T) {
+	t.Skip("TODO: skipping for now due to #122")
+	interpreter := newInterp(t, `BEGIN { print system("sleep 4") }`)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
+	defer cancel()
+	_, err := interpreter.ExecuteContext(ctx, nil)
+	if !errors.Is(err, context.DeadlineExceeded) {
+		t.Fatalf("expected DeadlineExceeded error, got: %v", err)
+	}
+}
+
+func newInterp(t *testing.T, src string) *interp.Interpreter {
+	t.Helper()
+	prog, err := parser.ParseProgram([]byte(src), nil)
+	if err != nil {
+		t.Fatalf("parse error: %v", err)
+	}
+	interpreter, err := interp.New(prog)
+	if err != nil {
+		t.Fatalf("interp.New error: %v", err)
+	}
+	return interpreter
+}
--- a/src/tool/awk/interp/value.go
+++ b/src/tool/awk/interp/value.go
@ -0,0 +1,294 @@
+// GoAWK interpreter value type (not exported).
+
+package interp
+
+import (
+	"fmt"
+	"math"
+	"strconv"
+	"strings"
+)
+
+type valueType uint8
+
+const (
+	typeNull valueType = iota
+	typeStr
+	typeNum
+	typeNumStr
+)
+
+// An AWK value (these are passed around by value)
+type value struct {
+	typ valueType // Type of value
+	s   string    // String value (for typeStr and typeNumStr)
+	n   float64   // Numeric value (for typeNum)
+}
+
+// Create a new null value
+func null() value {
+	return value{}
+}
+
+// Create a new number value
+func num(n float64) value {
+	return value{typ: typeNum, n: n}
+}
+
+// Create a new string value
+func str(s string) value {
+	return value{typ: typeStr, s: s}
+}
+
+// Create a new value to represent a "numeric string" from an input field
+func numStr(s string) value {
+	return value{typ: typeNumStr, s: s}
+}
+
+// Create a numeric value from a Go bool
+func boolean(b bool) value {
+	if b {
+		return num(1)
+	}
+	return num(0)
+}
+
+// String returns a string representation of v for debugging.
+func (v value) String() string {
+	switch v.typ {
+	case typeStr:
+		return fmt.Sprintf("str(%q)", v.s)
+	case typeNum:
+		return fmt.Sprintf("num(%s)", v.str("%.6g"))
+	case typeNumStr:
+		return fmt.Sprintf("numStr(%q)", v.s)
+	default:
+		return "null()"
+	}
+}
+
+// Return true if value is a "true string" (a string or a "numeric string"
+// from an input field that can't be converted to a number). If false,
+// also return the (possibly converted) number.
+func (v value) isTrueStr() (float64, bool) {
+	switch v.typ {
+	case typeStr:
+		return 0, true
+	case typeNumStr:
+		f, err := parseFloat(v.s)
+		if err != nil {
+			return 0, true
+		}
+		return f, false
+	default: // typeNum, typeNull
+		return v.n, false
+	}
+}
+
+// Return Go bool value of AWK value. For numbers or numeric strings,
+// zero is false and everything else is true. For strings, empty
+// string is false and everything else is true.
+func (v value) boolean() bool {
+	switch v.typ {
+	case typeStr:
+		return v.s != ""
+	case typeNumStr:
+		f, err := parseFloat(v.s)
+		if err != nil {
+			return v.s != ""
+		}
+		return f != 0
+	default: // typeNum, typeNull
+		return v.n != 0
+	}
+}
+
+// Like strconv.ParseFloat, but allow hex floating point without exponent, and
+// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow
+// underscore digit separators.
+func parseFloat(s string) (float64, error) {
+	s = strings.TrimSpace(s)
+	if len(s) > 1 && (s[0] == '+' || s[0] == '-') {
+		if len(s) == 4 && hasNaNPrefix(s[1:]) {
+			// ParseFloat doesn't handle "nan" with sign prefix, so handle it here.
+			return math.NaN(), nil
+		}
+		if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 {
+			s += "p0"
+		}
+	} else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 {
+		s += "p0"
+	}
+	n, err := strconv.ParseFloat(s, 64)
+	if err == nil && strings.IndexByte(s, '_') >= 0 {
+		// Underscore separators aren't supported by AWK.
+		return 0, strconv.ErrSyntax
+	}
+	return n, err
+}
+
+// Return value's string value, or convert to a string using given
+// format if a number value. Integers are a special case and don't
+// use floatFormat.
+func (v value) str(floatFormat string) string {
+	if v.typ == typeNum {
+		switch {
+		case math.IsNaN(v.n):
+			return "nan"
+		case math.IsInf(v.n, 0):
+			if v.n < 0 {
+				return "-inf"
+			} else {
+				return "inf"
+			}
+		case v.n == float64(int(v.n)):
+			return strconv.Itoa(int(v.n))
+		default:
+			if floatFormat == "%.6g" {
+				return strconv.FormatFloat(v.n, 'g', 6, 64)
+			}
+			return fmt.Sprintf(floatFormat, v.n)
+		}
+	}
+	// For typeStr and typeNumStr we already have the string, for
+	// typeNull v.s == "".
+	return v.s
+}
+
+// Return value's number value, converting from string if necessary
+func (v value) num() float64 {
+	switch v.typ {
+	case typeStr, typeNumStr:
+		// Ensure string starts with a float and convert it
+		return parseFloatPrefix(v.s)
+	default: // typeNum, typeNull
+		return v.n
+	}
+}
+
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
+// Like strconv.ParseFloat, but parses at the start of string and
+// allows things like "1.5foo"
+func parseFloatPrefix(s string) float64 {
+	// Skip whitespace at start
+	i := 0
+	for i < len(s) && asciiSpace[s[i]] != 0 {
+		i++
+	}
+	start := i
+
+	// Parse optional sign and check for NaN and Inf.
+	if i < len(s) && (s[i] == '+' || s[i] == '-') {
+		i++
+	}
+	if i+3 <= len(s) {
+		if hasNaNPrefix(s[i:]) {
+			return math.NaN()
+		}
+		if hasInfPrefix(s[i:]) {
+			if s[start] == '-' {
+				return math.Inf(-1)
+			}
+			return math.Inf(1)
+		}
+	}
+
+	// Parse mantissa: initial digit(s), optional '.', then more digits
+	if i+2 < len(s) && hasHexPrefix(s[i:]) {
+		return parseHexFloatPrefix(s, start, i+2)
+	}
+	gotDigit := false
+	for i < len(s) && isDigit(s[i]) {
+		gotDigit = true
+		i++
+	}
+	if i < len(s) && s[i] == '.' {
+		i++
+	}
+	for i < len(s) && isDigit(s[i]) {
+		gotDigit = true
+		i++
+	}
+	if !gotDigit {
+		return 0
+	}
+
+	// Parse exponent ("1e" and similar are allowed, but ParseFloat
+	// rejects them)
+	end := i
+	if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
+		i++
+		if i < len(s) && (s[i] == '+' || s[i] == '-') {
+			i++
+		}
+		for i < len(s) && isDigit(s[i]) {
+			i++
+			end = i
+		}
+	}
+
+	floatStr := s[start:end]
+	f, _ := strconv.ParseFloat(floatStr, 64)
+	return f // Returns infinity in case of "value out of range" error
+}
+
+func hasHexPrefix(s string) bool {
+	return s[0] == '0' && (s[1] == 'x' || s[1] == 'X')
+}
+
+func hasNaNPrefix(s string) bool {
+	return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N')
+}
+
+func hasInfPrefix(s string) bool {
+	return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F')
+}
+
+// Helper used by parseFloatPrefix to handle hexadecimal floating point.
+func parseHexFloatPrefix(s string, start, i int) float64 {
+	gotDigit := false
+	for i < len(s) && isHexDigit(s[i]) {
+		gotDigit = true
+		i++
+	}
+	if i < len(s) && s[i] == '.' {
+		i++
+	}
+	for i < len(s) && isHexDigit(s[i]) {
+		gotDigit = true
+		i++
+	}
+	if !gotDigit {
+		return 0
+	}
+
+	gotExponent := false
+	end := i
+	if i < len(s) && (s[i] == 'p' || s[i] == 'P') {
+		i++
+		if i < len(s) && (s[i] == '+' || s[i] == '-') {
+			i++
+		}
+		for i < len(s) && isDigit(s[i]) {
+			gotExponent = true
+			i++
+			end = i
+		}
+	}
+
+	floatStr := s[start:end]
+	if !gotExponent {
+		floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0"
+	}
+	f, _ := strconv.ParseFloat(floatStr, 64)
+	return f // Returns infinity in case of "value out of range" error
+}
+
+func isDigit(c byte) bool {
+	return c >= '0' && c <= '9'
+}
+
+func isHexDigit(c byte) bool {
+	return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
+}
--- a/src/tool/awk/interp/vm.go
+++ b/src/tool/awk/interp/vm.go
--- a/src/tool/awk/lexer/lexer.go
+++ b/src/tool/awk/lexer/lexer.go
@ -0,0 +1,499 @@
+// Package lexer is an AWK lexer (tokenizer).
+//
+// The lexer turns a string of AWK source code into a stream of
+// tokens for parsing.
+//
+// To tokenize some source, create a new lexer with NewLexer(src) and
+// then call Scan() until the token type is EOF or ILLEGAL.
+package lexer
+
+import (
+	"errors"
+)
+
+// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
+// actually create a lexer, and Scan() or ScanRegex() to get tokens.
+type Lexer struct {
+	src      []byte
+	offset   int
+	ch       byte
+	pos      Position
+	nextPos  Position
+	hadSpace bool
+	lastTok  Token
+}
+
+// Position stores the source line and column where a token starts.
+type Position struct {
+	// Line number of the token (starts at 1).
+	Line int
+	// Column on the line (starts at 1). Note that this is the byte
+	// offset into the line, not rune offset.
+	Column int
+}
+
+// NewLexer creates a new lexer that will tokenize the given source
+// code. See the module-level example for a working example.
+func NewLexer(src []byte) *Lexer {
+	l := &Lexer{src: src}
+	l.nextPos.Line = 1
+	l.nextPos.Column = 1
+	l.next()
+	return l
+}
+
+// HadSpace returns true if the previously-scanned token had
+// whitespace before it. Used by the parser because when calling a
+// user-defined function the grammar doesn't allow a space between
+// the function name and the left parenthesis.
+func (l *Lexer) HadSpace() bool {
+	return l.hadSpace
+}
+
+// Scan scans the next token and returns its position (line/column),
+// token value (one of the uppercase token constants), and the
+// string value of the token. For most tokens, the token value is
+// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the
+// token's value. For an ILLEGAL token, it's the error message.
+func (l *Lexer) Scan() (Position, Token, string) {
+	pos, tok, val := l.scan()
+	l.lastTok = tok
+	return pos, tok, val
+}
+
+// Does the real work of scanning. Scan() wraps this to more easily
+// set lastTok.
+func (l *Lexer) scan() (Position, Token, string) {
+	// Skip whitespace (except newline, which is a token)
+	l.hadSpace = false
+	for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' {
+		l.hadSpace = true
+		if l.ch == '\\' {
+			l.next()
+			if l.ch == '\r' {
+				l.next()
+			}
+			if l.ch != '\n' {
+				return l.pos, ILLEGAL, "expected \\n after \\ line continuation"
+			}
+		}
+		l.next()
+	}
+	if l.ch == '#' {
+		// Skip comment till end of line
+		l.next()
+		for l.ch != '\n' && l.ch != 0 {
+			l.next()
+		}
+	}
+	if l.ch == 0 {
+		// l.next() reached end of input
+		return l.pos, EOF, ""
+	}
+
+	pos := l.pos
+	tok := ILLEGAL
+	val := ""
+
+	ch := l.ch
+	l.next()
+
+	// Names: keywords and functions
+	if isNameStart(ch) {
+		start := l.offset - 2
+		for isNameStart(l.ch) || isDigit(l.ch) {
+			l.next()
+		}
+		name := string(l.src[start : l.offset-1])
+		tok := KeywordToken(name)
+		if tok == ILLEGAL {
+			tok = NAME
+			val = name
+		}
+		return pos, tok, val
+	}
+
+	// These are ordered by my guess at frequency of use. Should run
+	// through a corpus of real AWK programs to determine actual
+	// frequency.
+	switch ch {
+	case '$':
+		tok = DOLLAR
+	case '@':
+		tok = AT
+	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
+		// Avoid make/append and use l.offset directly for performance
+		start := l.offset - 2
+		gotDigit := false
+		if ch != '.' {
+			gotDigit = true
+			for isDigit(l.ch) {
+				l.next()
+			}
+			if l.ch == '.' {
+				l.next()
+			}
+		}
+		for isDigit(l.ch) {
+			gotDigit = true
+			l.next()
+		}
+		if !gotDigit {
+			return l.pos, ILLEGAL, "expected digits"
+		}
+		if l.ch == 'e' || l.ch == 'E' {
+			l.next()
+			gotSign := false
+			if l.ch == '+' || l.ch == '-' {
+				gotSign = true
+				l.next()
+			}
+			gotDigit = false
+			for isDigit(l.ch) {
+				l.next()
+				gotDigit = true
+			}
+			// Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e"
+			// considered a variable). "1e+" is parsed as "1e + ...".
+			if !gotDigit {
+				if gotSign {
+					l.unread() // unread the '+' or '-'
+				}
+				l.unread() // unread the 'e' or 'E'
+			}
+		}
+		tok = NUMBER
+		val = string(l.src[start : l.offset-1])
+	case '{':
+		tok = LBRACE
+	case '}':
+		tok = RBRACE
+	case '=':
+		tok = l.choice('=', ASSIGN, EQUALS)
+	case '<':
+		tok = l.choice('=', LESS, LTE)
+	case '>':
+		switch l.ch {
+		case '=':
+			l.next()
+			tok = GTE
+		case '>':
+			l.next()
+			tok = APPEND
+		default:
+			tok = GREATER
+		}
+	case '"', '\'':
+		// Note: POSIX awk spec doesn't allow single-quoted strings,
+		// but this helps with quoting, especially on Windows
+		// where the shell quote character is " (double quote).
+		s, err := parseString(ch, func() byte { return l.ch }, l.next)
+		if err != nil {
+			return l.pos, ILLEGAL, err.Error()
+		}
+		if l.ch != ch {
+			return l.pos, ILLEGAL, "didn't find end quote in string"
+		}
+		l.next()
+		tok = STRING
+		val = s
+	case '(':
+		tok = LPAREN
+	case ')':
+		tok = RPAREN
+	case ',':
+		tok = COMMA
+	case ';':
+		tok = SEMICOLON
+	case '+':
+		switch l.ch {
+		case '+':
+			l.next()
+			tok = INCR
+		case '=':
+			l.next()
+			tok = ADD_ASSIGN
+		default:
+			tok = ADD
+		}
+	case '-':
+		switch l.ch {
+		case '-':
+			l.next()
+			tok = DECR
+		case '=':
+			l.next()
+			tok = SUB_ASSIGN
+		default:
+			tok = SUB
+		}
+	case '*':
+		switch l.ch {
+		case '*':
+			l.next()
+			tok = l.choice('=', POW, POW_ASSIGN)
+		case '=':
+			l.next()
+			tok = MUL_ASSIGN
+		default:
+			tok = MUL
+		}
+	case '/':
+		tok = l.choice('=', DIV, DIV_ASSIGN)
+	case '%':
+		tok = l.choice('=', MOD, MOD_ASSIGN)
+	case '[':
+		tok = LBRACKET
+	case ']':
+		tok = RBRACKET
+	case '\n':
+		tok = NEWLINE
+	case '^':
+		tok = l.choice('=', POW, POW_ASSIGN)
+	case '!':
+		switch l.ch {
+		case '=':
+			l.next()
+			tok = NOT_EQUALS
+		case '~':
+			l.next()
+			tok = NOT_MATCH
+		default:
+			tok = NOT
+		}
+	case '~':
+		tok = MATCH
+	case '?':
+		tok = QUESTION
+	case ':':
+		tok = COLON
+	case '&':
+		tok = l.choice('&', ILLEGAL, AND)
+		if tok == ILLEGAL {
+			return l.pos, ILLEGAL, "unexpected char after '&'"
+		}
+	case '|':
+		tok = l.choice('|', PIPE, OR)
+	default:
+		tok = ILLEGAL
+		val = "unexpected char"
+	}
+	return pos, tok, val
+}
+
+// ScanRegex parses an AWK regular expression in /slash/ syntax. The
+// AWK grammar has somewhat special handling of regex tokens, so the
+// parser can only call this after a DIV or DIV_ASSIGN token has just
+// been scanned.
+func (l *Lexer) ScanRegex() (Position, Token, string) {
+	pos, tok, val := l.scanRegex()
+	l.lastTok = tok
+	return pos, tok, val
+}
+
+// Does the real work of scanning a regex. ScanRegex() wraps this to
+// more easily set lastTok.
+func (l *Lexer) scanRegex() (Position, Token, string) {
+	pos := l.pos
+	chars := make([]byte, 0, 32) // most won't require heap allocation
+	switch l.lastTok {
+	case DIV:
+		// Regex after '/' (the usual case)
+		pos.Column -= 1
+	case DIV_ASSIGN:
+		// Regex after '/=' (happens when regex starts with '=')
+		pos.Column -= 2
+		chars = append(chars, '=')
+	default:
+		panic("ScanRegex should only be called after DIV or DIV_ASSIGN token")
+	}
+	for l.ch != '/' {
+		c := l.ch
+		if c == 0 {
+			return l.pos, ILLEGAL, "didn't find end slash in regex"
+		}
+		if c == '\r' || c == '\n' {
+			return l.pos, ILLEGAL, "can't have newline in regex"
+		}
+		if c == '\\' {
+			l.next()
+			if l.ch != '/' {
+				chars = append(chars, '\\')
+			}
+			c = l.ch
+		}
+		chars = append(chars, c)
+		l.next()
+	}
+	l.next()
+	return pos, REGEX, string(chars)
+}
+
+// Load the next character into l.ch (or 0 on end of input) and update
+// line and column position.
+func (l *Lexer) next() {
+	l.pos = l.nextPos
+	if l.offset >= len(l.src) {
+		// For last character, move offset 1 past the end as it
+		// simplifies offset calculations in NAME and NUMBER
+		if l.ch != 0 {
+			l.ch = 0
+			l.offset++
+			l.nextPos.Column++
+		}
+		return
+	}
+	ch := l.src[l.offset]
+	if ch == '\n' {
+		l.nextPos.Line++
+		l.nextPos.Column = 1
+	} else if ch != '\r' {
+		l.nextPos.Column++
+	}
+	l.ch = ch
+	l.offset++
+}
+
+// Un-read the character just scanned (doesn't handle line boundaries).
+func (l *Lexer) unread() {
+	l.offset--
+	l.pos.Column--
+	l.nextPos.Column--
+	l.ch = l.src[l.offset-1]
+}
+
+func isNameStart(ch byte) bool {
+	return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
+}
+
+func isDigit(ch byte) bool {
+	return ch >= '0' && ch <= '9'
+}
+
+// Return the hex digit 0-15 corresponding to the given ASCII byte,
+// or -1 if it's not a valid hex digit.
+func hexDigit(ch byte) int {
+	switch {
+	case isDigit(ch):
+		return int(ch - '0')
+	case ch >= 'a' && ch <= 'f':
+		return int(ch - 'a' + 10)
+	case ch >= 'A' && ch <= 'F':
+		return int(ch - 'A' + 10)
+	default:
+		return -1
+	}
+}
+
+func (l *Lexer) choice(ch byte, one, two Token) Token {
+	if l.ch == ch {
+		l.next()
+		return two
+	}
+	return one
+}
+
+// PeekByte returns the next unscanned byte; used when parsing
+// "getline lvalue" expressions. Returns 0 at end of input.
+func (l *Lexer) PeekByte() byte {
+	return l.ch
+}
+
+// Unescape unescapes the backslash escapes in s (which shouldn't include the
+// surrounding quotes) and returns the unquoted string. It's intended for use
+// when unescaping command line var=value assignments, as required by the
+// POSIX AWK spec.
+func Unescape(s string) (string, error) {
+	i := 0
+	ch := func() byte {
+		if i >= len(s) {
+			return 0
+		}
+		return s[i]
+	}
+	next := func() {
+		i++
+	}
+	return parseString(0, ch, next)
+}
+
+// Parses a string ending with given quote character (not parsed). The ch
+// function returns the current character (or 0 at the end); the next function
+// moves forward one character.
+func parseString(quote byte, ch func() byte, next func()) (string, error) {
+	chars := make([]byte, 0, 32) // most strings won't require heap allocation
+	for {
+		c := ch()
+		if c == quote || c == 0 {
+			break
+		}
+		if c == '\r' || c == '\n' {
+			return "", errors.New("can't have newline in string")
+		}
+		if c != '\\' {
+			// Normal, non-escaped character
+			chars = append(chars, c)
+			next()
+			continue
+		}
+		// Escape sequence, skip over \ and process
+		next()
+		switch ch() {
+		case 'n':
+			c = '\n'
+			next()
+		case 't':
+			c = '\t'
+			next()
+		case 'r':
+			c = '\r'
+			next()
+		case 'a':
+			c = '\a'
+			next()
+		case 'b':
+			c = '\b'
+			next()
+		case 'f':
+			c = '\f'
+			next()
+		case 'v':
+			c = '\v'
+			next()
+		case 'x':
+			// Hex byte of one of two hex digits
+			next()
+			digit := hexDigit(ch())
+			if digit < 0 {
+				return "", errors.New("1 or 2 hex digits expected")
+			}
+			c = byte(digit)
+			next()
+			digit = hexDigit(ch())
+			if digit >= 0 {
+				c = c*16 + byte(digit)
+				next()
+			}
+		case '0', '1', '2', '3', '4', '5', '6', '7':
+			// Octal byte of 1-3 octal digits
+			c = ch() - '0'
+			next()
+			for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ {
+				c = c*8 + ch() - '0'
+				next()
+			}
+		default:
+			// Any other escape character is just the char
+			// itself, eg: "\z" is just "z".
+			c = ch()
+			if c == 0 {
+				// Expect backslash right at the end of the string, which is
+				// interpreted as a literal backslash (only for Unescape).
+				c = '\\'
+			}
+			next()
+		}
+		chars = append(chars, c)
+	}
+	return string(chars), nil
+}
--- a/src/tool/awk/lexer/lexer_test.go
+++ b/src/tool/awk/lexer/lexer_test.go
@ -0,0 +1,393 @@
+// Test GoAWK Lexer
+
+package lexer_test
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"testing"
+
+	. "github.com/benhoyt/goawk/lexer"
+)
+
+func TestLexer(t *testing.T) {
+	tests := []struct {
+		input  string
+		output string
+	}{
+		// Comments, whitespace, line continuations
+		{"+# foo \n- #foo", `1:1 + "", 1:8 <newline> "", 2:1 - ""`},
+		{"+\\\n-", `1:1 + "", 2:1 - ""`},
+		{"+\\\r\n-", `1:1 + "", 2:1 - ""`},
+		{"+\\-", `1:1 + "", 1:3 <illegal> "expected \\n after \\ line continuation", 1:3 - ""`},
+
+		// Names and keywords
+		{"x", `1:1 name "x"`},
+		{"x y0", `1:1 name "x", 1:3 name "y0"`},
+		{"x 0y", `1:1 name "x", 1:3 number "0", 1:4 name "y"`},
+		{"sub SUB", `1:1 sub "", 1:5 name "SUB"`},
+
+		// String tokens
+		{`"foo"`, `1:1 string "foo"`},
+		{`"a\t\r\n\z\'\"\a\b\f\vb"`, `1:1 string "a\t\r\nz'\"\a\b\f\vb"`},
+		{`"x`, `1:3 <illegal> "didn't find end quote in string"`},
+		{`"foo\"`, `1:7 <illegal> "didn't find end quote in string"`},
+		{"\"x\n\"", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
+		{`'foo'`, `1:1 string "foo"`},
+		{`'a\t\r\n\z\'\"b'`, `1:1 string "a\t\r\nz'\"b"`},
+		{`'x`, `1:3 <illegal> "didn't find end quote in string"`},
+		{"'x\n'", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
+		{`"\x0.\x00.\x0A\x10\xff\xFF\x41"`, `1:1 string "\x00.\x00.\n\x10\xff\xffA"`},
+		{`"\xg"`, `1:4 <illegal> "1 or 2 hex digits expected", 1:4 name "g", 1:6 <illegal> "didn't find end quote in string"`},
+		{`"\0\78\7\77\777\0 \141 "`, `1:1 string "\x00\a8\a?\xff\x00 a "`},
+
+		// Number tokens
+		{"0", `1:1 number "0"`},
+		{"9", `1:1 number "9"`},
+		{" 0 ", `1:2 number "0"`},
+		{"\n  1", `1:1 <newline> "", 2:3 number "1"`},
+		{"1234", `1:1 number "1234"`},
+		{".5", `1:1 number ".5"`},
+		{".5e1", `1:1 number ".5e1"`},
+		{"5e+1", `1:1 number "5e+1"`},
+		{"5e-1", `1:1 number "5e-1"`},
+		{"0.", `1:1 number "0."`},
+		{"42e", `1:1 number "42", 1:3 name "e"`},
+		{"4.2e", `1:1 number "4.2", 1:4 name "e"`},
+		{"1.e3", `1:1 number "1.e3"`},
+		{"1.e3", `1:1 number "1.e3"`},
+		{"1e3foo", `1:1 number "1e3", 1:4 name "foo"`},
+		{"1e3+", `1:1 number "1e3", 1:4 + ""`},
+		{"1e3.4", `1:1 number "1e3", 1:4 number ".4"`},
+		{"1e-", `1:1 number "1", 1:2 name "e", 1:3 - ""`},
+		{"1e+", `1:1 number "1", 1:2 name "e", 1:3 + ""`},
+		{"42`", `1:1 number "42", 1:3 <illegal> "unexpected char"`},
+		{"0..", `1:1 number "0.", 1:4 <illegal> "expected digits"`},
+		{".", `1:2 <illegal> "expected digits"`},
+
+		// Misc errors
+		{"&=", `1:2 <illegal> "unexpected char after '&'", 1:2 = ""`},
+	}
+	for _, test := range tests {
+		t.Run(test.input, func(t *testing.T) {
+			l := NewLexer([]byte(test.input))
+			strs := []string{}
+			for {
+				pos, tok, val := l.Scan()
+				if tok == EOF {
+					break
+				}
+				if tok == NUMBER {
+					// Ensure ParseFloat() works, as that's what our
+					// parser uses to convert
+					trimmed := strings.TrimRight(val, "eE")
+					_, err := strconv.ParseFloat(trimmed, 64)
+					if err != nil {
+						t.Fatalf("couldn't parse float: %q", val)
+					}
+				}
+				strs = append(strs, fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val))
+			}
+			output := strings.Join(strs, ", ")
+			if output != test.output {
+				t.Errorf("expected %q, got %q", test.output, output)
+			}
+		})
+	}
+}
+
+func TestRegex(t *testing.T) {
+	tests := []struct {
+		input  string
+		output string
+	}{
+		{`/foo/`, `1:1 regex "foo"`},
+		{`/=foo/`, `1:1 regex "=foo"`},
+		{`/a\/b/`, `1:1 regex "a/b"`},
+		{`/a\/\zb/`, `1:1 regex "a/\\zb"`},
+		{`/a`, `1:3 <illegal> "didn't find end slash in regex"`},
+		{"/a\n", `1:3 <illegal> "can't have newline in regex"`},
+	}
+	for _, test := range tests {
+		t.Run(test.input, func(t *testing.T) {
+			l := NewLexer([]byte(test.input))
+			l.Scan() // Scan first token (probably DIV)
+			pos, tok, val := l.ScanRegex()
+			output := fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val)
+			if output != test.output {
+				t.Errorf("expected %q, got %q", test.output, output)
+			}
+		})
+	}
+}
+
+func TestScanRegexInvalid(t *testing.T) {
+	defer func() {
+		r := recover()
+		if message, ok := r.(string); ok {
+			expected := "ScanRegex should only be called after DIV or DIV_ASSIGN token"
+			if message != expected {
+				t.Fatalf("expected %q, got %q", expected, message)
+			}
+		} else {
+			t.Fatalf("expected panic of string type")
+		}
+	}()
+	l := NewLexer([]byte("foo/"))
+	l.Scan() // Scan first token (NAME foo)
+	l.ScanRegex()
+}
+
+func TestHadSpace(t *testing.T) {
+	tests := []struct {
+		input  string
+		tokens []Token
+		spaces []bool
+	}{
+		{`foo(x)`, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, false, false, false}},
+		{`foo (x) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, true, false, false}},
+		{` foo ( x ) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{true, true, true, true}},
+	}
+	for _, test := range tests {
+		t.Run(test.input, func(t *testing.T) {
+			l := NewLexer([]byte(test.input))
+			for i := 0; ; i++ {
+				_, tok, _ := l.Scan()
+				if tok == EOF {
+					break
+				}
+				if tok != test.tokens[i] {
+					t.Errorf("expected %s for token %d, got %s", test.tokens[i], i, tok)
+				}
+				if l.HadSpace() != test.spaces[i] {
+					t.Errorf("expected %v for space %d, got %v", test.spaces[i], i, l.HadSpace())
+				}
+			}
+		})
+	}
+}
+
+func TestPeekByte(t *testing.T) {
+	l := NewLexer([]byte("foo()"))
+	b := l.PeekByte()
+	if b != 'f' {
+		t.Errorf("expected 'f', got %q", b)
+	}
+	_, tok, _ := l.Scan()
+	if tok != NAME {
+		t.Errorf("expected name, got %s", tok)
+	}
+	b = l.PeekByte()
+	if b != '(' {
+		t.Errorf("expected '(', got %q", b)
+	}
+	_, tok, _ = l.Scan()
+	if tok != LPAREN {
+		t.Errorf("expected (, got %s", tok)
+	}
+	_, tok, _ = l.Scan()
+	if tok != RPAREN {
+		t.Errorf("expected ), got %s", tok)
+	}
+	b = l.PeekByte()
+	if b != 0 {
+		t.Errorf("expected 0, got %q", b)
+	}
+}
+
+func TestKeywordToken(t *testing.T) {
+	tests := []struct {
+		name string
+		tok  Token
+	}{
+		{"print", PRINT},
+		{"split", F_SPLIT},
+		{"BEGIN", BEGIN},
+		{"foo", ILLEGAL},
+		{"GoAWK", ILLEGAL},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			tok := KeywordToken(test.name)
+			if tok != test.tok {
+				t.Errorf("expected %v, got %v", test.tok, tok)
+			}
+		})
+	}
+}
+
+func TestAllTokens(t *testing.T) {
+	input := "# comment line\n" +
+		"+ += && = : , -- /\n/= $ @ == >= > >> ++ { [ < ( #\n" +
+		"<= ~ % %= * *= !~ ! != | || ^ ^= ** **= ? } ] ) ; - -= " +
+		"BEGIN break continue delete do else END exit " +
+		"for function getline if in next print printf return while " +
+		"atan2 close cos exp fflush gsub index int length log match rand " +
+		"sin split sprintf sqrt srand sub substr system tolower toupper " +
+		"x \"str\\n\" 1234\n" +
+		"` ."
+
+	strs := make([]string, 0, LAST+1)
+	seen := make([]bool, LAST+1)
+	l := NewLexer([]byte(input))
+	for {
+		_, tok, _ := l.Scan()
+		strs = append(strs, tok.String())
+		seen[int(tok)] = true
+		if tok == EOF {
+			break
+		}
+	}
+	output := strings.Join(strs, " ")
+
+	expected := "<newline> " +
+		"+ += && = : , -- / <newline> /= $ @ == >= > >> ++ { [ < ( <newline> " +
+		"<= ~ % %= * *= !~ ! != | || ^ ^= ^ ^= ? } ] ) ; - -= " +
+		"BEGIN break continue delete do else END exit " +
+		"for function getline if in next print printf return while " +
+		"atan2 close cos exp fflush gsub index int length log match rand " +
+		"sin split sprintf sqrt srand sub substr system tolower toupper " +
+		"name string number <newline> " +
+		"<illegal> <illegal> EOF"
+	if output != expected {
+		t.Errorf("expected %q, got %q", expected, output)
+	}
+
+	for i, s := range seen {
+		if !s && Token(i) != CONCAT && Token(i) != REGEX {
+			t.Errorf("token %s (%d) not seen", Token(i), i)
+		}
+	}
+
+	l = NewLexer([]byte(`/foo/`))
+	_, tok1, _ := l.Scan()
+	_, tok2, val := l.ScanRegex()
+	if tok1 != DIV || tok2 != REGEX || val != "foo" {
+		t.Errorf(`expected / regex "foo", got %s %s %q`, tok1, tok2, val)
+	}
+
+	l = NewLexer([]byte(`/=foo/`))
+	_, tok1, _ = l.Scan()
+	_, tok2, val = l.ScanRegex()
+	if tok1 != DIV_ASSIGN || tok2 != REGEX || val != "=foo" {
+		t.Errorf(`expected /= regex "=foo", got %s %s %q`, tok1, tok2, val)
+	}
+}
+
+func TestUnescape(t *testing.T) {
+	tests := []struct {
+		input  string
+		output string
+		error  string
+	}{
+		{``, "", ""},
+		{`foo bar`, "foo bar", ""},
+		{`foo\tbar`, "foo\tbar", ""},
+		{"foo\nbar", "", "can't have newline in string"},
+		{`foo"`, "foo\"", ""},
+		{`O'Connor`, "O'Connor", ""},
+		{`foo\`, "foo\\", ""},
+		// Other cases tested in TestLexer string handling.
+	}
+	for _, test := range tests {
+		t.Run(test.input, func(t *testing.T) {
+			got, err := Unescape(test.input)
+			if err != nil {
+				if err.Error() != test.error {
+					t.Fatalf("expected error %q, got %q", test.error, err)
+				}
+			} else {
+				if test.error != "" {
+					t.Fatalf("expected error %q, got %q", test.error, "")
+				}
+				if got != test.output {
+					t.Fatalf("expected %q, got %q", test.output, got)
+				}
+			}
+		})
+	}
+}
+
+func benchmarkLexer(b *testing.B, repeat int, source string) {
+	fullSource := []byte(strings.Repeat(source+"\n", repeat))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		l := NewLexer(fullSource)
+		for {
+			_, tok, _ := l.Scan()
+			if tok == EOF || tok == ILLEGAL {
+				break
+			}
+		}
+	}
+}
+
+func BenchmarkProgram(b *testing.B) {
+	benchmarkLexer(b, 5, `{ print $1, ($3+$4)*$5 }`)
+}
+
+func BenchmarkNames(b *testing.B) {
+	benchmarkLexer(b, 5, `x y i foobar abcdefghij0123456789 _`)
+}
+
+func BenchmarkKeywords(b *testing.B) {
+	benchmarkLexer(b, 5, `BEGIN END print sub if length`)
+}
+
+func BenchmarkSimpleTokens(b *testing.B) {
+	benchmarkLexer(b, 5, "\n : , { [ ( } ] ) ~ ? ; $")
+}
+
+func BenchmarkChoiceTokens(b *testing.B) {
+	benchmarkLexer(b, 5, `/ /=  % %= + ++ += * ** **= *= = == ^ ^= ! != !~ < <= > >= >> && | ||`)
+}
+
+func BenchmarkNumbers(b *testing.B) {
+	benchmarkLexer(b, 5, `0 1 .5 1234 1234567890 1234.56789e-50`)
+}
+
+func BenchmarkStrings(b *testing.B) {
+	benchmarkLexer(b, 5, `"x" "y" "xyz" "foo" "foo bar baz" "foo\tbar\rbaz\n"`)
+}
+
+func BenchmarkRegex(b *testing.B) {
+	source := `/x/ /./ /foo/ /bar/ /=equals=/ /\/\/\/\//`
+	fullSource := []byte(strings.Repeat(source+" ", 5))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		l := NewLexer(fullSource)
+		for {
+			_, tok, _ := l.Scan()
+			if tok == EOF {
+				break
+			}
+			if tok != DIV && tok != DIV_ASSIGN {
+				b.Fatalf("expected / or /=, got %s", tok)
+			}
+			_, tok, _ = l.ScanRegex()
+			if tok != REGEX {
+				b.Fatalf("expected regex, got %s", tok)
+			}
+		}
+	}
+}
+
+func Example() {
+	lexer := NewLexer([]byte(`$0 { print $1 }`))
+	for {
+		pos, tok, val := lexer.Scan()
+		if tok == EOF {
+			break
+		}
+		fmt.Printf("%d:%d %s %q\n", pos.Line, pos.Column, tok, val)
+	}
+	// Output:
+	// 1:1 $ ""
+	// 1:2 number "0"
+	// 1:4 { ""
+	// 1:6 print ""
+	// 1:12 $ ""
+	// 1:13 number "1"
+	// 1:15 } ""
+}
--- a/src/tool/awk/lexer/token.go
+++ b/src/tool/awk/lexer/token.go
@ -0,0 +1,263 @@
+// Lexer tokens
+
+package lexer
+
+// Token is the type of a single token.
+type Token int
+
+const (
+	ILLEGAL Token = iota
+	EOF
+	NEWLINE
+	CONCAT // Not really a token, but used as an operator
+
+	// Symbols
+
+	ADD
+	ADD_ASSIGN
+	AND
+	APPEND
+	ASSIGN
+	AT
+	COLON
+	COMMA
+	DECR
+	DIV
+	DIV_ASSIGN
+	DOLLAR
+	EQUALS
+	GTE
+	GREATER
+	INCR
+	LBRACE
+	LBRACKET
+	LESS
+	LPAREN
+	LTE
+	MATCH
+	MOD
+	MOD_ASSIGN
+	MUL
+	MUL_ASSIGN
+	NOT_MATCH
+	NOT
+	NOT_EQUALS
+	OR
+	PIPE
+	POW
+	POW_ASSIGN
+	QUESTION
+	RBRACE
+	RBRACKET
+	RPAREN
+	SEMICOLON
+	SUB
+	SUB_ASSIGN
+
+	// Keywords
+
+	BEGIN
+	BREAK
+	CONTINUE
+	DELETE
+	DO
+	ELSE
+	END
+	EXIT
+	FOR
+	FUNCTION
+	GETLINE
+	IF
+	IN
+	NEXT
+	PRINT
+	PRINTF
+	RETURN
+	WHILE
+
+	// Built-in functions
+
+	F_ATAN2
+	F_CLOSE
+	F_COS
+	F_EXP
+	F_FFLUSH
+	F_GSUB
+	F_INDEX
+	F_INT
+	F_LENGTH
+	F_LOG
+	F_MATCH
+	F_RAND
+	F_SIN
+	F_SPLIT
+	F_SPRINTF
+	F_SQRT
+	F_SRAND
+	F_SUB
+	F_SUBSTR
+	F_SYSTEM
+	F_TOLOWER
+	F_TOUPPER
+
+	// Literals and names (variables and arrays)
+
+	NAME
+	NUMBER
+	STRING
+	REGEX
+
+	LAST       = REGEX
+	FIRST_FUNC = F_ATAN2
+	LAST_FUNC  = F_TOUPPER
+)
+
+var keywordTokens = map[string]Token{
+	"BEGIN":    BEGIN,
+	"break":    BREAK,
+	"continue": CONTINUE,
+	"delete":   DELETE,
+	"do":       DO,
+	"else":     ELSE,
+	"END":      END,
+	"exit":     EXIT,
+	"for":      FOR,
+	"function": FUNCTION,
+	"getline":  GETLINE,
+	"if":       IF,
+	"in":       IN,
+	"next":     NEXT,
+	"print":    PRINT,
+	"printf":   PRINTF,
+	"return":   RETURN,
+	"while":    WHILE,
+
+	"atan2":   F_ATAN2,
+	"close":   F_CLOSE,
+	"cos":     F_COS,
+	"exp":     F_EXP,
+	"fflush":  F_FFLUSH,
+	"gsub":    F_GSUB,
+	"index":   F_INDEX,
+	"int":     F_INT,
+	"length":  F_LENGTH,
+	"log":     F_LOG,
+	"match":   F_MATCH,
+	"rand":    F_RAND,
+	"sin":     F_SIN,
+	"split":   F_SPLIT,
+	"sprintf": F_SPRINTF,
+	"sqrt":    F_SQRT,
+	"srand":   F_SRAND,
+	"sub":     F_SUB,
+	"substr":  F_SUBSTR,
+	"system":  F_SYSTEM,
+	"tolower": F_TOLOWER,
+	"toupper": F_TOUPPER,
+}
+
+// KeywordToken returns the token associated with the given keyword
+// string, or ILLEGAL if given name is not a keyword.
+func KeywordToken(name string) Token {
+	return keywordTokens[name]
+}
+
+var tokenNames = map[Token]string{
+	ILLEGAL: "<illegal>",
+	EOF:     "EOF",
+	NEWLINE: "<newline>",
+	CONCAT:  "<concat>",
+
+	ADD:        "+",
+	ADD_ASSIGN: "+=",
+	AND:        "&&",
+	APPEND:     ">>",
+	ASSIGN:     "=",
+	AT:         "@",
+	COLON:      ":",
+	COMMA:      ",",
+	DECR:       "--",
+	DIV:        "/",
+	DIV_ASSIGN: "/=",
+	DOLLAR:     "$",
+	EQUALS:     "==",
+	GTE:        ">=",
+	GREATER:    ">",
+	INCR:       "++",
+	LBRACE:     "{",
+	LBRACKET:   "[",
+	LESS:       "<",
+	LPAREN:     "(",
+	LTE:        "<=",
+	MATCH:      "~",
+	MOD:        "%",
+	MOD_ASSIGN: "%=",
+	MUL:        "*",
+	MUL_ASSIGN: "*=",
+	NOT_MATCH:  "!~",
+	NOT:        "!",
+	NOT_EQUALS: "!=",
+	OR:         "||",
+	PIPE:       "|",
+	POW:        "^",
+	POW_ASSIGN: "^=",
+	QUESTION:   "?",
+	RBRACE:     "}",
+	RBRACKET:   "]",
+	RPAREN:     ")",
+	SEMICOLON:  ";",
+	SUB:        "-",
+	SUB_ASSIGN: "-=",
+
+	BEGIN:    "BEGIN",
+	BREAK:    "break",
+	CONTINUE: "continue",
+	DELETE:   "delete",
+	DO:       "do",
+	ELSE:     "else",
+	END:      "END",
+	EXIT:     "exit",
+	FOR:      "for",
+	FUNCTION: "function",
+	GETLINE:  "getline",
+	IF:       "if",
+	IN:       "in",
+	NEXT:     "next",
+	PRINT:    "print",
+	PRINTF:   "printf",
+	RETURN:   "return",
+	WHILE:    "while",
+
+	F_ATAN2:   "atan2",
+	F_CLOSE:   "close",
+	F_COS:     "cos",
+	F_EXP:     "exp",
+	F_FFLUSH:  "fflush",
+	F_GSUB:    "gsub",
+	F_INDEX:   "index",
+	F_INT:     "int",
+	F_LENGTH:  "length",
+	F_LOG:     "log",
+	F_MATCH:   "match",
+	F_RAND:    "rand",
+	F_SIN:     "sin",
+	F_SPLIT:   "split",
+	F_SPRINTF: "sprintf",
+	F_SQRT:    "sqrt",
+	F_SRAND:   "srand",
+	F_SUB:     "sub",
+	F_SUBSTR:  "substr",
+	F_SYSTEM:  "system",
+	F_TOLOWER: "tolower",
+	F_TOUPPER: "toupper",
+
+	NAME:   "name",
+	NUMBER: "number",
+	STRING: "string",
+	REGEX:  "regex",
+}
+
+// String returns the string name of this token.
+func (t Token) String() string {
+	return tokenNames[t]
+}
--- a/src/tool/awk/license.txt
+++ b/src/tool/awk/license.txt
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Ben Hoyt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/src/tool/awk/parser/parser.go
+++ b/src/tool/awk/parser/parser.go
--- a/src/tool/awk/parser/parser_test.go
+++ b/src/tool/awk/parser/parser_test.go
@ -0,0 +1,242 @@
+// Test parser package
+
+package parser_test
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/benhoyt/goawk/parser"
+)
+
+// NOTE: apart from TestParseAndString, the parser doesn't have
+// extensive tests of its own; the idea is to test the parser in the
+// interp tests.
+
+func TestParseAndString(t *testing.T) {
+	// This program should have one of every AST element to ensure
+	// we can parse and String()ify each.
+	source := strings.TrimSpace(`
+BEGIN {
+    print "begin one"
+}
+
+BEGIN {
+    print "begin two"
+}
+
+{
+    print "empty pattern"
+}
+
+$0 {
+    print "normal pattern"
+    print 1, 2, 3
+    printf "%.3f", 3.14159
+    print "x" >"file"
+    print "x" >>"append"
+    print "y" |"prog"
+    delete a[k]
+    if (c) {
+        get(a, k)
+    }
+    if (1 + 2) {
+        get(a, k)
+    } else {
+        set(a, k, v)
+    }
+    for (i = 0; i < 10; i++) {
+        print i
+        continue
+    }
+    for (k in a) {
+        break
+    }
+    while (0) {
+        print "x"
+    }
+    do {
+        print "y"
+        exit status
+    } while (x)
+    next
+    "cmd" |getline
+    "cmd" |getline x
+    "cmd" |getline a[1]
+    "cmd" |getline $1
+    getline
+    getline x
+    (getline x + 1)
+    getline $1
+    getline a[1]
+    getline <"file"
+    getline x <"file"
+    (getline x <"file" "x")
+    getline $1 <"file"
+    getline a[1] <"file"
+    x = 0
+    y = z = 0
+    b += 1
+    c -= 2
+    d *= 3
+    e /= 4
+    g ^= 5
+    h %= 6
+    (x ? "t" : "f")
+    ((b && c) || d)
+    (k in a)
+    ((x, y, z) in a)
+    (s ~ "foo")
+    (b < 1)
+    (c <= 2)
+    (d > 3)
+    (e >= 4)
+    (g == 5)
+    (h != 6)
+    ((x y) z)
+    ((b + c) + d)
+    ((b * c) * d)
+    ((b - c) - d)
+    ((b / c) / d)
+    (b ^ (c ^ d))
+    x++
+    x--
+    ++y
+    --y
+    1234
+    1.5
+    "This is a string"
+    if (/a.b/) {
+        print "match"
+    }
+    $1
+    $(1 + 2)
+    !x
+    +x
+    -x
+    var
+    a[key]
+    a[x, y, z]
+    f()
+    set(a, k, v)
+    sub(regex, repl)
+    sub(regex, repl, s)
+    gsub(regex, repl)
+    gsub(regex, repl, s)
+    split(s, a)
+    split(s, a, regex)
+    match(s, regex)
+    rand()
+    srand()
+    srand(1)
+    length()
+    length($1)
+    sprintf("")
+    sprintf("%.3f", 3.14159)
+    sprintf("%.3f %d", 3.14159, 42)
+    cos(1)
+    sin(1)
+    exp(1)
+    log(1)
+    sqrt(1)
+    int("42")
+    tolower("FOO")
+    toupper("foo")
+    system("ls")
+    close("file")
+    atan2(x, y)
+    index(haystack, needle)
+    {
+        print "block statement"
+        f()
+    }
+}
+
+(NR == 1), (NR == 2) {
+    print "range pattern"
+}
+
+($1 == "foo")
+
+END {
+    print "end one"
+}
+
+END {
+    print "end two"
+}
+
+function f() {
+}
+
+function get(a, k) {
+    return a[k]
+}
+
+function set(a, k, v) {
+    a[k] = v
+    return
+}
+`)
+	prog, err := parser.ParseProgram([]byte(source), nil)
+	if err != nil {
+		t.Fatalf("error parsing program: %v", err)
+	}
+	progStr := prog.String()
+	if progStr != source {
+		t.Fatalf("expected first, got second:\n%s\n----------\n%s", source, progStr)
+	}
+}
+
+func TestResolveLargeCallGraph(t *testing.T) {
+	const numCalls = 10000
+
+	var buf bytes.Buffer
+	var i int
+	for i = 0; i < numCalls; i++ {
+		fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
+	}
+	fmt.Fprintf(&buf, "function f%d(a) { return a }\n", i)
+	fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
+	_, err := parser.ParseProgram(buf.Bytes(), nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	buf.Reset()
+	fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
+	fmt.Fprintf(&buf, "function f%d(a) { return a }\n", numCalls)
+	for i = numCalls - 1; i >= 0; i-- {
+		fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
+	}
+	_, err = parser.ParseProgram(buf.Bytes(), nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
+
+func Example_valid() {
+	prog, err := parser.ParseProgram([]byte("$0 { print $1 }"), nil)
+	if err != nil {
+		fmt.Println(err)
+	} else {
+		fmt.Println(prog)
+	}
+	// Output:
+	// $0 {
+	//     print $1
+	// }
+}
+
+func Example_error() {
+	prog, err := parser.ParseProgram([]byte("{ for if }"), nil)
+	if err != nil {
+		fmt.Println(err)
+	} else {
+		fmt.Println(prog)
+	}
+	// Output:
+	// parse error at 1:7: expected ( instead of if
+}
--- a/src/tool/awk/parser/resolve.go
+++ b/src/tool/awk/parser/resolve.go
@ -0,0 +1,462 @@
+// Resolve function calls and variable types
+
+package parser
+
+import (
+	"fmt"
+	"reflect"
+	"sort"
+
+	"github.com/benhoyt/goawk/internal/ast"
+	. "github.com/benhoyt/goawk/lexer"
+)
+
+type varType int
+
+const (
+	typeUnknown varType = iota
+	typeScalar
+	typeArray
+)
+
+func (t varType) String() string {
+	switch t {
+	case typeScalar:
+		return "Scalar"
+	case typeArray:
+		return "Array"
+	default:
+		return "Unknown"
+	}
+}
+
+// typeInfo records type information for a single variable
+type typeInfo struct {
+	typ      varType
+	ref      *ast.VarExpr
+	scope    ast.VarScope
+	index    int
+	callName string
+	argIndex int
+}
+
+// Used by printVarTypes when debugTypes is turned on
+func (t typeInfo) String() string {
+	var scope string
+	switch t.scope {
+	case ast.ScopeGlobal:
+		scope = "Global"
+	case ast.ScopeLocal:
+		scope = "Local"
+	default:
+		scope = "Special"
+	}
+	return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d",
+		t.typ, t.ref, scope, t.index, t.callName, t.argIndex)
+}
+
+// A single variable reference (normally scalar)
+type varRef struct {
+	funcName string
+	ref      *ast.VarExpr
+	isArg    bool
+	pos      Position
+}
+
+// A single array reference
+type arrayRef struct {
+	funcName string
+	ref      *ast.ArrayExpr
+	pos      Position
+}
+
+// Initialize the resolver
+func (p *parser) initResolve() {
+	p.varTypes = make(map[string]map[string]typeInfo)
+	p.varTypes[""] = make(map[string]typeInfo) // globals
+	p.functions = make(map[string]int)
+	p.arrayRef("ARGV", Position{1, 1})    // interpreter relies on ARGV being present
+	p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays
+	p.arrayRef("FIELDS", Position{1, 1})
+	p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
+}
+
+// Signal the start of a function
+func (p *parser) startFunction(name string, params []string) {
+	p.funcName = name
+	p.varTypes[name] = make(map[string]typeInfo)
+}
+
+// Signal the end of a function
+func (p *parser) stopFunction() {
+	p.funcName = ""
+}
+
+// Add function by name with given index
+func (p *parser) addFunction(name string, index int) {
+	p.functions[name] = index
+}
+
+// Records a call to a user function (for resolving indexes later)
+type userCall struct {
+	call   *ast.UserCallExpr
+	pos    Position
+	inFunc string
+}
+
+// Record a user call site
+func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) {
+	p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
+}
+
+// After parsing, resolve all user calls to their indexes. Also
+// ensures functions called have actually been defined, and that
+// they're not being called with too many arguments.
+func (p *parser) resolveUserCalls(prog *Program) {
+	// Number the native funcs (order by name to get consistent order)
+	nativeNames := make([]string, 0, len(p.nativeFuncs))
+	for name := range p.nativeFuncs {
+		nativeNames = append(nativeNames, name)
+	}
+	sort.Strings(nativeNames)
+	nativeIndexes := make(map[string]int, len(nativeNames))
+	for i, name := range nativeNames {
+		nativeIndexes[name] = i
+	}
+
+	for _, c := range p.userCalls {
+		// AWK-defined functions take precedence over native Go funcs
+		index, ok := p.functions[c.call.Name]
+		if !ok {
+			f, haveNative := p.nativeFuncs[c.call.Name]
+			if !haveNative {
+				panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name))
+			}
+			typ := reflect.TypeOf(f)
+			if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() {
+				panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
+			}
+			c.call.Native = true
+			c.call.Index = nativeIndexes[c.call.Name]
+			continue
+		}
+		function := prog.Functions[index]
+		if len(c.call.Args) > len(function.Params) {
+			panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
+		}
+		c.call.Index = index
+	}
+}
+
+// For arguments that are variable references, we don't know the
+// type based on context, so mark the types for these as unknown.
+func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) {
+	if varExpr, ok := arg.(*ast.VarExpr); ok {
+		scope, varFuncName := p.getScope(varExpr.Name)
+		ref := p.varTypes[varFuncName][varExpr.Name].ref
+		if ref == varExpr {
+			// Only applies if this is the first reference to this
+			// variable (otherwise we know the type already)
+			p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index}
+		}
+		// Mark the last related varRef (the most recent one) as a
+		// call argument for later error handling
+		p.varRefs[len(p.varRefs)-1].isArg = true
+	}
+}
+
+// Determine scope of given variable reference (and funcName if it's
+// a local, otherwise empty string)
+func (p *parser) getScope(name string) (ast.VarScope, string) {
+	switch {
+	case p.locals[name]:
+		return ast.ScopeLocal, p.funcName
+	case ast.SpecialVarIndex(name) > 0:
+		return ast.ScopeSpecial, ""
+	default:
+		return ast.ScopeGlobal, ""
+	}
+}
+
+// Record a variable (scalar) reference and return the *VarExpr (but
+// VarExpr.Index won't be set till later)
+func (p *parser) varRef(name string, pos Position) *ast.VarExpr {
+	scope, funcName := p.getScope(name)
+	expr := &ast.VarExpr{scope, 0, name}
+	p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
+	info := p.varTypes[funcName][name]
+	if info.typ == typeUnknown {
+		p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0}
+	}
+	return expr
+}
+
+// Record an array reference and return the *ArrayExpr (but
+// ArrayExpr.Index won't be set till later)
+func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr {
+	scope, funcName := p.getScope(name)
+	if scope == ast.ScopeSpecial {
+		panic(p.errorf("can't use scalar %q as array", name))
+	}
+	expr := &ast.ArrayExpr{scope, 0, name}
+	p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
+	info := p.varTypes[funcName][name]
+	if info.typ == typeUnknown {
+		p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0}
+	}
+	return expr
+}
+
+// Print variable type information (for debugging) on p.debugWriter
+func (p *parser) printVarTypes(prog *Program) {
+	fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars)
+	fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays)
+	funcNames := []string{}
+	for funcName := range p.varTypes {
+		funcNames = append(funcNames, funcName)
+	}
+	sort.Strings(funcNames)
+	for _, funcName := range funcNames {
+		if funcName != "" {
+			fmt.Fprintf(p.debugWriter, "function %s\n", funcName)
+		} else {
+			fmt.Fprintf(p.debugWriter, "globals\n")
+		}
+		varNames := []string{}
+		for name := range p.varTypes[funcName] {
+			varNames = append(varNames, name)
+		}
+		sort.Strings(varNames)
+		for _, name := range varNames {
+			info := p.varTypes[funcName][name]
+			fmt.Fprintf(p.debugWriter, "  %s: %s\n", name, info)
+		}
+	}
+}
+
+// Resolve unknown variables types and generate variable indexes and
+// name-to-index mappings for interpreter
+func (p *parser) resolveVars(prog *Program) {
+	// First go through all unknown types and try to determine the
+	// type from the parameter type in that function definition.
+	// Iterate through functions in topological order, for example
+	// if f() calls g(), process g first, then f.
+	callGraph := make(map[string]map[string]struct{})
+	for _, call := range p.userCalls {
+		if _, ok := callGraph[call.inFunc]; !ok {
+			callGraph[call.inFunc] = make(map[string]struct{})
+		}
+		callGraph[call.inFunc][call.call.Name] = struct{}{}
+	}
+	sortedFuncs := topoSort(callGraph)
+	for _, funcName := range sortedFuncs {
+		infos := p.varTypes[funcName]
+		for name, info := range infos {
+			if info.scope == ast.ScopeSpecial || info.typ != typeUnknown {
+				// It's a special var or type is already known
+				continue
+			}
+			funcIndex, ok := p.functions[info.callName]
+			if !ok {
+				// Function being called is a native function
+				continue
+			}
+			// Determine var type based on type of this parameter
+			// in the called function (if we know that)
+			paramName := prog.Functions[funcIndex].Params[info.argIndex]
+			typ := p.varTypes[info.callName][paramName].typ
+			if typ != typeUnknown {
+				if p.debugTypes {
+					fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
+						funcName, name, typ)
+				}
+				info.typ = typ
+				p.varTypes[funcName][name] = info
+			}
+		}
+	}
+
+	// Resolve global variables (iteration order is undefined, so
+	// assign indexes basically randomly)
+	prog.Scalars = make(map[string]int)
+	prog.Arrays = make(map[string]int)
+	for name, info := range p.varTypes[""] {
+		_, isFunc := p.functions[name]
+		if isFunc {
+			// Global var can't also be the name of a function
+			panic(p.errorf("global var %q can't also be a function", name))
+		}
+		var index int
+		if info.scope == ast.ScopeSpecial {
+			index = ast.SpecialVarIndex(name)
+		} else if info.typ == typeArray {
+			index = len(prog.Arrays)
+			prog.Arrays[name] = index
+		} else {
+			index = len(prog.Scalars)
+			prog.Scalars[name] = index
+		}
+		info.index = index
+		p.varTypes[""][name] = info
+	}
+
+	// Fill in unknown parameter types that are being called with arrays,
+	// for example, as in the following code:
+	//
+	// BEGIN { arr[0]; f(arr) }
+	// function f(a) { }
+	for _, c := range p.userCalls {
+		if c.call.Native {
+			continue
+		}
+		function := prog.Functions[c.call.Index]
+		for i, arg := range c.call.Args {
+			varExpr, ok := arg.(*ast.VarExpr)
+			if !ok {
+				continue
+			}
+			funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
+			argType := p.varTypes[funcName][varExpr.Name]
+			paramType := p.varTypes[function.Name][function.Params[i]]
+			if argType.typ == typeArray && paramType.typ == typeUnknown {
+				paramType.typ = argType.typ
+				p.varTypes[function.Name][function.Params[i]] = paramType
+			}
+		}
+	}
+
+	// Resolve local variables (assign indexes in order of params).
+	// Also patch up Function.Arrays (tells interpreter which args
+	// are arrays).
+	for funcName, infos := range p.varTypes {
+		if funcName == "" {
+			continue
+		}
+		scalarIndex := 0
+		arrayIndex := 0
+		functionIndex := p.functions[funcName]
+		function := prog.Functions[functionIndex]
+		arrays := make([]bool, len(function.Params))
+		for i, name := range function.Params {
+			info := infos[name]
+			var index int
+			if info.typ == typeArray {
+				index = arrayIndex
+				arrayIndex++
+				arrays[i] = true
+			} else {
+				// typeScalar or typeUnknown: variables may still be
+				// of unknown type if they've never been referenced --
+				// default to scalar in that case
+				index = scalarIndex
+				scalarIndex++
+			}
+			info.index = index
+			p.varTypes[funcName][name] = info
+		}
+		prog.Functions[functionIndex].Arrays = arrays
+	}
+
+	// Check that variables passed to functions are the correct type
+	for _, c := range p.userCalls {
+		// Check native function calls
+		if c.call.Native {
+			for _, arg := range c.call.Args {
+				varExpr, ok := arg.(*ast.VarExpr)
+				if !ok {
+					// Non-variable expression, must be scalar
+					continue
+				}
+				funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
+				info := p.varTypes[funcName][varExpr.Name]
+				if info.typ == typeArray {
+					panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name))
+				}
+			}
+			continue
+		}
+
+		// Check AWK function calls
+		function := prog.Functions[c.call.Index]
+		for i, arg := range c.call.Args {
+			varExpr, ok := arg.(*ast.VarExpr)
+			if !ok {
+				if function.Arrays[i] {
+					panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
+				}
+				continue
+			}
+			funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
+			info := p.varTypes[funcName][varExpr.Name]
+			if info.typ == typeArray && !function.Arrays[i] {
+				panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name))
+			}
+			if info.typ != typeArray && function.Arrays[i] {
+				panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name))
+			}
+		}
+	}
+
+	if p.debugTypes {
+		p.printVarTypes(prog)
+	}
+
+	// Patch up variable indexes (interpreter uses an index instead
+	// of name for more efficient lookups)
+	for _, varRef := range p.varRefs {
+		info := p.varTypes[varRef.funcName][varRef.ref.Name]
+		if info.typ == typeArray && !varRef.isArg {
+			panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name))
+		}
+		varRef.ref.Index = info.index
+	}
+	for _, arrayRef := range p.arrayRefs {
+		info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name]
+		if info.typ == typeScalar {
+			panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name))
+		}
+		arrayRef.ref.Index = info.index
+	}
+}
+
+// If name refers to a local (in function inFunc), return that
+// function's name, otherwise return "" (meaning global).
+func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
+	if inFunc == "" {
+		return ""
+	}
+	for _, param := range prog.Functions[p.functions[inFunc]].Params {
+		if name == param {
+			return inFunc
+		}
+	}
+	return ""
+}
+
+// Record a "multi expression" (comma-separated pseudo-expression
+// used to allow commas around print/printf arguments).
+func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
+	expr := &ast.MultiExpr{exprs}
+	p.multiExprs[expr] = pos
+	return expr
+}
+
+// Mark the multi expression as used (by a print/printf statement).
+func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
+	delete(p.multiExprs, expr)
+}
+
+// Check that there are no unused multi expressions (syntax error).
+func (p *parser) checkMultiExprs() {
+	if len(p.multiExprs) == 0 {
+		return
+	}
+	// Show error on first comma-separated expression
+	min := Position{1000000000, 1000000000}
+	for _, pos := range p.multiExprs {
+		if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) {
+			min = pos
+		}
+	}
+	panic(p.posErrorf(min, "unexpected comma-separated expression"))
+}
--- a/src/tool/awk/parser/toposort.go
+++ b/src/tool/awk/parser/toposort.go
@ -0,0 +1,72 @@
+// Topological sorting
+
+package parser
+
+/*
+This algorithm is taken from:
+https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
+
+L ← Empty list that will contain the sorted nodes
+while exists nodes without a permanent mark do
+    select an unmarked node n
+    visit(n)
+
+function visit(node n)
+    if n has a permanent mark then
+        return
+    if n has a temporary mark then
+        stop   (not a DAG)
+
+    mark n with a temporary mark
+
+    for each node m with an edge from n to m do
+        visit(m)
+
+    remove temporary mark from n
+    mark n with a permanent mark
+    add n to head of L
+*/
+
+// Perform a topological sort on the given graph.
+func topoSort(graph map[string]map[string]struct{}) []string {
+	if len(graph) == 0 {
+		return nil
+	}
+
+	unmarked := make(map[string]struct{})
+	for node := range graph {
+		unmarked[node] = struct{}{}
+	}
+	permMarks := make(map[string]struct{})
+	tempMarks := make(map[string]struct{})
+	var sorted []string
+
+	var visit func(string)
+	visit = func(n string) {
+		if _, ok := permMarks[n]; ok {
+			return
+		}
+		if _, ok := tempMarks[n]; ok {
+			return
+		}
+		tempMarks[n] = struct{}{}
+		for m := range graph[n] {
+			visit(m)
+		}
+		delete(tempMarks, n)
+		permMarks[n] = struct{}{}
+		delete(unmarked, n)
+		sorted = append(sorted, n)
+		return
+	}
+
+	for len(unmarked) > 0 {
+		var n string
+		for n = range unmarked {
+			break
+		}
+		visit(n)
+	}
+
+	return sorted
+}
--- a/src/tool/awk/parser/toposort_test.go
+++ b/src/tool/awk/parser/toposort_test.go
@ -0,0 +1,100 @@
+package parser
+
+import (
+	"strconv"
+	"testing"
+)
+
+func TestTopoSortEmpty(t *testing.T) {
+	sorted := topoSort(nil)
+	if len(sorted) != 0 {
+		t.Fatalf("expected empty slice, got %v", sorted)
+	}
+}
+
+func TestTopoSortSimple(t *testing.T) {
+	sorted := topoSort(map[string]map[string]struct{}{
+		"a": {"b": struct{}{}},
+		"b": {"c": struct{}{}},
+	})
+	if len(sorted) != 3 {
+		t.Fatalf("expected 3 items, got %d", len(sorted))
+	}
+	assertBefore(t, sorted, "c", "b")
+	assertBefore(t, sorted, "b", "a")
+}
+
+func TestTopoSortComplex(t *testing.T) {
+	sorted := topoSort(map[string]map[string]struct{}{
+		"a": {"b": struct{}{}, "c": struct{}{}},
+		"c": {"d": struct{}{}},
+		"f": {"g": struct{}{}, "h": struct{}{}},
+		"g": {},
+		"h": {},
+	})
+	if len(sorted) != 7 {
+		t.Fatalf("expected 7 items, got %d", len(sorted))
+	}
+	assertBefore(t, sorted, "g", "f")
+	assertBefore(t, sorted, "h", "f")
+	assertBefore(t, sorted, "d", "c")
+	assertBefore(t, sorted, "c", "a")
+	assertBefore(t, sorted, "b", "a")
+}
+
+func assertBefore(t *testing.T, sorted []string, x, y string) {
+	xi := strIndex(sorted, x)
+	if xi < 0 {
+		t.Fatalf("expected %q to be in result", x)
+	}
+	yi := strIndex(sorted, y)
+	if yi < 0 {
+		t.Fatalf("expected %q to be in result", y)
+	}
+	if xi >= yi {
+		t.Fatalf("expected %q to come before %q, got indexes %d and %d", x, y, xi, yi)
+	}
+}
+
+func strIndex(slice []string, s string) int {
+	for i, item := range slice {
+		if s == item {
+			return i
+		}
+	}
+	return -1
+}
+
+func TestTopoSortCycle(t *testing.T) {
+	sorted := topoSort(map[string]map[string]struct{}{
+		"a": {"b": struct{}{}, "c": struct{}{}},
+		"c": {"a": struct{}{}},
+	})
+	if len(sorted) != 3 {
+		t.Fatalf("expected 3 items, got %d", len(sorted))
+	}
+	assertBefore(t, sorted, "b", "a")
+	c := strIndex(sorted, "a")
+	if c < 0 {
+		t.Fatalf("expected %q to be in result", c)
+	}
+}
+
+func TestTopoSortLarge(t *testing.T) {
+	const num = 1000
+	graph := make(map[string]map[string]struct{})
+	for i := 0; i < num; i++ {
+		graph[strconv.Itoa(i)] = map[string]struct{}{strconv.Itoa(i + 1): {}}
+	}
+	graph[strconv.Itoa(num)] = map[string]struct{}{}
+	sorted := topoSort(graph)
+	if len(sorted) != num+1 {
+		t.Fatalf("expected %d items, got %d", num+1, len(sorted))
+	}
+	for i := 0; i < num+1; i++ {
+		expected := num - i
+		if sorted[i] != strconv.Itoa(expected) {
+			t.Fatalf("expected %d to be at index %d, got %s", num-1, i, sorted[i])
+		}
+	}
+}
--- a/src/tool/awk/readme.md
+++ b/src/tool/awk/readme.md
@ -0,0 +1,125 @@
+
+# GoAWK: an AWK interpreter with CSV support
+
+[![Documentation](https://pkg.go.dev/badge/github.com/benhoyt/goawk)](https://pkg.go.dev/github.com/benhoyt/goawk)
+[![GitHub Actions Build](https://github.com/benhoyt/goawk/workflows/Go/badge.svg)](https://github.com/benhoyt/goawk/actions?query=workflow%3AGo)
+
+
+AWK is a fascinating text-processing language, and somehow after reading the delightfully-terse [*The AWK Programming Language*](https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf) I was inspired to write an interpreter for it in Go. So here it is, feature-complete and tested against "the one true AWK" and GNU AWK test suites.
+
+GoAWK is a POSIX-compatible version of AWK, and additionally has a CSV mode for reading and writing CSV and TSV files. This feature was sponsored by the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/). Read the [CSV documentation](https://github.com/benhoyt/goawk/blob/master/csv.md).
+
+You can also read one of the articles I've written about GoAWK:
+
+* The original article about [how GoAWK works and performs](https://benhoyt.com/writings/goawk/)
+* How I converted the tree-walking interpreter to a [bytecode compiler and virtual machine](https://benhoyt.com/writings/goawk-compiler-vm/)
+* A description of why and how I added [CSV support](https://benhoyt.com/writings/goawk-csv/)
+
+
+## Basic usage
+
+To use the command-line version, simply use `go install` to install it, and then run it using `goawk` (assuming `~/go/bin` is in your `PATH`):
+
+```shell
+$ go install github.com/benhoyt/goawk@latest
+
+$ goawk 'BEGIN { print "foo", 42 }'
+foo 42
+
+$ echo 1 2 3 | goawk '{ print $1 + $3 }'
+4
+
+# Or use GoAWK's CSV and @"named-field" support:
+$ echo -e 'name,amount\nBob,17.50\nJill,20\n"Boba Fett",100.00' | \
+  goawk -i csv -H '{ total += @"amount" } END { print total }'
+137.5
+```
+
+On Windows, `"` is the shell quoting character, so use `"` around the entire AWK program on the command line, and use `'` around AWK strings -- this is a non-POSIX extension to make GoAWK easier to use on Windows:
+
+```powershell
+C:\> goawk "BEGIN { print 'foo', 42 }"
+foo 42
+```
+
+To use it in your Go programs, you can call `interp.Exec()` directly for simple needs:
+
+```go
+input := strings.NewReader("foo bar\n\nbaz buz")
+err := interp.Exec("$0 { print $1 }", " ", input, nil)
+if err != nil {
+    fmt.Println(err)
+    return
+}
+// Output:
+// foo
+// baz
+```
+
+Or you can use the `parser` module and then `interp.ExecProgram()` to control execution, set variables, and so on:
+
+```go
+src := "{ print NR, tolower($0) }"
+input := "A\naB\nAbC"
+
+prog, err := parser.ParseProgram([]byte(src), nil)
+if err != nil {
+    fmt.Println(err)
+    return
+}
+config := &interp.Config{
+    Stdin: strings.NewReader(input),
+    Vars:  []string{"OFS", ":"},
+}
+_, err = interp.ExecProgram(prog, config)
+if err != nil {
+    fmt.Println(err)
+    return
+}
+// Output:
+// 1:a
+// 2:ab
+// 3:abc
+```
+
+If you need to repeat execution of the same program on different inputs, you can call [`interp.New`](https://pkg.go.dev/github.com/benhoyt/goawk/interp#New) once, and then call the returned object's `Execute` method as many times as you need.
+
+Read the [package documentation](https://pkg.go.dev/github.com/benhoyt/goawk) for more details.
+
+
+## Differences from AWK
+
+The intention is for GoAWK to conform to `awk`'s behavior and to the [POSIX AWK spec](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html), but this section describes some areas where it's different.
+
+Additional features GoAWK has over AWK:
+
+* It has proper support for CSV and TSV files ([read the documentation](https://github.com/benhoyt/goawk/blob/master/csv.md)).
+* It supports negative field indexes to access fields from the right, for example, `$-1` refers to the last field.
+* It's embeddable in your Go programs! You can even call custom Go functions from your AWK scripts.
+* Most AWK scripts are faster than `awk` and on a par with `gawk`, though usually slower than `mawk`. (See [recent benchmarks](https://benhoyt.com/writings/goawk-compiler-vm/#virtual-machine-results).)
+* The parser supports `'single-quoted strings'` in addition to `"double-quoted strings"`, primarily to make Windows one-liners easier (the Windows `cmd.exe` shell uses `"` as the quote character).
+
+Things AWK has over GoAWK:
+
+* Scripts that use regular expressions are slower than other implementations (unfortunately Go's `regexp` package is relatively slow).
+* AWK is written by Alfred Aho, Peter Weinberger, and Brian Kernighan.
+
+
+## Stability
+
+This project has a good suite of tests, which include my own intepreter tests, the original AWK test suite, and the relevant tests from the Gawk test suite. I've used it a bunch personally, and it's used in the [Benthos](https://github.com/benthosdev/benthos) stream processor as well as by the software team at the library of the University of Antwerp. However, to `err == human`, so please use GoAWK at your own risk. I intend not to change the Go API in a breaking way in any v1.x.y version.
+
+
+## AWKGo
+
+The GoAWK repository also includes the creatively-named AWKGo, an AWK-to-Go compiler. This is experimental and is not subject to the stability requirements of GoAWK itself. You can [read more about AWKGo](https://benhoyt.com/writings/awkgo/) or browse the code on the [`awkgo` branch](https://github.com/benhoyt/goawk/tree/awkgo/awkgo).
+
+
+## License
+
+GoAWK is licensed under an open source [MIT license](https://github.com/benhoyt/goawk/blob/master/LICENSE.txt).
+
+
+## The end
+
+Have fun, and please [contact me](https://benhoyt.com/) if you're using GoAWK or have any feedback!
--- a/src/tool/awk/scripts/benchmark.sh
+++ b/src/tool/awk/scripts/benchmark.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+go test ./interp -bench=. -count=5 > benchmarks_new.txt
--- a/src/tool/awk/scripts/benchmark_awks.py
+++ b/src/tool/awk/scripts/benchmark_awks.py
@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+# Benchmark GoAWK against other AWK versions
+
+from __future__ import print_function
+
+import glob
+import os.path
+import shutil
+import subprocess
+import sys
+import time
+
+AWKS = [
+    './goawk',
+    './orig', # GoAWK without perf improvements
+    'original-awk',
+    'gawk',
+    'mawk',
+]
+NORM_INDEX = AWKS.index('original-awk')
+TESTS_TO_MEAN = None  # By default, calculate the mean of all tests
+if False:
+    # Only get the mean of these tests because these are the only ones
+    # we show in the GoAWK article.
+    TESTS_TO_MEAN = [
+        'tt.01_print',
+        'tt.02_print_NR_NF',
+        'tt.02a_print_length',
+        'tt.03_sum_length',
+        'tt.03a_sum_field',
+        'tt.04_printf_fields',
+        'tt.05_concat_fields',
+        'tt.06_count_lengths',
+        'tt.07_even_fields',
+        'tt.big_complex_program',
+        'tt.x1_mandelbrot',
+        'tt.x2_sum_loop',
+    ]
+NUM_RUNS = 6
+MIN_TIME = 0.5
+PROGRAM_GLOB = 'testdata/tt.*'
+
+if len(sys.argv) > 1:
+    PROGRAM_GLOB = 'testdata/' + sys.argv[1]
+
+
+def repeat_file(input_file, repeated_file, n):
+    with open(input_file, 'rb') as fin, open(repeated_file, 'wb') as fout:
+        for i in range(n):
+            fin.seek(0)
+            shutil.copyfileobj(fin, fout)
+
+
+print('Test                         ', end='')
+for awk in AWKS:
+    display_awk = os.path.basename(awk)
+    display_awk = display_awk.replace('original-awk', 'awk')
+    print('| {:>8} '.format(display_awk), end='')
+print()
+print('-'*28 + ' | --------'*len(AWKS))
+
+repeats_created = []
+products = [1] * len(AWKS)
+num_products = 0
+programs = sorted(glob.glob(PROGRAM_GLOB))
+for program in programs:
+    # First do a test run with GoAWK to see roughly how long it takes
+    cmdline = '{} -f {} testdata/foo.td >tt.out'.format(AWKS[0], program)
+    start = time.time()
+    status = subprocess.call(cmdline, shell=True)
+    elapsed = time.time() - start
+
+    # If test run took less than MIN_TIME seconds, scale/repeat input
+    # file accordingly
+    input_file = 'testdata/foo.td'
+    if elapsed < MIN_TIME:
+        multiplier = int(round(MIN_TIME / elapsed))
+        repeated_file = '{}.{}'.format(input_file, multiplier)
+        if not os.path.exists(repeated_file):
+            repeat_file(input_file, repeated_file, multiplier)
+            repeats_created.append(repeated_file)
+        input_file = repeated_file
+
+    # Record time taken to run this test, running each NUM_RUMS times
+    # and taking the minimum elapsed time
+    awk_times = []
+    for awk in AWKS:
+        cmdline = '{} -f {} {} >tt.out'.format(awk, program, input_file)
+        times = []
+        for i in range(NUM_RUNS):
+            start = time.time()
+            status = subprocess.call(cmdline, shell=True)
+            elapsed = time.time() - start
+            times.append(elapsed)
+            if status != 0:
+                print('ERROR status {} from cmd: {}'.format(status, cmdline), file=sys.stderr)
+        min_time = min(sorted(times)[1:])
+        awk_times.append(min_time)
+
+    # Normalize to One True AWK time = 1.0
+    norm_time = awk_times[NORM_INDEX]
+    speeds = [norm_time/t for t in awk_times]
+    test_name = program.split('/')[1]
+    if TESTS_TO_MEAN is None or test_name in TESTS_TO_MEAN:
+        num_products += 1
+        for i in range(len(AWKS)):
+            products[i] *= speeds[i]
+
+    display_name = test_name.split('_')[0] + ' (' + ' '.join(test_name.split('_')[1:]) + ')'
+    print('{:28}'.format(display_name), end='')
+    for i, awk in enumerate(AWKS):
+        print(' | {:8.2f}'.format(speeds[i]), end='')
+    print()
+
+print('-'*28 + ' | --------'*len(AWKS))
+print('**Geo mean**                ', end='')
+for i, awk in enumerate(AWKS):
+    print(' | **{:.2f}**'.format(products[i] ** (1.0/num_products)), end='')
+print()
+
+# Delete temporary files created
+os.remove('tt.out')
+for repeated_file in repeats_created:
+   os.remove(repeated_file)
--- a/src/tool/awk/scripts/benchstat.sh
+++ b/src/tool/awk/scripts/benchstat.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+~/go/bin/benchstat -sort=delta -geomean benchmarks_old.txt benchmarks_new.txt
--- a/src/tool/awk/scripts/csvbench/count.py
+++ b/src/tool/awk/scripts/csvbench/count.py
@ -0,0 +1,9 @@
+import csv
+import sys
+
+lines, fields = 0, 0
+for row in csv.reader(sys.stdin):
+	lines += 1
+	fields += len(row)
+
+print(lines, fields)
--- a/src/tool/awk/scripts/csvbench/count/main.go
+++ b/src/tool/awk/scripts/csvbench/count/main.go
@ -0,0 +1,27 @@
+package main
+
+import (
+	"bufio"
+	"encoding/csv"
+	"fmt"
+	"io"
+	"log"
+	"os"
+)
+
+func main() {
+	reader := csv.NewReader(bufio.NewReader(os.Stdin))
+	lines, fields := 0, 0
+	for {
+		row, err := reader.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			log.Fatal(err)
+		}
+		lines++
+		fields += len(row)
+	}
+	fmt.Println(lines, fields)
+}
--- a/src/tool/awk/scripts/csvbench/csvbench.sh
+++ b/src/tool/awk/scripts/csvbench/csvbench.sh
@ -0,0 +1,48 @@
+#!/bin/sh
+
+set -e
+
+echo ===== Writing 1GB - goawk
+time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
+time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
+time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
+
+echo ===== Writing 1GB - frawk
+time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
+time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
+time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
+
+echo ===== Writing 1GB - Python
+time python3 write.py >/dev/null
+time python3 write.py >/dev/null
+time python3 write.py >/dev/null
+
+echo ===== Writing 1GB - Go
+go build -o bin/write ./write
+time ./bin/write >/dev/null
+time ./bin/write >/dev/null
+time ./bin/write >/dev/null
+
+
+./bin/write >count.csv
+
+echo ===== Reading 1GB - goawk
+time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+
+echo ===== Reading 1GB - frawk
+time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
+
+echo ===== Reading 1GB - Python
+time python3 count.py <count.csv
+time python3 count.py <count.csv
+time python3 count.py <count.csv
+
+echo ===== Reading 1GB - Go
+go build -o bin/count ./count
+time ./bin/count <count.csv
+time ./bin/count <count.csv
+time ./bin/count <count.csv
--- a/src/tool/awk/scripts/csvbench/write.py
+++ b/src/tool/awk/scripts/csvbench/write.py
@ -0,0 +1,27 @@
+import csv
+import sys
+
+writer = csv.writer(sys.stdout)
+for i in range(3514073):  # will create a ~1GB file
+	writer.writerow([
+		i,
+		"foo",
+		"bob@example.com",
+		"simple,quoted",
+		"quoted string with \" in it",
+		"0123456789",
+		"9876543210",
+		"The quick brown fox jumps over the lazy dog",
+		"",
+		"final field",
+		i,
+		"foo",
+		"bob@example.com",
+		"simple,quoted",
+		"quoted string with \" in it",
+		"0123456789",
+		"9876543210",
+		"The quick brown fox jumps over the lazy dog",
+		"",
+		"final field",
+	])
--- a/src/tool/awk/scripts/csvbench/write/main.go
+++ b/src/tool/awk/scripts/csvbench/write/main.go
@ -0,0 +1,43 @@
+package main
+
+import (
+	"encoding/csv"
+	"log"
+	"os"
+	"strconv"
+)
+
+func main() {
+	writer := csv.NewWriter(os.Stdout)
+	for i := 0; i < 3514073; i++ { // will create a ~1GB file
+		err := writer.Write([]string{
+			strconv.Itoa(i),
+			"foo",
+			"bob@example.com",
+			"simple,quoted",
+			"quoted string with \" in it",
+			"0123456789",
+			"9876543210",
+			"The quick brown fox jumps over the lazy dog",
+			"",
+			"final field",
+			strconv.Itoa(i),
+			"foo",
+			"bob@example.com",
+			"simple,quoted",
+			"quoted string with \" in it",
+			"0123456789",
+			"9876543210",
+			"The quick brown fox jumps over the lazy dog",
+			"",
+			"final field",
+		})
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+	writer.Flush()
+	if writer.Error() != nil {
+		log.Fatal(writer.Error())
+	}
+}
--- a/src/tool/awk/scripts/fuzz_input.sh
+++ b/src/tool/awk/scripts/fuzz_input.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+go1.18rc1 test ./interp -run=^$ -fuzz=Input -parallel=4
--- a/src/tool/awk/scripts/fuzz_source.sh
+++ b/src/tool/awk/scripts/fuzz_source.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+go1.18rc1 test ./interp -run=^$ -fuzz=Source -parallel=4
--- a/src/tool/awk/scripts/make_binaries.sh
+++ b/src/tool/awk/scripts/make_binaries.sh
@ -0,0 +1,21 @@
+#!/bin/sh
+
+go build
+VERSION="$(./goawk -version)"
+
+GOOS=windows GOARCH=386 go build -ldflags="-w"
+zip "goawk_${VERSION}_windows_386.zip" goawk.exe README.md csv.md LICENSE.txt
+GOOS=windows GOARCH=amd64 go build -ldflags="-w"
+zip "goawk_${VERSION}_windows_amd64.zip" goawk.exe README.md csv.md LICENSE.txt
+
+GOOS=linux GOARCH=386 go build -ldflags="-w"
+tar -cvzf "goawk_${VERSION}_linux_386.tar.gz" goawk README.md csv.md LICENSE.txt
+GOOS=linux GOARCH=amd64 go build -ldflags="-w"
+tar -cvzf "goawk_${VERSION}_linux_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
+
+GOOS=darwin GOARCH=amd64 go build -ldflags="-w"
+tar -cvzf "goawk_${VERSION}_darwin_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
+GOOS=darwin GOARCH=arm64 go build -ldflags="-w"
+tar -cvzf "goawk_${VERSION}_darwin_arm64.tar.gz" goawk README.md csv.md LICENSE.txt
+
+rm -f goawk goawk.exe
--- a/src/tool/awk/testdata/README
+++ b/src/tool/awk/testdata/README
@ -0,0 +1,46 @@
+Original README.TESTS from one-true-awk regdir tests directory:
+---------------------------------------------------------------
+The archive of test files contains 
+
+- A shell file called REGRESS that controls the testing process.
+
+- Several shell files called Compare* that control sub-parts
+of the testing.
+
+- About 160 small tests called t.* that constitute a random
+sampling of awk constructions collected over the years.
+Not organized, but they touch almost everything.
+
+- About 60 small tests called p.* that come from the first
+two chapters of The AWK Programming Environment.  This is
+basic stuff -- they have to work.
+
+These two sets are intended as regression tests, to be sure
+that a new version produces the same results as a previous one.
+There are a couple of standard data files used with them,
+test.data and test.countries, but others would work too.
+
+- About 20 files called T.* that are self-contained and
+more systematic tests of specific language features.
+For example, T.clv tests command-line variable handling.
+These tests are not regressions -- they compute the right
+answer by separate means, then compare the awk output.
+A specific test for each new bug found shows up in at least
+one of these, most often T.misc.  There are about 220 tests
+total in these files.
+
+- Two of these files, T.re and T.sub, are systematic tests
+of the regular expression and substitution code.  They express
+tests in a small language, then generate awk programs that
+verify behavior.
+
+- About 20 files called tt.* that are used as timing tests;
+they use the most common awk constructions in straightforward
+ways, against a large input file constructed by Compare.tt.
+
+
+There is undoubtedly more stuff in the archive;  it's been
+collecting for years and may need pruning.  Suggestions for
+improvement, additional tests (especially systematic ones),
+and the like are all welcome.
+
--- a/src/tool/awk/testdata/csv/1.csv
+++ b/src/tool/awk/testdata/csv/1.csv
@ -0,0 +1,3 @@
+name,age
+Bob,42
+Jill,37
--- a/src/tool/awk/testdata/csv/2.csv
+++ b/src/tool/awk/testdata/csv/2.csv
@ -0,0 +1,2 @@
+age,email,name
+25,sarah@example.com,Sarah
--- a/src/tool/awk/testdata/csv/address5.csv
+++ b/src/tool/awk/testdata/csv/address5.csv
@ -0,0 +1,2 @@
+name,address_1,address_2,address_3,address_4,address_5
+Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
--- a/src/tool/awk/testdata/csv/fields.csv
+++ b/src/tool/awk/testdata/csv/fields.csv
@ -0,0 +1,2 @@
+id,name,email
+1,Bob,b@bob.com
--- a/src/tool/awk/testdata/csv/nz-schools.csv
+++ b/src/tool/awk/testdata/csv/nz-schools.csv
--- a/src/tool/awk/testdata/csv/states.csv
+++ b/src/tool/awk/testdata/csv/states.csv
@ -0,0 +1,52 @@
+"State","Abbreviation"
+"Alabama","AL"
+"Alaska","AK"
+"Arizona","AZ"
+"Arkansas","AR"
+"California","CA"
+"Colorado","CO"
+"Connecticut","CT"
+"Delaware","DE"
+"District of Columbia","DC"
+"Florida","FL"
+"Georgia","GA"
+"Hawaii","HI"
+"Idaho","ID"
+"Illinois","IL"
+"Indiana","IN"
+"Iowa","IA"
+"Kansas","KS"
+"Kentucky","KY"
+"Louisiana","LA"
+"Maine","ME"
+"Montana","MT"
+"Nebraska","NE"
+"Nevada","NV"
+"New Hampshire","NH"
+"New Jersey","NJ"
+"New Mexico","NM"
+"New York","NY"
+"North Carolina","NC"
+"North Dakota","ND"
+"Ohio","OH"
+"Oklahoma","OK"
+"Oregon","OR"
+"Maryland","MD"
+"Massachusetts","MA"
+"Michigan","MI"
+"Minnesota","MN"
+"Mississippi","MS"
+"Missouri","MO"
+"Pennsylvania","PA"
+"Rhode Island","RI"
+"South Carolina","SC"
+"South Dakota","SD"
+"Tennessee","TN"
+"Texas","TX"
+"Utah","UT"
+"Vermont","VT"
+"Virginia","VA"
+"Washington","WA"
+"West Virginia","WV"
+"Wisconsin","WI"
+"Wyoming","WY"
--- a/src/tool/awk/testdata/csv/states.psv
+++ b/src/tool/awk/testdata/csv/states.psv
@ -0,0 +1,53 @@
+# comment
+State|Abbreviation
+Alabama|AL
+Alaska|AK
+Arizona|AZ
+Arkansas|AR
+California|CA
+Colorado|CO
+Connecticut|CT
+Delaware|DE
+District of Columbia|DC
+Florida|FL
+Georgia|GA
+Hawaii|HI
+Idaho|ID
+Illinois|IL
+Indiana|IN
+Iowa|IA
+Kansas|KS
+Kentucky|KY
+Louisiana|LA
+Maine|ME
+Montana|MT
+Nebraska|NE
+Nevada|NV
+New Hampshire|NH
+New Jersey|NJ
+New Mexico|NM
+New York|NY
+North Carolina|NC
+North Dakota|ND
+Ohio|OH
+Oklahoma|OK
+Oregon|OR
+Maryland|MD
+Massachusetts|MA
+Michigan|MI
+Minnesota|MN
+Mississippi|MS
+Missouri|MO
+Pennsylvania|PA
+Rhode Island|RI
+South Carolina|SC
+South Dakota|SD
+Tennessee|TN
+Texas|TX
+Utah|UT
+Vermont|VT
+Virginia|VA
+Washington|WA
+West Virginia|WV
+Wisconsin|WI
+Wyoming|WY
--- a/src/tool/awk/testdata/echo
+++ b/src/tool/awk/testdata/echo
--- a/src/tool/awk/testdata/filename/10
+++ b/src/tool/awk/testdata/filename/10
@ -0,0 +1 @@
+foo
--- a/src/tool/awk/testdata/filename/10x
+++ b/src/tool/awk/testdata/filename/10x
@ -0,0 +1 @@
+bar
--- a/src/tool/awk/testdata/foo.td
+++ b/src/tool/awk/testdata/foo.td
--- a/src/tool/awk/testdata/g.1
+++ b/src/tool/awk/testdata/g.1
@ -0,0 +1 @@
+ONE
--- a/src/tool/awk/testdata/g.2
+++ b/src/tool/awk/testdata/g.2
@ -0,0 +1 @@
+TWO
--- a/src/tool/awk/testdata/g.3
+++ b/src/tool/awk/testdata/g.3
@ -0,0 +1,10 @@
+BEGIN {
+    printf "A=%d, B=%d\n", A, B
+    for (i = 1; i < ARGC; i++) {
+        printf "\tARGV[%d] = %s\n", i, ARGV[i]
+    }
+}
+
+END {
+    printf "A=%d, B=%d\n", A, B
+}
--- a/src/tool/awk/testdata/g.4
+++ b/src/tool/awk/testdata/g.4
@ -0,0 +1,2 @@
+FOUR a
+FOUR b
--- a/src/tool/awk/testdata/gawk/addcomma.awk
+++ b/src/tool/awk/testdata/gawk/addcomma.awk
@ -0,0 +1,15 @@
+# addcomma - put commas in numbers
+#   input:  a number per line
+#   output: the input number followed by
+#      the number with commas and two decimal places 
+
+{ printf("%-12s %20s\n", $0, addcomma($0)) }
+
+function addcomma(x,   num) {
+ 	if (x < 0)
+ 	    return "-" addcomma(-x)
+ 	num = sprintf("%.2f", x)   # num is dddddd.dd
+ 	while (num ~ /[0-9][0-9][0-9][0-9]/)
+ 	    sub(/[0-9][0-9][0-9][,.]/, ",&", num)
+ 	return num
+}
--- a/src/tool/awk/testdata/gawk/addcomma.in
+++ b/src/tool/awk/testdata/gawk/addcomma.in
@ -0,0 +1,7 @@
+0
+-1
+-12.34
+12345
+-1234567.89
+-123.
+-123456
--- a/src/tool/awk/testdata/gawk/addcomma.ok
+++ b/src/tool/awk/testdata/gawk/addcomma.ok
@ -0,0 +1,7 @@
+0                            0.00
+-1                          -1.00
+-12.34                     -12.34
+12345                   12,345.00
+-1234567.89         -1,234,567.89
+-123.                     -123.00
+-123456               -123,456.00
--- a/src/tool/awk/testdata/gawk/anchgsub.awk
+++ b/src/tool/awk/testdata/gawk/anchgsub.awk
@ -0,0 +1 @@
+{ gsub(/^[ 	]*/, "", $0) ; print }
--- a/src/tool/awk/testdata/gawk/anchgsub.in
+++ b/src/tool/awk/testdata/gawk/anchgsub.in
@ -0,0 +1 @@
+  	This is a test, this is only a test.
--- a/src/tool/awk/testdata/gawk/anchgsub.ok
+++ b/src/tool/awk/testdata/gawk/anchgsub.ok
@ -0,0 +1 @@
+This is a test, this is only a test.
--- a/src/tool/awk/testdata/gawk/anchor.awk
+++ b/src/tool/awk/testdata/gawk/anchor.awk
@ -0,0 +1,33 @@
+BEGIN { RS = "" }
+
+{
+	if (/^A/)
+		print "ok"
+	else
+		print "not ok"
+
+	if (/B$/)
+		print "not ok"
+	else
+		print "ok"
+
+	if (/^C/)
+		print "not ok"
+	else
+		print "ok"
+
+	if (/D$/)
+		print "not ok"
+	else
+		print "ok"
+
+	if (/^E/)
+		print "not ok"
+	else
+		print "ok"
+
+	if (/F$/)
+		print "ok"
+	else
+		print "not ok"
+}
--- a/src/tool/awk/testdata/gawk/anchor.in
+++ b/src/tool/awk/testdata/gawk/anchor.in
@ -0,0 +1,3 @@
+A line1 B
+C line2 D
+E line3 F
--- a/src/tool/awk/testdata/gawk/anchor.ok
+++ b/src/tool/awk/testdata/gawk/anchor.ok
@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok
--- a/src/tool/awk/testdata/gawk/argarray.awk
+++ b/src/tool/awk/testdata/gawk/argarray.awk
@ -0,0 +1,14 @@
+BEGIN {
+	argn =  " argument" (ARGC > 1 ? "s" : "")
+	are  = ARGC > 1 ? "are" : "is"
+	print "here we have " ARGC argn
+	print "which " are
+	for (x = 0; x < ARGC; x++)
+		print "\t", ARGV[x]
+	print "Environment variable TEST=" ENVIRON["TEST"]
+	print "and the current input file is called \"" FILENAME "\""
+}
+
+FNR == 1 {
+	print "in main loop, this input file is known as \"" FILENAME "\""
+}
--- a/src/tool/awk/testdata/gawk/argarray.in
+++ b/src/tool/awk/testdata/gawk/argarray.in
@ -0,0 +1 @@
+this is a simple test file
--- a/src/tool/awk/testdata/gawk/argarray.ok
+++ b/src/tool/awk/testdata/gawk/argarray.ok
@ -0,0 +1,6 @@
+here we have 1 argument
+which is
+	 
+Environment variable TEST=
+and the current input file is called ""
+in main loop, this input file is known as "-"
--- a/src/tool/awk/testdata/gawk/arrayind3.awk
+++ b/src/tool/awk/testdata/gawk/arrayind3.awk
@ -0,0 +1,19 @@
+BEGIN {
+	# initialize cint arrays
+        pos[0] = 0
+        posout[0] = 0
+        split("00000779770060", f)      # f[1] is a strnum
+        pos[f[1]] = 1                   # subscripts must be strings!
+        for (x in pos) {
+                # if x is a strnum, then the
+                # x != 0 test may convert it to an integral NUMBER,
+		# and we might lose the unusual string representation
+		# if the cint code is not careful to recognize that this is
+		# actually a string
+                if (x != 0)
+                        posout[x] = pos[x]
+        }
+        # which array element is populated?
+        print posout[779770060]
+        print posout["00000779770060"]
+}
--- a/src/tool/awk/testdata/gawk/arrayind3.ok
+++ b/src/tool/awk/testdata/gawk/arrayind3.ok
@ -0,0 +1,2 @@
+
+1
--- a/src/tool/awk/testdata/gawk/arrayparm.awk
+++ b/src/tool/awk/testdata/gawk/arrayparm.awk
@ -0,0 +1,21 @@
+#
+# Test program from:
+#
+# Date: Tue, 21 Feb 95 16:09:29 EST
+# From: emory!blackhawk.com!aaron (Aaron Sosnick)
+# 
+BEGIN {
+    foo[1]=1;
+    foo[2]=2;
+    bug1(foo);
+}
+function bug1(i) {
+    for (i in foo) {
+	bug2(i);
+	delete foo[i];
+	print i,1,bot[1];
+    }
+}
+function bug2(arg) {
+    bot[arg]=arg;
+}
--- a/src/tool/awk/testdata/gawk/arrayparm.ok
+++ b/src/tool/awk/testdata/gawk/arrayparm.ok
@ -0,0 +1 @@
+parse error at 10:5: can't pass array "foo" as scalar param
--- a/src/tool/awk/testdata/gawk/arrayprm2.awk
+++ b/src/tool/awk/testdata/gawk/arrayprm2.awk
@ -0,0 +1,67 @@
+# From spcecdt@armory.com  Wed Apr 30 11:08:48 2003
+# Return-Path: <spcecdt@armory.com>
+# Received: from localhost (skeeve [127.0.0.1])
+# 	by skeeve.com (8.12.5/8.12.5) with ESMTP id h3U7uZWr015489
+# 	for <arnold@localhost>; Wed, 30 Apr 2003 11:08:48 +0300
+# Received: from actcom.co.il [192.114.47.1]
+# 	by localhost with POP3 (fetchmail-5.9.0)
+# 	for arnold@localhost (single-drop); Wed, 30 Apr 2003 11:08:48 +0300 (IDT)
+# Received: by actcom.co.il (mbox arobbins)
+#  (with Cubic Circle's cucipop (v1.31 1998/05/13) Wed Apr 30 11:05:01 2003)
+# X-From_: spcecdt@armory.com Wed Apr 30 04:06:46 2003
+# Received: from smtp1.actcom.net.il by actcom.co.il  with ESMTP
+# 	(8.11.6/actcom-0.2) id h3U16iv04111 for <arobbins@actcom.co.il>;
+# 	Wed, 30 Apr 2003 04:06:45 +0300 (EET DST)  
+# 	(rfc931-sender: mail.actcom.co.il [192.114.47.13])
+# Received: from f7.net (consort.superb.net [209.61.216.22])
+# 	by smtp1.actcom.net.il (8.12.8/8.12.8) with ESMTP id h3U16nEv009589
+# 	for <arobbins@actcom.co.il>; Wed, 30 Apr 2003 04:06:50 +0300
+# Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164])
+# 	by f7.net (8.11.7/8.11.6) with ESMTP id h3U16gj29182
+# 	for <arnold@skeeve.com>; Tue, 29 Apr 2003 21:06:42 -0400
+# Received: from monty-python.gnu.org ([199.232.76.173])
+# 	by fencepost.gnu.org with esmtp (Exim 4.10)
+# 	id 19Ag3W-00029w-00
+# 	for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:06:42 -0400
+# Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.10.13)
+# 	id 19Ag1V-0001AN-00
+# 	for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:04:39 -0400
+# Received: from deepthought.armory.com ([192.122.209.42] helo=armory.com)
+# 	by monty-python.gnu.org with smtp (Exim 4.10.13)
+# 	id 19Ag1V-0001A3-00
+# 	for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:04:37 -0400
+# Date: Tue, 29 Apr 2003 18:04:35 -0700
+# From: "John H. DuBois III" <spcecdt@armory.com>
+# To: bug-gawk@gnu.org
+# Subject: gawk 3.1.2a bug
+# Message-ID: <20030430010434.GA4278@armory.com>
+# Mime-Version: 1.0
+# Content-Type: text/plain; charset=us-ascii
+# Content-Disposition: inline
+# User-Agent: Mutt/1.3.28i
+# X-Www: http://www.armory.com./~spcecdt/
+# Sender: spcecdt@armory.com
+# X-Spam-Status: No, hits=-7.2 required=5.0
+# 	tests=SIGNATURE_SHORT_DENSE,SPAM_PHRASE_00_01,USER_AGENT,
+# 	      USER_AGENT_MUTT
+# 	version=2.41
+# X-Spam-Level: 
+# X-SpamBouncer: 1.4 (10/07/01)
+# X-SBClass: OK
+# Status: RO
+# 
+# gawk-3.1.2a 'BEGIN {foo(bar)};function foo(baz){split("x",baz)}'
+# gawk-3.1.2a: cmd. line:1: fatal: split: second argument is not an array
+# 
+# 	John
+# -- 
+# John DuBois  spcecdt@armory.com  KC6QKZ/AE  http://www.armory.com/~spcecdt/
+# 
+BEGIN {
+	foo(bar)
+}
+
+function foo(baz)
+{
+	split("x", baz)
+}
--- a/src/tool/awk/testdata/gawk/arrayprm2.ok
+++ b/src/tool/awk/testdata/gawk/arrayprm2.ok
--- a/src/tool/awk/testdata/gawk/arrayprm3.awk
+++ b/src/tool/awk/testdata/gawk/arrayprm3.awk
@ -0,0 +1,56 @@
+# From spcecdt@armory.com  Fri May  2 13:24:46 2003
+# Return-Path: <spcecdt@armory.com>
+# Received: from localhost (skeeve [127.0.0.1])
+# 	by skeeve.com (8.12.5/8.12.5) with ESMTP id h42AChum021950
+# 	for <arnold@localhost>; Fri, 2 May 2003 13:24:46 +0300
+# Received: from actcom.co.il [192.114.47.1]
+# 	by localhost with POP3 (fetchmail-5.9.0)
+# 	for arnold@localhost (single-drop); Fri, 02 May 2003 13:24:46 +0300 (IDT)
+# Received: by actcom.co.il (mbox arobbins)
+#  (with Cubic Circle's cucipop (v1.31 1998/05/13) Fri May  2 13:23:37 2003)
+# X-From_: spcecdt@armory.com Fri May  2 00:43:51 2003
+# Received: from smtp1.actcom.net.il by actcom.co.il  with ESMTP
+# 	(8.11.6/actcom-0.2) id h41Lhm500217 for <arobbins@actcom.co.il>;
+# 	Fri, 2 May 2003 00:43:49 +0300 (EET DST)  
+# 	(rfc931-sender: lmail.actcom.co.il [192.114.47.13])
+# Received: from f7.net (consort.superb.net [209.61.216.22])
+# 	by smtp1.actcom.net.il (8.12.8/8.12.8) with ESMTP id h41LiGcO022817
+# 	for <arobbins@actcom.co.il>; Fri, 2 May 2003 00:44:18 +0300
+# Received: from armory.com (deepthought.armory.com [192.122.209.42])
+# 	by f7.net (8.11.7/8.11.6) with SMTP id h41Lhj106516
+# 	for <arnold@skeeve.com>; Thu, 1 May 2003 17:43:46 -0400
+# Date: Thu, 1 May 2003 14:43:45 -0700
+# From: "John H. DuBois III" <spcecdt@armory.com>
+# To: Aharon Robbins <arnold@skeeve.com>
+# Subject: Re: gawk 3.1.2a bug
+# Message-ID: <20030501214345.GA24615@armory.com>
+# References: <200305011738.h41Hcg76017565@localhost.localdomain>
+# Mime-Version: 1.0
+# Content-Type: text/plain; charset=us-ascii
+# Content-Disposition: inline
+# In-Reply-To: <200305011738.h41Hcg76017565@localhost.localdomain>
+# User-Agent: Mutt/1.3.28i
+# X-Www: http://www.armory.com./~spcecdt/
+# Sender: spcecdt@armory.com
+# X-SpamBouncer: 1.4 (10/07/01)
+# X-SBClass: OK
+# Status: RO
+# 
+# On Thu, May 01, 2003 at 08:38:42PM +0300, Aharon Robbins wrote:
+# > > That worked, thanks.
+# > 
+# > Great.  Your report motivated me to find everywhere such additional
+# > code ought to be needed.  I think I did so.  --Arnold
+# 
+# Here's another one (perhaps fixed by your additional work):
+# 
+BEGIN { foo(a) }
+function foo(a) { bar(a); print "" in a }
+function bar(a) { a[""]; }
+# 
+# Prints 1 with gawk-3.1.1; 0 with 3.1.2a.
+# 
+# 	John
+# -- 
+# John DuBois  spcecdt@armory.com  KC6QKZ/AE  http://www.armory.com/~spcecdt/
+# 
--- a/src/tool/awk/testdata/gawk/arrayprm3.ok
+++ b/src/tool/awk/testdata/gawk/arrayprm3.ok
@ -0,0 +1 @@
+1
--- a/src/tool/awk/testdata/gawk/arrayref.awk
+++ b/src/tool/awk/testdata/gawk/arrayref.awk
@ -0,0 +1,13 @@
+	BEGIN { # foo[10] = 0		# put this line in and it will work
+		test(foo); print foo[1]
+		test2(foo2); print foo2[1]
+	}
+
+	function test(foo)
+	{
+		test2(foo)
+	}
+	function test2(bar)
+	{
+		bar[1] = 1
+	}
--- a/src/tool/awk/testdata/gawk/arrayref.ok
+++ b/src/tool/awk/testdata/gawk/arrayref.ok
@ -0,0 +1,2 @@
+1
+1
--- a/src/tool/awk/testdata/gawk/arrymem1.awk
+++ b/src/tool/awk/testdata/gawk/arrymem1.awk
@ -0,0 +1,81 @@
+# From spcecdt@armory.com  Thu Jun 14 13:24:32 2001
+# Received: from mail.actcom.co.il [192.114.47.13]
+# 	by localhost with POP3 (fetchmail-5.5.0)
+# 	for arnold@localhost (single-drop); Thu, 14 Jun 2001 13:24:32 +0300 (IDT)
+# Received: by actcom.co.il (mbox arobbins)
+#  (with Cubic Circle's cucipop (v1.31 1998/05/13) Thu Jun 14 13:25:13 2001)
+# X-From_: spcecdt@armory.com Thu Jun 14 06:34:47 2001
+# Received: from lmail.actcom.co.il by actcom.co.il  with ESMTP
+# 	(8.9.1a/actcom-0.2) id GAA29661 for <arobbins@actcom.co.il>;
+# 	Thu, 14 Jun 2001 06:34:46 +0300 (EET DST)  
+# 	(rfc931-sender: lmail.actcom.co.il [192.114.47.13])
+# Received: from billohost.com (www.billohost.com [209.196.35.10])
+# 	by lmail.actcom.co.il (8.11.2/8.11.2) with ESMTP id f5E3YiO27337
+# 	for <arobbins@actcom.co.il>; Thu, 14 Jun 2001 06:34:45 +0300
+# Received: from fencepost.gnu.org (we-refuse-to-spy-on-our-users@fencepost.gnu.org [199.232.76.164])
+# 	by billohost.com (8.9.3/8.9.3) with ESMTP id XAA02681
+# 	for <arnold@skeeve.com>; Wed, 13 Jun 2001 23:33:57 -0400
+# Received: from deepthought.armory.com ([192.122.209.42])
+# 	by fencepost.gnu.org with smtp (Exim 3.16 #1 (Debian))
+# 	id 15ANu2-00005C-00
+# 	for <bug-gawk@gnu.org>; Wed, 13 Jun 2001 23:34:38 -0400
+# Date: Wed, 13 Jun 2001 20:32:42 -0700
+# From: "John H. DuBois III" <spcecdt@armory.com>
+# To: bug-gawk@gnu.org
+# Subject: gawk 3.1.0 bug
+# Message-ID: <20010613203242.A29975@armory.com>
+# Mime-Version: 1.0
+# Content-Type: text/plain; charset=us-ascii
+# X-Mailer: Mutt 1.0.1i
+# X-Www: http://www.armory.com./~spcecdt/
+# Sender: spcecdt@armory.com
+# Status: RO
+# 
+# Under SCO OpenServer 5.0.6a using gawk 3.1.0 compiled with gcc 2.95.2, this
+# program:
+
+    BEGIN {
+	f1(Procs,b)
+	print "test"
+    }
+
+    function f1(Procs,a) {
+	# a[""]
+	a[""] = "a"	# ADR: Give it a value so can trace it
+	f2()
+    }
+
+    function f2() {
+	# b[""]
+	b[""] = "b"	# ADR: Give it a value so can trace it
+    }
+
+    # ADR: 1/28/2003: Added this:
+    BEGIN { for (i in b) printf("b[\"%s\"] = \"%s\"\n", i, b[i]) }
+    # END ADR added.
+
+# gives:
+# 
+#     gawk: ./gtest:5: fatal error: internal error
+# 
+# and dumps core.
+# 
+# gdb gives me this stack backtrace:
+# 
+# #0  0x80019943 in kill () from /usr/lib/libc.so.1
+# #1  0x8003e754 in abort () from /usr/lib/libc.so.1
+# #2  0x8062a87 in catchsig (sig=0, code=0) at main.c:947
+# #3  0x80053a0c in _sigreturn () from /usr/lib/libc.so.1
+# #4  0x80023d36 in cleanfree () from /usr/lib/libc.so.1
+# #5  0x80023156 in _real_malloc () from /usr/lib/libc.so.1
+# #6  0x80023019 in malloc () from /usr/lib/libc.so.1
+# #7  0x8053b95 in do_print (tree=0x0) at builtin.c:1336
+# #8  0x806b47c in interpret (tree=0x8084ee4) at eval.c:606
+# #9  0x806ad8d in interpret (tree=0x8084f0c) at eval.c:384
+# #10 0x806ad21 in interpret (tree=0x8084f5c) at eval.c:367
+# #11 0x8061d5b in main (argc=4, argv=0x80478ac) at main.c:506
+# 
+# 	John
+# --
+# John DuBois  spcecdt@armory.com.  KC6QKZ/AE  http://www.armory.com./~spcecdt/
+# 
--- a/src/tool/awk/testdata/gawk/arrymem1.ok
+++ b/src/tool/awk/testdata/gawk/arrymem1.ok
@ -0,0 +1,2 @@
+test
+b[""] = "b"
--- a/src/tool/awk/testdata/gawk/arryref2.awk
+++ b/src/tool/awk/testdata/gawk/arryref2.awk
@ -0,0 +1,18 @@
+BEGIN {
+	foo(a)
+
+	for (i in a)
+		print i, a[i]
+}
+
+function foo(b)
+{
+	bar(b)
+	b[2] = "local"
+}
+
+function bar(c)
+{
+	a[3] = "global"
+	c[1] = "local2"
+}
--- a/src/tool/awk/testdata/gawk/arryref2.ok
+++ b/src/tool/awk/testdata/gawk/arryref2.ok
@ -0,0 +1,3 @@
+1 local2
+2 local
+3 global
--- a/src/tool/awk/testdata/gawk/arryref3.awk
+++ b/src/tool/awk/testdata/gawk/arryref3.awk
@ -0,0 +1,18 @@
+BEGIN {
+	foo(a)
+
+	for (i in a)
+		print i, a[i]
+}
+
+function foo(b)
+{
+	a[1] = "global"
+	b[2] = "local"
+	bar(b)
+}
+
+function bar(c)
+{
+	c = 12
+}
--- a/src/tool/awk/testdata/gawk/arryref3.ok
+++ b/src/tool/awk/testdata/gawk/arryref3.ok
@ -0,0 +1 @@
+parse error at 12:2: can't pass array "b" as scalar param
--- a/src/tool/awk/testdata/gawk/arryref4.awk
+++ b/src/tool/awk/testdata/gawk/arryref4.awk
@ -0,0 +1,17 @@
+BEGIN {
+	foo(a)
+
+	print  a
+}
+
+function foo(b)
+{
+	a = "global"
+	b[2] = "local"
+#	bar(b)
+}
+
+function bar(c)
+{
+	c = 12
+}
--- a/src/tool/awk/testdata/gawk/arryref4.ok
+++ b/src/tool/awk/testdata/gawk/arryref4.ok
@ -0,0 +1 @@
+parse error at 2:2: can't pass scalar "a" as array param
--- a/src/tool/awk/testdata/gawk/arryref5.awk
+++ b/src/tool/awk/testdata/gawk/arryref5.awk
@ -0,0 +1,17 @@
+BEGIN {
+	foo(a)
+
+	print  a
+}
+
+function foo(b)
+{
+	b[2] = "local"
+	a = "global"
+#	bar(b)
+}
+
+function bar(c)
+{
+	c = 12
+}
--- a/src/tool/awk/testdata/gawk/arryref5.ok
+++ b/src/tool/awk/testdata/gawk/arryref5.ok
@ -0,0 +1 @@
+parse error at 2:2: can't pass scalar "a" as array param
--- a/src/tool/awk/testdata/gawk/arynasty.awk
+++ b/src/tool/awk/testdata/gawk/arynasty.awk
@ -0,0 +1,16 @@
+BEGIN {
+	a = 12.153
+#print "-- stroring test[a]" > "/dev/stderr" ; fflush("/dev/stderr")
+	test[a] = "hi"
+#print "-- setting CONVFMT" > "/dev/stderr" ; fflush("/dev/stderr")
+	CONVFMT = "%.0f"
+#print "-- setting a" > "/dev/stderr" ; fflush("/dev/stderr")
+	a = 5
+#stopme()
+#print "-- starting loop" > "/dev/stderr" ; fflush("/dev/stderr")
+	for (i in test) {
+#print("-- i =", i) > "/dev/stderr" ; fflush("/dev/stderr");
+#printf("-- i = <%s>\n", i) > "/dev/stderr" ; fflush("/dev/stderr");
+		printf ("test[%s] = %s\n", i, test[i])
+	}
+}
--- a/src/tool/awk/testdata/gawk/arynasty.ok
+++ b/src/tool/awk/testdata/gawk/arynasty.ok
@ -0,0 +1 @@
+test[12.153] = hi
--- a/src/tool/awk/testdata/gawk/aryprm1.awk
+++ b/src/tool/awk/testdata/gawk/aryprm1.awk
@ -0,0 +1,9 @@
+function f(a) {
+	if (3 in a)
+		print 7
+	a = 5
+}
+
+BEGIN {
+	f(arr)
+}
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`parse error at 10:5: can't pass array "foo" as scalar param`
				`@ -0,0 +1 @@`
				`parse error at 12:2: can't pass array "b" as scalar param`
				`@ -0,0 +1 @@`
				`parse error at 2:2: can't pass scalar "a" as array param`