Trying to embed the gawk.
This commit is contained in:
parent
708052ec0a
commit
80356b3878
1105 changed files with 133607 additions and 1 deletions
|
@ -27,6 +27,7 @@ import(
|
||||||
"github.com/surdeus/goblin/src/tool/useprog"
|
"github.com/surdeus/goblin/src/tool/useprog"
|
||||||
"github.com/surdeus/goblin/src/tool/path"
|
"github.com/surdeus/goblin/src/tool/path"
|
||||||
"github.com/surdeus/goblin/src/tool/mk"
|
"github.com/surdeus/goblin/src/tool/mk"
|
||||||
|
//"github.com/surdeus/goblin/src/tool/awk"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -55,7 +56,8 @@ func main() {
|
||||||
"in" : mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments"},
|
"in" : mtool.Tool{in.Run, "filter strings from stdin that aren not in arguments"},
|
||||||
"useprog" : mtool.Tool{useprog.Run, "print the name of the first existing program in arg list"},
|
"useprog" : mtool.Tool{useprog.Run, "print the name of the first existing program in arg list"},
|
||||||
"path" : mtool.Tool{path.Run, "print cross platform path based on cmd arguments"},
|
"path" : mtool.Tool{path.Run, "print cross platform path based on cmd arguments"},
|
||||||
"mk" : mtool.Tool{mk.Run, "file dependency system"},
|
"mk" : mtool.Tool{mk.Run, "file dependency system, simpler make"},
|
||||||
|
//"awk" : mtool.Tool{awk.Run, "simple scripting language for working with string templates"},
|
||||||
}
|
}
|
||||||
|
|
||||||
mtool.Main("goblin", tools)
|
mtool.Main("goblin", tools)
|
||||||
|
|
387
src/tool/awk/csv.md
Normal file
387
src/tool/awk/csv.md
Normal file
|
@ -0,0 +1,387 @@
|
||||||
|
|
||||||
|
# GoAWK's CSV and TSV file support
|
||||||
|
|
||||||
|
[CSV](https://en.wikipedia.org/wiki/Comma-separated_values) and [TSV](https://en.wikipedia.org/wiki/Tab-separated_values) files are often used in data processing today, but unfortunately you can't properly process them using POSIX AWK. You can change the field separator to `,` or tab (for example `awk -F,` or `awk '-F\t'`) but that doesn't handle quoted or multi-line fields.
|
||||||
|
|
||||||
|
There are other workarounds, such as [Gawk's FPAT feature](https://www.gnu.org/software/gawk/manual/html_node/Splitting-By-Content.html), various [CSV extensions](http://mcollado.z15.es/xgawk/) for Gawk, or Adam Gordon Bell's [csvquote](https://github.com/adamgordonbell/csvquote) tool. There's also [frawk](https://github.com/ezrosent/frawk), which is an amazing tool that natively supports CSV, but unfortunately it deviates quite a bit from POSIX-compatible AWK.
|
||||||
|
|
||||||
|
Since version v1.17.0, GoAWK has included CSV support, which allows you to read and write CSV and TSV files, including proper handling of quoted and multi-line fields as per [RFC 4180](https://rfc-editor.org/rfc/rfc4180.html). In addition, GoAWK supports a "named field" construct that allows you to access CSV fields by name as well as number, for example `@"Address"` rather than `$5`.
|
||||||
|
|
||||||
|
**Many thanks to the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/), who sponsored this feature in May 2022.** Thanks also to [Eli Rosenthal](https://github.com/ezrosent), whose frawk tool inspired aspects of the design (including the `-i` and `-o` command line arguments).
|
||||||
|
|
||||||
|
Links to sections:
|
||||||
|
|
||||||
|
* [CSV input configuration](#csv-input-configuration)
|
||||||
|
* [CSV output configuration](#csv-output-configuration)
|
||||||
|
* [Named field syntax](#named-field-syntax)
|
||||||
|
* [Go API](#go-api)
|
||||||
|
* [Examples](#examples)
|
||||||
|
* [Examples based on csvkit](#examples-based-on-csvkit)
|
||||||
|
* [Performance](#performance)
|
||||||
|
* [Future work](#future-work)
|
||||||
|
|
||||||
|
|
||||||
|
## CSV input configuration
|
||||||
|
|
||||||
|
When in CSV input mode, GoAWK ignores the regular field and record separators (`FS` and `RS`), instead parsing input into records and fields using the CSV or TSV format. Fields can be accessed using the standard AWK numbered field syntax (for example, `$1` or `$5`), or using the GoAWK-specific [named field syntax](#named-field-syntax).
|
||||||
|
|
||||||
|
To enable CSV input mode when using the `goawk` program, use the `-i mode` command line argument. You can also enable CSV input mode by setting the `INPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
csv|tsv [separator=<char>] [comment=<char>] [header]
|
||||||
|
```
|
||||||
|
|
||||||
|
The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
|
||||||
|
|
||||||
|
* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
|
||||||
|
* `comment=<char>`: consider lines starting with the given character to be comments and skip them, for example `comment=#` will ignore any lines starting with `#` (without preceding whitespace). The default is not to support comments.
|
||||||
|
* `header`: treat the first line of each input file as a header row providing the field names, and enable the `@"field"` syntax as well as the `FIELDS` array. This option is equivalent to the `-H` command line argument. If neither `header` or `-H` is specified, you can't use named fields.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## CSV output configuration
|
||||||
|
|
||||||
|
When in CSV output mode, the GoAWK `print` statement with one or more arguments ignores `OFS` and `ORS` and separates its arguments (fields) and records using CSV formatting. No header row is printed; if required, a header row can be printed in the `BEGIN` block manually. No other functionality is changed, for example, `printf` doesn't do anything different in CSV output mode.
|
||||||
|
|
||||||
|
**NOTE:** The behaviour of `print` without arguments remains unchanged. This means you can print the input line (`$0`) without further quoting by using a bare `print` statement, but `print $0` will print the input line as a single CSV field, which is probably not what you want. See the [example](#example-convert-between-formats-all-fields) below.
|
||||||
|
|
||||||
|
To enable CSV output mode when using the `goawk` program, use the `-o mode` command line argument. You can also enable CSV output mode by setting the `OUTPUTMODE` special variable in the `BEGIN` block, or by using the [Go API](#go-api). The full syntax of `mode` is as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
csv|tsv [separator=<char>]
|
||||||
|
```
|
||||||
|
|
||||||
|
The first field in `mode` is the format: `csv` for comma-separated values or `tsv` for tab-separated values. Optionally following the mode are configuration fields, defined as follows:
|
||||||
|
|
||||||
|
* `separator=<char>`: override the separator character, for example `separator=|` to use the pipe character. The default is `,` (comma) for `csv` format or `\t` (tab) for `tsv` format.
|
||||||
|
|
||||||
|
|
||||||
|
## Named field syntax
|
||||||
|
|
||||||
|
If the `header` option or `-H` argument is given, CSV input mode parses the first row of each input file as a header row containing a list of field names.
|
||||||
|
|
||||||
|
When the header option is enabled, you can use the GoAWK-specific "named field" operator (`@`) to access fields by name instead of by number (`$`). For example, given the header row `id,name,email`, for each record you can access the email address using `@"email"`, `$3`, or even `$-1` (first field from the right). Further usage examples are shown [below](#examples).
|
||||||
|
|
||||||
|
Every time a header row is processed, the `FIELDS` special array is updated: it is a mapping of field number to field name, allowing you to loop over the field names dynamically. For example, given the header row `id,name,email`, GoAWK sets `FIELDS` using the equivalent of:
|
||||||
|
|
||||||
|
```
|
||||||
|
FIELDS[1] = "id"
|
||||||
|
FIELDS[2] = "name"
|
||||||
|
FIELDS[3] = "email"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that named field assignment such as `@"id" = 42` is not yet supported, but this feature may be added later.
|
||||||
|
|
||||||
|
|
||||||
|
## Go API
|
||||||
|
|
||||||
|
When using GoAWK via the Go API, you can still use `INPUTMODE`, but it may be more convenient to use the `interp.Config` fields directly: `InputMode`, `CSVInput`, `OutputMode`, and `CSVOutput`.
|
||||||
|
|
||||||
|
Here's a simple snippet showing the use of the `InputMode` and `CSVInput` fields to enable `#` as the comment character:
|
||||||
|
|
||||||
|
```
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil { ... }
|
||||||
|
|
||||||
|
config := &interp.Config{
|
||||||
|
InputMode: interp.CSVMode,
|
||||||
|
CSVInput: interp.CSVInputConfig{Comment: '#'},
|
||||||
|
}
|
||||||
|
_, err = interp.ExecProgram(prog, config)
|
||||||
|
if err != nil { ... }
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that `INPUTMODE` and `OUTPUTMODE` set using `Vars` or in the `BEGIN` block will override these settings.
|
||||||
|
|
||||||
|
See the [full reference documentation](https://pkg.go.dev/github.com/benhoyt/goawk/interp#Config) for the `interp.Config` struct.
|
||||||
|
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Below are some examples using the [testdata/csv/states.csv](https://github.com/benhoyt/goawk/blob/master/testdata/csv/states.csv) file, which is a simple CSV file whose contents are as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
"State","Abbreviation"
|
||||||
|
"Alabama","AL"
|
||||||
|
"Alaska","AK"
|
||||||
|
"Arizona","AZ"
|
||||||
|
"Arkansas","AR"
|
||||||
|
"California","CA"
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: output a field by name
|
||||||
|
|
||||||
|
To output a field by name (in this case the state's abbreviation):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -H '{ print @"Abbreviation" }' testdata/csv/states.csv
|
||||||
|
AL
|
||||||
|
AK
|
||||||
|
AZ
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: match a field and count
|
||||||
|
|
||||||
|
To count the number of states that have "New" in the name, and then print out what they are:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -H '@"State" ~ /New/ { n++ } END { print n }' testdata/csv/states.csv
|
||||||
|
4
|
||||||
|
$ goawk -i csv -H '@"State" ~ /New/ { print @"State" }' testdata/csv/states.csv
|
||||||
|
New Hampshire
|
||||||
|
New Jersey
|
||||||
|
New Mexico
|
||||||
|
New York
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: rename and reorder fields
|
||||||
|
|
||||||
|
To rename and reorder the fields from `State`, `Abbreviation` to `abbr`, `name`. Note that the `print` statement in the `BEGIN` block prints the header row for the output:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -H -o csv 'BEGIN { print "abbr", "name" } { print @"Abbreviation", @"State" }' testdata/csv/states.csv
|
||||||
|
abbr,name
|
||||||
|
AL,Alabama
|
||||||
|
AK,Alaska
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: convert between formats (explicit field list)
|
||||||
|
|
||||||
|
To convert the file from CSV to TSV format (note how we're *not* using `-H`, so the header row is included):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -o tsv '{ print $1, $2 }' testdata/csv/states.csv
|
||||||
|
State Abbreviation
|
||||||
|
Alabama AL
|
||||||
|
Alaska AK
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: convert between formats (all fields)
|
||||||
|
|
||||||
|
If you want to convert between CSV and TSV format but don't know the number of fields, you can use a field assignment like `$1=$1` so that GoAWK reformats `$0` according to the output format (TSV in this case). This is similar to how in POSIX AWK a field assignment reformats `$0` according to the output field separator (`OFS`). Then `print` without arguments prints the raw value of `$0`:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -o tsv '{ $1=$1; print }' testdata/csv/states.csv
|
||||||
|
State Abbreviation
|
||||||
|
Alabama AL
|
||||||
|
Alaska AK
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE:** It's not correct to use `print $0` in this case, because that would print `$0` as a single TSV field, which you generally don't want:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -o tsv '{ $1=$1; print $0 }' testdata/csv/states.csv # INCORRECT!
|
||||||
|
"State Abbreviation"
|
||||||
|
"Alabama AL"
|
||||||
|
"Alaska AK"
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: override separator
|
||||||
|
|
||||||
|
To test overriding the separator character, we can use GoAWK to add a comment and convert the separator to `|` (pipe). We'll also add a comment line to test comment handling:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -o 'csv separator=|' 'BEGIN { printf "# comment\n" } { $1=$1; print }' testdata/csv/states.csv
|
||||||
|
# comment
|
||||||
|
State|Abbreviation
|
||||||
|
Alabama|AL
|
||||||
|
Alaska|AK
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: skip comment lines
|
||||||
|
|
||||||
|
We can process the "pipe-separated values" file generated above, skipping comment lines, and printing the first three state names (accessed by field number this time):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i 'csv header comment=# separator=|' 'NR<=3 { print $1 }' testdata/csv/states.psv
|
||||||
|
Alabama
|
||||||
|
Alaska
|
||||||
|
Arizona
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: use dynamic field names
|
||||||
|
|
||||||
|
Similar to the `$` operator, you can also use `@` with dynamic values. For example, if there are fields named `address_1`, `address_2`, up through `address_5`, you could loop over them as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat testdata/csv/address5.csv
|
||||||
|
name,address_1,address_2,address_3,address_4,address_5
|
||||||
|
Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
|
||||||
|
$ goawk -i csv -H '{ for (i=1; i<=5; i++) print @("address_" i) }' testdata/csv/address5.csv
|
||||||
|
123 Way St
|
||||||
|
Apt 2B
|
||||||
|
Township
|
||||||
|
Cityville
|
||||||
|
United Plates
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: use the `FIELDS` array
|
||||||
|
|
||||||
|
A somewhat contrived example showing use of the `FIELDS` array:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat testdata/csv/fields.csv
|
||||||
|
id,name,email
|
||||||
|
1,Bob,b@bob.com
|
||||||
|
$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) print i, FIELDS[i] }' testdata/csv/fields.csv
|
||||||
|
1 id
|
||||||
|
2 name
|
||||||
|
3 email
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: create CSV file from array
|
||||||
|
|
||||||
|
The following example shows how you might pull fields out of an integer-indexed array to produce a CSV file:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -o csv 'BEGIN { print "id", "name"; names[1]="Bob"; names[2]="Jane"; for (i=1; i in names; i++) print i, names[i] }'
|
||||||
|
id,name
|
||||||
|
1,Bob
|
||||||
|
2,Jane
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: create CSV file by assigning fields
|
||||||
|
|
||||||
|
This example shows the same result, but producing the CSV output by assigning individual fields and then using a bare `print` statement:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -o csv 'BEGIN { print "id", "name"; $1=1; $2="Bob"; print; $1=2; $2="Jane"; print }'
|
||||||
|
id,name
|
||||||
|
1,Bob
|
||||||
|
2,Jane
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: different ways to specify CSV mode
|
||||||
|
|
||||||
|
And finally, four equivalent examples showing different ways to specify the input mode, using `-i` or the `INPUTMODE` special variable (the same techniques work for `-o` and `OUTPUTMODE`):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ goawk -i csv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
||||||
|
NY
|
||||||
|
$ goawk -icsv -H '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
||||||
|
NY
|
||||||
|
$ goawk 'BEGIN { INPUTMODE="csv header" } @"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
||||||
|
NY
|
||||||
|
$ goawk -v 'INPUTMODE=csv header' '@"State"=="New York" { print @"Abbreviation" }' testdata/csv/states.csv
|
||||||
|
NY
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Examples based on csvkit
|
||||||
|
|
||||||
|
The [csvkit](https://csvkit.readthedocs.io/en/latest/index.html) suite is a set of tools that allow you to quickly analyze and extract fields from CSV files. Each csvkit tool allows you to do a specific task; GoAWK is more low-level and verbose, but also a more general tool ([`csvsql`](https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html#csvsql-and-sql2csv-ultimate-power) being the exception!). GoAWK also runs significantly faster than csvkit (the latter is written in Python).
|
||||||
|
|
||||||
|
Below are a few snippets showing how you'd do some of the tasks in the csvkit documentation, but using GoAWK (the input file is [testdata/csv/nz-schools.csv](https://github.com/benhoyt/goawk/blob/master/testdata/csv/nz-schools.csv)):
|
||||||
|
|
||||||
|
### csvkit example: print column names
|
||||||
|
|
||||||
|
```
|
||||||
|
$ csvcut -n testdata/csv/nz-schools.csv
|
||||||
|
1: School_Id
|
||||||
|
2: Org_Name
|
||||||
|
3: Decile
|
||||||
|
4: Total
|
||||||
|
|
||||||
|
# In GoAWK you have to loop through the fields, but you can print the data in
|
||||||
|
# any format you want (note the "exit" so it stops after the first row):
|
||||||
|
$ goawk -i csv '{ for (i=1; i<=NF; i++) printf "%3d: %s\n", i, $i; exit }' testdata/csv/nz-schools.csv
|
||||||
|
1: School_Id
|
||||||
|
2: Org_Name
|
||||||
|
3: Decile
|
||||||
|
4: Total
|
||||||
|
|
||||||
|
# You could also use -H and the FIELDS array to do this:
|
||||||
|
$ goawk -i csv -H '{ for (i=1; i in FIELDS; i++) printf "%3d: %s\n", i, FIELDS[i]; exit }' testdata/csv/nz-schools.csv
|
||||||
|
1: School_Id
|
||||||
|
2: Org_Name
|
||||||
|
3: Decile
|
||||||
|
4: Total
|
||||||
|
```
|
||||||
|
|
||||||
|
### csvkit example: select a subset of columns
|
||||||
|
|
||||||
|
```
|
||||||
|
$ csvcut -c Org_Name,Total testdata/csv/nz-schools.csv
|
||||||
|
Org_Name,Total
|
||||||
|
Waipa Christian School,60
|
||||||
|
Remarkables Primary School,494
|
||||||
|
...
|
||||||
|
|
||||||
|
# In GoAWK you need to print the field names explicitly in BEGIN:
|
||||||
|
$ goawk -i csv -H -o csv 'BEGIN { print "Org_Name", "Total" } { print @"Org_Name", @"Total" }' testdata/csv/nz-schools.csv
|
||||||
|
Org_Name,Total
|
||||||
|
Waipa Christian School,60
|
||||||
|
Remarkables Primary School,494
|
||||||
|
...
|
||||||
|
|
||||||
|
# But you can also change the column names and reorder them:
|
||||||
|
$ goawk -i csv -H -o csv 'BEGIN { print "# Students", "School" } { print @"Total", @"Org_Name" }' testdata/csv/nz-schools.csv
|
||||||
|
# Students,School
|
||||||
|
60,Waipa Christian School
|
||||||
|
494,Remarkables Primary School
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### csvkit example: generate statistics
|
||||||
|
|
||||||
|
There's no equivalent of the `csvstat` tool in GoAWK, but you can calculate statistics yourself. For example, to calculate the total number of students in New Zealand schools, you can do the following (`csvstat` is giving a warning due to the single-column input):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ csvcut -c Total testdata/csv/nz-schools.csv | csvstat --sum
|
||||||
|
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
||||||
|
802,516
|
||||||
|
|
||||||
|
$ goawk -i csv -H '{ sum += @"Total" } END { print sum }' testdata/csv/nz-schools.csv
|
||||||
|
802516
|
||||||
|
```
|
||||||
|
|
||||||
|
To calculate the average (mean) decile level for boys' and girls' schools (sorry, boys!):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ csvgrep -c Org_Name -m Boys testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
|
||||||
|
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
||||||
|
6.45
|
||||||
|
$ csvgrep -c Org_Name -m Girls testdata/csv/nz-schools.csv | csvcut -c Decile | csvstat --mean
|
||||||
|
/usr/local/lib/python3.9/dist-packages/agate/table/from_csv.py:74: RuntimeWarning: Error sniffing CSV dialect: Could not determine delimiter
|
||||||
|
8.889
|
||||||
|
|
||||||
|
$ goawk -i csv -H '/Boys/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv
|
||||||
|
6.45
|
||||||
|
$ goawk -i csv -H '/Girls/ { d+=@"Decile"; n++ } END { print d/n }' testdata/csv/nz-schools.csv
|
||||||
|
8.88889
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
The performance of GoAWK's CSV input and output mode is quite good, on a par with using the `encoding/csv` package from Go directly, and much faster than the `csv` module in Python. CSV input speed is significantly slower than `frawk`, though CSV output speed is significantly faster than `frawk`.
|
||||||
|
|
||||||
|
Below are the results of some simple read and write [benchmarks](https://github.com/benhoyt/goawk/blob/master/scripts/csvbench) using `goawk` and `frawk` as well as plain Python and Go. The output of the write benchmarks is a 1GB, 3.5 million row CSV file with 20 columns (including quoted columns); the input for the read benchmarks uses that same file. Times are in seconds, showing the best of three runs on a 64-bit Linux laptop with an SSD drive:
|
||||||
|
|
||||||
|
Test | goawk | frawk | Python | Go
|
||||||
|
--------------- | ----- | ----- | ------ | ----
|
||||||
|
Reading 1GB CSV | 3.18 | 1.01 | 13.4 | 3.22
|
||||||
|
Writing 1GB CSV | 5.64 | 13.0 | 17.0 | 3.24
|
||||||
|
|
||||||
|
|
||||||
|
## Future work
|
||||||
|
|
||||||
|
* Consider adding a `printrow(a)` or similar function to make it easier to construct CSV rows from scratch.
|
||||||
|
- `a` would be an array such as: `a["name"] = "Bob"; a["age"] = 7`
|
||||||
|
- keys would be ordered by `OFIELDS` (eg: `OFIELDS[1] = "name"; OFIELDS[2] = "age"`) or by "smart name" if `OFIELDS` not set ("smart name" meaning numeric if `a` keys are numeric, string otherwise)
|
||||||
|
- `printrow(a)` could take an optional second `fields` array arg to use that instead of the global `OFIELDS`
|
||||||
|
* Consider allowing `-H` to accept an optional list of field names which could be used as headers in the absence of headers in the file itself (either `-H=name,age` or `-i 'csv header=name,age'`).
|
||||||
|
* Consider adding TrimLeadingSpace CSV input option. See: https://github.com/benhoyt/goawk/issues/109
|
||||||
|
* Consider supporting `@"id" = 42` named field assignment.
|
||||||
|
|
||||||
|
|
||||||
|
## Feedback
|
||||||
|
|
||||||
|
Please [open an issue](https://github.com/benhoyt/goawk/issues) if you have bug reports or feature requests for GoAWK's CSV support.
|
3
src/tool/awk/go.mod
Normal file
3
src/tool/awk/go.mod
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
module github.com/benhoyt/goawk
|
||||||
|
|
||||||
|
go 1.14
|
BIN
src/tool/awk/goawk
Executable file
BIN
src/tool/awk/goawk
Executable file
Binary file not shown.
394
src/tool/awk/goawk.go
Normal file
394
src/tool/awk/goawk.go
Normal file
|
@ -0,0 +1,394 @@
|
||||||
|
// Package goawk is an implementation of AWK with CSV support
|
||||||
|
//
|
||||||
|
// You can use the command-line "goawk" command or run AWK from your
|
||||||
|
// Go programs using the "interp" package. The command-line program
|
||||||
|
// has the same interface as regular awk:
|
||||||
|
//
|
||||||
|
// goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]
|
||||||
|
//
|
||||||
|
// The -F flag specifies the field separator (the default is to split
|
||||||
|
// on whitespace). The -v flag allows you to set a variable to a
|
||||||
|
// given value (multiple -v flags allowed). The -f flag allows you to
|
||||||
|
// read AWK source from a file instead of the 'prog' command-line
|
||||||
|
// argument. The rest of the arguments are input filenames (default
|
||||||
|
// is to read from stdin).
|
||||||
|
//
|
||||||
|
// A simple example (prints the sum of the numbers in the file's
|
||||||
|
// second column):
|
||||||
|
//
|
||||||
|
// $ echo 'foo 12
|
||||||
|
// > bar 34
|
||||||
|
// > baz 56' >file.txt
|
||||||
|
// $ goawk '{ sum += $2 } END { print sum }' file.txt
|
||||||
|
// 102
|
||||||
|
//
|
||||||
|
// To use GoAWK in your Go programs, see README.md or the "interp"
|
||||||
|
// package docs.
|
||||||
|
package awk
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"runtime/pprof"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/interp"
|
||||||
|
"github.com/benhoyt/goawk/lexer"
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
version = "v1.19.0"
|
||||||
|
copyright = "GoAWK " + version + " - Copyright (c) 2022 Ben Hoyt"
|
||||||
|
shortUsage = "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"
|
||||||
|
longUsage = `Standard AWK arguments:
|
||||||
|
-F separator field separator (default " ")
|
||||||
|
-f progfile load AWK source from progfile (multiple allowed)
|
||||||
|
-v var=value variable assignment (multiple allowed)
|
||||||
|
|
||||||
|
Additional GoAWK arguments:
|
||||||
|
-cpuprofile file write CPU profile to file
|
||||||
|
-d print parsed syntax tree to stderr (debug mode)
|
||||||
|
-da print virtual machine assembly instructions to stderr
|
||||||
|
-dt print variable type information to stderr
|
||||||
|
-H parse header row and enable @"field" in CSV input mode
|
||||||
|
-h, --help show this help message
|
||||||
|
-i mode parse input into fields using CSV format (ignore FS and RS)
|
||||||
|
'csv|tsv [separator=<char>] [comment=<char>] [header]'
|
||||||
|
-o mode use CSV output for print with args (ignore OFS and ORS)
|
||||||
|
'csv|tsv [separator=<char>]'
|
||||||
|
-version show GoAWK version and exit
|
||||||
|
`
|
||||||
|
)
|
||||||
|
|
||||||
|
func Run(args []string) {
|
||||||
|
// Parse command line arguments manually rather than using the
|
||||||
|
// "flag" package, so we can support flags with no space between
|
||||||
|
// flag and argument, like '-F:' (allowed by POSIX)
|
||||||
|
var progFiles []string
|
||||||
|
var vars []string
|
||||||
|
fieldSep := " "
|
||||||
|
cpuprofile := ""
|
||||||
|
debug := false
|
||||||
|
debugAsm := false
|
||||||
|
debugTypes := false
|
||||||
|
memprofile := ""
|
||||||
|
inputMode := ""
|
||||||
|
outputMode := ""
|
||||||
|
header := false
|
||||||
|
|
||||||
|
var i int
|
||||||
|
for i = 1; i < len(args); i++ {
|
||||||
|
// Stop on explicit end of args or first arg not prefixed with "-"
|
||||||
|
arg := args[i]
|
||||||
|
if arg == "--" {
|
||||||
|
i++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if arg == "-" || !strings.HasPrefix(arg, "-") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
switch arg {
|
||||||
|
case "-F":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -F")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
fieldSep = args[i]
|
||||||
|
case "-f":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -f")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
progFiles = append(progFiles, args[i])
|
||||||
|
case "-v":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -v")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
vars = append(vars, args[i])
|
||||||
|
case "-cpuprofile":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -cpuprofile")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
cpuprofile = args[i]
|
||||||
|
case "-d":
|
||||||
|
debug = true
|
||||||
|
case "-da":
|
||||||
|
debugAsm = true
|
||||||
|
case "-dt":
|
||||||
|
debugTypes = true
|
||||||
|
case "-H":
|
||||||
|
header = true
|
||||||
|
case "-h", "--help":
|
||||||
|
fmt.Printf("%s\n\n%s\n\n%s", copyright, shortUsage, longUsage)
|
||||||
|
os.Exit(0)
|
||||||
|
case "-i":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -i")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
inputMode = args[i]
|
||||||
|
case "-memprofile":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -memprofile")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
memprofile = args[i]
|
||||||
|
case "-o":
|
||||||
|
if i+1 >= len(args) {
|
||||||
|
errorExitf("flag needs an argument: -o")
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
outputMode = args[i]
|
||||||
|
case "-version", "--version":
|
||||||
|
fmt.Println(version)
|
||||||
|
os.Exit(0)
|
||||||
|
default:
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(arg, "-F"):
|
||||||
|
fieldSep = arg[2:]
|
||||||
|
case strings.HasPrefix(arg, "-f"):
|
||||||
|
progFiles = append(progFiles, arg[2:])
|
||||||
|
case strings.HasPrefix(arg, "-i"):
|
||||||
|
inputMode = arg[2:]
|
||||||
|
case strings.HasPrefix(arg, "-o"):
|
||||||
|
outputMode = arg[2:]
|
||||||
|
case strings.HasPrefix(arg, "-v"):
|
||||||
|
vars = append(vars, arg[2:])
|
||||||
|
case strings.HasPrefix(arg, "-cpuprofile="):
|
||||||
|
cpuprofile = arg[12:]
|
||||||
|
case strings.HasPrefix(arg, "-memprofile="):
|
||||||
|
memprofile = arg[12:]
|
||||||
|
default:
|
||||||
|
errorExitf("flag provided but not defined: %s", arg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Any remaining args are program and input files
|
||||||
|
args = args[i:]
|
||||||
|
|
||||||
|
var src []byte
|
||||||
|
var stdinBytes []byte // used if there's a parse error
|
||||||
|
if len(progFiles) > 0 {
|
||||||
|
// Read source: the concatenation of all source files specified
|
||||||
|
buf := &bytes.Buffer{}
|
||||||
|
progFiles = expandWildcardsOnWindows(progFiles)
|
||||||
|
for _, progFile := range progFiles {
|
||||||
|
if progFile == "-" {
|
||||||
|
b, err := ioutil.ReadAll(os.Stdin)
|
||||||
|
if err != nil {
|
||||||
|
errorExit(err)
|
||||||
|
}
|
||||||
|
stdinBytes = b
|
||||||
|
_, _ = buf.Write(b)
|
||||||
|
} else {
|
||||||
|
f, err := os.Open(progFile)
|
||||||
|
if err != nil {
|
||||||
|
errorExit(err)
|
||||||
|
}
|
||||||
|
_, err = buf.ReadFrom(f)
|
||||||
|
if err != nil {
|
||||||
|
_ = f.Close()
|
||||||
|
errorExit(err)
|
||||||
|
}
|
||||||
|
_ = f.Close()
|
||||||
|
}
|
||||||
|
// Append newline to file in case it doesn't end with one
|
||||||
|
_ = buf.WriteByte('\n')
|
||||||
|
}
|
||||||
|
src = buf.Bytes()
|
||||||
|
} else {
|
||||||
|
if len(args) < 1 {
|
||||||
|
errorExitf(shortUsage)
|
||||||
|
}
|
||||||
|
src = []byte(args[0])
|
||||||
|
args = args[1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse source code and setup interpreter
|
||||||
|
parserConfig := &parser.ParserConfig{
|
||||||
|
DebugTypes: debugTypes,
|
||||||
|
DebugWriter: os.Stderr,
|
||||||
|
}
|
||||||
|
prog, err := parser.ParseProgram(src, parserConfig)
|
||||||
|
if err != nil {
|
||||||
|
if err, ok := err.(*parser.ParseError); ok {
|
||||||
|
name, line := errorFileLine(progFiles, stdinBytes, err.Position.Line)
|
||||||
|
fmt.Fprintf(os.Stderr, "%s:%d:%d: %s\n",
|
||||||
|
name, line, err.Position.Column, err.Message)
|
||||||
|
showSourceLine(src, err.Position)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
errorExitf("%s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if debug {
|
||||||
|
fmt.Fprintln(os.Stderr, prog)
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugAsm {
|
||||||
|
err := prog.Disassemble(os.Stderr)
|
||||||
|
if err != nil {
|
||||||
|
errorExitf("could not disassemble program: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if header {
|
||||||
|
if inputMode == "" {
|
||||||
|
errorExitf("-H only allowed together with -i")
|
||||||
|
}
|
||||||
|
inputMode += " header"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't buffer output if stdout is a terminal (default output writer when
|
||||||
|
// Config.Output is nil is a buffered version of os.Stdout).
|
||||||
|
var stdout io.Writer
|
||||||
|
stdoutInfo, err := os.Stdout.Stat()
|
||||||
|
if err == nil && stdoutInfo.Mode()&os.ModeCharDevice != 0 {
|
||||||
|
stdout = os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
config := &interp.Config{
|
||||||
|
Argv0: filepath.Base(args[0]),
|
||||||
|
Args: expandWildcardsOnWindows(args),
|
||||||
|
Vars: []string{
|
||||||
|
"FS", fieldSep,
|
||||||
|
"INPUTMODE", inputMode,
|
||||||
|
"OUTPUTMODE", outputMode,
|
||||||
|
},
|
||||||
|
Output: stdout,
|
||||||
|
}
|
||||||
|
for _, v := range vars {
|
||||||
|
equals := strings.IndexByte(v, '=')
|
||||||
|
if equals < 0 {
|
||||||
|
errorExitf("-v flag must be in format name=value")
|
||||||
|
}
|
||||||
|
name, value := v[:equals], v[equals+1:]
|
||||||
|
// Oddly, -v must interpret escapes (issue #129)
|
||||||
|
unescaped, err := lexer.Unescape(value)
|
||||||
|
if err == nil {
|
||||||
|
value = unescaped
|
||||||
|
}
|
||||||
|
config.Vars = append(config.Vars, name, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpuprofile != "" {
|
||||||
|
f, err := os.Create(cpuprofile)
|
||||||
|
if err != nil {
|
||||||
|
errorExitf("could not create CPU profile: %v", err)
|
||||||
|
}
|
||||||
|
if err := pprof.StartCPUProfile(f); err != nil {
|
||||||
|
errorExitf("could not start CPU profile: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the program!
|
||||||
|
status, err := interp.ExecProgram(prog, config)
|
||||||
|
if err != nil {
|
||||||
|
errorExit(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpuprofile != "" {
|
||||||
|
pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
if memprofile != "" {
|
||||||
|
f, err := os.Create(memprofile)
|
||||||
|
if err != nil {
|
||||||
|
errorExitf("could not create memory profile: %v", err)
|
||||||
|
}
|
||||||
|
runtime.GC() // get up-to-date statistics
|
||||||
|
if err := pprof.WriteHeapProfile(f); err != nil {
|
||||||
|
errorExitf("could not write memory profile: %v", err)
|
||||||
|
}
|
||||||
|
_ = f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
os.Exit(status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show source line and position of error, for example:
|
||||||
|
//
|
||||||
|
// BEGIN { x*; }
|
||||||
|
// ^
|
||||||
|
func showSourceLine(src []byte, pos lexer.Position) {
|
||||||
|
lines := bytes.Split(src, []byte{'\n'})
|
||||||
|
srcLine := string(lines[pos.Line-1])
|
||||||
|
numTabs := strings.Count(srcLine[:pos.Column-1], "\t")
|
||||||
|
runeColumn := utf8.RuneCountInString(srcLine[:pos.Column-1])
|
||||||
|
fmt.Fprintln(os.Stderr, strings.Replace(srcLine, "\t", " ", -1))
|
||||||
|
fmt.Fprintln(os.Stderr, strings.Repeat(" ", runeColumn)+strings.Repeat(" ", numTabs)+"^")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine which filename and line number to display for the overall
|
||||||
|
// error line number.
|
||||||
|
func errorFileLine(progFiles []string, stdinBytes []byte, errorLine int) (string, int) {
|
||||||
|
if len(progFiles) == 0 {
|
||||||
|
return "<cmdline>", errorLine
|
||||||
|
}
|
||||||
|
startLine := 1
|
||||||
|
for _, progFile := range progFiles {
|
||||||
|
var content []byte
|
||||||
|
if progFile == "-" {
|
||||||
|
progFile = "<stdin>"
|
||||||
|
content = stdinBytes
|
||||||
|
} else {
|
||||||
|
b, err := ioutil.ReadFile(progFile)
|
||||||
|
if err != nil {
|
||||||
|
return "<unknown>", errorLine
|
||||||
|
}
|
||||||
|
content = b
|
||||||
|
}
|
||||||
|
content = append(content, '\n')
|
||||||
|
|
||||||
|
numLines := bytes.Count(content, []byte{'\n'})
|
||||||
|
if errorLine >= startLine && errorLine < startLine+numLines {
|
||||||
|
return progFile, errorLine - startLine + 1
|
||||||
|
}
|
||||||
|
startLine += numLines
|
||||||
|
}
|
||||||
|
return "<unknown>", errorLine
|
||||||
|
}
|
||||||
|
|
||||||
|
func errorExit(err error) {
|
||||||
|
pathErr, ok := err.(*os.PathError)
|
||||||
|
if ok && os.IsNotExist(err) {
|
||||||
|
errorExitf("file %q not found", pathErr.Path)
|
||||||
|
}
|
||||||
|
errorExitf("%s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func errorExitf(format string, args ...interface{}) {
|
||||||
|
fmt.Fprintf(os.Stderr, format+"\n", args...)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func expandWildcardsOnWindows(args []string) []string {
|
||||||
|
if runtime.GOOS != "windows" {
|
||||||
|
return args
|
||||||
|
}
|
||||||
|
return expandWildcards(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Originally from https://github.com/mattn/getwild (compatible LICENSE).
|
||||||
|
func expandWildcards(args []string) []string {
|
||||||
|
result := make([]string, 0, len(args))
|
||||||
|
for _, arg := range args {
|
||||||
|
matches, err := filepath.Glob(arg)
|
||||||
|
if err == nil && len(matches) > 0 {
|
||||||
|
result = append(result, matches...)
|
||||||
|
} else {
|
||||||
|
result = append(result, arg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
749
src/tool/awk/goawk_test.go
Normal file
749
src/tool/awk/goawk_test.go
Normal file
|
@ -0,0 +1,749 @@
|
||||||
|
// GoAWK tests
|
||||||
|
|
||||||
|
package awk_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/interp"
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
goExe string
|
||||||
|
testsDir string
|
||||||
|
outputDir string
|
||||||
|
awkExe string
|
||||||
|
goAWKExe string
|
||||||
|
writeAWK bool
|
||||||
|
writeGoAWK bool
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
flag.StringVar(&goExe, "goexe", "go", "set to override Go executable used to build goawk")
|
||||||
|
flag.StringVar(&testsDir, "testsdir", "./testdata", "directory with one-true-awk tests")
|
||||||
|
flag.StringVar(&outputDir, "outputdir", "./testdata/output", "directory for test output")
|
||||||
|
flag.StringVar(&awkExe, "awk", "gawk", "awk executable name")
|
||||||
|
flag.StringVar(&goAWKExe, "goawk", "./goawk", "goawk executable name")
|
||||||
|
flag.BoolVar(&writeAWK, "writeawk", false, "write expected output")
|
||||||
|
flag.BoolVar(&writeGoAWK, "writegoawk", true, "write Go AWK output")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
cmd := exec.Command(goExe, "build", "-ldflags=-w")
|
||||||
|
stderr, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "error building goawk: %v\n%s\n", err, stderr)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
os.Exit(m.Run())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAWK(t *testing.T) {
|
||||||
|
inputByPrefix := map[string]string{
|
||||||
|
"t": "test.data",
|
||||||
|
"p": "test.countries",
|
||||||
|
}
|
||||||
|
// These programs exit with non-zero status code
|
||||||
|
errorExits := map[string]bool{
|
||||||
|
"t.exit": true,
|
||||||
|
"t.exit1": true,
|
||||||
|
"t.gsub4": true,
|
||||||
|
"t.split3": true,
|
||||||
|
}
|
||||||
|
// These programs have known different output
|
||||||
|
knownDifferent := map[string]bool{
|
||||||
|
"t.printf2": true, // because awk is weird here (our behavior is like mawk)
|
||||||
|
}
|
||||||
|
// Can't really diff test rand() tests as we're using a totally
|
||||||
|
// different algorithm for random numbers
|
||||||
|
randTests := map[string]bool{
|
||||||
|
"p.48b": true,
|
||||||
|
"t.randk": true,
|
||||||
|
}
|
||||||
|
// These tests use "for (x in a)", which iterates in an undefined
|
||||||
|
// order (according to the spec), so sort lines before comparing.
|
||||||
|
sortLines := map[string]bool{
|
||||||
|
"p.43": true,
|
||||||
|
"t.in1": true, // because "sort" is locale-dependent
|
||||||
|
"t.in2": true,
|
||||||
|
"t.intest2": true,
|
||||||
|
}
|
||||||
|
dontRunOnWindows := map[string]bool{
|
||||||
|
"p.50": true, // because this pipes to Unix sort "sort -t: +0 -1 +2nr"
|
||||||
|
}
|
||||||
|
|
||||||
|
infos, err := ioutil.ReadDir(testsDir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't read test files: %v", err)
|
||||||
|
}
|
||||||
|
for _, info := range infos {
|
||||||
|
if !strings.HasPrefix(info.Name(), "t.") && !strings.HasPrefix(info.Name(), "p.") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if runtime.GOOS == "windows" && dontRunOnWindows[info.Name()] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t.Run(info.Name(), func(t *testing.T) {
|
||||||
|
srcPath := filepath.Join(testsDir, info.Name())
|
||||||
|
inputPath := filepath.Join(testsDir, inputByPrefix[info.Name()[:1]])
|
||||||
|
outputPath := filepath.Join(outputDir, info.Name())
|
||||||
|
|
||||||
|
cmd := exec.Command(awkExe, "-f", srcPath, inputPath)
|
||||||
|
expected, err := cmd.Output()
|
||||||
|
if err != nil && !errorExits[info.Name()] {
|
||||||
|
t.Fatalf("error running %s: %v", awkExe, err)
|
||||||
|
}
|
||||||
|
expected = bytes.Replace(expected, []byte{0}, []byte("<00>"), -1)
|
||||||
|
expected = normalizeNewlines(expected)
|
||||||
|
if sortLines[info.Name()] {
|
||||||
|
expected = sortedLines(expected)
|
||||||
|
}
|
||||||
|
if writeAWK {
|
||||||
|
err := ioutil.WriteFile(outputPath, expected, 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error writing awk output: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prog, err := parseGoAWK(srcPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
output, err := interpGoAWK(prog, inputPath)
|
||||||
|
if err != nil && !errorExits[info.Name()] {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
output = bytes.Replace(output, []byte{0}, []byte("<00>"), -1)
|
||||||
|
output = normalizeNewlines(output)
|
||||||
|
if randTests[info.Name()] || knownDifferent[info.Name()] {
|
||||||
|
// For tests that use rand(), run them to ensure they
|
||||||
|
// parse and interpret, but can't compare the output,
|
||||||
|
// so stop now
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if sortLines[info.Name()] {
|
||||||
|
output = sortedLines(output)
|
||||||
|
}
|
||||||
|
if writeGoAWK {
|
||||||
|
err := ioutil.WriteFile(outputPath, output, 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error writing goawk output: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if string(output) != string(expected) {
|
||||||
|
t.Fatalf("output differs, run: git diff %s", outputPath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = os.Remove("tempbig")
|
||||||
|
_ = os.Remove("tempsmall")
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseGoAWK(srcPath string) (*parser.Program, error) {
|
||||||
|
src, err := ioutil.ReadFile(srcPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
prog, err := parser.ParseProgram(src, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return prog, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) {
|
||||||
|
outBuf := &bytes.Buffer{}
|
||||||
|
errBuf := &bytes.Buffer{}
|
||||||
|
config := &interp.Config{
|
||||||
|
Output: outBuf,
|
||||||
|
Error: &concurrentWriter{w: errBuf},
|
||||||
|
Args: []string{inputPath},
|
||||||
|
}
|
||||||
|
_, err := interp.ExecProgram(prog, config)
|
||||||
|
result := outBuf.Bytes()
|
||||||
|
result = append(result, errBuf.Bytes()...)
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func interpGoAWKStdin(prog *parser.Program, inputPath string) ([]byte, error) {
|
||||||
|
input, _ := ioutil.ReadFile(inputPath)
|
||||||
|
outBuf := &bytes.Buffer{}
|
||||||
|
errBuf := &bytes.Buffer{}
|
||||||
|
config := &interp.Config{
|
||||||
|
Stdin: &concurrentReader{r: bytes.NewReader(input)},
|
||||||
|
Output: outBuf,
|
||||||
|
Error: &concurrentWriter{w: errBuf},
|
||||||
|
// srcdir is for "redfilnm.awk"
|
||||||
|
Vars: []string{"srcdir", filepath.Dir(inputPath)},
|
||||||
|
}
|
||||||
|
_, err := interp.ExecProgram(prog, config)
|
||||||
|
result := outBuf.Bytes()
|
||||||
|
result = append(result, errBuf.Bytes()...)
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wraps a Writer but makes Write calls safe for concurrent use.
|
||||||
|
type concurrentWriter struct {
|
||||||
|
w io.Writer
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *concurrentWriter) Write(p []byte) (int, error) {
|
||||||
|
w.mu.Lock()
|
||||||
|
defer w.mu.Unlock()
|
||||||
|
return w.w.Write(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wraps a Reader but makes Read calls safe for concurrent use.
|
||||||
|
type concurrentReader struct {
|
||||||
|
r io.Reader
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *concurrentReader) Read(p []byte) (int, error) {
|
||||||
|
r.mu.Lock()
|
||||||
|
defer r.mu.Unlock()
|
||||||
|
return r.r.Read(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedLines(data []byte) []byte {
|
||||||
|
trimmed := strings.TrimSuffix(string(data), "\n")
|
||||||
|
lines := strings.Split(trimmed, "\n")
|
||||||
|
sort.Strings(lines)
|
||||||
|
return []byte(strings.Join(lines, "\n") + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGAWK(t *testing.T) {
|
||||||
|
skip := map[string]bool{ // TODO: fix these (at least the ones that are bugs)
|
||||||
|
"getline": true, // getline syntax issues (may be okay, see grammar notes at http://pubs.opengroup.org/onlinepubs/007904975/utilities/awk.html#tag_04_06_13_14)
|
||||||
|
"getline3": true, // getline syntax issues (similar to above)
|
||||||
|
|
||||||
|
"gsubtst7": true, // something wrong with gsub or field split/join
|
||||||
|
"splitwht": true, // other awks handle split(s, a, " ") differently from split(s, a, / /)
|
||||||
|
"status-close": true, // hmmm, not sure what's up here
|
||||||
|
"sigpipe1": true, // probable race condition: sometimes fails, sometimes passes
|
||||||
|
|
||||||
|
"parse1": true, // incorrect parsing of $$a++++ (see TODOs in interp_test.go too)
|
||||||
|
|
||||||
|
"rscompat": true, // GoAWK allows multi-char RS by default
|
||||||
|
"rsstart2": true, // GoAWK ^ and $ anchors match beginning and end of line, not file (unlike Gawk)
|
||||||
|
|
||||||
|
"hex2": true, // GoAWK allows hex numbers / floating point (per POSIX)
|
||||||
|
"strtod": true, // GoAWK allows hex numbers / floating point (per POSIX)
|
||||||
|
}
|
||||||
|
|
||||||
|
dontRunOnWindows := map[string]bool{
|
||||||
|
"delargv": true, // reads from /dev/null
|
||||||
|
"eofsplit": true, // reads from /etc/passwd
|
||||||
|
"getline5": true, // removes a file while it's open
|
||||||
|
"iobug1": true, // reads from /dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
sortLines := map[string]bool{
|
||||||
|
"arryref2": true,
|
||||||
|
"delargv": true,
|
||||||
|
"delarpm2": true,
|
||||||
|
"forref": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
gawkDir := filepath.Join(testsDir, "gawk")
|
||||||
|
infos, err := ioutil.ReadDir(gawkDir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't read test files: %v", err)
|
||||||
|
}
|
||||||
|
for _, info := range infos {
|
||||||
|
if !strings.HasSuffix(info.Name(), ".awk") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
testName := info.Name()[:len(info.Name())-4]
|
||||||
|
if skip[testName] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if runtime.GOOS == "windows" && dontRunOnWindows[testName] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
|
srcPath := filepath.Join(gawkDir, info.Name())
|
||||||
|
inputPath := filepath.Join(gawkDir, testName+".in")
|
||||||
|
okPath := filepath.Join(gawkDir, testName+".ok")
|
||||||
|
|
||||||
|
expected, err := ioutil.ReadFile(okPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expected = normalizeNewlines(expected)
|
||||||
|
|
||||||
|
prog, err := parseGoAWK(srcPath)
|
||||||
|
if err != nil {
|
||||||
|
if err.Error() != string(expected) {
|
||||||
|
t.Fatalf("parser error differs, got:\n%s\nexpected:\n%s", err.Error(), expected)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
output, err := interpGoAWKStdin(prog, inputPath)
|
||||||
|
output = normalizeNewlines(output)
|
||||||
|
if err != nil {
|
||||||
|
errStr := string(output) + err.Error()
|
||||||
|
if errStr != string(expected) {
|
||||||
|
t.Fatalf("interp error differs, got:\n%s\nexpected:\n%s", errStr, expected)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if sortLines[testName] {
|
||||||
|
output = sortedLines(output)
|
||||||
|
expected = sortedLines(expected)
|
||||||
|
}
|
||||||
|
|
||||||
|
if string(output) != string(expected) {
|
||||||
|
t.Fatalf("output differs, got:\n%s\nexpected:\n%s", output, expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = os.Remove("seq")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCommandLine(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
args []string
|
||||||
|
stdin string
|
||||||
|
output string
|
||||||
|
error string
|
||||||
|
}{
|
||||||
|
// Load source from stdin
|
||||||
|
{[]string{"-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
||||||
|
{[]string{"-f", "-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
||||||
|
{[]string{"-f-", "-f", "-"}, `BEGIN { print "b" }`, "b\n", ""},
|
||||||
|
|
||||||
|
// Program with no input
|
||||||
|
{[]string{`BEGIN { print "a" }`}, "", "a\n", ""},
|
||||||
|
|
||||||
|
// Read input from stdin
|
||||||
|
{[]string{`$0`}, "one\n\nthree", "one\nthree\n", ""},
|
||||||
|
{[]string{`$0`, "-"}, "one\n\nthree", "one\nthree\n", ""},
|
||||||
|
{[]string{`$0`, "-", "-"}, "one\n\nthree", "one\nthree\n", ""},
|
||||||
|
{[]string{"-f", "testdata/t.0", "-"}, "one\ntwo\n", "one\ntwo\n", ""},
|
||||||
|
{[]string{"{ print FILENAME }"}, "a", "-\n", ""},
|
||||||
|
{[]string{"{ print FILENAME }", "-"}, "a", "-\n", ""},
|
||||||
|
|
||||||
|
// Read input from file(s)
|
||||||
|
{[]string{`$0`, "testdata/g.1"}, "", "ONE\n", ""},
|
||||||
|
{[]string{`$0`, "testdata/g.1", "testdata/g.2"}, "", "ONE\nTWO\n", ""},
|
||||||
|
{[]string{`{ print FILENAME ":" FNR "/" NR ": " $0 }`, "testdata/g.1", "testdata/g.4"}, "",
|
||||||
|
"testdata/g.1:1/1: ONE\ntestdata/g.4:1/2: FOUR a\ntestdata/g.4:2/3: FOUR b\n", ""},
|
||||||
|
{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
|
||||||
|
{[]string{`$0`, "testdata/g.1", "-", "testdata/g.2", "-"}, "STDIN", "ONE\nSTDIN\nTWO\n", ""},
|
||||||
|
{[]string{"-F", " ", "--", "$0", "testdata/g.1"}, "", "ONE\n", ""},
|
||||||
|
{[]string{"{ print NR, FNR } END { print NR, FNR }", "-"}, "a\nb\nc\n", "1 1\n2 2\n3 3\n3 3\n", ""},
|
||||||
|
// I've deleted the "-ftest" file for now as it was causing problems with "go install" zip files
|
||||||
|
// {[]string{"--", "$0", "-ftest"}, "", "used in tests; do not delete\n", ""}, // Issue #53
|
||||||
|
// {[]string{"$0", "-ftest"}, "", "used in tests; do not delete\n", ""},
|
||||||
|
|
||||||
|
// Specifying field separator with -F
|
||||||
|
{[]string{`{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 3\n4 6\n", ""},
|
||||||
|
{[]string{"-F", ",", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1 2 3 \n4 5 6 \n", ""},
|
||||||
|
{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
||||||
|
{[]string{"-F", ",", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
||||||
|
{[]string{"-F,", `{ print $1, $3 }`}, "1,2,3\n4,5,6", "1 3\n4 6\n", ""},
|
||||||
|
|
||||||
|
// Assigning other variables with -v
|
||||||
|
{[]string{"-v", "OFS=.", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.3\n4.6\n", ""},
|
||||||
|
{[]string{"-v", "OFS=.", "-v", "ORS=", `{ print $1, $3 }`}, "1 2 3\n4 5 6", "1.34.6", ""},
|
||||||
|
{[]string{"-v", "x=42", "-v", "y=foo", `BEGIN { print x, y }`}, "", "42 foo\n", ""},
|
||||||
|
{[]string{"-v", "RS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
|
||||||
|
{[]string{"-vRS=;", `$0`}, "a b;c\nd;e", "a b\nc\nd\ne\n", ""},
|
||||||
|
{[]string{"-v", `X=x\ty`, `BEGIN { printf X }`}, "", "x\ty", ""},
|
||||||
|
|
||||||
|
// ARGV/ARGC handling
|
||||||
|
{[]string{`
|
||||||
|
BEGIN {
|
||||||
|
for (i=1; i<ARGC; i++) {
|
||||||
|
print i, ARGV[i]
|
||||||
|
}
|
||||||
|
}`, "a", "b"}, "", "1 a\n2 b\n", ""},
|
||||||
|
{[]string{`
|
||||||
|
BEGIN {
|
||||||
|
for (i=1; i<ARGC; i++) {
|
||||||
|
print i, ARGV[i]
|
||||||
|
delete ARGV[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$0`, "a", "b"}, "c\nd", "1 a\n2 b\nc\nd\n", ""},
|
||||||
|
{[]string{`
|
||||||
|
BEGIN {
|
||||||
|
ARGV[1] = ""
|
||||||
|
}
|
||||||
|
$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "c\nd\nTWO\n", ""},
|
||||||
|
{[]string{`
|
||||||
|
BEGIN {
|
||||||
|
ARGC = 3
|
||||||
|
}
|
||||||
|
$0`, "testdata/g.1", "-", "testdata/g.2"}, "c\nd", "ONE\nc\nd\n", ""},
|
||||||
|
{[]string{"-v", "A=1", "-f", "testdata/g.3", "B=2", "testdata/test.countries"}, "",
|
||||||
|
"A=1, B=0\n\tARGV[1] = B=2\n\tARGV[2] = testdata/test.countries\nA=1, B=2\n", ""},
|
||||||
|
{[]string{`END { print (x==42) }`, "x=42.0"}, "", "1\n", ""},
|
||||||
|
{[]string{`END { printf X }`, `X=a\tb`}, "", "a\tb", ""},
|
||||||
|
{[]string{"-v", "x=42.0", `BEGIN { print (x==42) }`}, "", "1\n", ""},
|
||||||
|
{[]string{`BEGIN { print(ARGV[1]<2, ARGV[2]<2); ARGV[1]="10"; ARGV[2]="10x"; print(ARGV[1]<2, ARGV[2]<2) }`,
|
||||||
|
"10", "10x"}, "", "0 1\n1 1\n", ""},
|
||||||
|
|
||||||
|
// Error handling
|
||||||
|
{[]string{}, "", "", "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"},
|
||||||
|
{[]string{"-F"}, "", "", "flag needs an argument: -F"},
|
||||||
|
{[]string{"-f"}, "", "", "flag needs an argument: -f"},
|
||||||
|
{[]string{"-v"}, "", "", "flag needs an argument: -v"},
|
||||||
|
{[]string{"-z"}, "", "", "flag provided but not defined: -z"},
|
||||||
|
{[]string{"{ print }", "notexist"}, "", "", `file "notexist" not found`},
|
||||||
|
{[]string{"BEGIN { print 1/0 }"}, "", "", "division by zero"},
|
||||||
|
{[]string{"-v", "foo", "BEGIN {}"}, "", "", "-v flag must be in format name=value"},
|
||||||
|
{[]string{"--", "{ print $1 }", "-file"}, "", "", `file "-file" not found`},
|
||||||
|
{[]string{"{ print $1 }", "-file"}, "", "", `file "-file" not found`},
|
||||||
|
|
||||||
|
// Output synchronization
|
||||||
|
{[]string{`BEGIN { print "1"; print "2"|"cat" }`}, "", "1\n2\n", ""},
|
||||||
|
{[]string{`BEGIN { print "1"; "echo 2" | getline x; print x }`}, "", "1\n2\n", ""},
|
||||||
|
|
||||||
|
// Parse error formatting
|
||||||
|
{[]string{"`"}, "", "", "<cmdline>:1:1: unexpected char\n`\n^"},
|
||||||
|
{[]string{"BEGIN {\n\tx*;\n}"}, "", "", "<cmdline>:2:4: expected expression instead of ;\n x*;\n ^"},
|
||||||
|
{[]string{"BEGIN {\n\tx*\r\n}"}, "", "", "<cmdline>:2:4: expected expression instead of <newline>\n x*\n ^"},
|
||||||
|
{[]string{"-f", "-"}, "\n ++", "", "<stdin>:2:4: expected expression instead of <newline>\n ++\n ^"},
|
||||||
|
{[]string{"-f", "testdata/parseerror/good.awk", "-f", "testdata/parseerror/bad.awk"},
|
||||||
|
"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n ^"},
|
||||||
|
{[]string{"-f", "testdata/parseerror/bad.awk", "-f", "testdata/parseerror/good.awk"},
|
||||||
|
"", "", "testdata/parseerror/bad.awk:2:3: expected expression instead of <newline>\nx*\n ^"},
|
||||||
|
{[]string{"-f", "testdata/parseerror/good.awk", "-f", "-", "-f", "testdata/parseerror/bad.awk"},
|
||||||
|
"`", "", "<stdin>:1:1: unexpected char\n`\n^"},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
testName := strings.Join(test.args, " ")
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
|
runAWKs(t, test.args, test.stdin, test.output, test.error)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDevStdout(t *testing.T) {
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
t.Skip("/dev/stdout not presnt on Windows")
|
||||||
|
}
|
||||||
|
runAWKs(t, []string{`BEGIN { print "1"; print "2">"/dev/stdout" }`}, "", "1\n2\n", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func runGoAWK(args []string, stdin string) (stdout, stderr string, err error) {
|
||||||
|
cmd := exec.Command(goAWKExe, args...)
|
||||||
|
if stdin != "" {
|
||||||
|
cmd.Stdin = strings.NewReader(stdin)
|
||||||
|
}
|
||||||
|
errBuf := &bytes.Buffer{}
|
||||||
|
cmd.Stderr = errBuf
|
||||||
|
output, err := cmd.Output()
|
||||||
|
stdout = string(normalizeNewlines(output))
|
||||||
|
stderr = string(normalizeNewlines(errBuf.Bytes()))
|
||||||
|
return stdout, stderr, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAWKs(t *testing.T, testArgs []string, testStdin, testOutput, testError string) {
|
||||||
|
var args []string
|
||||||
|
if strings.Contains(awkExe, "gawk") {
|
||||||
|
args = append(args, "--posix")
|
||||||
|
}
|
||||||
|
args = append(args, testArgs...)
|
||||||
|
cmd := exec.Command(awkExe, testArgs...)
|
||||||
|
if testStdin != "" {
|
||||||
|
cmd.Stdin = strings.NewReader(testStdin)
|
||||||
|
}
|
||||||
|
errBuf := &bytes.Buffer{}
|
||||||
|
cmd.Stderr = errBuf
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
if testError == "" {
|
||||||
|
t.Fatalf("expected no error, got AWK error: %v (%s)", err, errBuf.String())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if testError != "" {
|
||||||
|
t.Fatalf("expected AWK error, got none")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stdout := string(normalizeNewlines(output))
|
||||||
|
if stdout != testOutput {
|
||||||
|
t.Fatalf("expected AWK to give %q, got %q", testOutput, stdout)
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout, stderr, err := runGoAWK(testArgs, testStdin)
|
||||||
|
if err != nil {
|
||||||
|
stderr = strings.TrimSpace(stderr)
|
||||||
|
if stderr != testError {
|
||||||
|
t.Fatalf("expected GoAWK error %q, got %q", testError, stderr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if testError != "" {
|
||||||
|
t.Fatalf("expected GoAWK error %q, got none", testError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if stdout != testOutput {
|
||||||
|
t.Fatalf("expected GoAWK to give %q, got %q", testOutput, stdout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWildcards(t *testing.T) {
|
||||||
|
if runtime.GOOS != "windows" {
|
||||||
|
// Wildcards shouldn't be expanded on non-Windows systems, and a file
|
||||||
|
// literally named "*.go" doesn't exist, so expect a failure.
|
||||||
|
_, stderr, err := runGoAWK([]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"}, "")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error using wildcards on non-Windows system")
|
||||||
|
}
|
||||||
|
expected := "file \"testdata/wildcards/*.txt\" not found\n"
|
||||||
|
if stderr != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, stderr)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
args []string
|
||||||
|
output string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
[]string{"FNR==1 { print FILENAME }", "testdata/wildcards/*.txt"},
|
||||||
|
"testdata/wildcards/one.txt\ntestdata/wildcards/two.txt\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/one.txt"},
|
||||||
|
"testdata/wildcards/one.txt\nbee\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{"-f", "testdata/wildcards/*.awk", "testdata/wildcards/*.txt"},
|
||||||
|
"testdata/wildcards/one.txt\nbee\ntestdata/wildcards/two.txt\nbee\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testName := strings.Join(test.args, " ")
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
|
stdout, stderr, err := runGoAWK(test.args, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
||||||
|
}
|
||||||
|
stdout = strings.Replace(stdout, "\\", "/", -1)
|
||||||
|
if stdout != test.output {
|
||||||
|
t.Fatalf("expected %q, got %q", test.output, stdout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFILENAME(t *testing.T) {
|
||||||
|
origGoAWKExe := goAWKExe
|
||||||
|
goAWKExe = "../../" + goAWKExe
|
||||||
|
defer func() { goAWKExe = origGoAWKExe }()
|
||||||
|
|
||||||
|
origDir, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
err = os.Chdir("testdata/filename")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer os.Chdir(origDir)
|
||||||
|
|
||||||
|
src := `
|
||||||
|
BEGIN { FILENAME = "10"; print(FILENAME, FILENAME<2) }
|
||||||
|
BEGIN { FILENAME = 10; print(FILENAME, FILENAME<2) }
|
||||||
|
{ print(FILENAME, FILENAME<2) }
|
||||||
|
`
|
||||||
|
runAWKs(t, []string{src, "10", "10x"}, "", "10 1\n10 0\n10 0\n10x 1\n", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeNewlines(b []byte) []byte {
|
||||||
|
return bytes.Replace(b, []byte("\r\n"), []byte{'\n'}, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputOutputMode(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
args []string
|
||||||
|
input string
|
||||||
|
output string
|
||||||
|
error string
|
||||||
|
}{
|
||||||
|
{[]string{"-icsv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
||||||
|
{[]string{"-i", "csv", "-H", `{ print @"age", @"name" }`}, "name,age\nBob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
||||||
|
{[]string{"-icsv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
||||||
|
{[]string{"-i", "csv", `{ print $2, $1 }`}, "Bob,42\nJane,37", "42 Bob\n37 Jane\n", ""},
|
||||||
|
{[]string{"-icsv", "-H", "-ocsv", `{ print @"age", @"name" }`}, "name,age\n\"Bo,ba\",42\nJane,37", "42,\"Bo,ba\"\n37,Jane\n", ""},
|
||||||
|
{[]string{"-o", "csv", `BEGIN { print "foo,bar", 3.14, "baz" }`}, "", "\"foo,bar\",3.14,baz\n", ""},
|
||||||
|
{[]string{"-iabc", `{}`}, "", "", "invalid input mode \"abc\"\n"},
|
||||||
|
{[]string{"-oxyz", `{}`}, "", "", "invalid output mode \"xyz\"\n"},
|
||||||
|
{[]string{"-H", `{}`}, "", "", "-H only allowed together with -i\n"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testName := strings.Join(test.args, " ")
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
|
stdout, stderr, err := runGoAWK(test.args, test.input)
|
||||||
|
if err != nil {
|
||||||
|
if test.error == "" {
|
||||||
|
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
||||||
|
} else if stderr != test.error {
|
||||||
|
t.Fatalf("expected error message %q, got %q", test.error, stderr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if stdout != test.output {
|
||||||
|
t.Fatalf("expected %q, got %q", test.output, stdout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMultipleCSVFiles(t *testing.T) {
|
||||||
|
// Ensure CSV handling works across multiple files with different headers (field names).
|
||||||
|
src := `
|
||||||
|
{
|
||||||
|
for (i=1; i in FIELDS; i++) {
|
||||||
|
if (i>1)
|
||||||
|
printf ",";
|
||||||
|
printf "%s", FIELDS[i]
|
||||||
|
}
|
||||||
|
printf " "
|
||||||
|
}
|
||||||
|
{ print @"name", @"age" }
|
||||||
|
`
|
||||||
|
stdout, stderr, err := runGoAWK([]string{"-i", "csv", "-H", src, "testdata/csv/1.csv", "testdata/csv/2.csv"}, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
||||||
|
}
|
||||||
|
expected := `
|
||||||
|
name,age Bob 42
|
||||||
|
name,age Jill 37
|
||||||
|
age,email,name Sarah 25
|
||||||
|
`[1:]
|
||||||
|
if stdout != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, stdout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCSVDocExamples(t *testing.T) {
|
||||||
|
f, err := os.Open("csv.md")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error opening examples file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var (
|
||||||
|
command string
|
||||||
|
output string
|
||||||
|
truncated bool
|
||||||
|
n = 1
|
||||||
|
)
|
||||||
|
runTest := func() {
|
||||||
|
t.Run(fmt.Sprintf("Example%d", n), func(t *testing.T) {
|
||||||
|
shell := "/bin/sh"
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
shell = "sh"
|
||||||
|
}
|
||||||
|
cmd := exec.Command(shell, "-c", command)
|
||||||
|
gotBytes, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error running %q: %v\n%s", command, err, gotBytes)
|
||||||
|
}
|
||||||
|
got := string(gotBytes)
|
||||||
|
if truncated {
|
||||||
|
numLines := strings.Count(output, "\n")
|
||||||
|
got = strings.Join(strings.Split(got, "\n")[:numLines], "\n") + "\n"
|
||||||
|
}
|
||||||
|
got = string(normalizeNewlines([]byte(got)))
|
||||||
|
if got != output {
|
||||||
|
t.Fatalf("error running %q\ngot:\n%s\nexpected:\n%s", command, got, output)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
inTest := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.HasPrefix(line, "$ goawk") {
|
||||||
|
if inTest {
|
||||||
|
runTest()
|
||||||
|
}
|
||||||
|
inTest = true
|
||||||
|
command = "./" + line[2:]
|
||||||
|
output = ""
|
||||||
|
truncated = false
|
||||||
|
} else if inTest {
|
||||||
|
switch line {
|
||||||
|
case "```", "":
|
||||||
|
runTest()
|
||||||
|
inTest = false
|
||||||
|
case "...":
|
||||||
|
truncated = true
|
||||||
|
runTest()
|
||||||
|
inTest = false
|
||||||
|
default:
|
||||||
|
output += line + "\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if scanner.Err() != nil {
|
||||||
|
t.Errorf("error reading input: %v", scanner.Err())
|
||||||
|
}
|
||||||
|
if inTest {
|
||||||
|
t.Error("unexpectedly in test at end of file")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMandelbrot(t *testing.T) {
|
||||||
|
stdout, stderr, err := runGoAWK([]string{"-v", "width=80", "-v", "height=25", "-f", "testdata/tt.x1_mandelbrot"}, "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected no error, got %v (%q)", err, stderr)
|
||||||
|
}
|
||||||
|
expected := `
|
||||||
|
................................................................................
|
||||||
|
......................................................--+-----..................
|
||||||
|
....................................................-----+*+-++-................
|
||||||
|
.................................................--------+* *+-----.............
|
||||||
|
..............................................--------+# #%*-------.........
|
||||||
|
.........................................------------++$ +-----------.....
|
||||||
|
...................................---------* # +* # *+++++%+--...
|
||||||
|
............................----------------++ @ *----..
|
||||||
|
.......................-+----------------+$ %+----..
|
||||||
|
..................-------*++%++**+++---++ #+--.
|
||||||
|
...............----------+* #*++* %*---.
|
||||||
|
.............-------+++++* # #----.
|
||||||
|
....------+-------++**@ @ ------.
|
||||||
|
....------+-------++**@ @ ------.
|
||||||
|
.............-------+++++* # #----.
|
||||||
|
...............----------+* #*++* %*---.
|
||||||
|
..................-------*++%++**+++---++ #+--.
|
||||||
|
.......................-+----------------+$ %+----..
|
||||||
|
............................----------------++ @ *----..
|
||||||
|
...................................---------* # +* # *+++++%+--...
|
||||||
|
.........................................------------++$ +-----------.....
|
||||||
|
..............................................--------+# #%*-------.........
|
||||||
|
.................................................--------+* *+-----.............
|
||||||
|
....................................................-----+*+-++-................
|
||||||
|
......................................................--+-----..................
|
||||||
|
`[1:]
|
||||||
|
if stdout != expected {
|
||||||
|
t.Fatalf("expected:\n%s\ngot:\n%s", expected, stdout)
|
||||||
|
}
|
||||||
|
}
|
600
src/tool/awk/internal/ast/ast.go
Normal file
600
src/tool/awk/internal/ast/ast.go
Normal file
|
@ -0,0 +1,600 @@
|
||||||
|
// GoAWK parser - abstract syntax tree structs
|
||||||
|
|
||||||
|
package ast
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
. "github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Program is an entire AWK program.
|
||||||
|
type Program struct {
|
||||||
|
Begin []Stmts
|
||||||
|
Actions []Action
|
||||||
|
End []Stmts
|
||||||
|
Functions []Function
|
||||||
|
Scalars map[string]int
|
||||||
|
Arrays map[string]int
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns an indented, pretty-printed version of the parsed
|
||||||
|
// program.
|
||||||
|
func (p *Program) String() string {
|
||||||
|
parts := []string{}
|
||||||
|
for _, ss := range p.Begin {
|
||||||
|
parts = append(parts, "BEGIN {\n"+ss.String()+"}")
|
||||||
|
}
|
||||||
|
for _, a := range p.Actions {
|
||||||
|
parts = append(parts, a.String())
|
||||||
|
}
|
||||||
|
for _, ss := range p.End {
|
||||||
|
parts = append(parts, "END {\n"+ss.String()+"}")
|
||||||
|
}
|
||||||
|
for _, function := range p.Functions {
|
||||||
|
parts = append(parts, function.String())
|
||||||
|
}
|
||||||
|
return strings.Join(parts, "\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stmts is a block containing multiple statements.
|
||||||
|
type Stmts []Stmt
|
||||||
|
|
||||||
|
func (ss Stmts) String() string {
|
||||||
|
lines := []string{}
|
||||||
|
for _, s := range ss {
|
||||||
|
subLines := strings.Split(s.String(), "\n")
|
||||||
|
for _, sl := range subLines {
|
||||||
|
lines = append(lines, " "+sl+"\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(lines, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action is pattern-action section of a program.
|
||||||
|
type Action struct {
|
||||||
|
Pattern []Expr
|
||||||
|
Stmts Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Action) String() string {
|
||||||
|
patterns := make([]string, len(a.Pattern))
|
||||||
|
for i, p := range a.Pattern {
|
||||||
|
patterns[i] = p.String()
|
||||||
|
}
|
||||||
|
sep := ""
|
||||||
|
if len(patterns) > 0 && a.Stmts != nil {
|
||||||
|
sep = " "
|
||||||
|
}
|
||||||
|
stmtsStr := ""
|
||||||
|
if a.Stmts != nil {
|
||||||
|
stmtsStr = "{\n" + a.Stmts.String() + "}"
|
||||||
|
}
|
||||||
|
return strings.Join(patterns, ", ") + sep + stmtsStr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expr is the abstract syntax tree for any AWK expression.
|
||||||
|
type Expr interface {
|
||||||
|
expr()
|
||||||
|
String() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// All these types implement the Expr interface.
|
||||||
|
func (e *FieldExpr) expr() {}
|
||||||
|
func (e *NamedFieldExpr) expr() {}
|
||||||
|
func (e *UnaryExpr) expr() {}
|
||||||
|
func (e *BinaryExpr) expr() {}
|
||||||
|
func (e *ArrayExpr) expr() {}
|
||||||
|
func (e *InExpr) expr() {}
|
||||||
|
func (e *CondExpr) expr() {}
|
||||||
|
func (e *NumExpr) expr() {}
|
||||||
|
func (e *StrExpr) expr() {}
|
||||||
|
func (e *RegExpr) expr() {}
|
||||||
|
func (e *VarExpr) expr() {}
|
||||||
|
func (e *IndexExpr) expr() {}
|
||||||
|
func (e *AssignExpr) expr() {}
|
||||||
|
func (e *AugAssignExpr) expr() {}
|
||||||
|
func (e *IncrExpr) expr() {}
|
||||||
|
func (e *CallExpr) expr() {}
|
||||||
|
func (e *UserCallExpr) expr() {}
|
||||||
|
func (e *MultiExpr) expr() {}
|
||||||
|
func (e *GetlineExpr) expr() {}
|
||||||
|
|
||||||
|
// FieldExpr is an expression like $0.
|
||||||
|
type FieldExpr struct {
|
||||||
|
Index Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *FieldExpr) String() string {
|
||||||
|
return "$" + e.Index.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NamedFieldExpr is an expression like @"name".
|
||||||
|
type NamedFieldExpr struct {
|
||||||
|
Field Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *NamedFieldExpr) String() string {
|
||||||
|
return "@" + e.Field.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnaryExpr is an expression like -1234.
|
||||||
|
type UnaryExpr struct {
|
||||||
|
Op Token
|
||||||
|
Value Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *UnaryExpr) String() string {
|
||||||
|
return e.Op.String() + e.Value.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// BinaryExpr is an expression like 1 + 2.
|
||||||
|
type BinaryExpr struct {
|
||||||
|
Left Expr
|
||||||
|
Op Token
|
||||||
|
Right Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *BinaryExpr) String() string {
|
||||||
|
var opStr string
|
||||||
|
if e.Op == CONCAT {
|
||||||
|
opStr = " "
|
||||||
|
} else {
|
||||||
|
opStr = " " + e.Op.String() + " "
|
||||||
|
}
|
||||||
|
return "(" + e.Left.String() + opStr + e.Right.String() + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ArrayExpr is an array reference. Not really a stand-alone
|
||||||
|
// expression, except as an argument to split() or a user function
|
||||||
|
// call.
|
||||||
|
type ArrayExpr struct {
|
||||||
|
Scope VarScope
|
||||||
|
Index int
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ArrayExpr) String() string {
|
||||||
|
return e.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// InExpr is an expression like (index in array).
|
||||||
|
type InExpr struct {
|
||||||
|
Index []Expr
|
||||||
|
Array *ArrayExpr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *InExpr) String() string {
|
||||||
|
if len(e.Index) == 1 {
|
||||||
|
return "(" + e.Index[0].String() + " in " + e.Array.String() + ")"
|
||||||
|
}
|
||||||
|
indices := make([]string, len(e.Index))
|
||||||
|
for i, index := range e.Index {
|
||||||
|
indices[i] = index.String()
|
||||||
|
}
|
||||||
|
return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// CondExpr is an expression like cond ? 1 : 0.
|
||||||
|
type CondExpr struct {
|
||||||
|
Cond Expr
|
||||||
|
True Expr
|
||||||
|
False Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *CondExpr) String() string {
|
||||||
|
return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// NumExpr is a literal number like 1234.
|
||||||
|
type NumExpr struct {
|
||||||
|
Value float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *NumExpr) String() string {
|
||||||
|
if e.Value == float64(int(e.Value)) {
|
||||||
|
return strconv.Itoa(int(e.Value))
|
||||||
|
} else {
|
||||||
|
return fmt.Sprintf("%.6g", e.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StrExpr is a literal string like "foo".
|
||||||
|
type StrExpr struct {
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *StrExpr) String() string {
|
||||||
|
return strconv.Quote(e.Value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegExpr is a stand-alone regex expression, equivalent to:
|
||||||
|
// $0 ~ /regex/.
|
||||||
|
type RegExpr struct {
|
||||||
|
Regex string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *RegExpr) String() string {
|
||||||
|
escaped := strings.Replace(e.Regex, "/", `\/`, -1)
|
||||||
|
return "/" + escaped + "/"
|
||||||
|
}
|
||||||
|
|
||||||
|
type VarScope int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ScopeSpecial VarScope = iota
|
||||||
|
ScopeGlobal
|
||||||
|
ScopeLocal
|
||||||
|
)
|
||||||
|
|
||||||
|
// VarExpr is a variable reference (special var, global, or local).
|
||||||
|
// Index is the resolved variable index used by the interpreter; Name
|
||||||
|
// is the original name used by String().
|
||||||
|
type VarExpr struct {
|
||||||
|
Scope VarScope
|
||||||
|
Index int
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *VarExpr) String() string {
|
||||||
|
return e.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexExpr is an expression like a[k] (rvalue or lvalue).
|
||||||
|
type IndexExpr struct {
|
||||||
|
Array *ArrayExpr
|
||||||
|
Index []Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *IndexExpr) String() string {
|
||||||
|
indices := make([]string, len(e.Index))
|
||||||
|
for i, index := range e.Index {
|
||||||
|
indices[i] = index.String()
|
||||||
|
}
|
||||||
|
return e.Array.String() + "[" + strings.Join(indices, ", ") + "]"
|
||||||
|
}
|
||||||
|
|
||||||
|
// AssignExpr is an expression like x = 1234.
|
||||||
|
type AssignExpr struct {
|
||||||
|
Left Expr // can be one of: var, array[x], $n
|
||||||
|
Right Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *AssignExpr) String() string {
|
||||||
|
return e.Left.String() + " = " + e.Right.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// AugAssignExpr is an assignment expression like x += 5.
|
||||||
|
type AugAssignExpr struct {
|
||||||
|
Left Expr // can be one of: var, array[x], $n
|
||||||
|
Op Token
|
||||||
|
Right Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *AugAssignExpr) String() string {
|
||||||
|
return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IncrExpr is an increment or decrement expression like x++ or --y.
|
||||||
|
type IncrExpr struct {
|
||||||
|
Expr Expr
|
||||||
|
Op Token
|
||||||
|
Pre bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *IncrExpr) String() string {
|
||||||
|
if e.Pre {
|
||||||
|
return e.Op.String() + e.Expr.String()
|
||||||
|
} else {
|
||||||
|
return e.Expr.String() + e.Op.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CallExpr is a builtin function call like length($1).
|
||||||
|
type CallExpr struct {
|
||||||
|
Func Token
|
||||||
|
Args []Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *CallExpr) String() string {
|
||||||
|
args := make([]string, len(e.Args))
|
||||||
|
for i, a := range e.Args {
|
||||||
|
args[i] = a.String()
|
||||||
|
}
|
||||||
|
return e.Func.String() + "(" + strings.Join(args, ", ") + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// UserCallExpr is a user-defined function call like my_func(1, 2, 3)
|
||||||
|
//
|
||||||
|
// Index is the resolved function index used by the interpreter; Name
|
||||||
|
// is the original name used by String().
|
||||||
|
type UserCallExpr struct {
|
||||||
|
Native bool // false = AWK-defined function, true = native Go func
|
||||||
|
Index int
|
||||||
|
Name string
|
||||||
|
Args []Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *UserCallExpr) String() string {
|
||||||
|
args := make([]string, len(e.Args))
|
||||||
|
for i, a := range e.Args {
|
||||||
|
args[i] = a.String()
|
||||||
|
}
|
||||||
|
return e.Name + "(" + strings.Join(args, ", ") + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// MultiExpr isn't an interpretable expression, but it's used as a
|
||||||
|
// pseudo-expression for print[f] parsing.
|
||||||
|
type MultiExpr struct {
|
||||||
|
Exprs []Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *MultiExpr) String() string {
|
||||||
|
exprs := make([]string, len(e.Exprs))
|
||||||
|
for i, e := range e.Exprs {
|
||||||
|
exprs[i] = e.String()
|
||||||
|
}
|
||||||
|
return "(" + strings.Join(exprs, ", ") + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetlineExpr is an expression read from file or pipe input.
|
||||||
|
type GetlineExpr struct {
|
||||||
|
Command Expr
|
||||||
|
Target Expr
|
||||||
|
File Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *GetlineExpr) String() string {
|
||||||
|
s := ""
|
||||||
|
if e.Command != nil {
|
||||||
|
s += e.Command.String() + " |"
|
||||||
|
}
|
||||||
|
s += "getline"
|
||||||
|
if e.Target != nil {
|
||||||
|
s += " " + e.Target.String()
|
||||||
|
}
|
||||||
|
if e.File != nil {
|
||||||
|
s += " <" + e.File.String()
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsLValue returns true if the given expression can be used as an
|
||||||
|
// lvalue (on the left-hand side of an assignment, in a ++ or --
|
||||||
|
// operation, or as the third argument to sub or gsub).
|
||||||
|
func IsLValue(expr Expr) bool {
|
||||||
|
switch expr.(type) {
|
||||||
|
case *VarExpr, *IndexExpr, *FieldExpr:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stmt is the abstract syntax tree for any AWK statement.
|
||||||
|
type Stmt interface {
|
||||||
|
stmt()
|
||||||
|
String() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// All these types implement the Stmt interface.
|
||||||
|
func (s *PrintStmt) stmt() {}
|
||||||
|
func (s *PrintfStmt) stmt() {}
|
||||||
|
func (s *ExprStmt) stmt() {}
|
||||||
|
func (s *IfStmt) stmt() {}
|
||||||
|
func (s *ForStmt) stmt() {}
|
||||||
|
func (s *ForInStmt) stmt() {}
|
||||||
|
func (s *WhileStmt) stmt() {}
|
||||||
|
func (s *DoWhileStmt) stmt() {}
|
||||||
|
func (s *BreakStmt) stmt() {}
|
||||||
|
func (s *ContinueStmt) stmt() {}
|
||||||
|
func (s *NextStmt) stmt() {}
|
||||||
|
func (s *ExitStmt) stmt() {}
|
||||||
|
func (s *DeleteStmt) stmt() {}
|
||||||
|
func (s *ReturnStmt) stmt() {}
|
||||||
|
func (s *BlockStmt) stmt() {}
|
||||||
|
|
||||||
|
// PrintStmt is a statement like print $1, $3.
|
||||||
|
type PrintStmt struct {
|
||||||
|
Args []Expr
|
||||||
|
Redirect Token
|
||||||
|
Dest Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrintStmt) String() string {
|
||||||
|
return printString("print", s.Args, s.Redirect, s.Dest)
|
||||||
|
}
|
||||||
|
|
||||||
|
func printString(f string, args []Expr, redirect Token, dest Expr) string {
|
||||||
|
parts := make([]string, len(args))
|
||||||
|
for i, a := range args {
|
||||||
|
parts[i] = a.String()
|
||||||
|
}
|
||||||
|
str := f + " " + strings.Join(parts, ", ")
|
||||||
|
if dest != nil {
|
||||||
|
str += " " + redirect.String() + dest.String()
|
||||||
|
}
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
||||||
|
// PrintfStmt is a statement like printf "%3d", 1234.
|
||||||
|
type PrintfStmt struct {
|
||||||
|
Args []Expr
|
||||||
|
Redirect Token
|
||||||
|
Dest Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrintfStmt) String() string {
|
||||||
|
return printString("printf", s.Args, s.Redirect, s.Dest)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExprStmt is statement like a bare function call: my_func(x).
|
||||||
|
type ExprStmt struct {
|
||||||
|
Expr Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ExprStmt) String() string {
|
||||||
|
return s.Expr.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IfStmt is an if or if-else statement.
|
||||||
|
type IfStmt struct {
|
||||||
|
Cond Expr
|
||||||
|
Body Stmts
|
||||||
|
Else Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *IfStmt) String() string {
|
||||||
|
str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
|
||||||
|
if len(s.Else) > 0 {
|
||||||
|
str += " else {\n" + s.Else.String() + "}"
|
||||||
|
}
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
||||||
|
// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i.
|
||||||
|
type ForStmt struct {
|
||||||
|
Pre Stmt
|
||||||
|
Cond Expr
|
||||||
|
Post Stmt
|
||||||
|
Body Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ForStmt) String() string {
|
||||||
|
preStr := ""
|
||||||
|
if s.Pre != nil {
|
||||||
|
preStr = s.Pre.String()
|
||||||
|
}
|
||||||
|
condStr := ""
|
||||||
|
if s.Cond != nil {
|
||||||
|
condStr = " " + trimParens(s.Cond.String())
|
||||||
|
}
|
||||||
|
postStr := ""
|
||||||
|
if s.Post != nil {
|
||||||
|
postStr = " " + s.Post.String()
|
||||||
|
}
|
||||||
|
return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ForInStmt is a for loop like for (k in a) print k, a[k].
|
||||||
|
type ForInStmt struct {
|
||||||
|
Var *VarExpr
|
||||||
|
Array *ArrayExpr
|
||||||
|
Body Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ForInStmt) String() string {
|
||||||
|
return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
// WhileStmt is a while loop.
|
||||||
|
type WhileStmt struct {
|
||||||
|
Cond Expr
|
||||||
|
Body Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *WhileStmt) String() string {
|
||||||
|
return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
// DoWhileStmt is a do-while loop.
|
||||||
|
type DoWhileStmt struct {
|
||||||
|
Body Stmts
|
||||||
|
Cond Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DoWhileStmt) String() string {
|
||||||
|
return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// BreakStmt is a break statement.
|
||||||
|
type BreakStmt struct{}
|
||||||
|
|
||||||
|
func (s *BreakStmt) String() string {
|
||||||
|
return "break"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ContinueStmt is a continue statement.
|
||||||
|
type ContinueStmt struct{}
|
||||||
|
|
||||||
|
func (s *ContinueStmt) String() string {
|
||||||
|
return "continue"
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextStmt is a next statement.
|
||||||
|
type NextStmt struct{}
|
||||||
|
|
||||||
|
func (s *NextStmt) String() string {
|
||||||
|
return "next"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExitStmt is an exit statement.
|
||||||
|
type ExitStmt struct {
|
||||||
|
Status Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ExitStmt) String() string {
|
||||||
|
var statusStr string
|
||||||
|
if s.Status != nil {
|
||||||
|
statusStr = " " + s.Status.String()
|
||||||
|
}
|
||||||
|
return "exit" + statusStr
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteStmt is a statement like delete a[k].
|
||||||
|
type DeleteStmt struct {
|
||||||
|
Array *ArrayExpr
|
||||||
|
Index []Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DeleteStmt) String() string {
|
||||||
|
indices := make([]string, len(s.Index))
|
||||||
|
for i, index := range s.Index {
|
||||||
|
indices[i] = index.String()
|
||||||
|
}
|
||||||
|
return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReturnStmt is a return statement.
|
||||||
|
type ReturnStmt struct {
|
||||||
|
Value Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ReturnStmt) String() string {
|
||||||
|
var valueStr string
|
||||||
|
if s.Value != nil {
|
||||||
|
valueStr = " " + s.Value.String()
|
||||||
|
}
|
||||||
|
return "return" + valueStr
|
||||||
|
}
|
||||||
|
|
||||||
|
// BlockStmt is a stand-alone block like { print "x" }.
|
||||||
|
type BlockStmt struct {
|
||||||
|
Body Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlockStmt) String() string {
|
||||||
|
return "{\n" + s.Body.String() + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function is the AST for a user-defined function.
|
||||||
|
type Function struct {
|
||||||
|
Name string
|
||||||
|
Params []string
|
||||||
|
Arrays []bool
|
||||||
|
Body Stmts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Function) String() string {
|
||||||
|
return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" +
|
||||||
|
f.Body.String() + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
func trimParens(s string) string {
|
||||||
|
if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") {
|
||||||
|
s = s[1 : len(s)-1]
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
100
src/tool/awk/internal/ast/specialvars.go
Normal file
100
src/tool/awk/internal/ast/specialvars.go
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// Special variable constants
|
||||||
|
|
||||||
|
package ast
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
V_ILLEGAL = iota
|
||||||
|
V_ARGC
|
||||||
|
V_CONVFMT
|
||||||
|
V_FILENAME
|
||||||
|
V_FNR
|
||||||
|
V_FS
|
||||||
|
V_INPUTMODE
|
||||||
|
V_NF
|
||||||
|
V_NR
|
||||||
|
V_OFMT
|
||||||
|
V_OFS
|
||||||
|
V_ORS
|
||||||
|
V_OUTPUTMODE
|
||||||
|
V_RLENGTH
|
||||||
|
V_RS
|
||||||
|
V_RSTART
|
||||||
|
V_RT
|
||||||
|
V_SUBSEP
|
||||||
|
|
||||||
|
V_LAST = V_SUBSEP
|
||||||
|
)
|
||||||
|
|
||||||
|
var specialVars = map[string]int{
|
||||||
|
"ARGC": V_ARGC,
|
||||||
|
"CONVFMT": V_CONVFMT,
|
||||||
|
"FILENAME": V_FILENAME,
|
||||||
|
"FNR": V_FNR,
|
||||||
|
"FS": V_FS,
|
||||||
|
"INPUTMODE": V_INPUTMODE,
|
||||||
|
"NF": V_NF,
|
||||||
|
"NR": V_NR,
|
||||||
|
"OFMT": V_OFMT,
|
||||||
|
"OFS": V_OFS,
|
||||||
|
"ORS": V_ORS,
|
||||||
|
"OUTPUTMODE": V_OUTPUTMODE,
|
||||||
|
"RLENGTH": V_RLENGTH,
|
||||||
|
"RS": V_RS,
|
||||||
|
"RSTART": V_RSTART,
|
||||||
|
"RT": V_RT,
|
||||||
|
"SUBSEP": V_SUBSEP,
|
||||||
|
}
|
||||||
|
|
||||||
|
// SpecialVarIndex returns the "index" of the special variable, or 0
|
||||||
|
// if it's not a special variable.
|
||||||
|
func SpecialVarIndex(name string) int {
|
||||||
|
return specialVars[name]
|
||||||
|
}
|
||||||
|
|
||||||
|
// SpecialVarName returns the name of the special variable by index.
|
||||||
|
func SpecialVarName(index int) string {
|
||||||
|
switch index {
|
||||||
|
case V_ILLEGAL:
|
||||||
|
return "ILLEGAL"
|
||||||
|
case V_ARGC:
|
||||||
|
return "ARGC"
|
||||||
|
case V_CONVFMT:
|
||||||
|
return "CONVFMT"
|
||||||
|
case V_FILENAME:
|
||||||
|
return "FILENAME"
|
||||||
|
case V_FNR:
|
||||||
|
return "FNR"
|
||||||
|
case V_FS:
|
||||||
|
return "FS"
|
||||||
|
case V_INPUTMODE:
|
||||||
|
return "INPUTMODE"
|
||||||
|
case V_NF:
|
||||||
|
return "NF"
|
||||||
|
case V_NR:
|
||||||
|
return "NR"
|
||||||
|
case V_OFMT:
|
||||||
|
return "OFMT"
|
||||||
|
case V_OFS:
|
||||||
|
return "OFS"
|
||||||
|
case V_ORS:
|
||||||
|
return "ORS"
|
||||||
|
case V_OUTPUTMODE:
|
||||||
|
return "OUTPUTMODE"
|
||||||
|
case V_RLENGTH:
|
||||||
|
return "RLENGTH"
|
||||||
|
case V_RS:
|
||||||
|
return "RS"
|
||||||
|
case V_RSTART:
|
||||||
|
return "RSTART"
|
||||||
|
case V_RT:
|
||||||
|
return "RT"
|
||||||
|
case V_SUBSEP:
|
||||||
|
return "SUBSEP"
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("<unknown special var %d>", index)
|
||||||
|
}
|
||||||
|
}
|
46
src/tool/awk/internal/ast/specialvars_test.go
Normal file
46
src/tool/awk/internal/ast/specialvars_test.go
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
package ast
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNameIndex(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
index int
|
||||||
|
}{
|
||||||
|
{"ILLEGAL", V_ILLEGAL},
|
||||||
|
{"ARGC", V_ARGC},
|
||||||
|
{"CONVFMT", V_CONVFMT},
|
||||||
|
{"FILENAME", V_FILENAME},
|
||||||
|
{"FNR", V_FNR},
|
||||||
|
{"FS", V_FS},
|
||||||
|
{"INPUTMODE", V_INPUTMODE},
|
||||||
|
{"NF", V_NF},
|
||||||
|
{"NR", V_NR},
|
||||||
|
{"OFMT", V_OFMT},
|
||||||
|
{"OFS", V_OFS},
|
||||||
|
{"ORS", V_ORS},
|
||||||
|
{"OUTPUTMODE", V_OUTPUTMODE},
|
||||||
|
{"RLENGTH", V_RLENGTH},
|
||||||
|
{"RS", V_RS},
|
||||||
|
{"RSTART", V_RSTART},
|
||||||
|
{"RT", V_RT},
|
||||||
|
{"SUBSEP", V_SUBSEP},
|
||||||
|
{"<unknown special var 42>", 42},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
name := SpecialVarName(test.index)
|
||||||
|
if name != test.name {
|
||||||
|
t.Errorf("got %q, want %q", name, test.name)
|
||||||
|
}
|
||||||
|
if test.index <= V_LAST {
|
||||||
|
index := SpecialVarIndex(test.name)
|
||||||
|
if index != test.index {
|
||||||
|
t.Errorf("got %d, want %d", index, test.index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
1005
src/tool/awk/internal/compiler/compiler.go
Normal file
1005
src/tool/awk/internal/compiler/compiler.go
Normal file
File diff suppressed because it is too large
Load diff
495
src/tool/awk/internal/compiler/disassembler.go
Normal file
495
src/tool/awk/internal/compiler/disassembler.go
Normal file
|
@ -0,0 +1,495 @@
|
||||||
|
// Disassembles compiled program to text assembly instructions
|
||||||
|
|
||||||
|
package compiler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/internal/ast"
|
||||||
|
"github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Disassemble writes a human-readable form of the program's virtual machine
|
||||||
|
// instructions to writer.
|
||||||
|
func (p *Program) Disassemble(writer io.Writer) error {
|
||||||
|
if p.Begin != nil {
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: p.Begin,
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err := d.disassemble("BEGIN")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, action := range p.Actions {
|
||||||
|
switch len(action.Pattern) {
|
||||||
|
case 0:
|
||||||
|
// Nothing to do here.
|
||||||
|
case 1:
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: action.Pattern[0],
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err := d.disassemble("pattern")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: action.Pattern[0],
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err := d.disassemble("start")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
d = &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: action.Pattern[1],
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err = d.disassemble("stop")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(action.Body) > 0 {
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: action.Body,
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err := d.disassemble("{ body }")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.End != nil {
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: p.End,
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
}
|
||||||
|
err := d.disassemble("END")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, f := range p.Functions {
|
||||||
|
d := &disassembler{
|
||||||
|
program: p,
|
||||||
|
writer: writer,
|
||||||
|
code: f.Body,
|
||||||
|
nativeFuncNames: p.nativeFuncNames,
|
||||||
|
funcIndex: i,
|
||||||
|
}
|
||||||
|
err := d.disassemble("function " + f.Name)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disassembles a single block of opcodes.
|
||||||
|
type disassembler struct {
|
||||||
|
program *Program
|
||||||
|
writer io.Writer
|
||||||
|
code []Opcode
|
||||||
|
nativeFuncNames []string
|
||||||
|
funcIndex int
|
||||||
|
ip int
|
||||||
|
opAddr int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *disassembler) disassemble(prefix string) error {
|
||||||
|
if prefix != "" {
|
||||||
|
d.writef(" // %s\n", prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
for d.ip < len(d.code) && d.err == nil {
|
||||||
|
d.opAddr = d.ip
|
||||||
|
op := d.fetch()
|
||||||
|
|
||||||
|
switch op {
|
||||||
|
case Num:
|
||||||
|
index := d.fetch()
|
||||||
|
num := d.program.Nums[index]
|
||||||
|
if num == float64(int(num)) {
|
||||||
|
d.writeOpf("Num %d (%d)", int(num), index)
|
||||||
|
} else {
|
||||||
|
d.writeOpf("Num %.6g (%d)", num, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
case Str:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("Str %q (%d)", d.program.Strs[index], index)
|
||||||
|
|
||||||
|
case FieldInt:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("FieldInt %d", index)
|
||||||
|
|
||||||
|
case FieldByNameStr:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index)
|
||||||
|
|
||||||
|
case Global:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("Global %s", d.program.scalarNames[index])
|
||||||
|
|
||||||
|
case Local:
|
||||||
|
index := int(d.fetch())
|
||||||
|
d.writeOpf("Local %s", d.localName(index))
|
||||||
|
|
||||||
|
case Special:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("Special %s", ast.SpecialVarName(int(index)))
|
||||||
|
|
||||||
|
case ArrayGlobal:
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex])
|
||||||
|
|
||||||
|
case ArrayLocal:
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex)))
|
||||||
|
|
||||||
|
case InGlobal:
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex])
|
||||||
|
|
||||||
|
case InLocal:
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("InLocal %s", d.localArrayName(arrayIndex))
|
||||||
|
|
||||||
|
case AssignGlobal:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("AssignGlobal %s", d.program.scalarNames[index])
|
||||||
|
|
||||||
|
case AssignLocal:
|
||||||
|
index := int(d.fetch())
|
||||||
|
d.writeOpf("AssignLocal %s", d.localName(index))
|
||||||
|
|
||||||
|
case AssignSpecial:
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index)))
|
||||||
|
|
||||||
|
case AssignArrayGlobal:
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex])
|
||||||
|
|
||||||
|
case AssignArrayLocal:
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex))
|
||||||
|
|
||||||
|
case Delete:
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex))
|
||||||
|
|
||||||
|
case DeleteAll:
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex))
|
||||||
|
|
||||||
|
case IncrField:
|
||||||
|
amount := d.fetch()
|
||||||
|
d.writeOpf("IncrField %d", amount)
|
||||||
|
|
||||||
|
case IncrGlobal:
|
||||||
|
amount := d.fetch()
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index])
|
||||||
|
|
||||||
|
case IncrLocal:
|
||||||
|
amount := d.fetch()
|
||||||
|
index := int(d.fetch())
|
||||||
|
d.writeOpf("IncrLocal %d %s", amount, d.localName(index))
|
||||||
|
|
||||||
|
case IncrSpecial:
|
||||||
|
amount := d.fetch()
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index)))
|
||||||
|
|
||||||
|
case IncrArrayGlobal:
|
||||||
|
amount := d.fetch()
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex])
|
||||||
|
|
||||||
|
case IncrArrayLocal:
|
||||||
|
amount := d.fetch()
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex))
|
||||||
|
|
||||||
|
case AugAssignField:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
d.writeOpf("AugAssignField %s", operation)
|
||||||
|
|
||||||
|
case AugAssignGlobal:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index])
|
||||||
|
|
||||||
|
case AugAssignLocal:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
index := int(d.fetch())
|
||||||
|
d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index))
|
||||||
|
|
||||||
|
case AugAssignSpecial:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index)))
|
||||||
|
|
||||||
|
case AugAssignArrayGlobal:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
arrayIndex := d.fetch()
|
||||||
|
d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex])
|
||||||
|
|
||||||
|
case AugAssignArrayLocal:
|
||||||
|
operation := AugOp(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex))
|
||||||
|
|
||||||
|
case Regex:
|
||||||
|
regexIndex := d.fetch()
|
||||||
|
d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex)
|
||||||
|
|
||||||
|
case IndexMulti:
|
||||||
|
num := d.fetch()
|
||||||
|
d.writeOpf("IndexMulti %d", num)
|
||||||
|
|
||||||
|
case ConcatMulti:
|
||||||
|
num := d.fetch()
|
||||||
|
d.writeOpf("ConcatMulti %d", num)
|
||||||
|
|
||||||
|
case Jump:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("Jump 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpFalse:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpTrue:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpEquals:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpNotEquals:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpLess:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpLess 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpGreater:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpLessOrEqual:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case JumpGreaterOrEqual:
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset))
|
||||||
|
|
||||||
|
case ForIn:
|
||||||
|
varScope := ast.VarScope(d.fetch())
|
||||||
|
varIndex := int(d.fetch())
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
offset := d.fetch()
|
||||||
|
d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset))
|
||||||
|
|
||||||
|
case CallBuiltin:
|
||||||
|
builtinOp := BuiltinOp(d.fetch())
|
||||||
|
d.writeOpf("CallBuiltin %s", builtinOp)
|
||||||
|
|
||||||
|
case CallSplit:
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex))
|
||||||
|
|
||||||
|
case CallSplitSep:
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex))
|
||||||
|
|
||||||
|
case CallSprintf:
|
||||||
|
numArgs := d.fetch()
|
||||||
|
d.writeOpf("CallSprintf %d", numArgs)
|
||||||
|
|
||||||
|
case CallUser:
|
||||||
|
funcIndex := d.fetch()
|
||||||
|
numArrayArgs := int(d.fetch())
|
||||||
|
var arrayArgs []string
|
||||||
|
for i := 0; i < numArrayArgs; i++ {
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex))
|
||||||
|
}
|
||||||
|
d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", "))
|
||||||
|
|
||||||
|
case CallNative:
|
||||||
|
funcIndex := d.fetch()
|
||||||
|
numArgs := d.fetch()
|
||||||
|
d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs)
|
||||||
|
|
||||||
|
case Nulls:
|
||||||
|
numNulls := d.fetch()
|
||||||
|
d.writeOpf("Nulls %d", numNulls)
|
||||||
|
|
||||||
|
case Print:
|
||||||
|
numArgs := d.fetch()
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
if redirect == lexer.ILLEGAL {
|
||||||
|
d.writeOpf("Print %d", numArgs)
|
||||||
|
} else {
|
||||||
|
d.writeOpf("Print %d %s", numArgs, redirect)
|
||||||
|
}
|
||||||
|
|
||||||
|
case Printf:
|
||||||
|
numArgs := d.fetch()
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
if redirect == lexer.ILLEGAL {
|
||||||
|
d.writeOpf("Printf %d", numArgs)
|
||||||
|
} else {
|
||||||
|
d.writeOpf("Printf %d %s", numArgs, redirect)
|
||||||
|
}
|
||||||
|
|
||||||
|
case Getline:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
d.writeOpf("Getline %s", redirect)
|
||||||
|
|
||||||
|
case GetlineField:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
d.writeOpf("GetlineField %s", redirect)
|
||||||
|
|
||||||
|
case GetlineGlobal:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index])
|
||||||
|
|
||||||
|
case GetlineLocal:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
index := int(d.fetch())
|
||||||
|
d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index))
|
||||||
|
|
||||||
|
case GetlineSpecial:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
index := d.fetch()
|
||||||
|
d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index)))
|
||||||
|
|
||||||
|
case GetlineArray:
|
||||||
|
redirect := lexer.Token(d.fetch())
|
||||||
|
arrayScope := ast.VarScope(d.fetch())
|
||||||
|
arrayIndex := int(d.fetch())
|
||||||
|
d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex))
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Handles all other opcodes with no arguments
|
||||||
|
d.writeOpf("%s", op)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d.writef("\n")
|
||||||
|
return d.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the next opcode and increment the "instruction pointer".
|
||||||
|
func (d *disassembler) fetch() Opcode {
|
||||||
|
op := d.code[d.ip]
|
||||||
|
d.ip++
|
||||||
|
return op
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write formatted string to the disassembly output.
|
||||||
|
func (d *disassembler) writef(format string, args ...interface{}) {
|
||||||
|
if d.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, d.err = fmt.Fprintf(d.writer, format, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write formatted opcode (with address and newline) to disassembly output.
|
||||||
|
func (d *disassembler) writeOpf(format string, args ...interface{}) {
|
||||||
|
if d.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
addrStr := fmt.Sprintf("%04x", d.opAddr)
|
||||||
|
_, d.err = fmt.Fprintf(d.writer, addrStr+" "+format+"\n", args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the scalar variable name described by scope and index.
|
||||||
|
func (d *disassembler) varName(scope ast.VarScope, index int) string {
|
||||||
|
switch scope {
|
||||||
|
case ast.ScopeGlobal:
|
||||||
|
return d.program.scalarNames[index]
|
||||||
|
case ast.ScopeLocal:
|
||||||
|
return d.localName(index)
|
||||||
|
default: // ScopeSpecial
|
||||||
|
return ast.SpecialVarName(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the local variable name with the given index.
|
||||||
|
func (d *disassembler) localName(index int) string {
|
||||||
|
f := d.program.Functions[d.funcIndex]
|
||||||
|
n := 0
|
||||||
|
for i, p := range f.Params {
|
||||||
|
if f.Arrays[i] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if n == index {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
panic(fmt.Sprintf("unexpected local variable index %d", index))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the array variable name describes by scope and index.
|
||||||
|
func (d *disassembler) arrayName(scope ast.VarScope, index int) string {
|
||||||
|
if scope == ast.ScopeLocal {
|
||||||
|
return d.localArrayName(index)
|
||||||
|
}
|
||||||
|
return d.program.arrayNames[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the local array name with the given index.
|
||||||
|
func (d *disassembler) localArrayName(index int) string {
|
||||||
|
f := d.program.Functions[d.funcIndex]
|
||||||
|
n := 0
|
||||||
|
for i, p := range f.Params {
|
||||||
|
if !f.Arrays[i] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if n == index {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
panic(fmt.Sprintf("unexpected local array index %d", index))
|
||||||
|
}
|
51
src/tool/awk/internal/compiler/disassembler_test.go
Normal file
51
src/tool/awk/internal/compiler/disassembler_test.go
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
package compiler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDisassembler(t *testing.T) {
|
||||||
|
// Note: this doesn't really test the disassembly, just that each opcode
|
||||||
|
// disassembly includes the opcode name, to help catch silly typos.
|
||||||
|
for op := Nop; op < EndOpcode; op++ {
|
||||||
|
t.Run(op.String(), func(t *testing.T) {
|
||||||
|
p := Program{
|
||||||
|
Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
Functions: []Function{
|
||||||
|
{
|
||||||
|
Name: "f",
|
||||||
|
Params: []string{"a", "k"},
|
||||||
|
Arrays: []bool{true, false},
|
||||||
|
NumScalars: 1,
|
||||||
|
NumArrays: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Nums: []float64{0},
|
||||||
|
Strs: []string{""},
|
||||||
|
Regexes: []*regexp.Regexp{regexp.MustCompile("")},
|
||||||
|
scalarNames: []string{"s"},
|
||||||
|
arrayNames: []string{"a"},
|
||||||
|
nativeFuncNames: []string{"n"},
|
||||||
|
}
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := p.Disassemble(&buf)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error disassembling opcode %s: %v", op, err)
|
||||||
|
}
|
||||||
|
lines := strings.Split(buf.String(), "\n")
|
||||||
|
if strings.TrimSpace(lines[0]) != "// BEGIN" {
|
||||||
|
t.Fatalf("first line should be \"// BEGIN\", not %q", lines[0])
|
||||||
|
}
|
||||||
|
fields := strings.Fields(lines[1])
|
||||||
|
if fields[0] != "0000" {
|
||||||
|
t.Fatalf("address should be \"0000\", not %q", fields[0])
|
||||||
|
}
|
||||||
|
if fields[1] != op.String() {
|
||||||
|
t.Fatalf("opcode name should be %q, not %q", op.String(), fields[1])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
174
src/tool/awk/internal/compiler/opcode_string.go
Normal file
174
src/tool/awk/internal/compiler/opcode_string.go
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT.
|
||||||
|
|
||||||
|
package compiler
|
||||||
|
|
||||||
|
import "strconv"
|
||||||
|
|
||||||
|
func _() {
|
||||||
|
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||||
|
// Re-run the stringer command to generate them again.
|
||||||
|
var x [1]struct{}
|
||||||
|
_ = x[Nop-0]
|
||||||
|
_ = x[Num-1]
|
||||||
|
_ = x[Str-2]
|
||||||
|
_ = x[Dupe-3]
|
||||||
|
_ = x[Drop-4]
|
||||||
|
_ = x[Swap-5]
|
||||||
|
_ = x[Field-6]
|
||||||
|
_ = x[FieldInt-7]
|
||||||
|
_ = x[FieldByName-8]
|
||||||
|
_ = x[FieldByNameStr-9]
|
||||||
|
_ = x[Global-10]
|
||||||
|
_ = x[Local-11]
|
||||||
|
_ = x[Special-12]
|
||||||
|
_ = x[ArrayGlobal-13]
|
||||||
|
_ = x[ArrayLocal-14]
|
||||||
|
_ = x[InGlobal-15]
|
||||||
|
_ = x[InLocal-16]
|
||||||
|
_ = x[AssignField-17]
|
||||||
|
_ = x[AssignGlobal-18]
|
||||||
|
_ = x[AssignLocal-19]
|
||||||
|
_ = x[AssignSpecial-20]
|
||||||
|
_ = x[AssignArrayGlobal-21]
|
||||||
|
_ = x[AssignArrayLocal-22]
|
||||||
|
_ = x[Delete-23]
|
||||||
|
_ = x[DeleteAll-24]
|
||||||
|
_ = x[IncrField-25]
|
||||||
|
_ = x[IncrGlobal-26]
|
||||||
|
_ = x[IncrLocal-27]
|
||||||
|
_ = x[IncrSpecial-28]
|
||||||
|
_ = x[IncrArrayGlobal-29]
|
||||||
|
_ = x[IncrArrayLocal-30]
|
||||||
|
_ = x[AugAssignField-31]
|
||||||
|
_ = x[AugAssignGlobal-32]
|
||||||
|
_ = x[AugAssignLocal-33]
|
||||||
|
_ = x[AugAssignSpecial-34]
|
||||||
|
_ = x[AugAssignArrayGlobal-35]
|
||||||
|
_ = x[AugAssignArrayLocal-36]
|
||||||
|
_ = x[Regex-37]
|
||||||
|
_ = x[IndexMulti-38]
|
||||||
|
_ = x[ConcatMulti-39]
|
||||||
|
_ = x[Add-40]
|
||||||
|
_ = x[Subtract-41]
|
||||||
|
_ = x[Multiply-42]
|
||||||
|
_ = x[Divide-43]
|
||||||
|
_ = x[Power-44]
|
||||||
|
_ = x[Modulo-45]
|
||||||
|
_ = x[Equals-46]
|
||||||
|
_ = x[NotEquals-47]
|
||||||
|
_ = x[Less-48]
|
||||||
|
_ = x[Greater-49]
|
||||||
|
_ = x[LessOrEqual-50]
|
||||||
|
_ = x[GreaterOrEqual-51]
|
||||||
|
_ = x[Concat-52]
|
||||||
|
_ = x[Match-53]
|
||||||
|
_ = x[NotMatch-54]
|
||||||
|
_ = x[Not-55]
|
||||||
|
_ = x[UnaryMinus-56]
|
||||||
|
_ = x[UnaryPlus-57]
|
||||||
|
_ = x[Boolean-58]
|
||||||
|
_ = x[Jump-59]
|
||||||
|
_ = x[JumpFalse-60]
|
||||||
|
_ = x[JumpTrue-61]
|
||||||
|
_ = x[JumpEquals-62]
|
||||||
|
_ = x[JumpNotEquals-63]
|
||||||
|
_ = x[JumpLess-64]
|
||||||
|
_ = x[JumpGreater-65]
|
||||||
|
_ = x[JumpLessOrEqual-66]
|
||||||
|
_ = x[JumpGreaterOrEqual-67]
|
||||||
|
_ = x[Next-68]
|
||||||
|
_ = x[Exit-69]
|
||||||
|
_ = x[ForIn-70]
|
||||||
|
_ = x[BreakForIn-71]
|
||||||
|
_ = x[CallBuiltin-72]
|
||||||
|
_ = x[CallSplit-73]
|
||||||
|
_ = x[CallSplitSep-74]
|
||||||
|
_ = x[CallSprintf-75]
|
||||||
|
_ = x[CallUser-76]
|
||||||
|
_ = x[CallNative-77]
|
||||||
|
_ = x[Return-78]
|
||||||
|
_ = x[ReturnNull-79]
|
||||||
|
_ = x[Nulls-80]
|
||||||
|
_ = x[Print-81]
|
||||||
|
_ = x[Printf-82]
|
||||||
|
_ = x[Getline-83]
|
||||||
|
_ = x[GetlineField-84]
|
||||||
|
_ = x[GetlineGlobal-85]
|
||||||
|
_ = x[GetlineLocal-86]
|
||||||
|
_ = x[GetlineSpecial-87]
|
||||||
|
_ = x[GetlineArray-88]
|
||||||
|
_ = x[EndOpcode-89]
|
||||||
|
}
|
||||||
|
|
||||||
|
const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
|
||||||
|
|
||||||
|
var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826}
|
||||||
|
|
||||||
|
func (i Opcode) String() string {
|
||||||
|
if i < 0 || i >= Opcode(len(_Opcode_index)-1) {
|
||||||
|
return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||||
|
}
|
||||||
|
return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]]
|
||||||
|
}
|
||||||
|
func _() {
|
||||||
|
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||||
|
// Re-run the stringer command to generate them again.
|
||||||
|
var x [1]struct{}
|
||||||
|
_ = x[AugOpAdd-0]
|
||||||
|
_ = x[AugOpSub-1]
|
||||||
|
_ = x[AugOpMul-2]
|
||||||
|
_ = x[AugOpDiv-3]
|
||||||
|
_ = x[AugOpPow-4]
|
||||||
|
_ = x[AugOpMod-5]
|
||||||
|
}
|
||||||
|
|
||||||
|
const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod"
|
||||||
|
|
||||||
|
var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48}
|
||||||
|
|
||||||
|
func (i AugOp) String() string {
|
||||||
|
if i < 0 || i >= AugOp(len(_AugOp_index)-1) {
|
||||||
|
return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||||
|
}
|
||||||
|
return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]]
|
||||||
|
}
|
||||||
|
func _() {
|
||||||
|
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||||
|
// Re-run the stringer command to generate them again.
|
||||||
|
var x [1]struct{}
|
||||||
|
_ = x[BuiltinAtan2-0]
|
||||||
|
_ = x[BuiltinClose-1]
|
||||||
|
_ = x[BuiltinCos-2]
|
||||||
|
_ = x[BuiltinExp-3]
|
||||||
|
_ = x[BuiltinFflush-4]
|
||||||
|
_ = x[BuiltinFflushAll-5]
|
||||||
|
_ = x[BuiltinGsub-6]
|
||||||
|
_ = x[BuiltinIndex-7]
|
||||||
|
_ = x[BuiltinInt-8]
|
||||||
|
_ = x[BuiltinLength-9]
|
||||||
|
_ = x[BuiltinLengthArg-10]
|
||||||
|
_ = x[BuiltinLog-11]
|
||||||
|
_ = x[BuiltinMatch-12]
|
||||||
|
_ = x[BuiltinRand-13]
|
||||||
|
_ = x[BuiltinSin-14]
|
||||||
|
_ = x[BuiltinSqrt-15]
|
||||||
|
_ = x[BuiltinSrand-16]
|
||||||
|
_ = x[BuiltinSrandSeed-17]
|
||||||
|
_ = x[BuiltinSub-18]
|
||||||
|
_ = x[BuiltinSubstr-19]
|
||||||
|
_ = x[BuiltinSubstrLength-20]
|
||||||
|
_ = x[BuiltinSystem-21]
|
||||||
|
_ = x[BuiltinTolower-22]
|
||||||
|
_ = x[BuiltinToupper-23]
|
||||||
|
}
|
||||||
|
|
||||||
|
const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper"
|
||||||
|
|
||||||
|
var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300}
|
||||||
|
|
||||||
|
func (i BuiltinOp) String() string {
|
||||||
|
if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) {
|
||||||
|
return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||||
|
}
|
||||||
|
return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]]
|
||||||
|
}
|
180
src/tool/awk/internal/compiler/opcodes.go
Normal file
180
src/tool/awk/internal/compiler/opcodes.go
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
package compiler
|
||||||
|
|
||||||
|
//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp
|
||||||
|
|
||||||
|
// Opcode represents a single virtual machine instruction (or argument). The
|
||||||
|
// comments beside each opcode show any arguments that instruction consumes.
|
||||||
|
//
|
||||||
|
// Normally this is called "bytecode", but I've avoided that term here as each
|
||||||
|
// opcode is a 32-bit word, not an 8-bit byte.
|
||||||
|
//
|
||||||
|
// I tested various bit widths, and I believe 32 bit was the fastest, but also
|
||||||
|
// means we don't have to worry about jump offsets overflowing. That's tested
|
||||||
|
// in the compiler, but who's going to have an AWK program bigger than 2GB?
|
||||||
|
type Opcode int32
|
||||||
|
|
||||||
|
const (
|
||||||
|
Nop Opcode = iota
|
||||||
|
|
||||||
|
// Stack operations
|
||||||
|
Num // numIndex
|
||||||
|
Str // strIndex
|
||||||
|
Dupe
|
||||||
|
Drop
|
||||||
|
Swap
|
||||||
|
|
||||||
|
// Fetch a field, variable, or array item
|
||||||
|
Field
|
||||||
|
FieldInt // index
|
||||||
|
FieldByName
|
||||||
|
FieldByNameStr // strIndex
|
||||||
|
Global // index
|
||||||
|
Local // index
|
||||||
|
Special // index
|
||||||
|
ArrayGlobal // arrayIndex
|
||||||
|
ArrayLocal // arrayIndex
|
||||||
|
InGlobal // arrayIndex
|
||||||
|
InLocal // arrayIndex
|
||||||
|
|
||||||
|
// Assign a field, variable, or array item
|
||||||
|
AssignField
|
||||||
|
AssignGlobal // index
|
||||||
|
AssignLocal // index
|
||||||
|
AssignSpecial // index
|
||||||
|
AssignArrayGlobal // arrayIndex
|
||||||
|
AssignArrayLocal // arrayIndex
|
||||||
|
|
||||||
|
// Delete statement
|
||||||
|
Delete // arrayScope arrayIndex
|
||||||
|
DeleteAll // arrayScope arrayIndex
|
||||||
|
|
||||||
|
// Post-increment and post-decrement
|
||||||
|
IncrField // amount
|
||||||
|
IncrGlobal // amount index
|
||||||
|
IncrLocal // amount index
|
||||||
|
IncrSpecial // amount index
|
||||||
|
IncrArrayGlobal // amount arrayIndex
|
||||||
|
IncrArrayLocal // amount arrayIndex
|
||||||
|
|
||||||
|
// Augmented assignment (also used for pre-increment and pre-decrement)
|
||||||
|
AugAssignField // augOp
|
||||||
|
AugAssignGlobal // augOp index
|
||||||
|
AugAssignLocal // augOp index
|
||||||
|
AugAssignSpecial // augOp index
|
||||||
|
AugAssignArrayGlobal // augOp arrayIndex
|
||||||
|
AugAssignArrayLocal // augOp arrayIndex
|
||||||
|
|
||||||
|
// Stand-alone regex expression /foo/
|
||||||
|
Regex // regexIndex
|
||||||
|
|
||||||
|
// Multi-index concatenation
|
||||||
|
IndexMulti // num
|
||||||
|
|
||||||
|
// Multi-value concatenation
|
||||||
|
ConcatMulti // num
|
||||||
|
|
||||||
|
// Binary operators
|
||||||
|
Add
|
||||||
|
Subtract
|
||||||
|
Multiply
|
||||||
|
Divide
|
||||||
|
Power
|
||||||
|
Modulo
|
||||||
|
Equals
|
||||||
|
NotEquals
|
||||||
|
Less
|
||||||
|
Greater
|
||||||
|
LessOrEqual
|
||||||
|
GreaterOrEqual
|
||||||
|
Concat
|
||||||
|
Match
|
||||||
|
NotMatch
|
||||||
|
|
||||||
|
// Unary operators
|
||||||
|
Not
|
||||||
|
UnaryMinus
|
||||||
|
UnaryPlus
|
||||||
|
Boolean
|
||||||
|
|
||||||
|
// Control flow
|
||||||
|
Jump // offset
|
||||||
|
JumpFalse // offset
|
||||||
|
JumpTrue // offset
|
||||||
|
JumpEquals // offset
|
||||||
|
JumpNotEquals // offset
|
||||||
|
JumpLess // offset
|
||||||
|
JumpGreater // offset
|
||||||
|
JumpLessOrEqual // offset
|
||||||
|
JumpGreaterOrEqual // offset
|
||||||
|
Next
|
||||||
|
Exit
|
||||||
|
ForIn // varScope varIndex arrayScope arrayIndex offset
|
||||||
|
BreakForIn
|
||||||
|
|
||||||
|
// Builtin functions
|
||||||
|
CallBuiltin // builtinOp
|
||||||
|
CallSplit // arrayScope arrayIndex
|
||||||
|
CallSplitSep // arrayScope arrayIndex
|
||||||
|
CallSprintf // numArgs
|
||||||
|
|
||||||
|
// User and native functions
|
||||||
|
CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...]
|
||||||
|
CallNative // funcIndex numArgs
|
||||||
|
Return
|
||||||
|
ReturnNull
|
||||||
|
Nulls // numNulls
|
||||||
|
|
||||||
|
// Print, printf, and getline
|
||||||
|
Print // numArgs redirect
|
||||||
|
Printf // numArgs redirect
|
||||||
|
Getline // redirect
|
||||||
|
GetlineField // redirect
|
||||||
|
GetlineGlobal // redirect index
|
||||||
|
GetlineLocal // redirect index
|
||||||
|
GetlineSpecial // redirect index
|
||||||
|
GetlineArray // redirect arrayScope arrayIndex
|
||||||
|
|
||||||
|
EndOpcode
|
||||||
|
)
|
||||||
|
|
||||||
|
// AugOp represents an augmented assignment operation.
|
||||||
|
type AugOp Opcode
|
||||||
|
|
||||||
|
const (
|
||||||
|
AugOpAdd AugOp = iota
|
||||||
|
AugOpSub
|
||||||
|
AugOpMul
|
||||||
|
AugOpDiv
|
||||||
|
AugOpPow
|
||||||
|
AugOpMod
|
||||||
|
)
|
||||||
|
|
||||||
|
// BuiltinOp represents a builtin function call.
|
||||||
|
type BuiltinOp Opcode
|
||||||
|
|
||||||
|
const (
|
||||||
|
BuiltinAtan2 BuiltinOp = iota
|
||||||
|
BuiltinClose
|
||||||
|
BuiltinCos
|
||||||
|
BuiltinExp
|
||||||
|
BuiltinFflush
|
||||||
|
BuiltinFflushAll
|
||||||
|
BuiltinGsub
|
||||||
|
BuiltinIndex
|
||||||
|
BuiltinInt
|
||||||
|
BuiltinLength
|
||||||
|
BuiltinLengthArg
|
||||||
|
BuiltinLog
|
||||||
|
BuiltinMatch
|
||||||
|
BuiltinRand
|
||||||
|
BuiltinSin
|
||||||
|
BuiltinSqrt
|
||||||
|
BuiltinSrand
|
||||||
|
BuiltinSrandSeed
|
||||||
|
BuiltinSub
|
||||||
|
BuiltinSubstr
|
||||||
|
BuiltinSubstrLength
|
||||||
|
BuiltinSystem
|
||||||
|
BuiltinTolower
|
||||||
|
BuiltinToupper
|
||||||
|
)
|
392
src/tool/awk/interp/csvreader_test.go
Normal file
392
src/tool/awk/interp/csvreader_test.go
Normal file
|
@ -0,0 +1,392 @@
|
||||||
|
// Tests copied from encoding/csv to ensure we pass all the relevant cases.
|
||||||
|
|
||||||
|
// These tests are a subset of those in encoding/csv used to test Reader.
|
||||||
|
// However, the §, ¶ and ∑ special characters (for error positions) have been
|
||||||
|
// removed, and some tests have been removed or tweaked slightly because we
|
||||||
|
// don't support all the encoding/csv features (FieldsPerRecord is not
|
||||||
|
// supported, LazyQuotes is always on, and TrimLeadingSpace is always off).
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/csv"
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
type readTest struct {
|
||||||
|
Name string
|
||||||
|
Input string
|
||||||
|
Output [][]string
|
||||||
|
Error string
|
||||||
|
|
||||||
|
// These fields are copied into the CSVInputConfig
|
||||||
|
Comma rune
|
||||||
|
Comment rune
|
||||||
|
}
|
||||||
|
|
||||||
|
var readTests = []readTest{{
|
||||||
|
Name: "Simple",
|
||||||
|
Input: "a,b,c\n",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
}, {
|
||||||
|
Name: "CRLF",
|
||||||
|
Input: "a,b\r\nc,d\r\n",
|
||||||
|
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
||||||
|
}, {
|
||||||
|
Name: "BareCR",
|
||||||
|
Input: "a,b\rc,d\r\n",
|
||||||
|
Output: [][]string{{"a", "b\rc", "d"}},
|
||||||
|
}, {
|
||||||
|
Name: "RFC4180test",
|
||||||
|
Input: `#field1,field2,field3
|
||||||
|
"aaa","bb
|
||||||
|
b","ccc"
|
||||||
|
"a,a","b""bb","ccc"
|
||||||
|
zzz,yyy,xxx
|
||||||
|
`,
|
||||||
|
Output: [][]string{
|
||||||
|
{"#field1", "field2", "field3"},
|
||||||
|
{"aaa", "bb\nb", "ccc"},
|
||||||
|
{"a,a", `b"bb`, "ccc"},
|
||||||
|
{"zzz", "yyy", "xxx"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "NoEOLTest",
|
||||||
|
Input: "a,b,c",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
}, {
|
||||||
|
Name: "Semicolon",
|
||||||
|
Input: "a;b;c\n",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
Comma: ';',
|
||||||
|
}, {
|
||||||
|
Name: "MultiLine",
|
||||||
|
Input: `"two
|
||||||
|
line","one line","three
|
||||||
|
line
|
||||||
|
field"`,
|
||||||
|
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
||||||
|
}, {
|
||||||
|
Name: "BlankLine",
|
||||||
|
Input: "a,b,c\n\nd,e,f\n\n",
|
||||||
|
Output: [][]string{
|
||||||
|
{"a", "b", "c"},
|
||||||
|
{"d", "e", "f"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "BlankLineFieldCount",
|
||||||
|
Input: "a,b,c\n\nd,e,f\n\n",
|
||||||
|
Output: [][]string{
|
||||||
|
{"a", "b", "c"},
|
||||||
|
{"d", "e", "f"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "LeadingSpace",
|
||||||
|
Input: " a, b, c\n",
|
||||||
|
Output: [][]string{{" a", " b", " c"}},
|
||||||
|
}, {
|
||||||
|
Name: "Comment",
|
||||||
|
Input: "#1,2,3\na,b,c\n#comment",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
Comment: '#',
|
||||||
|
}, {
|
||||||
|
Name: "NoComment",
|
||||||
|
Input: "#1,2,3\na,b,c",
|
||||||
|
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
||||||
|
}, {
|
||||||
|
Name: "LazyQuotes",
|
||||||
|
Input: `a "word","1"2",a","b`,
|
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
||||||
|
}, {
|
||||||
|
Name: "BareQuotes",
|
||||||
|
Input: `a "word","1"2",a"`,
|
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
||||||
|
}, {
|
||||||
|
Name: "BareDoubleQuotes",
|
||||||
|
Input: `a""b,c`,
|
||||||
|
Output: [][]string{{`a""b`, `c`}},
|
||||||
|
}, {
|
||||||
|
Name: "TrimQuote",
|
||||||
|
Input: `"a"," b",c`,
|
||||||
|
Output: [][]string{{"a", " b", "c"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCount",
|
||||||
|
Input: "a,b,c\nd,e",
|
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaEOF",
|
||||||
|
Input: "a,b,c,",
|
||||||
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaEOL",
|
||||||
|
Input: "a,b,c,\n",
|
||||||
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaSpaceEOF",
|
||||||
|
Input: "a,b,c, ",
|
||||||
|
Output: [][]string{{"a", "b", "c", " "}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaSpaceEOL",
|
||||||
|
Input: "a,b,c, \n",
|
||||||
|
Output: [][]string{{"a", "b", "c", " "}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaLine3",
|
||||||
|
Input: "a,b,c\nd,e,f\ng,hi,",
|
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
|
||||||
|
}, {
|
||||||
|
Name: "NotTrailingComma3",
|
||||||
|
Input: "a,b,c, \n",
|
||||||
|
Output: [][]string{{"a", "b", "c", " "}},
|
||||||
|
}, {
|
||||||
|
Name: "CommaFieldTest",
|
||||||
|
Input: `x,y,z,w
|
||||||
|
x,y,z,
|
||||||
|
x,y,,
|
||||||
|
x,,,
|
||||||
|
,,,
|
||||||
|
"x","y","z","w"
|
||||||
|
"x","y","z",""
|
||||||
|
"x","y","",""
|
||||||
|
"x","","",""
|
||||||
|
"","","",""
|
||||||
|
`,
|
||||||
|
Output: [][]string{
|
||||||
|
{"x", "y", "z", "w"},
|
||||||
|
{"x", "y", "z", ""},
|
||||||
|
{"x", "y", "", ""},
|
||||||
|
{"x", "", "", ""},
|
||||||
|
{"", "", "", ""},
|
||||||
|
{"x", "y", "z", "w"},
|
||||||
|
{"x", "y", "z", ""},
|
||||||
|
{"x", "y", "", ""},
|
||||||
|
{"x", "", "", ""},
|
||||||
|
{"", "", "", ""},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCommaIneffective1",
|
||||||
|
Input: "a,b,\nc,d,e",
|
||||||
|
Output: [][]string{
|
||||||
|
{"a", "b", ""},
|
||||||
|
{"c", "d", "e"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "ReadAllReuseRecord",
|
||||||
|
Input: "a,b\nc,d",
|
||||||
|
Output: [][]string{
|
||||||
|
{"a", "b"},
|
||||||
|
{"c", "d"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "CRLFInQuotedField", // Issue 21201
|
||||||
|
Input: "A,\"Hello\r\nHi\",B\r\n",
|
||||||
|
Output: [][]string{
|
||||||
|
{"A", "Hello\nHi", "B"},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "BinaryBlobField", // Issue 19410
|
||||||
|
Input: "x09\x41\xb4\x1c,aktau",
|
||||||
|
Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
|
||||||
|
}, {
|
||||||
|
Name: "TrailingCR",
|
||||||
|
Input: "field1,field2\r",
|
||||||
|
Output: [][]string{{"field1", "field2"}},
|
||||||
|
}, {
|
||||||
|
Name: "QuotedTrailingCR",
|
||||||
|
Input: "\"field\"\r",
|
||||||
|
Output: [][]string{{"field"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCR",
|
||||||
|
Input: "field\rfield\r",
|
||||||
|
Output: [][]string{{"field\rfield"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCRCR",
|
||||||
|
Input: "field\r\rfield\r\r",
|
||||||
|
Output: [][]string{{"field\r\rfield\r"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCRCRLF",
|
||||||
|
Input: "field\r\r\nfield\r\r\n",
|
||||||
|
Output: [][]string{{"field\r"}, {"field\r"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCRCRLFCR",
|
||||||
|
Input: "field\r\r\n\rfield\r\r\n\r",
|
||||||
|
Output: [][]string{{"field\r"}, {"\rfield\r"}},
|
||||||
|
}, {
|
||||||
|
Name: "FieldCRCRLFCRCR",
|
||||||
|
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
|
||||||
|
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
|
||||||
|
}, {
|
||||||
|
Name: "MultiFieldCRCRLFCRCR",
|
||||||
|
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
|
||||||
|
Output: [][]string{
|
||||||
|
{"field1", "field2\r"},
|
||||||
|
{"\r\rfield1", "field2\r"},
|
||||||
|
{"\r\r", ""},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Name: "NonASCIICommaAndComment",
|
||||||
|
Input: "a£b,c£ \td,e\n€ comment\n",
|
||||||
|
Output: [][]string{{"a", "b,c", " \td,e"}},
|
||||||
|
Comma: '£',
|
||||||
|
Comment: '€',
|
||||||
|
}, {
|
||||||
|
Name: "NonASCIICommaAndCommentWithQuotes",
|
||||||
|
Input: "a€\" b,\"€ c\nλ comment\n",
|
||||||
|
Output: [][]string{{"a", " b,", " c"}},
|
||||||
|
Comma: '€',
|
||||||
|
Comment: 'λ',
|
||||||
|
}, {
|
||||||
|
// λ and θ start with the same byte.
|
||||||
|
// This tests that the parser doesn't confuse such characters.
|
||||||
|
Name: "NonASCIICommaConfusion",
|
||||||
|
Input: "\"abθcd\"λefθgh",
|
||||||
|
Output: [][]string{{"abθcd", "efθgh"}},
|
||||||
|
Comma: 'λ',
|
||||||
|
Comment: '€',
|
||||||
|
}, {
|
||||||
|
Name: "NonASCIICommentConfusion",
|
||||||
|
Input: "λ\nλ\nθ\nλ\n",
|
||||||
|
Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
|
||||||
|
Comment: 'θ',
|
||||||
|
}, {
|
||||||
|
Name: "QuotedFieldMultipleLF",
|
||||||
|
Input: "\"\n\n\n\n\"",
|
||||||
|
Output: [][]string{{"\n\n\n\n"}},
|
||||||
|
}, {
|
||||||
|
Name: "MultipleCRLF",
|
||||||
|
Input: "\r\n\r\n\r\n\r\n",
|
||||||
|
}, {
|
||||||
|
// The implementation may read each line in several chunks if it doesn't fit entirely
|
||||||
|
// in the read buffer, so we should test the code to handle that condition.
|
||||||
|
Name: "HugeLines",
|
||||||
|
Input: strings.Repeat("#ignore\n", 10000) + "" + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
|
||||||
|
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
|
||||||
|
Comment: '#',
|
||||||
|
}, {
|
||||||
|
Name: "LazyQuoteWithTrailingCRLF",
|
||||||
|
Input: "\"foo\"bar\"\r\n",
|
||||||
|
Output: [][]string{{`foo"bar`}},
|
||||||
|
}, {
|
||||||
|
Name: "DoubleQuoteWithTrailingCRLF",
|
||||||
|
Input: "\"foo\"\"bar\"\r\n",
|
||||||
|
Output: [][]string{{`foo"bar`}},
|
||||||
|
}, {
|
||||||
|
Name: "EvenQuotes",
|
||||||
|
Input: `""""""""`,
|
||||||
|
Output: [][]string{{`"""`}},
|
||||||
|
}, {
|
||||||
|
Name: "LazyOddQuotes",
|
||||||
|
Input: `"""""""`,
|
||||||
|
Output: [][]string{{`"""`}},
|
||||||
|
}, {
|
||||||
|
Name: "BadComma1",
|
||||||
|
Comma: '\n',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComma2",
|
||||||
|
Comma: '\r',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComma3",
|
||||||
|
Comma: '"',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComma4",
|
||||||
|
Comma: utf8.RuneError,
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComment1",
|
||||||
|
Comment: '\n',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComment2",
|
||||||
|
Comment: '\r',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadComment3",
|
||||||
|
Comment: utf8.RuneError,
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}, {
|
||||||
|
Name: "BadCommaComment",
|
||||||
|
Comma: 'X',
|
||||||
|
Comment: 'X',
|
||||||
|
Error: "invalid CSV field separator or comment delimiter",
|
||||||
|
}}
|
||||||
|
|
||||||
|
func TestCSVReader(t *testing.T) {
|
||||||
|
for _, tt := range readTests {
|
||||||
|
t.Run(tt.Name, func(t *testing.T) {
|
||||||
|
inputConfig := CSVInputConfig{
|
||||||
|
Separator: tt.Comma,
|
||||||
|
Comment: tt.Comment,
|
||||||
|
}
|
||||||
|
if inputConfig.Separator == 0 {
|
||||||
|
inputConfig.Separator = ','
|
||||||
|
}
|
||||||
|
|
||||||
|
var out [][]string
|
||||||
|
err := validateCSVInputConfig(CSVMode, inputConfig)
|
||||||
|
if err == nil {
|
||||||
|
var fields []string
|
||||||
|
splitter := csvSplitter{
|
||||||
|
separator: inputConfig.Separator,
|
||||||
|
sepLen: utf8.RuneLen(inputConfig.Separator),
|
||||||
|
comment: inputConfig.Comment,
|
||||||
|
fields: &fields,
|
||||||
|
}
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(tt.Input))
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
scanner.Buffer(make([]byte, inputBufSize), maxRecordLength)
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
row := make([]string, len(fields))
|
||||||
|
copy(row, fields)
|
||||||
|
out = append(out, row)
|
||||||
|
|
||||||
|
// We don't explicitly check the returned token, but at
|
||||||
|
// least check it parses to the same row.
|
||||||
|
if strings.ContainsRune(tt.Input, '\r') {
|
||||||
|
// But FieldCRCRLF and similar tests don't round-trip
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
token := scanner.Text()
|
||||||
|
reader := csv.NewReader(strings.NewReader(token))
|
||||||
|
reader.Comma = inputConfig.Separator
|
||||||
|
reader.Comment = inputConfig.Comment
|
||||||
|
reader.FieldsPerRecord = -1
|
||||||
|
reader.LazyQuotes = true
|
||||||
|
tokenRow, err := reader.Read()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error reparsing token: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(tokenRow, row) {
|
||||||
|
t.Fatalf("token mismatch:\ngot %q\nwant %q", tokenRow, row)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = scanner.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.Error != "" {
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("error mismatch:\ngot nil\nwant %q", tt.Error)
|
||||||
|
}
|
||||||
|
if err.Error() != tt.Error {
|
||||||
|
t.Fatalf("error mismatch:\ngot %q\nwant %q", err.Error(), tt.Error)
|
||||||
|
}
|
||||||
|
if out != nil {
|
||||||
|
t.Fatalf("output mismatch:\ngot %q\nwant nil", out)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error mismatch:\ngot %q\nwant nil", err.Error())
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(out, tt.Output) {
|
||||||
|
t.Fatalf("output mismatch:\ngot %q\nwant %q", out, tt.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
177
src/tool/awk/interp/example_test.go
Normal file
177
src/tool/awk/interp/example_test.go
Normal file
|
@ -0,0 +1,177 @@
|
||||||
|
// Don't run these on Windows, because newline handling means they don't pass.
|
||||||
|
|
||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package interp_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/interp"
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Example() {
|
||||||
|
input := strings.NewReader("foo bar\n\nbaz buz")
|
||||||
|
err := interp.Exec("$0 { print $1 }", " ", input, nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// foo
|
||||||
|
// baz
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_fieldsep() {
|
||||||
|
// Use ',' as the field separator
|
||||||
|
input := strings.NewReader("1,2\n3,4")
|
||||||
|
err := interp.Exec("{ print $1, $2 }", ",", input, nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 1 2
|
||||||
|
// 3 4
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_program() {
|
||||||
|
src := "{ print NR, tolower($0) }"
|
||||||
|
input := "A\naB\nAbC"
|
||||||
|
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
config := &interp.Config{
|
||||||
|
Stdin: strings.NewReader(input),
|
||||||
|
Vars: []string{"OFS", ":"},
|
||||||
|
}
|
||||||
|
_, err = interp.ExecProgram(prog, config)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 1:a
|
||||||
|
// 2:ab
|
||||||
|
// 3:abc
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_funcs() {
|
||||||
|
src := `BEGIN { print sum(), sum(1), sum(2, 3, 4), repeat("xyz", 3) }`
|
||||||
|
|
||||||
|
parserConfig := &parser.ParserConfig{
|
||||||
|
Funcs: map[string]interface{}{
|
||||||
|
"sum": func(args ...float64) float64 {
|
||||||
|
sum := 0.0
|
||||||
|
for _, a := range args {
|
||||||
|
sum += a
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
},
|
||||||
|
"repeat": strings.Repeat,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), parserConfig)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
interpConfig := &interp.Config{
|
||||||
|
Funcs: parserConfig.Funcs,
|
||||||
|
}
|
||||||
|
_, err = interp.ExecProgram(prog, interpConfig)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 0 1 9 xyzxyzxyz
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_new() {
|
||||||
|
// We'll execute this program multiple times on different inputs.
|
||||||
|
src := `{ print $1, x, $3; x++ }`
|
||||||
|
|
||||||
|
// Parse the program and set up the interpreter.
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
interpreter, err := interp.New(prog)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run it once on one input.
|
||||||
|
_, err = interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("one two three"),
|
||||||
|
Environ: []string{}, // avoid calling os.Environ each time
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset variables and run it again efficiently on a different input (this
|
||||||
|
// could be from a completely different data source).
|
||||||
|
interpreter.ResetVars()
|
||||||
|
_, err = interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("a b c\nd e f\n"),
|
||||||
|
Environ: []string{},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run it on another input, this time without resetting variables.
|
||||||
|
_, err = interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("x y z"),
|
||||||
|
Environ: []string{},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// one three
|
||||||
|
// a c
|
||||||
|
// d 1 f
|
||||||
|
// x 2 z
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_csv() {
|
||||||
|
src := `{ total += @"amount" } END { print total }`
|
||||||
|
input := `# comment
|
||||||
|
name,amount
|
||||||
|
Bob,17.50
|
||||||
|
Jill,20
|
||||||
|
"Boba Fett",100.00
|
||||||
|
`
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
config := &interp.Config{
|
||||||
|
Stdin: strings.NewReader(input),
|
||||||
|
InputMode: interp.CSVMode,
|
||||||
|
CSVInput: interp.CSVInputConfig{Comment: '#', Header: true},
|
||||||
|
}
|
||||||
|
_, err = interp.ExecProgram(prog, config)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 137.5
|
||||||
|
}
|
413
src/tool/awk/interp/functions.go
Normal file
413
src/tool/awk/interp/functions.go
Normal file
|
@ -0,0 +1,413 @@
|
||||||
|
// Call native Go functions; helpers for some builtin function calls.
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/internal/ast"
|
||||||
|
. "github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Call native-defined function with given name and arguments, return
|
||||||
|
// its return value (or null value if it doesn't return anything).
|
||||||
|
func (p *interp) callNative(index int, args []value) (value, error) {
|
||||||
|
f := p.nativeFuncs[index]
|
||||||
|
minIn := len(f.in) // Minimum number of args we should pass
|
||||||
|
var variadicType reflect.Type
|
||||||
|
if f.isVariadic {
|
||||||
|
variadicType = f.in[len(f.in)-1].Elem()
|
||||||
|
minIn--
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build list of args to pass to function
|
||||||
|
values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation
|
||||||
|
for i, a := range args {
|
||||||
|
var argType reflect.Type
|
||||||
|
if !f.isVariadic || i < len(f.in)-1 {
|
||||||
|
argType = f.in[i]
|
||||||
|
} else {
|
||||||
|
// Final arg(s) when calling a variadic are all of this type
|
||||||
|
argType = variadicType
|
||||||
|
}
|
||||||
|
values = append(values, p.toNative(a, argType))
|
||||||
|
}
|
||||||
|
// Use zero value for any unspecified args
|
||||||
|
for i := len(args); i < minIn; i++ {
|
||||||
|
values = append(values, reflect.Zero(f.in[i]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call Go function, determine return value
|
||||||
|
outs := f.value.Call(values)
|
||||||
|
switch len(outs) {
|
||||||
|
case 0:
|
||||||
|
// No return value, return null value to AWK
|
||||||
|
return null(), nil
|
||||||
|
case 1:
|
||||||
|
// Single return value
|
||||||
|
return fromNative(outs[0]), nil
|
||||||
|
case 2:
|
||||||
|
// Two-valued return of (scalar, error)
|
||||||
|
if !outs[1].IsNil() {
|
||||||
|
return null(), outs[1].Interface().(error)
|
||||||
|
}
|
||||||
|
return fromNative(outs[0]), nil
|
||||||
|
default:
|
||||||
|
// Should never happen (checked at parse time)
|
||||||
|
panic(fmt.Sprintf("unexpected number of return values: %d", len(outs)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert from an AWK value to a native Go value
|
||||||
|
func (p *interp) toNative(v value, typ reflect.Type) reflect.Value {
|
||||||
|
switch typ.Kind() {
|
||||||
|
case reflect.Bool:
|
||||||
|
return reflect.ValueOf(v.boolean())
|
||||||
|
case reflect.Int:
|
||||||
|
return reflect.ValueOf(int(v.num()))
|
||||||
|
case reflect.Int8:
|
||||||
|
return reflect.ValueOf(int8(v.num()))
|
||||||
|
case reflect.Int16:
|
||||||
|
return reflect.ValueOf(int16(v.num()))
|
||||||
|
case reflect.Int32:
|
||||||
|
return reflect.ValueOf(int32(v.num()))
|
||||||
|
case reflect.Int64:
|
||||||
|
return reflect.ValueOf(int64(v.num()))
|
||||||
|
case reflect.Uint:
|
||||||
|
return reflect.ValueOf(uint(v.num()))
|
||||||
|
case reflect.Uint8:
|
||||||
|
return reflect.ValueOf(uint8(v.num()))
|
||||||
|
case reflect.Uint16:
|
||||||
|
return reflect.ValueOf(uint16(v.num()))
|
||||||
|
case reflect.Uint32:
|
||||||
|
return reflect.ValueOf(uint32(v.num()))
|
||||||
|
case reflect.Uint64:
|
||||||
|
return reflect.ValueOf(uint64(v.num()))
|
||||||
|
case reflect.Float32:
|
||||||
|
return reflect.ValueOf(float32(v.num()))
|
||||||
|
case reflect.Float64:
|
||||||
|
return reflect.ValueOf(v.num())
|
||||||
|
case reflect.String:
|
||||||
|
return reflect.ValueOf(p.toString(v))
|
||||||
|
case reflect.Slice:
|
||||||
|
if typ.Elem().Kind() != reflect.Uint8 {
|
||||||
|
// Shouldn't happen: prevented by checkNativeFunc
|
||||||
|
panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind()))
|
||||||
|
}
|
||||||
|
return reflect.ValueOf([]byte(p.toString(v)))
|
||||||
|
default:
|
||||||
|
// Shouldn't happen: prevented by checkNativeFunc
|
||||||
|
panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert from a native Go value to an AWK value
|
||||||
|
func fromNative(v reflect.Value) value {
|
||||||
|
switch v.Kind() {
|
||||||
|
case reflect.Bool:
|
||||||
|
return boolean(v.Bool())
|
||||||
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||||
|
return num(float64(v.Int()))
|
||||||
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||||
|
return num(float64(v.Uint()))
|
||||||
|
case reflect.Float32, reflect.Float64:
|
||||||
|
return num(v.Float())
|
||||||
|
case reflect.String:
|
||||||
|
return str(v.String())
|
||||||
|
case reflect.Slice:
|
||||||
|
if b, ok := v.Interface().([]byte); ok {
|
||||||
|
return str(string(b))
|
||||||
|
}
|
||||||
|
// Shouldn't happen: prevented by checkNativeFunc
|
||||||
|
panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind()))
|
||||||
|
default:
|
||||||
|
// Shouldn't happen: prevented by checkNativeFunc
|
||||||
|
panic(fmt.Sprintf("unexpected return type: %s", v.Kind()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for caching native function type information on init
|
||||||
|
type nativeFunc struct {
|
||||||
|
isVariadic bool
|
||||||
|
in []reflect.Type
|
||||||
|
value reflect.Value
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check and initialize native functions
|
||||||
|
func (p *interp) initNativeFuncs(funcs map[string]interface{}) error {
|
||||||
|
for name, f := range funcs {
|
||||||
|
err := checkNativeFunc(name, f)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort functions by name, then use those indexes to build slice
|
||||||
|
// (this has to match how the parser sets the indexes).
|
||||||
|
names := make([]string, 0, len(funcs))
|
||||||
|
for name := range funcs {
|
||||||
|
names = append(names, name)
|
||||||
|
}
|
||||||
|
sort.Strings(names)
|
||||||
|
p.nativeFuncs = make([]nativeFunc, len(names))
|
||||||
|
for i, name := range names {
|
||||||
|
f := funcs[name]
|
||||||
|
typ := reflect.TypeOf(f)
|
||||||
|
in := make([]reflect.Type, typ.NumIn())
|
||||||
|
for j := 0; j < len(in); j++ {
|
||||||
|
in[j] = typ.In(j)
|
||||||
|
}
|
||||||
|
p.nativeFuncs[i] = nativeFunc{
|
||||||
|
isVariadic: typ.IsVariadic(),
|
||||||
|
in: in,
|
||||||
|
value: reflect.ValueOf(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Got this trick from the Go stdlib text/template source
|
||||||
|
var errorType = reflect.TypeOf((*error)(nil)).Elem()
|
||||||
|
|
||||||
|
// Check that native function with given name is okay to call from
|
||||||
|
// AWK, return an *interp.Error if not. This checks that f is actually
|
||||||
|
// a function, and that its parameter and return types are good.
|
||||||
|
func checkNativeFunc(name string, f interface{}) error {
|
||||||
|
if KeywordToken(name) != ILLEGAL {
|
||||||
|
return newError("can't use keyword %q as native function name", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
typ := reflect.TypeOf(f)
|
||||||
|
if typ.Kind() != reflect.Func {
|
||||||
|
return newError("native function %q is not a function", name)
|
||||||
|
}
|
||||||
|
for i := 0; i < typ.NumIn(); i++ {
|
||||||
|
param := typ.In(i)
|
||||||
|
if typ.IsVariadic() && i == typ.NumIn()-1 {
|
||||||
|
param = param.Elem()
|
||||||
|
}
|
||||||
|
if !validNativeType(param) {
|
||||||
|
return newError("native function %q param %d is not int or string", name, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch typ.NumOut() {
|
||||||
|
case 0:
|
||||||
|
// No return value is fine
|
||||||
|
case 1:
|
||||||
|
// Single scalar return value is fine
|
||||||
|
if !validNativeType(typ.Out(0)) {
|
||||||
|
return newError("native function %q return value is not int or string", name)
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
// Returning (scalar, error) is handled too
|
||||||
|
if !validNativeType(typ.Out(0)) {
|
||||||
|
return newError("native function %q first return value is not int or string", name)
|
||||||
|
}
|
||||||
|
if typ.Out(1) != errorType {
|
||||||
|
return newError("native function %q second return value is not an error", name)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return newError("native function %q returns more than two values", name)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if typ is a valid parameter or return type.
|
||||||
|
func validNativeType(typ reflect.Type) bool {
|
||||||
|
switch typ.Kind() {
|
||||||
|
case reflect.Bool:
|
||||||
|
return true
|
||||||
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||||
|
return true
|
||||||
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||||
|
return true
|
||||||
|
case reflect.Float32, reflect.Float64:
|
||||||
|
return true
|
||||||
|
case reflect.String:
|
||||||
|
return true
|
||||||
|
case reflect.Slice:
|
||||||
|
// Only allow []byte (convert to string in AWK)
|
||||||
|
return typ.Elem().Kind() == reflect.Uint8
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guts of the split() function
|
||||||
|
func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) {
|
||||||
|
var parts []string
|
||||||
|
if fs == " " {
|
||||||
|
parts = strings.Fields(s)
|
||||||
|
} else if s == "" {
|
||||||
|
// Leave parts 0 length on empty string
|
||||||
|
} else if utf8.RuneCountInString(fs) <= 1 {
|
||||||
|
parts = strings.Split(s, fs)
|
||||||
|
} else {
|
||||||
|
re, err := p.compileRegex(fs)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
parts = re.Split(s, -1)
|
||||||
|
}
|
||||||
|
array := make(map[string]value, len(parts))
|
||||||
|
for i, part := range parts {
|
||||||
|
array[strconv.Itoa(i+1)] = numStr(part)
|
||||||
|
}
|
||||||
|
p.arrays[p.arrayIndex(scope, index)] = array
|
||||||
|
return len(array), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guts of the sub() and gsub() functions
|
||||||
|
func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) {
|
||||||
|
re, err := p.compileRegex(regex)
|
||||||
|
if err != nil {
|
||||||
|
return "", 0, err
|
||||||
|
}
|
||||||
|
count := 0
|
||||||
|
out = re.ReplaceAllStringFunc(in, func(s string) string {
|
||||||
|
// Only do the first replacement for sub(), or all for gsub()
|
||||||
|
if !global && count > 0 {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
count++
|
||||||
|
// Handle & (ampersand) properly in replacement string
|
||||||
|
r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation
|
||||||
|
for i := 0; i < len(repl); i++ {
|
||||||
|
switch repl[i] {
|
||||||
|
case '&':
|
||||||
|
r = append(r, s...)
|
||||||
|
case '\\':
|
||||||
|
i++
|
||||||
|
if i < len(repl) {
|
||||||
|
switch repl[i] {
|
||||||
|
case '&':
|
||||||
|
r = append(r, '&')
|
||||||
|
case '\\':
|
||||||
|
r = append(r, '\\')
|
||||||
|
default:
|
||||||
|
r = append(r, '\\', repl[i])
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
r = append(r, '\\')
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
r = append(r, repl[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(r)
|
||||||
|
})
|
||||||
|
return out, count, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type cachedFormat struct {
|
||||||
|
format string
|
||||||
|
types []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse given sprintf format string into Go format string, along with
|
||||||
|
// type conversion specifiers. Output is memoized in a simple cache
|
||||||
|
// for performance.
|
||||||
|
func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) {
|
||||||
|
if item, ok := p.formatCache[s]; ok {
|
||||||
|
return item.format, item.types, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
out := []byte(s)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if s[i] == '%' {
|
||||||
|
i++
|
||||||
|
if i >= len(s) {
|
||||||
|
return "", nil, errors.New("expected type specifier after %")
|
||||||
|
}
|
||||||
|
if s[i] == '%' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 {
|
||||||
|
if s[i] == '*' {
|
||||||
|
types = append(types, 'd')
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i >= len(s) {
|
||||||
|
return "", nil, errors.New("expected type specifier after %")
|
||||||
|
}
|
||||||
|
var t byte
|
||||||
|
switch s[i] {
|
||||||
|
case 's':
|
||||||
|
t = 's'
|
||||||
|
case 'd', 'i', 'o', 'x', 'X':
|
||||||
|
t = 'd'
|
||||||
|
case 'f', 'e', 'E', 'g', 'G':
|
||||||
|
t = 'f'
|
||||||
|
case 'u':
|
||||||
|
t = 'u'
|
||||||
|
out[i] = 'd'
|
||||||
|
case 'c':
|
||||||
|
t = 'c'
|
||||||
|
out[i] = 's'
|
||||||
|
default:
|
||||||
|
return "", nil, fmt.Errorf("invalid format type %q", s[i])
|
||||||
|
}
|
||||||
|
types = append(types, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dumb, non-LRU cache: just cache the first N formats
|
||||||
|
format = string(out)
|
||||||
|
if len(p.formatCache) < maxCachedFormats {
|
||||||
|
p.formatCache[s] = cachedFormat{format, types}
|
||||||
|
}
|
||||||
|
return format, types, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guts of sprintf() function (also used by "printf" statement)
|
||||||
|
func (p *interp) sprintf(format string, args []value) (string, error) {
|
||||||
|
format, types, err := p.parseFmtTypes(format)
|
||||||
|
if err != nil {
|
||||||
|
return "", newError("format error: %s", err)
|
||||||
|
}
|
||||||
|
if len(types) > len(args) {
|
||||||
|
return "", newError("format error: got %d args, expected %d", len(args), len(types))
|
||||||
|
}
|
||||||
|
converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation
|
||||||
|
for i, t := range types {
|
||||||
|
a := args[i]
|
||||||
|
var v interface{}
|
||||||
|
switch t {
|
||||||
|
case 's':
|
||||||
|
v = p.toString(a)
|
||||||
|
case 'd':
|
||||||
|
v = int(a.num())
|
||||||
|
case 'f':
|
||||||
|
v = a.num()
|
||||||
|
case 'u':
|
||||||
|
v = uint(a.num())
|
||||||
|
case 'c':
|
||||||
|
var c []byte
|
||||||
|
n, isStr := a.isTrueStr()
|
||||||
|
if isStr {
|
||||||
|
s := p.toString(a)
|
||||||
|
if len(s) > 0 {
|
||||||
|
c = []byte{s[0]}
|
||||||
|
} else {
|
||||||
|
c = []byte{0}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Follow the behaviour of awk and mawk, where %c
|
||||||
|
// operates on bytes (0-255), not Unicode codepoints
|
||||||
|
c = []byte{byte(n)}
|
||||||
|
}
|
||||||
|
v = c
|
||||||
|
}
|
||||||
|
converted = append(converted, v)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(format, converted...), nil
|
||||||
|
}
|
107
src/tool/awk/interp/fuzz_test.go
Normal file
107
src/tool/awk/interp/fuzz_test.go
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
// Fuzz tests for use with the Go 1.18 fuzzer.
|
||||||
|
|
||||||
|
//go:build go1.18
|
||||||
|
// +build go1.18
|
||||||
|
|
||||||
|
package interp_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/interp"
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isFuzzTest(test interpTest) bool {
|
||||||
|
return test.err == "" && test.awkErr == "" && !strings.Contains(test.src, "!fuzz")
|
||||||
|
}
|
||||||
|
|
||||||
|
func FuzzSource(f *testing.F) {
|
||||||
|
for _, test := range interpTests {
|
||||||
|
if isFuzzTest(test) {
|
||||||
|
f.Add(test.src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Fuzz(func(t *testing.T, src string) {
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
interpreter, err := interp.New(prog)
|
||||||
|
if err != nil {
|
||||||
|
f.Fatalf("interp.New error: %v", err)
|
||||||
|
}
|
||||||
|
config := interp.Config{
|
||||||
|
Stdin: strings.NewReader("foo bar\nbazz\n"),
|
||||||
|
Output: ioutil.Discard,
|
||||||
|
Error: ioutil.Discard,
|
||||||
|
NoExec: true,
|
||||||
|
NoFileWrites: true,
|
||||||
|
NoFileReads: true,
|
||||||
|
Environ: []string{},
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_, _ = interpreter.ExecuteContext(ctx, &config)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func FuzzInput(f *testing.F) {
|
||||||
|
f.Add("")
|
||||||
|
added := make(map[string]bool)
|
||||||
|
for _, test := range interpTests {
|
||||||
|
if test.in != "" && !added[test.in] {
|
||||||
|
f.Add(test.in)
|
||||||
|
added[test.in] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prog, err := parser.ParseProgram([]byte(`{ print $0, $3, $1, $10 }`), nil)
|
||||||
|
if err != nil {
|
||||||
|
f.Fatalf("parse error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
interpreter, err := interp.New(prog)
|
||||||
|
if err != nil {
|
||||||
|
f.Fatalf("interp.New error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var vars = [][]string{
|
||||||
|
{"FS", " ", "RS", "\n"},
|
||||||
|
{"FS", ",", "RS", "\n"},
|
||||||
|
{"FS", "\t", "RS", "\n"},
|
||||||
|
{"FS", "@+", "RS", "\n"},
|
||||||
|
{"FS", "\n", "RS", ""},
|
||||||
|
{"FS", " ", "RS", "X+"},
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Fuzz(func(t *testing.T, in string) {
|
||||||
|
for _, v := range vars {
|
||||||
|
t.Run(fmt.Sprintf("Vars=%q", v), func(t *testing.T) {
|
||||||
|
interpreter.ResetVars()
|
||||||
|
config := interp.Config{
|
||||||
|
Stdin: strings.NewReader(in),
|
||||||
|
Output: ioutil.Discard,
|
||||||
|
Error: ioutil.Discard,
|
||||||
|
Vars: v,
|
||||||
|
NoExec: true,
|
||||||
|
NoFileWrites: true,
|
||||||
|
NoFileReads: true,
|
||||||
|
Environ: []string{},
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_, err := interpreter.ExecuteContext(ctx, &config)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("execute error: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
75
src/tool/awk/interp/fuzz_unexported_test.go
Normal file
75
src/tool/awk/interp/fuzz_unexported_test.go
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
// Fuzz tests for unexported functions for use with the Go 1.18 fuzzer.
|
||||||
|
|
||||||
|
//go:build go1.18
|
||||||
|
// +build go1.18
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func FuzzParseFloatPrefix(f *testing.F) {
|
||||||
|
f.Add("")
|
||||||
|
f.Add("foo")
|
||||||
|
f.Add("The quick.")
|
||||||
|
f.Add("0")
|
||||||
|
f.Add("9")
|
||||||
|
f.Add("1.3e4")
|
||||||
|
f.Add("1.3E0")
|
||||||
|
f.Add("1.3e+5")
|
||||||
|
f.Add("1.3e-5")
|
||||||
|
f.Add("1E1000")
|
||||||
|
f.Add(" 1234 ")
|
||||||
|
f.Add("1234xyz")
|
||||||
|
f.Add("-1234567890")
|
||||||
|
f.Add("0x0")
|
||||||
|
f.Add("0X10")
|
||||||
|
f.Add("0x1234567890")
|
||||||
|
f.Add("0xabcdef")
|
||||||
|
f.Add("0xABCDEF")
|
||||||
|
f.Add("-0xa")
|
||||||
|
f.Add("+0XA")
|
||||||
|
f.Add("0xf.f")
|
||||||
|
f.Add("0xf.fp10")
|
||||||
|
f.Add("0xf.fp-10")
|
||||||
|
f.Add("0x.f")
|
||||||
|
f.Add("0xf.")
|
||||||
|
f.Add("0x.")
|
||||||
|
f.Add("nan")
|
||||||
|
f.Add("+nan")
|
||||||
|
f.Add("-nan")
|
||||||
|
f.Add("NAN")
|
||||||
|
f.Add("inf")
|
||||||
|
f.Add("+inf")
|
||||||
|
f.Add("-inf")
|
||||||
|
f.Add("INF")
|
||||||
|
|
||||||
|
f.Fuzz(func(t *testing.T, in string) {
|
||||||
|
nPrefix := parseFloatPrefix(in)
|
||||||
|
if nPrefix != 0 {
|
||||||
|
for i := 1; i <= len(in); i++ {
|
||||||
|
n, _ := parseFloatHelper(in[:i])
|
||||||
|
if n == nPrefix || math.IsNaN(n) && math.IsNaN(nPrefix) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Fatalf("no ParseFloat match: %q", in)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFloatHelper(s string) (float64, error) {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
if s == "+nan" || s == "-nan" {
|
||||||
|
return math.NaN(), nil
|
||||||
|
}
|
||||||
|
if strings.Contains(s, "0x") && strings.IndexAny(s, "pP") < 0 {
|
||||||
|
s += "p0"
|
||||||
|
}
|
||||||
|
return strconv.ParseFloat(s, 64)
|
||||||
|
}
|
1095
src/tool/awk/interp/interp.go
Normal file
1095
src/tool/awk/interp/interp.go
Normal file
File diff suppressed because it is too large
Load diff
2609
src/tool/awk/interp/interp_test.go
Normal file
2609
src/tool/awk/interp/interp_test.go
Normal file
File diff suppressed because it is too large
Load diff
899
src/tool/awk/interp/io.go
Normal file
899
src/tool/awk/interp/io.go
Normal file
|
@ -0,0 +1,899 @@
|
||||||
|
// Input/output handling for GoAWK interpreter
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/internal/ast"
|
||||||
|
. "github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Print a line of output followed by a newline
|
||||||
|
func (p *interp) printLine(writer io.Writer, line string) error {
|
||||||
|
err := writeOutput(writer, line)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return writeOutput(writer, p.outputRecordSep)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print given arguments followed by a newline (for "print" statement).
|
||||||
|
func (p *interp) printArgs(writer io.Writer, args []value) error {
|
||||||
|
switch p.outputMode {
|
||||||
|
case CSVMode, TSVMode:
|
||||||
|
fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation
|
||||||
|
for _, arg := range args {
|
||||||
|
fields = append(fields, arg.str(p.outputFormat))
|
||||||
|
}
|
||||||
|
err := p.writeCSV(writer, fields)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Print OFS-separated args followed by ORS (usually newline).
|
||||||
|
for i, arg := range args {
|
||||||
|
if i > 0 {
|
||||||
|
err := writeOutput(writer, p.outputFieldSep)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err := writeOutput(writer, arg.str(p.outputFormat))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err := writeOutput(writer, p.outputRecordSep)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *interp) writeCSV(output io.Writer, fields []string) error {
|
||||||
|
// If output is already a *bufio.Writer (the common case), csv.NewWriter
|
||||||
|
// will use it directly. This is not explicitly documented, but
|
||||||
|
// csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize
|
||||||
|
// with a 4KB buffer, and bufio.NewWriterSize is documented as returning
|
||||||
|
// the underlying bufio.Writer if it's passed a large enough one.
|
||||||
|
var flush func() error
|
||||||
|
_, isBuffered := output.(*bufio.Writer)
|
||||||
|
if !isBuffered {
|
||||||
|
// Otherwise create a new buffered writer and flush after writing.
|
||||||
|
if p.csvOutput == nil {
|
||||||
|
p.csvOutput = bufio.NewWriterSize(output, 4096)
|
||||||
|
} else {
|
||||||
|
p.csvOutput.Reset(output)
|
||||||
|
}
|
||||||
|
output = p.csvOutput
|
||||||
|
flush = p.csvOutput.Flush
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given the above, creating a new one of these is cheap.
|
||||||
|
writer := csv.NewWriter(output)
|
||||||
|
writer.Comma = p.csvOutputConfig.Separator
|
||||||
|
writer.UseCRLF = runtime.GOOS == "windows"
|
||||||
|
err := writer.Write(fields)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if flush != nil {
|
||||||
|
return flush()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implement a buffered version of WriteCloser so output is buffered
|
||||||
|
// when redirecting to a file (eg: print >"out")
|
||||||
|
type bufferedWriteCloser struct {
|
||||||
|
*bufio.Writer
|
||||||
|
io.Closer
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser {
|
||||||
|
writer := bufio.NewWriterSize(w, outputBufSize)
|
||||||
|
return &bufferedWriteCloser{writer, w}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wc *bufferedWriteCloser) Close() error {
|
||||||
|
err := wc.Writer.Flush()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return wc.Closer.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the output stream for given redirect token and
|
||||||
|
// destination (file or pipe name)
|
||||||
|
func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) {
|
||||||
|
name := p.toString(destValue)
|
||||||
|
if _, ok := p.inputStreams[name]; ok {
|
||||||
|
return nil, newError("can't write to reader stream")
|
||||||
|
}
|
||||||
|
if w, ok := p.outputStreams[name]; ok {
|
||||||
|
return w, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch redirect {
|
||||||
|
case GREATER, APPEND:
|
||||||
|
if name == "-" {
|
||||||
|
// filename of "-" means write to stdout, eg: print "x" >"-"
|
||||||
|
return p.output, nil
|
||||||
|
}
|
||||||
|
// Write or append to file
|
||||||
|
if p.noFileWrites {
|
||||||
|
return nil, newError("can't write to file due to NoFileWrites")
|
||||||
|
}
|
||||||
|
p.flushOutputAndError() // ensure synchronization
|
||||||
|
flags := os.O_CREATE | os.O_WRONLY
|
||||||
|
if redirect == GREATER {
|
||||||
|
flags |= os.O_TRUNC
|
||||||
|
} else {
|
||||||
|
flags |= os.O_APPEND
|
||||||
|
}
|
||||||
|
w, err := os.OpenFile(name, flags, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return nil, newError("output redirection error: %s", err)
|
||||||
|
}
|
||||||
|
buffered := newBufferedWriteCloser(w)
|
||||||
|
p.outputStreams[name] = buffered
|
||||||
|
return buffered, nil
|
||||||
|
|
||||||
|
case PIPE:
|
||||||
|
// Pipe to command
|
||||||
|
if p.noExec {
|
||||||
|
return nil, newError("can't write to pipe due to NoExec")
|
||||||
|
}
|
||||||
|
cmd := p.execShell(name)
|
||||||
|
w, err := cmd.StdinPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, newError("error connecting to stdin pipe: %v", err)
|
||||||
|
}
|
||||||
|
cmd.Stdout = p.output
|
||||||
|
cmd.Stderr = p.errorOutput
|
||||||
|
p.flushOutputAndError() // ensure synchronization
|
||||||
|
err = cmd.Start()
|
||||||
|
if err != nil {
|
||||||
|
p.printErrorf("%s\n", err)
|
||||||
|
return ioutil.Discard, nil
|
||||||
|
}
|
||||||
|
p.commands[name] = cmd
|
||||||
|
buffered := newBufferedWriteCloser(w)
|
||||||
|
p.outputStreams[name] = buffered
|
||||||
|
return buffered, nil
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Should never happen
|
||||||
|
panic(fmt.Sprintf("unexpected redirect type %s", redirect))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Executes code using configured system shell
|
||||||
|
func (p *interp) execShell(code string) *exec.Cmd {
|
||||||
|
executable := p.shellCommand[0]
|
||||||
|
args := p.shellCommand[1:]
|
||||||
|
args = append(args, code)
|
||||||
|
if p.checkCtx {
|
||||||
|
return exec.CommandContext(p.ctx, executable, args...)
|
||||||
|
} else {
|
||||||
|
return exec.Command(executable, args...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get input Scanner to use for "getline" based on file name
|
||||||
|
func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
|
||||||
|
if _, ok := p.outputStreams[name]; ok {
|
||||||
|
return nil, newError("can't read from writer stream")
|
||||||
|
}
|
||||||
|
if _, ok := p.inputStreams[name]; ok {
|
||||||
|
return p.scanners[name], nil
|
||||||
|
}
|
||||||
|
if name == "-" {
|
||||||
|
// filename of "-" means read from stdin, eg: getline <"-"
|
||||||
|
if scanner, ok := p.scanners["-"]; ok {
|
||||||
|
return scanner, nil
|
||||||
|
}
|
||||||
|
scanner := p.newScanner(p.stdin, make([]byte, inputBufSize))
|
||||||
|
p.scanners[name] = scanner
|
||||||
|
return scanner, nil
|
||||||
|
}
|
||||||
|
if p.noFileReads {
|
||||||
|
return nil, newError("can't read from file due to NoFileReads")
|
||||||
|
}
|
||||||
|
r, err := os.Open(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err // *os.PathError is handled by caller (getline returns -1)
|
||||||
|
}
|
||||||
|
scanner := p.newScanner(r, make([]byte, inputBufSize))
|
||||||
|
p.scanners[name] = scanner
|
||||||
|
p.inputStreams[name] = r
|
||||||
|
return scanner, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get input Scanner to use for "getline" based on pipe name
|
||||||
|
func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
|
||||||
|
if _, ok := p.outputStreams[name]; ok {
|
||||||
|
return nil, newError("can't read from writer stream")
|
||||||
|
}
|
||||||
|
if _, ok := p.inputStreams[name]; ok {
|
||||||
|
return p.scanners[name], nil
|
||||||
|
}
|
||||||
|
if p.noExec {
|
||||||
|
return nil, newError("can't read from pipe due to NoExec")
|
||||||
|
}
|
||||||
|
cmd := p.execShell(name)
|
||||||
|
cmd.Stdin = p.stdin
|
||||||
|
cmd.Stderr = p.errorOutput
|
||||||
|
r, err := cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, newError("error connecting to stdout pipe: %v", err)
|
||||||
|
}
|
||||||
|
p.flushOutputAndError() // ensure synchronization
|
||||||
|
err = cmd.Start()
|
||||||
|
if err != nil {
|
||||||
|
p.printErrorf("%s\n", err)
|
||||||
|
return bufio.NewScanner(strings.NewReader("")), nil
|
||||||
|
}
|
||||||
|
scanner := p.newScanner(r, make([]byte, inputBufSize))
|
||||||
|
p.commands[name] = cmd
|
||||||
|
p.inputStreams[name] = r
|
||||||
|
p.scanners[name] = scanner
|
||||||
|
return scanner, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new buffered Scanner for reading input records
|
||||||
|
func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner {
|
||||||
|
scanner := bufio.NewScanner(input)
|
||||||
|
switch {
|
||||||
|
case p.inputMode == CSVMode || p.inputMode == TSVMode:
|
||||||
|
splitter := csvSplitter{
|
||||||
|
separator: p.csvInputConfig.Separator,
|
||||||
|
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
|
||||||
|
comment: p.csvInputConfig.Comment,
|
||||||
|
header: p.csvInputConfig.Header,
|
||||||
|
fields: &p.fields,
|
||||||
|
setFieldNames: p.setFieldNames,
|
||||||
|
}
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
case p.recordSep == "\n":
|
||||||
|
// Scanner default is to split on newlines
|
||||||
|
case p.recordSep == "":
|
||||||
|
// Empty string for RS means split on \n\n (blank lines)
|
||||||
|
splitter := blankLineSplitter{terminator: &p.recordTerminator}
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
case len(p.recordSep) == 1:
|
||||||
|
splitter := byteSplitter{sep: p.recordSep[0]}
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
case utf8.RuneCountInString(p.recordSep) >= 1:
|
||||||
|
// Multi-byte and single char but multi-byte RS use regex
|
||||||
|
splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator}
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
}
|
||||||
|
scanner.Buffer(buffer, maxRecordLength)
|
||||||
|
return scanner
|
||||||
|
}
|
||||||
|
|
||||||
|
// setFieldNames is called by csvSplitter.scan on the first row (if the
|
||||||
|
// "header" option is specified).
|
||||||
|
func (p *interp) setFieldNames(names []string) {
|
||||||
|
p.fieldNames = names
|
||||||
|
p.fieldIndexes = nil // clear name-to-index cache
|
||||||
|
|
||||||
|
// Populate FIELDS array (mapping of field indexes to field names).
|
||||||
|
fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"])
|
||||||
|
for k := range fieldsArray {
|
||||||
|
delete(fieldsArray, k)
|
||||||
|
}
|
||||||
|
for i, name := range names {
|
||||||
|
fieldsArray[strconv.Itoa(i+1)] = str(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copied from bufio/scan.go in the stdlib: I guess it's a bit more
|
||||||
|
// efficient than bytes.TrimSuffix(data, []byte("\r"))
|
||||||
|
func dropCR(data []byte) []byte {
|
||||||
|
if len(data) > 0 && data[len(data)-1] == '\r' {
|
||||||
|
return data[:len(data)-1]
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func dropLF(data []byte) []byte {
|
||||||
|
if len(data) > 0 && data[len(data)-1] == '\n' {
|
||||||
|
return data[:len(data)-1]
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
type blankLineSplitter struct {
|
||||||
|
terminator *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
if atEOF && len(data) == 0 {
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip newlines at beginning of data
|
||||||
|
i := 0
|
||||||
|
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i >= len(data) {
|
||||||
|
// At end of data after newlines, skip entire data block
|
||||||
|
return i, nil, nil
|
||||||
|
}
|
||||||
|
start := i
|
||||||
|
|
||||||
|
// Try to find two consecutive newlines (or \n\r\n for Windows)
|
||||||
|
for ; i < len(data); i++ {
|
||||||
|
if data[i] != '\n' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
end := i
|
||||||
|
if i+1 < len(data) && data[i+1] == '\n' {
|
||||||
|
i += 2
|
||||||
|
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
||||||
|
i++ // Skip newlines at end of record
|
||||||
|
}
|
||||||
|
*s.terminator = string(data[end:i])
|
||||||
|
return i, dropCR(data[start:end]), nil
|
||||||
|
}
|
||||||
|
if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' {
|
||||||
|
i += 3
|
||||||
|
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
||||||
|
i++ // Skip newlines at end of record
|
||||||
|
}
|
||||||
|
*s.terminator = string(data[end:i])
|
||||||
|
return i, dropCR(data[start:end]), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we're at EOF, we have one final record; return it
|
||||||
|
if atEOF {
|
||||||
|
token = dropCR(dropLF(data[start:]))
|
||||||
|
*s.terminator = string(data[len(token):])
|
||||||
|
return len(data), token, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request more data
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splitter that splits records on the given separator byte
|
||||||
|
type byteSplitter struct {
|
||||||
|
sep byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
if atEOF && len(data) == 0 {
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
if i := bytes.IndexByte(data, s.sep); i >= 0 {
|
||||||
|
// We have a full sep-terminated record
|
||||||
|
return i + 1, data[:i], nil
|
||||||
|
}
|
||||||
|
// If at EOF, we have a final, non-terminated record; return it
|
||||||
|
if atEOF {
|
||||||
|
return len(data), data, nil
|
||||||
|
}
|
||||||
|
// Request more data
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splitter that splits records on the given regular expression
|
||||||
|
type regexSplitter struct {
|
||||||
|
re *regexp.Regexp
|
||||||
|
terminator *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
if atEOF && len(data) == 0 {
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
loc := s.re.FindIndex(data)
|
||||||
|
// Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this
|
||||||
|
// case is to match the entire input.
|
||||||
|
if loc != nil && loc[0] != loc[1] {
|
||||||
|
*s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable
|
||||||
|
return loc[1], data[:loc[0]], nil
|
||||||
|
}
|
||||||
|
// If at EOF, we have a final, non-terminated record; return it
|
||||||
|
if atEOF {
|
||||||
|
*s.terminator = ""
|
||||||
|
return len(data), data, nil
|
||||||
|
}
|
||||||
|
// Request more data
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splitter that splits records in CSV or TSV format.
|
||||||
|
type csvSplitter struct {
|
||||||
|
separator rune
|
||||||
|
sepLen int
|
||||||
|
comment rune
|
||||||
|
header bool
|
||||||
|
|
||||||
|
recordBuffer []byte
|
||||||
|
fieldIndexes []int
|
||||||
|
noBOMCheck bool
|
||||||
|
|
||||||
|
fields *[]string
|
||||||
|
setFieldNames func(names []string)
|
||||||
|
rowNum int
|
||||||
|
}
|
||||||
|
|
||||||
|
// The structure of this code is taken from the stdlib encoding/csv Reader
|
||||||
|
// code, which is licensed under a compatible BSD-style license.
|
||||||
|
//
|
||||||
|
// We don't support all encoding/csv features: FieldsPerRecord is not
|
||||||
|
// supported, LazyQuotes is always on, and TrimLeadingSpace is always off.
|
||||||
|
func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
// Some CSV files are saved with a UTF-8 BOM at the start; skip it.
|
||||||
|
if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
|
||||||
|
data = data[3:]
|
||||||
|
advance = 3
|
||||||
|
s.noBOMCheck = true
|
||||||
|
}
|
||||||
|
|
||||||
|
origData := data
|
||||||
|
if atEOF && len(data) == 0 {
|
||||||
|
// No more data, tell Scanner to stop.
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
readLine := func() []byte {
|
||||||
|
newline := bytes.IndexByte(data, '\n')
|
||||||
|
var line []byte
|
||||||
|
switch {
|
||||||
|
case newline >= 0:
|
||||||
|
// Process a single line (including newline).
|
||||||
|
line = data[:newline+1]
|
||||||
|
data = data[newline+1:]
|
||||||
|
case atEOF:
|
||||||
|
// If at EOF, we have a final record without a newline.
|
||||||
|
line = data
|
||||||
|
data = data[len(data):]
|
||||||
|
default:
|
||||||
|
// Need more data
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// For backwards compatibility, drop trailing \r before EOF.
|
||||||
|
if len(line) > 0 && atEOF && line[len(line)-1] == '\r' {
|
||||||
|
line = line[:len(line)-1]
|
||||||
|
advance++
|
||||||
|
}
|
||||||
|
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read line (automatically skipping past empty lines and any comments).
|
||||||
|
skip := 0
|
||||||
|
var line []byte
|
||||||
|
for {
|
||||||
|
line = readLine()
|
||||||
|
if len(line) == 0 {
|
||||||
|
return 0, nil, nil // Request more data
|
||||||
|
}
|
||||||
|
if s.comment != 0 && nextRune(line) == s.comment {
|
||||||
|
advance += len(line)
|
||||||
|
skip += len(line)
|
||||||
|
continue // Skip comment lines
|
||||||
|
}
|
||||||
|
if len(line) == lenNewline(line) {
|
||||||
|
advance += len(line)
|
||||||
|
skip += len(line)
|
||||||
|
continue // Skip empty lines
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse each field in the record.
|
||||||
|
const quoteLen = len(`"`)
|
||||||
|
tokenHasCR := false
|
||||||
|
s.recordBuffer = s.recordBuffer[:0]
|
||||||
|
s.fieldIndexes = s.fieldIndexes[:0]
|
||||||
|
parseField:
|
||||||
|
for {
|
||||||
|
if len(line) == 0 || line[0] != '"' {
|
||||||
|
// Non-quoted string field
|
||||||
|
i := bytes.IndexRune(line, s.separator)
|
||||||
|
field := line
|
||||||
|
if i >= 0 {
|
||||||
|
advance += i + s.sepLen
|
||||||
|
field = field[:i]
|
||||||
|
} else {
|
||||||
|
advance += len(field)
|
||||||
|
field = field[:len(field)-lenNewline(field)]
|
||||||
|
}
|
||||||
|
s.recordBuffer = append(s.recordBuffer, field...)
|
||||||
|
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
||||||
|
if i >= 0 {
|
||||||
|
line = line[i+s.sepLen:]
|
||||||
|
continue parseField
|
||||||
|
}
|
||||||
|
break parseField
|
||||||
|
} else {
|
||||||
|
// Quoted string field
|
||||||
|
line = line[quoteLen:]
|
||||||
|
advance += quoteLen
|
||||||
|
for {
|
||||||
|
i := bytes.IndexByte(line, '"')
|
||||||
|
if i >= 0 {
|
||||||
|
// Hit next quote.
|
||||||
|
s.recordBuffer = append(s.recordBuffer, line[:i]...)
|
||||||
|
line = line[i+quoteLen:]
|
||||||
|
advance += i + quoteLen
|
||||||
|
switch rn := nextRune(line); {
|
||||||
|
case rn == '"':
|
||||||
|
// `""` sequence (append quote).
|
||||||
|
s.recordBuffer = append(s.recordBuffer, '"')
|
||||||
|
line = line[quoteLen:]
|
||||||
|
advance += quoteLen
|
||||||
|
case rn == s.separator:
|
||||||
|
// `",` sequence (end of field).
|
||||||
|
line = line[s.sepLen:]
|
||||||
|
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
||||||
|
advance += s.sepLen
|
||||||
|
continue parseField
|
||||||
|
case lenNewline(line) == len(line):
|
||||||
|
// `"\n` sequence (end of line).
|
||||||
|
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
||||||
|
advance += len(line)
|
||||||
|
break parseField
|
||||||
|
default:
|
||||||
|
// `"` sequence (bare quote).
|
||||||
|
s.recordBuffer = append(s.recordBuffer, '"')
|
||||||
|
}
|
||||||
|
} else if len(line) > 0 {
|
||||||
|
// Hit end of line (copy all data so far).
|
||||||
|
advance += len(line)
|
||||||
|
newlineLen := lenNewline(line)
|
||||||
|
if newlineLen == 2 {
|
||||||
|
tokenHasCR = true
|
||||||
|
s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...)
|
||||||
|
s.recordBuffer = append(s.recordBuffer, '\n')
|
||||||
|
} else {
|
||||||
|
s.recordBuffer = append(s.recordBuffer, line...)
|
||||||
|
}
|
||||||
|
line = readLine()
|
||||||
|
if line == nil {
|
||||||
|
return 0, nil, nil // Request more data
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Abrupt end of file.
|
||||||
|
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
|
||||||
|
advance += len(line)
|
||||||
|
break parseField
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a single string and create slices out of it.
|
||||||
|
// This pins the memory of the fields together, but allocates once.
|
||||||
|
strBuf := string(s.recordBuffer) // Convert to string once to batch allocations
|
||||||
|
fields := make([]string, len(s.fieldIndexes))
|
||||||
|
preIdx := 0
|
||||||
|
for i, idx := range s.fieldIndexes {
|
||||||
|
fields[i] = strBuf[preIdx:idx]
|
||||||
|
preIdx = idx
|
||||||
|
}
|
||||||
|
|
||||||
|
s.noBOMCheck = true
|
||||||
|
|
||||||
|
if s.rowNum == 0 && s.header {
|
||||||
|
// Set header field names and advance, but don't return a line (token).
|
||||||
|
s.rowNum++
|
||||||
|
s.setFieldNames(fields)
|
||||||
|
return advance, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normal row, set fields and return a line (token).
|
||||||
|
s.rowNum++
|
||||||
|
*s.fields = fields
|
||||||
|
token = origData[skip:advance]
|
||||||
|
token = token[:len(token)-lenNewline(token)]
|
||||||
|
if tokenHasCR {
|
||||||
|
token = bytes.ReplaceAll(token, []byte{'\r'}, nil)
|
||||||
|
}
|
||||||
|
return advance, token, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lenNewline reports the number of bytes for the trailing \n.
|
||||||
|
func lenNewline(b []byte) int {
|
||||||
|
if len(b) > 0 && b[len(b)-1] == '\n' {
|
||||||
|
if len(b) > 1 && b[len(b)-2] == '\r' {
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextRune returns the next rune in b or utf8.RuneError.
|
||||||
|
func nextRune(b []byte) rune {
|
||||||
|
r, _ := utf8.DecodeRune(b)
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup for a new input file with given name (empty string if stdin)
|
||||||
|
func (p *interp) setFile(filename string) {
|
||||||
|
p.filename = numStr(filename)
|
||||||
|
p.fileLineNum = 0
|
||||||
|
p.hadFiles = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup for a new input line (but don't parse it into fields till we
|
||||||
|
// need to)
|
||||||
|
func (p *interp) setLine(line string, isTrueStr bool) {
|
||||||
|
p.line = line
|
||||||
|
p.lineIsTrueStr = isTrueStr
|
||||||
|
p.haveFields = false
|
||||||
|
p.reparseCSV = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the current line is parsed into fields, splitting it
|
||||||
|
// into fields if it hasn't been already
|
||||||
|
func (p *interp) ensureFields() {
|
||||||
|
if p.haveFields {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p.haveFields = true
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case p.inputMode == CSVMode || p.inputMode == TSVMode:
|
||||||
|
if p.reparseCSV {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(p.line))
|
||||||
|
scanner.Buffer(nil, maxRecordLength)
|
||||||
|
splitter := csvSplitter{
|
||||||
|
separator: p.csvInputConfig.Separator,
|
||||||
|
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
|
||||||
|
comment: p.csvInputConfig.Comment,
|
||||||
|
fields: &p.fields,
|
||||||
|
}
|
||||||
|
scanner.Split(splitter.scan)
|
||||||
|
if !scanner.Scan() {
|
||||||
|
p.fields = nil
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Normally fields have already been parsed by csvSplitter
|
||||||
|
}
|
||||||
|
case p.fieldSep == " ":
|
||||||
|
// FS space (default) means split fields on any whitespace
|
||||||
|
p.fields = strings.Fields(p.line)
|
||||||
|
case p.line == "":
|
||||||
|
p.fields = nil
|
||||||
|
case utf8.RuneCountInString(p.fieldSep) <= 1:
|
||||||
|
// 1-char FS is handled as plain split (not regex)
|
||||||
|
p.fields = strings.Split(p.line, p.fieldSep)
|
||||||
|
default:
|
||||||
|
// Split on FS as a regex
|
||||||
|
p.fields = p.fieldSepRegex.Split(p.line, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special case for when RS=="" and FS is single character,
|
||||||
|
// split on newline in addition to FS. See more here:
|
||||||
|
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
|
||||||
|
if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
|
||||||
|
fields := make([]string, 0, len(p.fields))
|
||||||
|
for _, field := range p.fields {
|
||||||
|
lines := strings.Split(field, "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
trimmed := strings.TrimSuffix(line, "\r")
|
||||||
|
fields = append(fields, trimmed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p.fields = fields
|
||||||
|
}
|
||||||
|
|
||||||
|
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time
|
||||||
|
for range p.fields {
|
||||||
|
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
|
||||||
|
}
|
||||||
|
p.numFields = len(p.fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch next line (record) of input from current input file, opening
|
||||||
|
// next input file if done with previous one
|
||||||
|
func (p *interp) nextLine() (string, error) {
|
||||||
|
for {
|
||||||
|
if p.scanner == nil {
|
||||||
|
if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin {
|
||||||
|
// Previous input is file, close it
|
||||||
|
_ = prevInput.Close()
|
||||||
|
}
|
||||||
|
if p.filenameIndex >= p.argc && !p.hadFiles {
|
||||||
|
// Moved past number of ARGV args and haven't seen
|
||||||
|
// any files yet, use stdin
|
||||||
|
p.input = p.stdin
|
||||||
|
p.setFile("-")
|
||||||
|
} else {
|
||||||
|
if p.filenameIndex >= p.argc {
|
||||||
|
// Done with ARGV args, all done with input
|
||||||
|
return "", io.EOF
|
||||||
|
}
|
||||||
|
// Fetch next filename from ARGV. Can't use
|
||||||
|
// getArrayValue() here as it would set the value if
|
||||||
|
// not present
|
||||||
|
index := strconv.Itoa(p.filenameIndex)
|
||||||
|
argvIndex := p.program.Arrays["ARGV"]
|
||||||
|
argvArray := p.array(ast.ScopeGlobal, argvIndex)
|
||||||
|
filename := p.toString(argvArray[index])
|
||||||
|
p.filenameIndex++
|
||||||
|
|
||||||
|
// Is it actually a var=value assignment?
|
||||||
|
matches := varRegex.FindStringSubmatch(filename)
|
||||||
|
if len(matches) >= 3 {
|
||||||
|
// Yep, set variable to value and keep going
|
||||||
|
name, val := matches[1], matches[2]
|
||||||
|
// Oddly, var=value args must interpret escapes (issue #129)
|
||||||
|
unescaped, err := Unescape(val)
|
||||||
|
if err == nil {
|
||||||
|
val = unescaped
|
||||||
|
}
|
||||||
|
err = p.setVarByName(name, val)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
} else if filename == "" {
|
||||||
|
// ARGV arg is empty string, skip
|
||||||
|
p.input = nil
|
||||||
|
continue
|
||||||
|
} else if filename == "-" {
|
||||||
|
// ARGV arg is "-" meaning stdin
|
||||||
|
p.input = p.stdin
|
||||||
|
p.setFile("-")
|
||||||
|
} else {
|
||||||
|
// A regular file name, open it
|
||||||
|
if p.noFileReads {
|
||||||
|
return "", newError("can't read from file due to NoFileReads")
|
||||||
|
}
|
||||||
|
input, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
p.input = input
|
||||||
|
p.setFile(filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.inputBuffer == nil { // reuse buffer from last input file
|
||||||
|
p.inputBuffer = make([]byte, inputBufSize)
|
||||||
|
}
|
||||||
|
p.scanner = p.newScanner(p.input, p.inputBuffer)
|
||||||
|
}
|
||||||
|
p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars
|
||||||
|
if p.scanner.Scan() {
|
||||||
|
// We scanned some input, break and return it
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err := p.scanner.Err()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error reading from input: %s", err)
|
||||||
|
}
|
||||||
|
// Signal loop to move onto next file
|
||||||
|
p.scanner = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Got a line (record) of input, return it
|
||||||
|
p.lineNum++
|
||||||
|
p.fileLineNum++
|
||||||
|
return p.scanner.Text(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write output string to given writer, producing correct line endings
|
||||||
|
// on Windows (CR LF).
|
||||||
|
func writeOutput(w io.Writer, s string) error {
|
||||||
|
if crlfNewline {
|
||||||
|
// First normalize to \n, then convert all newlines to \r\n
|
||||||
|
// (on Windows). NOTE: creating two new strings is almost
|
||||||
|
// certainly slow; would be better to create a custom Writer.
|
||||||
|
s = strings.Replace(s, "\r\n", "\n", -1)
|
||||||
|
s = strings.Replace(s, "\n", "\r\n", -1)
|
||||||
|
}
|
||||||
|
_, err := io.WriteString(w, s)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close all streams, commands, and so on (after program execution).
|
||||||
|
func (p *interp) closeAll() {
|
||||||
|
if prevInput, ok := p.input.(io.Closer); ok {
|
||||||
|
_ = prevInput.Close()
|
||||||
|
}
|
||||||
|
for _, r := range p.inputStreams {
|
||||||
|
_ = r.Close()
|
||||||
|
}
|
||||||
|
for _, w := range p.outputStreams {
|
||||||
|
_ = w.Close()
|
||||||
|
}
|
||||||
|
for _, cmd := range p.commands {
|
||||||
|
_ = cmd.Wait()
|
||||||
|
}
|
||||||
|
if f, ok := p.output.(flusher); ok {
|
||||||
|
_ = f.Flush()
|
||||||
|
}
|
||||||
|
if f, ok := p.errorOutput.(flusher); ok {
|
||||||
|
_ = f.Flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush all output streams as well as standard output. Report whether all
|
||||||
|
// streams were flushed successfully (logging error(s) if not).
|
||||||
|
func (p *interp) flushAll() bool {
|
||||||
|
allGood := true
|
||||||
|
for name, writer := range p.outputStreams {
|
||||||
|
allGood = allGood && p.flushWriter(name, writer)
|
||||||
|
}
|
||||||
|
if _, ok := p.output.(flusher); ok {
|
||||||
|
// User-provided output may or may not be flushable
|
||||||
|
allGood = allGood && p.flushWriter("stdout", p.output)
|
||||||
|
}
|
||||||
|
return allGood
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush a single, named output stream, and report whether it was flushed
|
||||||
|
// successfully (logging an error if not).
|
||||||
|
func (p *interp) flushStream(name string) bool {
|
||||||
|
writer := p.outputStreams[name]
|
||||||
|
if writer == nil {
|
||||||
|
p.printErrorf("error flushing %q: not an output file or pipe\n", name)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return p.flushWriter(name, writer)
|
||||||
|
}
|
||||||
|
|
||||||
|
type flusher interface {
|
||||||
|
Flush() error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush given output writer, and report whether it was flushed successfully
|
||||||
|
// (logging an error if not).
|
||||||
|
func (p *interp) flushWriter(name string, writer io.Writer) bool {
|
||||||
|
flusher, ok := writer.(flusher)
|
||||||
|
if !ok {
|
||||||
|
return true // not a flusher, don't error
|
||||||
|
}
|
||||||
|
err := flusher.Flush()
|
||||||
|
if err != nil {
|
||||||
|
p.printErrorf("error flushing %q: %v\n", name, err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush output and error streams.
|
||||||
|
func (p *interp) flushOutputAndError() {
|
||||||
|
if flusher, ok := p.output.(flusher); ok {
|
||||||
|
_ = flusher.Flush()
|
||||||
|
}
|
||||||
|
if flusher, ok := p.errorOutput.(flusher); ok {
|
||||||
|
_ = flusher.Flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print a message to the error output stream, flushing as necessary.
|
||||||
|
func (p *interp) printErrorf(format string, args ...interface{}) {
|
||||||
|
if flusher, ok := p.output.(flusher); ok {
|
||||||
|
_ = flusher.Flush() // ensure synchronization
|
||||||
|
}
|
||||||
|
fmt.Fprintf(p.errorOutput, format, args...)
|
||||||
|
if flusher, ok := p.errorOutput.(flusher); ok {
|
||||||
|
_ = flusher.Flush()
|
||||||
|
}
|
||||||
|
}
|
176
src/tool/awk/interp/newexecute.go
Normal file
176
src/tool/awk/interp/newexecute.go
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
// The New...Execute API (allows you to efficiently execute the same program repeatedly).
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
const checkContextOps = 1000 // for efficiency, only check context every N instructions
|
||||||
|
|
||||||
|
// Interpreter is an interpreter for a specific program, allowing you to
|
||||||
|
// efficiently execute the same program over and over with different inputs.
|
||||||
|
// Use New to create an Interpreter.
|
||||||
|
//
|
||||||
|
// Most programs won't need reusable execution, and should use the simpler
|
||||||
|
// Exec or ExecProgram functions instead.
|
||||||
|
type Interpreter struct {
|
||||||
|
interp *interp
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates a reusable interpreter for the given program.
|
||||||
|
//
|
||||||
|
// Most programs won't need reusable execution, and should use the simpler
|
||||||
|
// Exec or ExecProgram functions instead.
|
||||||
|
func New(program *parser.Program) (*Interpreter, error) {
|
||||||
|
p := newInterp(program)
|
||||||
|
return &Interpreter{interp: p}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute runs this program with the given execution configuration (input,
|
||||||
|
// output, and variables) and returns the exit status code of the program. A
|
||||||
|
// nil config is valid and will use the defaults (zero values).
|
||||||
|
//
|
||||||
|
// Internal memory allocations are reused, so calling Execute on the same
|
||||||
|
// Interpreter instance is significantly more efficient than calling
|
||||||
|
// ExecProgram multiple times.
|
||||||
|
//
|
||||||
|
// I/O state is reset between each run, but variables and the random number
|
||||||
|
// generator seed are not; use ResetVars and ResetRand to reset those.
|
||||||
|
//
|
||||||
|
// It's best to set config.Environ to a non-nil slice, otherwise Execute will
|
||||||
|
// call the relatively inefficient os.Environ each time. Set config.Environ to
|
||||||
|
// []string{} if the script doesn't need environment variables, or call
|
||||||
|
// os.Environ once and set config.Environ to that value each execution.
|
||||||
|
//
|
||||||
|
// Note that config.Funcs must be the same value provided to
|
||||||
|
// parser.ParseProgram, and must not change between calls to Execute.
|
||||||
|
func (p *Interpreter) Execute(config *Config) (int, error) {
|
||||||
|
p.interp.resetCore()
|
||||||
|
p.interp.checkCtx = false
|
||||||
|
|
||||||
|
err := p.interp.setExecuteConfig(config)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return p.interp.executeAll()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *interp) resetCore() {
|
||||||
|
p.scanner = nil
|
||||||
|
for k := range p.scanners {
|
||||||
|
delete(p.scanners, k)
|
||||||
|
}
|
||||||
|
p.input = nil
|
||||||
|
for k := range p.inputStreams {
|
||||||
|
delete(p.inputStreams, k)
|
||||||
|
}
|
||||||
|
for k := range p.outputStreams {
|
||||||
|
delete(p.outputStreams, k)
|
||||||
|
}
|
||||||
|
for k := range p.commands {
|
||||||
|
delete(p.commands, k)
|
||||||
|
}
|
||||||
|
|
||||||
|
p.sp = 0
|
||||||
|
p.localArrays = p.localArrays[:0]
|
||||||
|
p.callDepth = 0
|
||||||
|
|
||||||
|
p.filename = null()
|
||||||
|
p.line = ""
|
||||||
|
p.lineIsTrueStr = false
|
||||||
|
p.lineNum = 0
|
||||||
|
p.fileLineNum = 0
|
||||||
|
p.fields = nil
|
||||||
|
p.fieldsIsTrueStr = nil
|
||||||
|
p.numFields = 0
|
||||||
|
p.haveFields = false
|
||||||
|
|
||||||
|
p.exitStatus = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *interp) resetVars() {
|
||||||
|
// Reset global scalars
|
||||||
|
for i := range p.globals {
|
||||||
|
p.globals[i] = null()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset global arrays
|
||||||
|
for _, array := range p.arrays {
|
||||||
|
for k := range array {
|
||||||
|
delete(array, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset special variables
|
||||||
|
p.convertFormat = "%.6g"
|
||||||
|
p.outputFormat = "%.6g"
|
||||||
|
p.fieldSep = " "
|
||||||
|
p.fieldSepRegex = nil
|
||||||
|
p.recordSep = "\n"
|
||||||
|
p.recordSepRegex = nil
|
||||||
|
p.recordTerminator = ""
|
||||||
|
p.outputFieldSep = " "
|
||||||
|
p.outputRecordSep = "\n"
|
||||||
|
p.subscriptSep = "\x1c"
|
||||||
|
p.matchLength = 0
|
||||||
|
p.matchStart = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResetVars resets this interpreter's variables, setting scalar variables to
|
||||||
|
// null, clearing arrays, and resetting special variables such as FS and RS to
|
||||||
|
// their defaults.
|
||||||
|
func (p *Interpreter) ResetVars() {
|
||||||
|
p.interp.resetVars()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResetRand resets this interpreter's random number generator seed, so that
|
||||||
|
// rand() produces the same sequence it would have after calling New. This is
|
||||||
|
// a relatively CPU-intensive operation.
|
||||||
|
func (p *Interpreter) ResetRand() {
|
||||||
|
p.interp.randSeed = 1.0
|
||||||
|
p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExecuteContext is like Execute, but takes a context to allow the caller to
|
||||||
|
// set an execution timeout or cancel the execution. For efficiency, the
|
||||||
|
// context is only tested every 1000 virtual machine instructions.
|
||||||
|
//
|
||||||
|
// Context handling is not preemptive: currently long-running operations like
|
||||||
|
// system() won't be interrupted.
|
||||||
|
func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) {
|
||||||
|
p.interp.resetCore()
|
||||||
|
p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO()
|
||||||
|
p.interp.ctx = ctx
|
||||||
|
p.interp.ctxDone = ctx.Done()
|
||||||
|
p.interp.ctxOps = 0
|
||||||
|
|
||||||
|
err := p.interp.setExecuteConfig(config)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return p.interp.executeAll()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *interp) checkContext() error {
|
||||||
|
p.ctxOps++
|
||||||
|
if p.ctxOps < checkContextOps {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
p.ctxOps = 0
|
||||||
|
return p.checkContextNow()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *interp) checkContextNow() error {
|
||||||
|
select {
|
||||||
|
case <-p.ctxDone:
|
||||||
|
return p.ctx.Err()
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
163
src/tool/awk/interp/newexecute_test.go
Normal file
163
src/tool/awk/interp/newexecute_test.go
Normal file
|
@ -0,0 +1,163 @@
|
||||||
|
// Tests for the New...Execute API.
|
||||||
|
|
||||||
|
package interp_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/interp"
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This definitely doesn't test that everything was reset, but it's a good start.
|
||||||
|
func TestNewExecute(t *testing.T) {
|
||||||
|
source := `{ print NR, OFMT, x, y, a["k"], $1, $3; OFMT="%g"; x++; y++; a["k"]++ }`
|
||||||
|
interpreter := newInterp(t, source)
|
||||||
|
|
||||||
|
// First execution.
|
||||||
|
var output bytes.Buffer
|
||||||
|
status, err := interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("one two three\nfour five six\n"),
|
||||||
|
Output: &output,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
if status != 0 {
|
||||||
|
t.Fatalf("expected status 0, got %d", status)
|
||||||
|
}
|
||||||
|
normalized := normalizeNewlines(output.String())
|
||||||
|
expected := "1 %.6g one three\n2 %g 1 1 1 four six\n"
|
||||||
|
if normalized != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, normalized)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second execution, with ResetVars.
|
||||||
|
output.Reset()
|
||||||
|
interpreter.ResetVars()
|
||||||
|
status, err = interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("ONE TWO THREE\nFOUR FIVE SIX\n"),
|
||||||
|
Output: &output,
|
||||||
|
Vars: []string{"x", "10"},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
if status != 0 {
|
||||||
|
t.Fatalf("expected status 0, got %d", status)
|
||||||
|
}
|
||||||
|
normalized = normalizeNewlines(output.String())
|
||||||
|
expected = "1 %.6g 10 ONE THREE\n2 %g 11 1 1 FOUR SIX\n"
|
||||||
|
if normalized != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, normalized)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Third execution, without ResetVars.
|
||||||
|
output.Reset()
|
||||||
|
status, err = interpreter.Execute(&interp.Config{
|
||||||
|
Stdin: strings.NewReader("1 2 3\n4 5 6\n"),
|
||||||
|
Output: &output,
|
||||||
|
Vars: []string{"x", "100"},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
if status != 0 {
|
||||||
|
t.Fatalf("expected status 0, got %d", status)
|
||||||
|
}
|
||||||
|
normalized = normalizeNewlines(output.String())
|
||||||
|
expected = "1 %g 100 2 2 1 3\n2 %g 101 3 3 4 6\n"
|
||||||
|
if normalized != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResetRand(t *testing.T) {
|
||||||
|
source := `BEGIN { print rand(), rand(), rand() }`
|
||||||
|
interpreter := newInterp(t, source)
|
||||||
|
var output bytes.Buffer
|
||||||
|
|
||||||
|
_, err := interpreter.Execute(&interp.Config{Output: &output})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
original := output.String()
|
||||||
|
|
||||||
|
output.Reset()
|
||||||
|
_, err = interpreter.Execute(&interp.Config{Output: &output})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
noResetRand := output.String()
|
||||||
|
if original == noResetRand {
|
||||||
|
t.Fatalf("expected different random numbers, got %q both times", original)
|
||||||
|
}
|
||||||
|
|
||||||
|
output.Reset()
|
||||||
|
interpreter.ResetRand()
|
||||||
|
_, err = interpreter.Execute(&interp.Config{Output: &output})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error executing: %v", err)
|
||||||
|
}
|
||||||
|
withResetRand := output.String()
|
||||||
|
if original != withResetRand {
|
||||||
|
t.Fatalf("expected same random numbers (%q) as original (%q)", withResetRand, original)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExecuteContextNoError(t *testing.T) {
|
||||||
|
interpreter := newInterp(t, `BEGIN {}`)
|
||||||
|
_, err := interpreter.ExecuteContext(context.Background(), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("execute error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExecuteContextTimeout(t *testing.T) {
|
||||||
|
interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_, err := interpreter.ExecuteContext(ctx, nil)
|
||||||
|
if !errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
t.Fatalf("expected DeadlineExceeded error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExecuteContextCancel(t *testing.T) {
|
||||||
|
interpreter := newInterp(t, `BEGIN { for (i=0; i<100000000; i++) s+=i }`) // would take about 4s
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel() // cancel it right away
|
||||||
|
_, err := interpreter.ExecuteContext(ctx, nil)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected Canceled error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExecuteContextSystemTimeout(t *testing.T) {
|
||||||
|
t.Skip("TODO: skipping for now due to #122")
|
||||||
|
interpreter := newInterp(t, `BEGIN { print system("sleep 4") }`)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
_, err := interpreter.ExecuteContext(ctx, nil)
|
||||||
|
if !errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
t.Fatalf("expected DeadlineExceeded error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInterp(t *testing.T, src string) *interp.Interpreter {
|
||||||
|
t.Helper()
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse error: %v", err)
|
||||||
|
}
|
||||||
|
interpreter, err := interp.New(prog)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("interp.New error: %v", err)
|
||||||
|
}
|
||||||
|
return interpreter
|
||||||
|
}
|
294
src/tool/awk/interp/value.go
Normal file
294
src/tool/awk/interp/value.go
Normal file
|
@ -0,0 +1,294 @@
|
||||||
|
// GoAWK interpreter value type (not exported).
|
||||||
|
|
||||||
|
package interp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type valueType uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
typeNull valueType = iota
|
||||||
|
typeStr
|
||||||
|
typeNum
|
||||||
|
typeNumStr
|
||||||
|
)
|
||||||
|
|
||||||
|
// An AWK value (these are passed around by value)
|
||||||
|
type value struct {
|
||||||
|
typ valueType // Type of value
|
||||||
|
s string // String value (for typeStr and typeNumStr)
|
||||||
|
n float64 // Numeric value (for typeNum)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new null value
|
||||||
|
func null() value {
|
||||||
|
return value{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new number value
|
||||||
|
func num(n float64) value {
|
||||||
|
return value{typ: typeNum, n: n}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new string value
|
||||||
|
func str(s string) value {
|
||||||
|
return value{typ: typeStr, s: s}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new value to represent a "numeric string" from an input field
|
||||||
|
func numStr(s string) value {
|
||||||
|
return value{typ: typeNumStr, s: s}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a numeric value from a Go bool
|
||||||
|
func boolean(b bool) value {
|
||||||
|
if b {
|
||||||
|
return num(1)
|
||||||
|
}
|
||||||
|
return num(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string representation of v for debugging.
|
||||||
|
func (v value) String() string {
|
||||||
|
switch v.typ {
|
||||||
|
case typeStr:
|
||||||
|
return fmt.Sprintf("str(%q)", v.s)
|
||||||
|
case typeNum:
|
||||||
|
return fmt.Sprintf("num(%s)", v.str("%.6g"))
|
||||||
|
case typeNumStr:
|
||||||
|
return fmt.Sprintf("numStr(%q)", v.s)
|
||||||
|
default:
|
||||||
|
return "null()"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if value is a "true string" (a string or a "numeric string"
|
||||||
|
// from an input field that can't be converted to a number). If false,
|
||||||
|
// also return the (possibly converted) number.
|
||||||
|
func (v value) isTrueStr() (float64, bool) {
|
||||||
|
switch v.typ {
|
||||||
|
case typeStr:
|
||||||
|
return 0, true
|
||||||
|
case typeNumStr:
|
||||||
|
f, err := parseFloat(v.s)
|
||||||
|
if err != nil {
|
||||||
|
return 0, true
|
||||||
|
}
|
||||||
|
return f, false
|
||||||
|
default: // typeNum, typeNull
|
||||||
|
return v.n, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return Go bool value of AWK value. For numbers or numeric strings,
|
||||||
|
// zero is false and everything else is true. For strings, empty
|
||||||
|
// string is false and everything else is true.
|
||||||
|
func (v value) boolean() bool {
|
||||||
|
switch v.typ {
|
||||||
|
case typeStr:
|
||||||
|
return v.s != ""
|
||||||
|
case typeNumStr:
|
||||||
|
f, err := parseFloat(v.s)
|
||||||
|
if err != nil {
|
||||||
|
return v.s != ""
|
||||||
|
}
|
||||||
|
return f != 0
|
||||||
|
default: // typeNum, typeNull
|
||||||
|
return v.n != 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Like strconv.ParseFloat, but allow hex floating point without exponent, and
|
||||||
|
// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow
|
||||||
|
// underscore digit separators.
|
||||||
|
func parseFloat(s string) (float64, error) {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if len(s) > 1 && (s[0] == '+' || s[0] == '-') {
|
||||||
|
if len(s) == 4 && hasNaNPrefix(s[1:]) {
|
||||||
|
// ParseFloat doesn't handle "nan" with sign prefix, so handle it here.
|
||||||
|
return math.NaN(), nil
|
||||||
|
}
|
||||||
|
if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 {
|
||||||
|
s += "p0"
|
||||||
|
}
|
||||||
|
} else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 {
|
||||||
|
s += "p0"
|
||||||
|
}
|
||||||
|
n, err := strconv.ParseFloat(s, 64)
|
||||||
|
if err == nil && strings.IndexByte(s, '_') >= 0 {
|
||||||
|
// Underscore separators aren't supported by AWK.
|
||||||
|
return 0, strconv.ErrSyntax
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return value's string value, or convert to a string using given
|
||||||
|
// format if a number value. Integers are a special case and don't
|
||||||
|
// use floatFormat.
|
||||||
|
func (v value) str(floatFormat string) string {
|
||||||
|
if v.typ == typeNum {
|
||||||
|
switch {
|
||||||
|
case math.IsNaN(v.n):
|
||||||
|
return "nan"
|
||||||
|
case math.IsInf(v.n, 0):
|
||||||
|
if v.n < 0 {
|
||||||
|
return "-inf"
|
||||||
|
} else {
|
||||||
|
return "inf"
|
||||||
|
}
|
||||||
|
case v.n == float64(int(v.n)):
|
||||||
|
return strconv.Itoa(int(v.n))
|
||||||
|
default:
|
||||||
|
if floatFormat == "%.6g" {
|
||||||
|
return strconv.FormatFloat(v.n, 'g', 6, 64)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(floatFormat, v.n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// For typeStr and typeNumStr we already have the string, for
|
||||||
|
// typeNull v.s == "".
|
||||||
|
return v.s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return value's number value, converting from string if necessary
|
||||||
|
func (v value) num() float64 {
|
||||||
|
switch v.typ {
|
||||||
|
case typeStr, typeNumStr:
|
||||||
|
// Ensure string starts with a float and convert it
|
||||||
|
return parseFloatPrefix(v.s)
|
||||||
|
default: // typeNum, typeNull
|
||||||
|
return v.n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
|
||||||
|
|
||||||
|
// Like strconv.ParseFloat, but parses at the start of string and
|
||||||
|
// allows things like "1.5foo"
|
||||||
|
func parseFloatPrefix(s string) float64 {
|
||||||
|
// Skip whitespace at start
|
||||||
|
i := 0
|
||||||
|
for i < len(s) && asciiSpace[s[i]] != 0 {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
start := i
|
||||||
|
|
||||||
|
// Parse optional sign and check for NaN and Inf.
|
||||||
|
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i+3 <= len(s) {
|
||||||
|
if hasNaNPrefix(s[i:]) {
|
||||||
|
return math.NaN()
|
||||||
|
}
|
||||||
|
if hasInfPrefix(s[i:]) {
|
||||||
|
if s[start] == '-' {
|
||||||
|
return math.Inf(-1)
|
||||||
|
}
|
||||||
|
return math.Inf(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse mantissa: initial digit(s), optional '.', then more digits
|
||||||
|
if i+2 < len(s) && hasHexPrefix(s[i:]) {
|
||||||
|
return parseHexFloatPrefix(s, start, i+2)
|
||||||
|
}
|
||||||
|
gotDigit := false
|
||||||
|
for i < len(s) && isDigit(s[i]) {
|
||||||
|
gotDigit = true
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i < len(s) && s[i] == '.' {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
for i < len(s) && isDigit(s[i]) {
|
||||||
|
gotDigit = true
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if !gotDigit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse exponent ("1e" and similar are allowed, but ParseFloat
|
||||||
|
// rejects them)
|
||||||
|
end := i
|
||||||
|
if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
|
||||||
|
i++
|
||||||
|
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
for i < len(s) && isDigit(s[i]) {
|
||||||
|
i++
|
||||||
|
end = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
floatStr := s[start:end]
|
||||||
|
f, _ := strconv.ParseFloat(floatStr, 64)
|
||||||
|
return f // Returns infinity in case of "value out of range" error
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasHexPrefix(s string) bool {
|
||||||
|
return s[0] == '0' && (s[1] == 'x' || s[1] == 'X')
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasNaNPrefix(s string) bool {
|
||||||
|
return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N')
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasInfPrefix(s string) bool {
|
||||||
|
return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper used by parseFloatPrefix to handle hexadecimal floating point.
|
||||||
|
func parseHexFloatPrefix(s string, start, i int) float64 {
|
||||||
|
gotDigit := false
|
||||||
|
for i < len(s) && isHexDigit(s[i]) {
|
||||||
|
gotDigit = true
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i < len(s) && s[i] == '.' {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
for i < len(s) && isHexDigit(s[i]) {
|
||||||
|
gotDigit = true
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if !gotDigit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
gotExponent := false
|
||||||
|
end := i
|
||||||
|
if i < len(s) && (s[i] == 'p' || s[i] == 'P') {
|
||||||
|
i++
|
||||||
|
if i < len(s) && (s[i] == '+' || s[i] == '-') {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
for i < len(s) && isDigit(s[i]) {
|
||||||
|
gotExponent = true
|
||||||
|
i++
|
||||||
|
end = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
floatStr := s[start:end]
|
||||||
|
if !gotExponent {
|
||||||
|
floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0"
|
||||||
|
}
|
||||||
|
f, _ := strconv.ParseFloat(floatStr, 64)
|
||||||
|
return f // Returns infinity in case of "value out of range" error
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDigit(c byte) bool {
|
||||||
|
return c >= '0' && c <= '9'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isHexDigit(c byte) bool {
|
||||||
|
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
|
||||||
|
}
|
1259
src/tool/awk/interp/vm.go
Normal file
1259
src/tool/awk/interp/vm.go
Normal file
File diff suppressed because it is too large
Load diff
499
src/tool/awk/lexer/lexer.go
Normal file
499
src/tool/awk/lexer/lexer.go
Normal file
|
@ -0,0 +1,499 @@
|
||||||
|
// Package lexer is an AWK lexer (tokenizer).
|
||||||
|
//
|
||||||
|
// The lexer turns a string of AWK source code into a stream of
|
||||||
|
// tokens for parsing.
|
||||||
|
//
|
||||||
|
// To tokenize some source, create a new lexer with NewLexer(src) and
|
||||||
|
// then call Scan() until the token type is EOF or ILLEGAL.
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
|
||||||
|
// actually create a lexer, and Scan() or ScanRegex() to get tokens.
|
||||||
|
type Lexer struct {
|
||||||
|
src []byte
|
||||||
|
offset int
|
||||||
|
ch byte
|
||||||
|
pos Position
|
||||||
|
nextPos Position
|
||||||
|
hadSpace bool
|
||||||
|
lastTok Token
|
||||||
|
}
|
||||||
|
|
||||||
|
// Position stores the source line and column where a token starts.
|
||||||
|
type Position struct {
|
||||||
|
// Line number of the token (starts at 1).
|
||||||
|
Line int
|
||||||
|
// Column on the line (starts at 1). Note that this is the byte
|
||||||
|
// offset into the line, not rune offset.
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLexer creates a new lexer that will tokenize the given source
|
||||||
|
// code. See the module-level example for a working example.
|
||||||
|
func NewLexer(src []byte) *Lexer {
|
||||||
|
l := &Lexer{src: src}
|
||||||
|
l.nextPos.Line = 1
|
||||||
|
l.nextPos.Column = 1
|
||||||
|
l.next()
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
// HadSpace returns true if the previously-scanned token had
|
||||||
|
// whitespace before it. Used by the parser because when calling a
|
||||||
|
// user-defined function the grammar doesn't allow a space between
|
||||||
|
// the function name and the left parenthesis.
|
||||||
|
func (l *Lexer) HadSpace() bool {
|
||||||
|
return l.hadSpace
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan scans the next token and returns its position (line/column),
|
||||||
|
// token value (one of the uppercase token constants), and the
|
||||||
|
// string value of the token. For most tokens, the token value is
|
||||||
|
// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the
|
||||||
|
// token's value. For an ILLEGAL token, it's the error message.
|
||||||
|
func (l *Lexer) Scan() (Position, Token, string) {
|
||||||
|
pos, tok, val := l.scan()
|
||||||
|
l.lastTok = tok
|
||||||
|
return pos, tok, val
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does the real work of scanning. Scan() wraps this to more easily
|
||||||
|
// set lastTok.
|
||||||
|
func (l *Lexer) scan() (Position, Token, string) {
|
||||||
|
// Skip whitespace (except newline, which is a token)
|
||||||
|
l.hadSpace = false
|
||||||
|
for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' {
|
||||||
|
l.hadSpace = true
|
||||||
|
if l.ch == '\\' {
|
||||||
|
l.next()
|
||||||
|
if l.ch == '\r' {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
if l.ch != '\n' {
|
||||||
|
return l.pos, ILLEGAL, "expected \\n after \\ line continuation"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
if l.ch == '#' {
|
||||||
|
// Skip comment till end of line
|
||||||
|
l.next()
|
||||||
|
for l.ch != '\n' && l.ch != 0 {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if l.ch == 0 {
|
||||||
|
// l.next() reached end of input
|
||||||
|
return l.pos, EOF, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
pos := l.pos
|
||||||
|
tok := ILLEGAL
|
||||||
|
val := ""
|
||||||
|
|
||||||
|
ch := l.ch
|
||||||
|
l.next()
|
||||||
|
|
||||||
|
// Names: keywords and functions
|
||||||
|
if isNameStart(ch) {
|
||||||
|
start := l.offset - 2
|
||||||
|
for isNameStart(l.ch) || isDigit(l.ch) {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
name := string(l.src[start : l.offset-1])
|
||||||
|
tok := KeywordToken(name)
|
||||||
|
if tok == ILLEGAL {
|
||||||
|
tok = NAME
|
||||||
|
val = name
|
||||||
|
}
|
||||||
|
return pos, tok, val
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are ordered by my guess at frequency of use. Should run
|
||||||
|
// through a corpus of real AWK programs to determine actual
|
||||||
|
// frequency.
|
||||||
|
switch ch {
|
||||||
|
case '$':
|
||||||
|
tok = DOLLAR
|
||||||
|
case '@':
|
||||||
|
tok = AT
|
||||||
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
|
||||||
|
// Avoid make/append and use l.offset directly for performance
|
||||||
|
start := l.offset - 2
|
||||||
|
gotDigit := false
|
||||||
|
if ch != '.' {
|
||||||
|
gotDigit = true
|
||||||
|
for isDigit(l.ch) {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
if l.ch == '.' {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for isDigit(l.ch) {
|
||||||
|
gotDigit = true
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
if !gotDigit {
|
||||||
|
return l.pos, ILLEGAL, "expected digits"
|
||||||
|
}
|
||||||
|
if l.ch == 'e' || l.ch == 'E' {
|
||||||
|
l.next()
|
||||||
|
gotSign := false
|
||||||
|
if l.ch == '+' || l.ch == '-' {
|
||||||
|
gotSign = true
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
gotDigit = false
|
||||||
|
for isDigit(l.ch) {
|
||||||
|
l.next()
|
||||||
|
gotDigit = true
|
||||||
|
}
|
||||||
|
// Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e"
|
||||||
|
// considered a variable). "1e+" is parsed as "1e + ...".
|
||||||
|
if !gotDigit {
|
||||||
|
if gotSign {
|
||||||
|
l.unread() // unread the '+' or '-'
|
||||||
|
}
|
||||||
|
l.unread() // unread the 'e' or 'E'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tok = NUMBER
|
||||||
|
val = string(l.src[start : l.offset-1])
|
||||||
|
case '{':
|
||||||
|
tok = LBRACE
|
||||||
|
case '}':
|
||||||
|
tok = RBRACE
|
||||||
|
case '=':
|
||||||
|
tok = l.choice('=', ASSIGN, EQUALS)
|
||||||
|
case '<':
|
||||||
|
tok = l.choice('=', LESS, LTE)
|
||||||
|
case '>':
|
||||||
|
switch l.ch {
|
||||||
|
case '=':
|
||||||
|
l.next()
|
||||||
|
tok = GTE
|
||||||
|
case '>':
|
||||||
|
l.next()
|
||||||
|
tok = APPEND
|
||||||
|
default:
|
||||||
|
tok = GREATER
|
||||||
|
}
|
||||||
|
case '"', '\'':
|
||||||
|
// Note: POSIX awk spec doesn't allow single-quoted strings,
|
||||||
|
// but this helps with quoting, especially on Windows
|
||||||
|
// where the shell quote character is " (double quote).
|
||||||
|
s, err := parseString(ch, func() byte { return l.ch }, l.next)
|
||||||
|
if err != nil {
|
||||||
|
return l.pos, ILLEGAL, err.Error()
|
||||||
|
}
|
||||||
|
if l.ch != ch {
|
||||||
|
return l.pos, ILLEGAL, "didn't find end quote in string"
|
||||||
|
}
|
||||||
|
l.next()
|
||||||
|
tok = STRING
|
||||||
|
val = s
|
||||||
|
case '(':
|
||||||
|
tok = LPAREN
|
||||||
|
case ')':
|
||||||
|
tok = RPAREN
|
||||||
|
case ',':
|
||||||
|
tok = COMMA
|
||||||
|
case ';':
|
||||||
|
tok = SEMICOLON
|
||||||
|
case '+':
|
||||||
|
switch l.ch {
|
||||||
|
case '+':
|
||||||
|
l.next()
|
||||||
|
tok = INCR
|
||||||
|
case '=':
|
||||||
|
l.next()
|
||||||
|
tok = ADD_ASSIGN
|
||||||
|
default:
|
||||||
|
tok = ADD
|
||||||
|
}
|
||||||
|
case '-':
|
||||||
|
switch l.ch {
|
||||||
|
case '-':
|
||||||
|
l.next()
|
||||||
|
tok = DECR
|
||||||
|
case '=':
|
||||||
|
l.next()
|
||||||
|
tok = SUB_ASSIGN
|
||||||
|
default:
|
||||||
|
tok = SUB
|
||||||
|
}
|
||||||
|
case '*':
|
||||||
|
switch l.ch {
|
||||||
|
case '*':
|
||||||
|
l.next()
|
||||||
|
tok = l.choice('=', POW, POW_ASSIGN)
|
||||||
|
case '=':
|
||||||
|
l.next()
|
||||||
|
tok = MUL_ASSIGN
|
||||||
|
default:
|
||||||
|
tok = MUL
|
||||||
|
}
|
||||||
|
case '/':
|
||||||
|
tok = l.choice('=', DIV, DIV_ASSIGN)
|
||||||
|
case '%':
|
||||||
|
tok = l.choice('=', MOD, MOD_ASSIGN)
|
||||||
|
case '[':
|
||||||
|
tok = LBRACKET
|
||||||
|
case ']':
|
||||||
|
tok = RBRACKET
|
||||||
|
case '\n':
|
||||||
|
tok = NEWLINE
|
||||||
|
case '^':
|
||||||
|
tok = l.choice('=', POW, POW_ASSIGN)
|
||||||
|
case '!':
|
||||||
|
switch l.ch {
|
||||||
|
case '=':
|
||||||
|
l.next()
|
||||||
|
tok = NOT_EQUALS
|
||||||
|
case '~':
|
||||||
|
l.next()
|
||||||
|
tok = NOT_MATCH
|
||||||
|
default:
|
||||||
|
tok = NOT
|
||||||
|
}
|
||||||
|
case '~':
|
||||||
|
tok = MATCH
|
||||||
|
case '?':
|
||||||
|
tok = QUESTION
|
||||||
|
case ':':
|
||||||
|
tok = COLON
|
||||||
|
case '&':
|
||||||
|
tok = l.choice('&', ILLEGAL, AND)
|
||||||
|
if tok == ILLEGAL {
|
||||||
|
return l.pos, ILLEGAL, "unexpected char after '&'"
|
||||||
|
}
|
||||||
|
case '|':
|
||||||
|
tok = l.choice('|', PIPE, OR)
|
||||||
|
default:
|
||||||
|
tok = ILLEGAL
|
||||||
|
val = "unexpected char"
|
||||||
|
}
|
||||||
|
return pos, tok, val
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScanRegex parses an AWK regular expression in /slash/ syntax. The
|
||||||
|
// AWK grammar has somewhat special handling of regex tokens, so the
|
||||||
|
// parser can only call this after a DIV or DIV_ASSIGN token has just
|
||||||
|
// been scanned.
|
||||||
|
func (l *Lexer) ScanRegex() (Position, Token, string) {
|
||||||
|
pos, tok, val := l.scanRegex()
|
||||||
|
l.lastTok = tok
|
||||||
|
return pos, tok, val
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does the real work of scanning a regex. ScanRegex() wraps this to
|
||||||
|
// more easily set lastTok.
|
||||||
|
func (l *Lexer) scanRegex() (Position, Token, string) {
|
||||||
|
pos := l.pos
|
||||||
|
chars := make([]byte, 0, 32) // most won't require heap allocation
|
||||||
|
switch l.lastTok {
|
||||||
|
case DIV:
|
||||||
|
// Regex after '/' (the usual case)
|
||||||
|
pos.Column -= 1
|
||||||
|
case DIV_ASSIGN:
|
||||||
|
// Regex after '/=' (happens when regex starts with '=')
|
||||||
|
pos.Column -= 2
|
||||||
|
chars = append(chars, '=')
|
||||||
|
default:
|
||||||
|
panic("ScanRegex should only be called after DIV or DIV_ASSIGN token")
|
||||||
|
}
|
||||||
|
for l.ch != '/' {
|
||||||
|
c := l.ch
|
||||||
|
if c == 0 {
|
||||||
|
return l.pos, ILLEGAL, "didn't find end slash in regex"
|
||||||
|
}
|
||||||
|
if c == '\r' || c == '\n' {
|
||||||
|
return l.pos, ILLEGAL, "can't have newline in regex"
|
||||||
|
}
|
||||||
|
if c == '\\' {
|
||||||
|
l.next()
|
||||||
|
if l.ch != '/' {
|
||||||
|
chars = append(chars, '\\')
|
||||||
|
}
|
||||||
|
c = l.ch
|
||||||
|
}
|
||||||
|
chars = append(chars, c)
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
l.next()
|
||||||
|
return pos, REGEX, string(chars)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the next character into l.ch (or 0 on end of input) and update
|
||||||
|
// line and column position.
|
||||||
|
func (l *Lexer) next() {
|
||||||
|
l.pos = l.nextPos
|
||||||
|
if l.offset >= len(l.src) {
|
||||||
|
// For last character, move offset 1 past the end as it
|
||||||
|
// simplifies offset calculations in NAME and NUMBER
|
||||||
|
if l.ch != 0 {
|
||||||
|
l.ch = 0
|
||||||
|
l.offset++
|
||||||
|
l.nextPos.Column++
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ch := l.src[l.offset]
|
||||||
|
if ch == '\n' {
|
||||||
|
l.nextPos.Line++
|
||||||
|
l.nextPos.Column = 1
|
||||||
|
} else if ch != '\r' {
|
||||||
|
l.nextPos.Column++
|
||||||
|
}
|
||||||
|
l.ch = ch
|
||||||
|
l.offset++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Un-read the character just scanned (doesn't handle line boundaries).
|
||||||
|
func (l *Lexer) unread() {
|
||||||
|
l.offset--
|
||||||
|
l.pos.Column--
|
||||||
|
l.nextPos.Column--
|
||||||
|
l.ch = l.src[l.offset-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func isNameStart(ch byte) bool {
|
||||||
|
return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDigit(ch byte) bool {
|
||||||
|
return ch >= '0' && ch <= '9'
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the hex digit 0-15 corresponding to the given ASCII byte,
|
||||||
|
// or -1 if it's not a valid hex digit.
|
||||||
|
func hexDigit(ch byte) int {
|
||||||
|
switch {
|
||||||
|
case isDigit(ch):
|
||||||
|
return int(ch - '0')
|
||||||
|
case ch >= 'a' && ch <= 'f':
|
||||||
|
return int(ch - 'a' + 10)
|
||||||
|
case ch >= 'A' && ch <= 'F':
|
||||||
|
return int(ch - 'A' + 10)
|
||||||
|
default:
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) choice(ch byte, one, two Token) Token {
|
||||||
|
if l.ch == ch {
|
||||||
|
l.next()
|
||||||
|
return two
|
||||||
|
}
|
||||||
|
return one
|
||||||
|
}
|
||||||
|
|
||||||
|
// PeekByte returns the next unscanned byte; used when parsing
|
||||||
|
// "getline lvalue" expressions. Returns 0 at end of input.
|
||||||
|
func (l *Lexer) PeekByte() byte {
|
||||||
|
return l.ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unescape unescapes the backslash escapes in s (which shouldn't include the
|
||||||
|
// surrounding quotes) and returns the unquoted string. It's intended for use
|
||||||
|
// when unescaping command line var=value assignments, as required by the
|
||||||
|
// POSIX AWK spec.
|
||||||
|
func Unescape(s string) (string, error) {
|
||||||
|
i := 0
|
||||||
|
ch := func() byte {
|
||||||
|
if i >= len(s) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return s[i]
|
||||||
|
}
|
||||||
|
next := func() {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return parseString(0, ch, next)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parses a string ending with given quote character (not parsed). The ch
|
||||||
|
// function returns the current character (or 0 at the end); the next function
|
||||||
|
// moves forward one character.
|
||||||
|
func parseString(quote byte, ch func() byte, next func()) (string, error) {
|
||||||
|
chars := make([]byte, 0, 32) // most strings won't require heap allocation
|
||||||
|
for {
|
||||||
|
c := ch()
|
||||||
|
if c == quote || c == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if c == '\r' || c == '\n' {
|
||||||
|
return "", errors.New("can't have newline in string")
|
||||||
|
}
|
||||||
|
if c != '\\' {
|
||||||
|
// Normal, non-escaped character
|
||||||
|
chars = append(chars, c)
|
||||||
|
next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Escape sequence, skip over \ and process
|
||||||
|
next()
|
||||||
|
switch ch() {
|
||||||
|
case 'n':
|
||||||
|
c = '\n'
|
||||||
|
next()
|
||||||
|
case 't':
|
||||||
|
c = '\t'
|
||||||
|
next()
|
||||||
|
case 'r':
|
||||||
|
c = '\r'
|
||||||
|
next()
|
||||||
|
case 'a':
|
||||||
|
c = '\a'
|
||||||
|
next()
|
||||||
|
case 'b':
|
||||||
|
c = '\b'
|
||||||
|
next()
|
||||||
|
case 'f':
|
||||||
|
c = '\f'
|
||||||
|
next()
|
||||||
|
case 'v':
|
||||||
|
c = '\v'
|
||||||
|
next()
|
||||||
|
case 'x':
|
||||||
|
// Hex byte of one of two hex digits
|
||||||
|
next()
|
||||||
|
digit := hexDigit(ch())
|
||||||
|
if digit < 0 {
|
||||||
|
return "", errors.New("1 or 2 hex digits expected")
|
||||||
|
}
|
||||||
|
c = byte(digit)
|
||||||
|
next()
|
||||||
|
digit = hexDigit(ch())
|
||||||
|
if digit >= 0 {
|
||||||
|
c = c*16 + byte(digit)
|
||||||
|
next()
|
||||||
|
}
|
||||||
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||||
|
// Octal byte of 1-3 octal digits
|
||||||
|
c = ch() - '0'
|
||||||
|
next()
|
||||||
|
for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ {
|
||||||
|
c = c*8 + ch() - '0'
|
||||||
|
next()
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Any other escape character is just the char
|
||||||
|
// itself, eg: "\z" is just "z".
|
||||||
|
c = ch()
|
||||||
|
if c == 0 {
|
||||||
|
// Expect backslash right at the end of the string, which is
|
||||||
|
// interpreted as a literal backslash (only for Unescape).
|
||||||
|
c = '\\'
|
||||||
|
}
|
||||||
|
next()
|
||||||
|
}
|
||||||
|
chars = append(chars, c)
|
||||||
|
}
|
||||||
|
return string(chars), nil
|
||||||
|
}
|
393
src/tool/awk/lexer/lexer_test.go
Normal file
393
src/tool/awk/lexer/lexer_test.go
Normal file
|
@ -0,0 +1,393 @@
|
||||||
|
// Test GoAWK Lexer
|
||||||
|
|
||||||
|
package lexer_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLexer(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
output string
|
||||||
|
}{
|
||||||
|
// Comments, whitespace, line continuations
|
||||||
|
{"+# foo \n- #foo", `1:1 + "", 1:8 <newline> "", 2:1 - ""`},
|
||||||
|
{"+\\\n-", `1:1 + "", 2:1 - ""`},
|
||||||
|
{"+\\\r\n-", `1:1 + "", 2:1 - ""`},
|
||||||
|
{"+\\-", `1:1 + "", 1:3 <illegal> "expected \\n after \\ line continuation", 1:3 - ""`},
|
||||||
|
|
||||||
|
// Names and keywords
|
||||||
|
{"x", `1:1 name "x"`},
|
||||||
|
{"x y0", `1:1 name "x", 1:3 name "y0"`},
|
||||||
|
{"x 0y", `1:1 name "x", 1:3 number "0", 1:4 name "y"`},
|
||||||
|
{"sub SUB", `1:1 sub "", 1:5 name "SUB"`},
|
||||||
|
|
||||||
|
// String tokens
|
||||||
|
{`"foo"`, `1:1 string "foo"`},
|
||||||
|
{`"a\t\r\n\z\'\"\a\b\f\vb"`, `1:1 string "a\t\r\nz'\"\a\b\f\vb"`},
|
||||||
|
{`"x`, `1:3 <illegal> "didn't find end quote in string"`},
|
||||||
|
{`"foo\"`, `1:7 <illegal> "didn't find end quote in string"`},
|
||||||
|
{"\"x\n\"", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
|
||||||
|
{`'foo'`, `1:1 string "foo"`},
|
||||||
|
{`'a\t\r\n\z\'\"b'`, `1:1 string "a\t\r\nz'\"b"`},
|
||||||
|
{`'x`, `1:3 <illegal> "didn't find end quote in string"`},
|
||||||
|
{"'x\n'", `1:3 <illegal> "can't have newline in string", 1:3 <newline> "", 2:2 <illegal> "didn't find end quote in string"`},
|
||||||
|
{`"\x0.\x00.\x0A\x10\xff\xFF\x41"`, `1:1 string "\x00.\x00.\n\x10\xff\xffA"`},
|
||||||
|
{`"\xg"`, `1:4 <illegal> "1 or 2 hex digits expected", 1:4 name "g", 1:6 <illegal> "didn't find end quote in string"`},
|
||||||
|
{`"\0\78\7\77\777\0 \141 "`, `1:1 string "\x00\a8\a?\xff\x00 a "`},
|
||||||
|
|
||||||
|
// Number tokens
|
||||||
|
{"0", `1:1 number "0"`},
|
||||||
|
{"9", `1:1 number "9"`},
|
||||||
|
{" 0 ", `1:2 number "0"`},
|
||||||
|
{"\n 1", `1:1 <newline> "", 2:3 number "1"`},
|
||||||
|
{"1234", `1:1 number "1234"`},
|
||||||
|
{".5", `1:1 number ".5"`},
|
||||||
|
{".5e1", `1:1 number ".5e1"`},
|
||||||
|
{"5e+1", `1:1 number "5e+1"`},
|
||||||
|
{"5e-1", `1:1 number "5e-1"`},
|
||||||
|
{"0.", `1:1 number "0."`},
|
||||||
|
{"42e", `1:1 number "42", 1:3 name "e"`},
|
||||||
|
{"4.2e", `1:1 number "4.2", 1:4 name "e"`},
|
||||||
|
{"1.e3", `1:1 number "1.e3"`},
|
||||||
|
{"1.e3", `1:1 number "1.e3"`},
|
||||||
|
{"1e3foo", `1:1 number "1e3", 1:4 name "foo"`},
|
||||||
|
{"1e3+", `1:1 number "1e3", 1:4 + ""`},
|
||||||
|
{"1e3.4", `1:1 number "1e3", 1:4 number ".4"`},
|
||||||
|
{"1e-", `1:1 number "1", 1:2 name "e", 1:3 - ""`},
|
||||||
|
{"1e+", `1:1 number "1", 1:2 name "e", 1:3 + ""`},
|
||||||
|
{"42`", `1:1 number "42", 1:3 <illegal> "unexpected char"`},
|
||||||
|
{"0..", `1:1 number "0.", 1:4 <illegal> "expected digits"`},
|
||||||
|
{".", `1:2 <illegal> "expected digits"`},
|
||||||
|
|
||||||
|
// Misc errors
|
||||||
|
{"&=", `1:2 <illegal> "unexpected char after '&'", 1:2 = ""`},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.input, func(t *testing.T) {
|
||||||
|
l := NewLexer([]byte(test.input))
|
||||||
|
strs := []string{}
|
||||||
|
for {
|
||||||
|
pos, tok, val := l.Scan()
|
||||||
|
if tok == EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if tok == NUMBER {
|
||||||
|
// Ensure ParseFloat() works, as that's what our
|
||||||
|
// parser uses to convert
|
||||||
|
trimmed := strings.TrimRight(val, "eE")
|
||||||
|
_, err := strconv.ParseFloat(trimmed, 64)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't parse float: %q", val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
strs = append(strs, fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val))
|
||||||
|
}
|
||||||
|
output := strings.Join(strs, ", ")
|
||||||
|
if output != test.output {
|
||||||
|
t.Errorf("expected %q, got %q", test.output, output)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegex(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
output string
|
||||||
|
}{
|
||||||
|
{`/foo/`, `1:1 regex "foo"`},
|
||||||
|
{`/=foo/`, `1:1 regex "=foo"`},
|
||||||
|
{`/a\/b/`, `1:1 regex "a/b"`},
|
||||||
|
{`/a\/\zb/`, `1:1 regex "a/\\zb"`},
|
||||||
|
{`/a`, `1:3 <illegal> "didn't find end slash in regex"`},
|
||||||
|
{"/a\n", `1:3 <illegal> "can't have newline in regex"`},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.input, func(t *testing.T) {
|
||||||
|
l := NewLexer([]byte(test.input))
|
||||||
|
l.Scan() // Scan first token (probably DIV)
|
||||||
|
pos, tok, val := l.ScanRegex()
|
||||||
|
output := fmt.Sprintf("%d:%d %s %q", pos.Line, pos.Column, tok, val)
|
||||||
|
if output != test.output {
|
||||||
|
t.Errorf("expected %q, got %q", test.output, output)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScanRegexInvalid(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
r := recover()
|
||||||
|
if message, ok := r.(string); ok {
|
||||||
|
expected := "ScanRegex should only be called after DIV or DIV_ASSIGN token"
|
||||||
|
if message != expected {
|
||||||
|
t.Fatalf("expected %q, got %q", expected, message)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Fatalf("expected panic of string type")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
l := NewLexer([]byte("foo/"))
|
||||||
|
l.Scan() // Scan first token (NAME foo)
|
||||||
|
l.ScanRegex()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHadSpace(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
tokens []Token
|
||||||
|
spaces []bool
|
||||||
|
}{
|
||||||
|
{`foo(x)`, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, false, false, false}},
|
||||||
|
{`foo (x) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{false, true, false, false}},
|
||||||
|
{` foo ( x ) `, []Token{NAME, LPAREN, NAME, RPAREN}, []bool{true, true, true, true}},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.input, func(t *testing.T) {
|
||||||
|
l := NewLexer([]byte(test.input))
|
||||||
|
for i := 0; ; i++ {
|
||||||
|
_, tok, _ := l.Scan()
|
||||||
|
if tok == EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if tok != test.tokens[i] {
|
||||||
|
t.Errorf("expected %s for token %d, got %s", test.tokens[i], i, tok)
|
||||||
|
}
|
||||||
|
if l.HadSpace() != test.spaces[i] {
|
||||||
|
t.Errorf("expected %v for space %d, got %v", test.spaces[i], i, l.HadSpace())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPeekByte(t *testing.T) {
|
||||||
|
l := NewLexer([]byte("foo()"))
|
||||||
|
b := l.PeekByte()
|
||||||
|
if b != 'f' {
|
||||||
|
t.Errorf("expected 'f', got %q", b)
|
||||||
|
}
|
||||||
|
_, tok, _ := l.Scan()
|
||||||
|
if tok != NAME {
|
||||||
|
t.Errorf("expected name, got %s", tok)
|
||||||
|
}
|
||||||
|
b = l.PeekByte()
|
||||||
|
if b != '(' {
|
||||||
|
t.Errorf("expected '(', got %q", b)
|
||||||
|
}
|
||||||
|
_, tok, _ = l.Scan()
|
||||||
|
if tok != LPAREN {
|
||||||
|
t.Errorf("expected (, got %s", tok)
|
||||||
|
}
|
||||||
|
_, tok, _ = l.Scan()
|
||||||
|
if tok != RPAREN {
|
||||||
|
t.Errorf("expected ), got %s", tok)
|
||||||
|
}
|
||||||
|
b = l.PeekByte()
|
||||||
|
if b != 0 {
|
||||||
|
t.Errorf("expected 0, got %q", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestKeywordToken(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
tok Token
|
||||||
|
}{
|
||||||
|
{"print", PRINT},
|
||||||
|
{"split", F_SPLIT},
|
||||||
|
{"BEGIN", BEGIN},
|
||||||
|
{"foo", ILLEGAL},
|
||||||
|
{"GoAWK", ILLEGAL},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
tok := KeywordToken(test.name)
|
||||||
|
if tok != test.tok {
|
||||||
|
t.Errorf("expected %v, got %v", test.tok, tok)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAllTokens(t *testing.T) {
|
||||||
|
input := "# comment line\n" +
|
||||||
|
"+ += && = : , -- /\n/= $ @ == >= > >> ++ { [ < ( #\n" +
|
||||||
|
"<= ~ % %= * *= !~ ! != | || ^ ^= ** **= ? } ] ) ; - -= " +
|
||||||
|
"BEGIN break continue delete do else END exit " +
|
||||||
|
"for function getline if in next print printf return while " +
|
||||||
|
"atan2 close cos exp fflush gsub index int length log match rand " +
|
||||||
|
"sin split sprintf sqrt srand sub substr system tolower toupper " +
|
||||||
|
"x \"str\\n\" 1234\n" +
|
||||||
|
"` ."
|
||||||
|
|
||||||
|
strs := make([]string, 0, LAST+1)
|
||||||
|
seen := make([]bool, LAST+1)
|
||||||
|
l := NewLexer([]byte(input))
|
||||||
|
for {
|
||||||
|
_, tok, _ := l.Scan()
|
||||||
|
strs = append(strs, tok.String())
|
||||||
|
seen[int(tok)] = true
|
||||||
|
if tok == EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output := strings.Join(strs, " ")
|
||||||
|
|
||||||
|
expected := "<newline> " +
|
||||||
|
"+ += && = : , -- / <newline> /= $ @ == >= > >> ++ { [ < ( <newline> " +
|
||||||
|
"<= ~ % %= * *= !~ ! != | || ^ ^= ^ ^= ? } ] ) ; - -= " +
|
||||||
|
"BEGIN break continue delete do else END exit " +
|
||||||
|
"for function getline if in next print printf return while " +
|
||||||
|
"atan2 close cos exp fflush gsub index int length log match rand " +
|
||||||
|
"sin split sprintf sqrt srand sub substr system tolower toupper " +
|
||||||
|
"name string number <newline> " +
|
||||||
|
"<illegal> <illegal> EOF"
|
||||||
|
if output != expected {
|
||||||
|
t.Errorf("expected %q, got %q", expected, output)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, s := range seen {
|
||||||
|
if !s && Token(i) != CONCAT && Token(i) != REGEX {
|
||||||
|
t.Errorf("token %s (%d) not seen", Token(i), i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
l = NewLexer([]byte(`/foo/`))
|
||||||
|
_, tok1, _ := l.Scan()
|
||||||
|
_, tok2, val := l.ScanRegex()
|
||||||
|
if tok1 != DIV || tok2 != REGEX || val != "foo" {
|
||||||
|
t.Errorf(`expected / regex "foo", got %s %s %q`, tok1, tok2, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
l = NewLexer([]byte(`/=foo/`))
|
||||||
|
_, tok1, _ = l.Scan()
|
||||||
|
_, tok2, val = l.ScanRegex()
|
||||||
|
if tok1 != DIV_ASSIGN || tok2 != REGEX || val != "=foo" {
|
||||||
|
t.Errorf(`expected /= regex "=foo", got %s %s %q`, tok1, tok2, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUnescape(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
output string
|
||||||
|
error string
|
||||||
|
}{
|
||||||
|
{``, "", ""},
|
||||||
|
{`foo bar`, "foo bar", ""},
|
||||||
|
{`foo\tbar`, "foo\tbar", ""},
|
||||||
|
{"foo\nbar", "", "can't have newline in string"},
|
||||||
|
{`foo"`, "foo\"", ""},
|
||||||
|
{`O'Connor`, "O'Connor", ""},
|
||||||
|
{`foo\`, "foo\\", ""},
|
||||||
|
// Other cases tested in TestLexer string handling.
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.input, func(t *testing.T) {
|
||||||
|
got, err := Unescape(test.input)
|
||||||
|
if err != nil {
|
||||||
|
if err.Error() != test.error {
|
||||||
|
t.Fatalf("expected error %q, got %q", test.error, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if test.error != "" {
|
||||||
|
t.Fatalf("expected error %q, got %q", test.error, "")
|
||||||
|
}
|
||||||
|
if got != test.output {
|
||||||
|
t.Fatalf("expected %q, got %q", test.output, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkLexer(b *testing.B, repeat int, source string) {
|
||||||
|
fullSource := []byte(strings.Repeat(source+"\n", repeat))
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
l := NewLexer(fullSource)
|
||||||
|
for {
|
||||||
|
_, tok, _ := l.Scan()
|
||||||
|
if tok == EOF || tok == ILLEGAL {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkProgram(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `{ print $1, ($3+$4)*$5 }`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkNames(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `x y i foobar abcdefghij0123456789 _`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkKeywords(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `BEGIN END print sub if length`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkSimpleTokens(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, "\n : , { [ ( } ] ) ~ ? ; $")
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkChoiceTokens(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `/ /= % %= + ++ += * ** **= *= = == ^ ^= ! != !~ < <= > >= >> && | ||`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkNumbers(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `0 1 .5 1234 1234567890 1234.56789e-50`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkStrings(b *testing.B) {
|
||||||
|
benchmarkLexer(b, 5, `"x" "y" "xyz" "foo" "foo bar baz" "foo\tbar\rbaz\n"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRegex(b *testing.B) {
|
||||||
|
source := `/x/ /./ /foo/ /bar/ /=equals=/ /\/\/\/\//`
|
||||||
|
fullSource := []byte(strings.Repeat(source+" ", 5))
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
l := NewLexer(fullSource)
|
||||||
|
for {
|
||||||
|
_, tok, _ := l.Scan()
|
||||||
|
if tok == EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if tok != DIV && tok != DIV_ASSIGN {
|
||||||
|
b.Fatalf("expected / or /=, got %s", tok)
|
||||||
|
}
|
||||||
|
_, tok, _ = l.ScanRegex()
|
||||||
|
if tok != REGEX {
|
||||||
|
b.Fatalf("expected regex, got %s", tok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example() {
|
||||||
|
lexer := NewLexer([]byte(`$0 { print $1 }`))
|
||||||
|
for {
|
||||||
|
pos, tok, val := lexer.Scan()
|
||||||
|
if tok == EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Printf("%d:%d %s %q\n", pos.Line, pos.Column, tok, val)
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 1:1 $ ""
|
||||||
|
// 1:2 number "0"
|
||||||
|
// 1:4 { ""
|
||||||
|
// 1:6 print ""
|
||||||
|
// 1:12 $ ""
|
||||||
|
// 1:13 number "1"
|
||||||
|
// 1:15 } ""
|
||||||
|
}
|
263
src/tool/awk/lexer/token.go
Normal file
263
src/tool/awk/lexer/token.go
Normal file
|
@ -0,0 +1,263 @@
|
||||||
|
// Lexer tokens
|
||||||
|
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
// Token is the type of a single token.
|
||||||
|
type Token int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ILLEGAL Token = iota
|
||||||
|
EOF
|
||||||
|
NEWLINE
|
||||||
|
CONCAT // Not really a token, but used as an operator
|
||||||
|
|
||||||
|
// Symbols
|
||||||
|
|
||||||
|
ADD
|
||||||
|
ADD_ASSIGN
|
||||||
|
AND
|
||||||
|
APPEND
|
||||||
|
ASSIGN
|
||||||
|
AT
|
||||||
|
COLON
|
||||||
|
COMMA
|
||||||
|
DECR
|
||||||
|
DIV
|
||||||
|
DIV_ASSIGN
|
||||||
|
DOLLAR
|
||||||
|
EQUALS
|
||||||
|
GTE
|
||||||
|
GREATER
|
||||||
|
INCR
|
||||||
|
LBRACE
|
||||||
|
LBRACKET
|
||||||
|
LESS
|
||||||
|
LPAREN
|
||||||
|
LTE
|
||||||
|
MATCH
|
||||||
|
MOD
|
||||||
|
MOD_ASSIGN
|
||||||
|
MUL
|
||||||
|
MUL_ASSIGN
|
||||||
|
NOT_MATCH
|
||||||
|
NOT
|
||||||
|
NOT_EQUALS
|
||||||
|
OR
|
||||||
|
PIPE
|
||||||
|
POW
|
||||||
|
POW_ASSIGN
|
||||||
|
QUESTION
|
||||||
|
RBRACE
|
||||||
|
RBRACKET
|
||||||
|
RPAREN
|
||||||
|
SEMICOLON
|
||||||
|
SUB
|
||||||
|
SUB_ASSIGN
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
|
||||||
|
BEGIN
|
||||||
|
BREAK
|
||||||
|
CONTINUE
|
||||||
|
DELETE
|
||||||
|
DO
|
||||||
|
ELSE
|
||||||
|
END
|
||||||
|
EXIT
|
||||||
|
FOR
|
||||||
|
FUNCTION
|
||||||
|
GETLINE
|
||||||
|
IF
|
||||||
|
IN
|
||||||
|
NEXT
|
||||||
|
PRINT
|
||||||
|
PRINTF
|
||||||
|
RETURN
|
||||||
|
WHILE
|
||||||
|
|
||||||
|
// Built-in functions
|
||||||
|
|
||||||
|
F_ATAN2
|
||||||
|
F_CLOSE
|
||||||
|
F_COS
|
||||||
|
F_EXP
|
||||||
|
F_FFLUSH
|
||||||
|
F_GSUB
|
||||||
|
F_INDEX
|
||||||
|
F_INT
|
||||||
|
F_LENGTH
|
||||||
|
F_LOG
|
||||||
|
F_MATCH
|
||||||
|
F_RAND
|
||||||
|
F_SIN
|
||||||
|
F_SPLIT
|
||||||
|
F_SPRINTF
|
||||||
|
F_SQRT
|
||||||
|
F_SRAND
|
||||||
|
F_SUB
|
||||||
|
F_SUBSTR
|
||||||
|
F_SYSTEM
|
||||||
|
F_TOLOWER
|
||||||
|
F_TOUPPER
|
||||||
|
|
||||||
|
// Literals and names (variables and arrays)
|
||||||
|
|
||||||
|
NAME
|
||||||
|
NUMBER
|
||||||
|
STRING
|
||||||
|
REGEX
|
||||||
|
|
||||||
|
LAST = REGEX
|
||||||
|
FIRST_FUNC = F_ATAN2
|
||||||
|
LAST_FUNC = F_TOUPPER
|
||||||
|
)
|
||||||
|
|
||||||
|
var keywordTokens = map[string]Token{
|
||||||
|
"BEGIN": BEGIN,
|
||||||
|
"break": BREAK,
|
||||||
|
"continue": CONTINUE,
|
||||||
|
"delete": DELETE,
|
||||||
|
"do": DO,
|
||||||
|
"else": ELSE,
|
||||||
|
"END": END,
|
||||||
|
"exit": EXIT,
|
||||||
|
"for": FOR,
|
||||||
|
"function": FUNCTION,
|
||||||
|
"getline": GETLINE,
|
||||||
|
"if": IF,
|
||||||
|
"in": IN,
|
||||||
|
"next": NEXT,
|
||||||
|
"print": PRINT,
|
||||||
|
"printf": PRINTF,
|
||||||
|
"return": RETURN,
|
||||||
|
"while": WHILE,
|
||||||
|
|
||||||
|
"atan2": F_ATAN2,
|
||||||
|
"close": F_CLOSE,
|
||||||
|
"cos": F_COS,
|
||||||
|
"exp": F_EXP,
|
||||||
|
"fflush": F_FFLUSH,
|
||||||
|
"gsub": F_GSUB,
|
||||||
|
"index": F_INDEX,
|
||||||
|
"int": F_INT,
|
||||||
|
"length": F_LENGTH,
|
||||||
|
"log": F_LOG,
|
||||||
|
"match": F_MATCH,
|
||||||
|
"rand": F_RAND,
|
||||||
|
"sin": F_SIN,
|
||||||
|
"split": F_SPLIT,
|
||||||
|
"sprintf": F_SPRINTF,
|
||||||
|
"sqrt": F_SQRT,
|
||||||
|
"srand": F_SRAND,
|
||||||
|
"sub": F_SUB,
|
||||||
|
"substr": F_SUBSTR,
|
||||||
|
"system": F_SYSTEM,
|
||||||
|
"tolower": F_TOLOWER,
|
||||||
|
"toupper": F_TOUPPER,
|
||||||
|
}
|
||||||
|
|
||||||
|
// KeywordToken returns the token associated with the given keyword
|
||||||
|
// string, or ILLEGAL if given name is not a keyword.
|
||||||
|
func KeywordToken(name string) Token {
|
||||||
|
return keywordTokens[name]
|
||||||
|
}
|
||||||
|
|
||||||
|
var tokenNames = map[Token]string{
|
||||||
|
ILLEGAL: "<illegal>",
|
||||||
|
EOF: "EOF",
|
||||||
|
NEWLINE: "<newline>",
|
||||||
|
CONCAT: "<concat>",
|
||||||
|
|
||||||
|
ADD: "+",
|
||||||
|
ADD_ASSIGN: "+=",
|
||||||
|
AND: "&&",
|
||||||
|
APPEND: ">>",
|
||||||
|
ASSIGN: "=",
|
||||||
|
AT: "@",
|
||||||
|
COLON: ":",
|
||||||
|
COMMA: ",",
|
||||||
|
DECR: "--",
|
||||||
|
DIV: "/",
|
||||||
|
DIV_ASSIGN: "/=",
|
||||||
|
DOLLAR: "$",
|
||||||
|
EQUALS: "==",
|
||||||
|
GTE: ">=",
|
||||||
|
GREATER: ">",
|
||||||
|
INCR: "++",
|
||||||
|
LBRACE: "{",
|
||||||
|
LBRACKET: "[",
|
||||||
|
LESS: "<",
|
||||||
|
LPAREN: "(",
|
||||||
|
LTE: "<=",
|
||||||
|
MATCH: "~",
|
||||||
|
MOD: "%",
|
||||||
|
MOD_ASSIGN: "%=",
|
||||||
|
MUL: "*",
|
||||||
|
MUL_ASSIGN: "*=",
|
||||||
|
NOT_MATCH: "!~",
|
||||||
|
NOT: "!",
|
||||||
|
NOT_EQUALS: "!=",
|
||||||
|
OR: "||",
|
||||||
|
PIPE: "|",
|
||||||
|
POW: "^",
|
||||||
|
POW_ASSIGN: "^=",
|
||||||
|
QUESTION: "?",
|
||||||
|
RBRACE: "}",
|
||||||
|
RBRACKET: "]",
|
||||||
|
RPAREN: ")",
|
||||||
|
SEMICOLON: ";",
|
||||||
|
SUB: "-",
|
||||||
|
SUB_ASSIGN: "-=",
|
||||||
|
|
||||||
|
BEGIN: "BEGIN",
|
||||||
|
BREAK: "break",
|
||||||
|
CONTINUE: "continue",
|
||||||
|
DELETE: "delete",
|
||||||
|
DO: "do",
|
||||||
|
ELSE: "else",
|
||||||
|
END: "END",
|
||||||
|
EXIT: "exit",
|
||||||
|
FOR: "for",
|
||||||
|
FUNCTION: "function",
|
||||||
|
GETLINE: "getline",
|
||||||
|
IF: "if",
|
||||||
|
IN: "in",
|
||||||
|
NEXT: "next",
|
||||||
|
PRINT: "print",
|
||||||
|
PRINTF: "printf",
|
||||||
|
RETURN: "return",
|
||||||
|
WHILE: "while",
|
||||||
|
|
||||||
|
F_ATAN2: "atan2",
|
||||||
|
F_CLOSE: "close",
|
||||||
|
F_COS: "cos",
|
||||||
|
F_EXP: "exp",
|
||||||
|
F_FFLUSH: "fflush",
|
||||||
|
F_GSUB: "gsub",
|
||||||
|
F_INDEX: "index",
|
||||||
|
F_INT: "int",
|
||||||
|
F_LENGTH: "length",
|
||||||
|
F_LOG: "log",
|
||||||
|
F_MATCH: "match",
|
||||||
|
F_RAND: "rand",
|
||||||
|
F_SIN: "sin",
|
||||||
|
F_SPLIT: "split",
|
||||||
|
F_SPRINTF: "sprintf",
|
||||||
|
F_SQRT: "sqrt",
|
||||||
|
F_SRAND: "srand",
|
||||||
|
F_SUB: "sub",
|
||||||
|
F_SUBSTR: "substr",
|
||||||
|
F_SYSTEM: "system",
|
||||||
|
F_TOLOWER: "tolower",
|
||||||
|
F_TOUPPER: "toupper",
|
||||||
|
|
||||||
|
NAME: "name",
|
||||||
|
NUMBER: "number",
|
||||||
|
STRING: "string",
|
||||||
|
REGEX: "regex",
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the string name of this token.
|
||||||
|
func (t Token) String() string {
|
||||||
|
return tokenNames[t]
|
||||||
|
}
|
21
src/tool/awk/license.txt
Normal file
21
src/tool/awk/license.txt
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2022 Ben Hoyt
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
1048
src/tool/awk/parser/parser.go
Normal file
1048
src/tool/awk/parser/parser.go
Normal file
File diff suppressed because it is too large
Load diff
242
src/tool/awk/parser/parser_test.go
Normal file
242
src/tool/awk/parser/parser_test.go
Normal file
|
@ -0,0 +1,242 @@
|
||||||
|
// Test parser package
|
||||||
|
|
||||||
|
package parser_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NOTE: apart from TestParseAndString, the parser doesn't have
|
||||||
|
// extensive tests of its own; the idea is to test the parser in the
|
||||||
|
// interp tests.
|
||||||
|
|
||||||
|
func TestParseAndString(t *testing.T) {
|
||||||
|
// This program should have one of every AST element to ensure
|
||||||
|
// we can parse and String()ify each.
|
||||||
|
source := strings.TrimSpace(`
|
||||||
|
BEGIN {
|
||||||
|
print "begin one"
|
||||||
|
}
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
print "begin two"
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
print "empty pattern"
|
||||||
|
}
|
||||||
|
|
||||||
|
$0 {
|
||||||
|
print "normal pattern"
|
||||||
|
print 1, 2, 3
|
||||||
|
printf "%.3f", 3.14159
|
||||||
|
print "x" >"file"
|
||||||
|
print "x" >>"append"
|
||||||
|
print "y" |"prog"
|
||||||
|
delete a[k]
|
||||||
|
if (c) {
|
||||||
|
get(a, k)
|
||||||
|
}
|
||||||
|
if (1 + 2) {
|
||||||
|
get(a, k)
|
||||||
|
} else {
|
||||||
|
set(a, k, v)
|
||||||
|
}
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
print i
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for (k in a) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
while (0) {
|
||||||
|
print "x"
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
print "y"
|
||||||
|
exit status
|
||||||
|
} while (x)
|
||||||
|
next
|
||||||
|
"cmd" |getline
|
||||||
|
"cmd" |getline x
|
||||||
|
"cmd" |getline a[1]
|
||||||
|
"cmd" |getline $1
|
||||||
|
getline
|
||||||
|
getline x
|
||||||
|
(getline x + 1)
|
||||||
|
getline $1
|
||||||
|
getline a[1]
|
||||||
|
getline <"file"
|
||||||
|
getline x <"file"
|
||||||
|
(getline x <"file" "x")
|
||||||
|
getline $1 <"file"
|
||||||
|
getline a[1] <"file"
|
||||||
|
x = 0
|
||||||
|
y = z = 0
|
||||||
|
b += 1
|
||||||
|
c -= 2
|
||||||
|
d *= 3
|
||||||
|
e /= 4
|
||||||
|
g ^= 5
|
||||||
|
h %= 6
|
||||||
|
(x ? "t" : "f")
|
||||||
|
((b && c) || d)
|
||||||
|
(k in a)
|
||||||
|
((x, y, z) in a)
|
||||||
|
(s ~ "foo")
|
||||||
|
(b < 1)
|
||||||
|
(c <= 2)
|
||||||
|
(d > 3)
|
||||||
|
(e >= 4)
|
||||||
|
(g == 5)
|
||||||
|
(h != 6)
|
||||||
|
((x y) z)
|
||||||
|
((b + c) + d)
|
||||||
|
((b * c) * d)
|
||||||
|
((b - c) - d)
|
||||||
|
((b / c) / d)
|
||||||
|
(b ^ (c ^ d))
|
||||||
|
x++
|
||||||
|
x--
|
||||||
|
++y
|
||||||
|
--y
|
||||||
|
1234
|
||||||
|
1.5
|
||||||
|
"This is a string"
|
||||||
|
if (/a.b/) {
|
||||||
|
print "match"
|
||||||
|
}
|
||||||
|
$1
|
||||||
|
$(1 + 2)
|
||||||
|
!x
|
||||||
|
+x
|
||||||
|
-x
|
||||||
|
var
|
||||||
|
a[key]
|
||||||
|
a[x, y, z]
|
||||||
|
f()
|
||||||
|
set(a, k, v)
|
||||||
|
sub(regex, repl)
|
||||||
|
sub(regex, repl, s)
|
||||||
|
gsub(regex, repl)
|
||||||
|
gsub(regex, repl, s)
|
||||||
|
split(s, a)
|
||||||
|
split(s, a, regex)
|
||||||
|
match(s, regex)
|
||||||
|
rand()
|
||||||
|
srand()
|
||||||
|
srand(1)
|
||||||
|
length()
|
||||||
|
length($1)
|
||||||
|
sprintf("")
|
||||||
|
sprintf("%.3f", 3.14159)
|
||||||
|
sprintf("%.3f %d", 3.14159, 42)
|
||||||
|
cos(1)
|
||||||
|
sin(1)
|
||||||
|
exp(1)
|
||||||
|
log(1)
|
||||||
|
sqrt(1)
|
||||||
|
int("42")
|
||||||
|
tolower("FOO")
|
||||||
|
toupper("foo")
|
||||||
|
system("ls")
|
||||||
|
close("file")
|
||||||
|
atan2(x, y)
|
||||||
|
index(haystack, needle)
|
||||||
|
{
|
||||||
|
print "block statement"
|
||||||
|
f()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(NR == 1), (NR == 2) {
|
||||||
|
print "range pattern"
|
||||||
|
}
|
||||||
|
|
||||||
|
($1 == "foo")
|
||||||
|
|
||||||
|
END {
|
||||||
|
print "end one"
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
print "end two"
|
||||||
|
}
|
||||||
|
|
||||||
|
function f() {
|
||||||
|
}
|
||||||
|
|
||||||
|
function get(a, k) {
|
||||||
|
return a[k]
|
||||||
|
}
|
||||||
|
|
||||||
|
function set(a, k, v) {
|
||||||
|
a[k] = v
|
||||||
|
return
|
||||||
|
}
|
||||||
|
`)
|
||||||
|
prog, err := parser.ParseProgram([]byte(source), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error parsing program: %v", err)
|
||||||
|
}
|
||||||
|
progStr := prog.String()
|
||||||
|
if progStr != source {
|
||||||
|
t.Fatalf("expected first, got second:\n%s\n----------\n%s", source, progStr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveLargeCallGraph(t *testing.T) {
|
||||||
|
const numCalls = 10000
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
var i int
|
||||||
|
for i = 0; i < numCalls; i++ {
|
||||||
|
fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&buf, "function f%d(a) { return a }\n", i)
|
||||||
|
fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
|
||||||
|
_, err := parser.ParseProgram(buf.Bytes(), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.Reset()
|
||||||
|
fmt.Fprint(&buf, "BEGIN { printf f0(42) }\n")
|
||||||
|
fmt.Fprintf(&buf, "function f%d(a) { return a }\n", numCalls)
|
||||||
|
for i = numCalls - 1; i >= 0; i-- {
|
||||||
|
fmt.Fprintf(&buf, "function f%d(a) { return f%d(a) }\n", i, i+1)
|
||||||
|
}
|
||||||
|
_, err = parser.ParseProgram(buf.Bytes(), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_valid() {
|
||||||
|
prog, err := parser.ParseProgram([]byte("$0 { print $1 }"), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
} else {
|
||||||
|
fmt.Println(prog)
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// $0 {
|
||||||
|
// print $1
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_error() {
|
||||||
|
prog, err := parser.ParseProgram([]byte("{ for if }"), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
} else {
|
||||||
|
fmt.Println(prog)
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// parse error at 1:7: expected ( instead of if
|
||||||
|
}
|
462
src/tool/awk/parser/resolve.go
Normal file
462
src/tool/awk/parser/resolve.go
Normal file
|
@ -0,0 +1,462 @@
|
||||||
|
// Resolve function calls and variable types
|
||||||
|
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/benhoyt/goawk/internal/ast"
|
||||||
|
. "github.com/benhoyt/goawk/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
type varType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
typeUnknown varType = iota
|
||||||
|
typeScalar
|
||||||
|
typeArray
|
||||||
|
)
|
||||||
|
|
||||||
|
func (t varType) String() string {
|
||||||
|
switch t {
|
||||||
|
case typeScalar:
|
||||||
|
return "Scalar"
|
||||||
|
case typeArray:
|
||||||
|
return "Array"
|
||||||
|
default:
|
||||||
|
return "Unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// typeInfo records type information for a single variable
|
||||||
|
type typeInfo struct {
|
||||||
|
typ varType
|
||||||
|
ref *ast.VarExpr
|
||||||
|
scope ast.VarScope
|
||||||
|
index int
|
||||||
|
callName string
|
||||||
|
argIndex int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used by printVarTypes when debugTypes is turned on
|
||||||
|
func (t typeInfo) String() string {
|
||||||
|
var scope string
|
||||||
|
switch t.scope {
|
||||||
|
case ast.ScopeGlobal:
|
||||||
|
scope = "Global"
|
||||||
|
case ast.ScopeLocal:
|
||||||
|
scope = "Local"
|
||||||
|
default:
|
||||||
|
scope = "Special"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d",
|
||||||
|
t.typ, t.ref, scope, t.index, t.callName, t.argIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// A single variable reference (normally scalar)
|
||||||
|
type varRef struct {
|
||||||
|
funcName string
|
||||||
|
ref *ast.VarExpr
|
||||||
|
isArg bool
|
||||||
|
pos Position
|
||||||
|
}
|
||||||
|
|
||||||
|
// A single array reference
|
||||||
|
type arrayRef struct {
|
||||||
|
funcName string
|
||||||
|
ref *ast.ArrayExpr
|
||||||
|
pos Position
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the resolver
|
||||||
|
func (p *parser) initResolve() {
|
||||||
|
p.varTypes = make(map[string]map[string]typeInfo)
|
||||||
|
p.varTypes[""] = make(map[string]typeInfo) // globals
|
||||||
|
p.functions = make(map[string]int)
|
||||||
|
p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present
|
||||||
|
p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays
|
||||||
|
p.arrayRef("FIELDS", Position{1, 1})
|
||||||
|
p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal the start of a function
|
||||||
|
func (p *parser) startFunction(name string, params []string) {
|
||||||
|
p.funcName = name
|
||||||
|
p.varTypes[name] = make(map[string]typeInfo)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal the end of a function
|
||||||
|
func (p *parser) stopFunction() {
|
||||||
|
p.funcName = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add function by name with given index
|
||||||
|
func (p *parser) addFunction(name string, index int) {
|
||||||
|
p.functions[name] = index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Records a call to a user function (for resolving indexes later)
|
||||||
|
type userCall struct {
|
||||||
|
call *ast.UserCallExpr
|
||||||
|
pos Position
|
||||||
|
inFunc string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record a user call site
|
||||||
|
func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) {
|
||||||
|
p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
|
||||||
|
}
|
||||||
|
|
||||||
|
// After parsing, resolve all user calls to their indexes. Also
|
||||||
|
// ensures functions called have actually been defined, and that
|
||||||
|
// they're not being called with too many arguments.
|
||||||
|
func (p *parser) resolveUserCalls(prog *Program) {
|
||||||
|
// Number the native funcs (order by name to get consistent order)
|
||||||
|
nativeNames := make([]string, 0, len(p.nativeFuncs))
|
||||||
|
for name := range p.nativeFuncs {
|
||||||
|
nativeNames = append(nativeNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(nativeNames)
|
||||||
|
nativeIndexes := make(map[string]int, len(nativeNames))
|
||||||
|
for i, name := range nativeNames {
|
||||||
|
nativeIndexes[name] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range p.userCalls {
|
||||||
|
// AWK-defined functions take precedence over native Go funcs
|
||||||
|
index, ok := p.functions[c.call.Name]
|
||||||
|
if !ok {
|
||||||
|
f, haveNative := p.nativeFuncs[c.call.Name]
|
||||||
|
if !haveNative {
|
||||||
|
panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name))
|
||||||
|
}
|
||||||
|
typ := reflect.TypeOf(f)
|
||||||
|
if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() {
|
||||||
|
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
|
||||||
|
}
|
||||||
|
c.call.Native = true
|
||||||
|
c.call.Index = nativeIndexes[c.call.Name]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
function := prog.Functions[index]
|
||||||
|
if len(c.call.Args) > len(function.Params) {
|
||||||
|
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
|
||||||
|
}
|
||||||
|
c.call.Index = index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For arguments that are variable references, we don't know the
|
||||||
|
// type based on context, so mark the types for these as unknown.
|
||||||
|
func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) {
|
||||||
|
if varExpr, ok := arg.(*ast.VarExpr); ok {
|
||||||
|
scope, varFuncName := p.getScope(varExpr.Name)
|
||||||
|
ref := p.varTypes[varFuncName][varExpr.Name].ref
|
||||||
|
if ref == varExpr {
|
||||||
|
// Only applies if this is the first reference to this
|
||||||
|
// variable (otherwise we know the type already)
|
||||||
|
p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index}
|
||||||
|
}
|
||||||
|
// Mark the last related varRef (the most recent one) as a
|
||||||
|
// call argument for later error handling
|
||||||
|
p.varRefs[len(p.varRefs)-1].isArg = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine scope of given variable reference (and funcName if it's
|
||||||
|
// a local, otherwise empty string)
|
||||||
|
func (p *parser) getScope(name string) (ast.VarScope, string) {
|
||||||
|
switch {
|
||||||
|
case p.locals[name]:
|
||||||
|
return ast.ScopeLocal, p.funcName
|
||||||
|
case ast.SpecialVarIndex(name) > 0:
|
||||||
|
return ast.ScopeSpecial, ""
|
||||||
|
default:
|
||||||
|
return ast.ScopeGlobal, ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record a variable (scalar) reference and return the *VarExpr (but
|
||||||
|
// VarExpr.Index won't be set till later)
|
||||||
|
func (p *parser) varRef(name string, pos Position) *ast.VarExpr {
|
||||||
|
scope, funcName := p.getScope(name)
|
||||||
|
expr := &ast.VarExpr{scope, 0, name}
|
||||||
|
p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
|
||||||
|
info := p.varTypes[funcName][name]
|
||||||
|
if info.typ == typeUnknown {
|
||||||
|
p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0}
|
||||||
|
}
|
||||||
|
return expr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record an array reference and return the *ArrayExpr (but
|
||||||
|
// ArrayExpr.Index won't be set till later)
|
||||||
|
func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr {
|
||||||
|
scope, funcName := p.getScope(name)
|
||||||
|
if scope == ast.ScopeSpecial {
|
||||||
|
panic(p.errorf("can't use scalar %q as array", name))
|
||||||
|
}
|
||||||
|
expr := &ast.ArrayExpr{scope, 0, name}
|
||||||
|
p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
|
||||||
|
info := p.varTypes[funcName][name]
|
||||||
|
if info.typ == typeUnknown {
|
||||||
|
p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0}
|
||||||
|
}
|
||||||
|
return expr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print variable type information (for debugging) on p.debugWriter
|
||||||
|
func (p *parser) printVarTypes(prog *Program) {
|
||||||
|
fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars)
|
||||||
|
fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays)
|
||||||
|
funcNames := []string{}
|
||||||
|
for funcName := range p.varTypes {
|
||||||
|
funcNames = append(funcNames, funcName)
|
||||||
|
}
|
||||||
|
sort.Strings(funcNames)
|
||||||
|
for _, funcName := range funcNames {
|
||||||
|
if funcName != "" {
|
||||||
|
fmt.Fprintf(p.debugWriter, "function %s\n", funcName)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(p.debugWriter, "globals\n")
|
||||||
|
}
|
||||||
|
varNames := []string{}
|
||||||
|
for name := range p.varTypes[funcName] {
|
||||||
|
varNames = append(varNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(varNames)
|
||||||
|
for _, name := range varNames {
|
||||||
|
info := p.varTypes[funcName][name]
|
||||||
|
fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve unknown variables types and generate variable indexes and
|
||||||
|
// name-to-index mappings for interpreter
|
||||||
|
func (p *parser) resolveVars(prog *Program) {
|
||||||
|
// First go through all unknown types and try to determine the
|
||||||
|
// type from the parameter type in that function definition.
|
||||||
|
// Iterate through functions in topological order, for example
|
||||||
|
// if f() calls g(), process g first, then f.
|
||||||
|
callGraph := make(map[string]map[string]struct{})
|
||||||
|
for _, call := range p.userCalls {
|
||||||
|
if _, ok := callGraph[call.inFunc]; !ok {
|
||||||
|
callGraph[call.inFunc] = make(map[string]struct{})
|
||||||
|
}
|
||||||
|
callGraph[call.inFunc][call.call.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
sortedFuncs := topoSort(callGraph)
|
||||||
|
for _, funcName := range sortedFuncs {
|
||||||
|
infos := p.varTypes[funcName]
|
||||||
|
for name, info := range infos {
|
||||||
|
if info.scope == ast.ScopeSpecial || info.typ != typeUnknown {
|
||||||
|
// It's a special var or type is already known
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
funcIndex, ok := p.functions[info.callName]
|
||||||
|
if !ok {
|
||||||
|
// Function being called is a native function
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Determine var type based on type of this parameter
|
||||||
|
// in the called function (if we know that)
|
||||||
|
paramName := prog.Functions[funcIndex].Params[info.argIndex]
|
||||||
|
typ := p.varTypes[info.callName][paramName].typ
|
||||||
|
if typ != typeUnknown {
|
||||||
|
if p.debugTypes {
|
||||||
|
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
|
||||||
|
funcName, name, typ)
|
||||||
|
}
|
||||||
|
info.typ = typ
|
||||||
|
p.varTypes[funcName][name] = info
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve global variables (iteration order is undefined, so
|
||||||
|
// assign indexes basically randomly)
|
||||||
|
prog.Scalars = make(map[string]int)
|
||||||
|
prog.Arrays = make(map[string]int)
|
||||||
|
for name, info := range p.varTypes[""] {
|
||||||
|
_, isFunc := p.functions[name]
|
||||||
|
if isFunc {
|
||||||
|
// Global var can't also be the name of a function
|
||||||
|
panic(p.errorf("global var %q can't also be a function", name))
|
||||||
|
}
|
||||||
|
var index int
|
||||||
|
if info.scope == ast.ScopeSpecial {
|
||||||
|
index = ast.SpecialVarIndex(name)
|
||||||
|
} else if info.typ == typeArray {
|
||||||
|
index = len(prog.Arrays)
|
||||||
|
prog.Arrays[name] = index
|
||||||
|
} else {
|
||||||
|
index = len(prog.Scalars)
|
||||||
|
prog.Scalars[name] = index
|
||||||
|
}
|
||||||
|
info.index = index
|
||||||
|
p.varTypes[""][name] = info
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill in unknown parameter types that are being called with arrays,
|
||||||
|
// for example, as in the following code:
|
||||||
|
//
|
||||||
|
// BEGIN { arr[0]; f(arr) }
|
||||||
|
// function f(a) { }
|
||||||
|
for _, c := range p.userCalls {
|
||||||
|
if c.call.Native {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
function := prog.Functions[c.call.Index]
|
||||||
|
for i, arg := range c.call.Args {
|
||||||
|
varExpr, ok := arg.(*ast.VarExpr)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
||||||
|
argType := p.varTypes[funcName][varExpr.Name]
|
||||||
|
paramType := p.varTypes[function.Name][function.Params[i]]
|
||||||
|
if argType.typ == typeArray && paramType.typ == typeUnknown {
|
||||||
|
paramType.typ = argType.typ
|
||||||
|
p.varTypes[function.Name][function.Params[i]] = paramType
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve local variables (assign indexes in order of params).
|
||||||
|
// Also patch up Function.Arrays (tells interpreter which args
|
||||||
|
// are arrays).
|
||||||
|
for funcName, infos := range p.varTypes {
|
||||||
|
if funcName == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
scalarIndex := 0
|
||||||
|
arrayIndex := 0
|
||||||
|
functionIndex := p.functions[funcName]
|
||||||
|
function := prog.Functions[functionIndex]
|
||||||
|
arrays := make([]bool, len(function.Params))
|
||||||
|
for i, name := range function.Params {
|
||||||
|
info := infos[name]
|
||||||
|
var index int
|
||||||
|
if info.typ == typeArray {
|
||||||
|
index = arrayIndex
|
||||||
|
arrayIndex++
|
||||||
|
arrays[i] = true
|
||||||
|
} else {
|
||||||
|
// typeScalar or typeUnknown: variables may still be
|
||||||
|
// of unknown type if they've never been referenced --
|
||||||
|
// default to scalar in that case
|
||||||
|
index = scalarIndex
|
||||||
|
scalarIndex++
|
||||||
|
}
|
||||||
|
info.index = index
|
||||||
|
p.varTypes[funcName][name] = info
|
||||||
|
}
|
||||||
|
prog.Functions[functionIndex].Arrays = arrays
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that variables passed to functions are the correct type
|
||||||
|
for _, c := range p.userCalls {
|
||||||
|
// Check native function calls
|
||||||
|
if c.call.Native {
|
||||||
|
for _, arg := range c.call.Args {
|
||||||
|
varExpr, ok := arg.(*ast.VarExpr)
|
||||||
|
if !ok {
|
||||||
|
// Non-variable expression, must be scalar
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
||||||
|
info := p.varTypes[funcName][varExpr.Name]
|
||||||
|
if info.typ == typeArray {
|
||||||
|
panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check AWK function calls
|
||||||
|
function := prog.Functions[c.call.Index]
|
||||||
|
for i, arg := range c.call.Args {
|
||||||
|
varExpr, ok := arg.(*ast.VarExpr)
|
||||||
|
if !ok {
|
||||||
|
if function.Arrays[i] {
|
||||||
|
panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
|
||||||
|
info := p.varTypes[funcName][varExpr.Name]
|
||||||
|
if info.typ == typeArray && !function.Arrays[i] {
|
||||||
|
panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name))
|
||||||
|
}
|
||||||
|
if info.typ != typeArray && function.Arrays[i] {
|
||||||
|
panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.debugTypes {
|
||||||
|
p.printVarTypes(prog)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch up variable indexes (interpreter uses an index instead
|
||||||
|
// of name for more efficient lookups)
|
||||||
|
for _, varRef := range p.varRefs {
|
||||||
|
info := p.varTypes[varRef.funcName][varRef.ref.Name]
|
||||||
|
if info.typ == typeArray && !varRef.isArg {
|
||||||
|
panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name))
|
||||||
|
}
|
||||||
|
varRef.ref.Index = info.index
|
||||||
|
}
|
||||||
|
for _, arrayRef := range p.arrayRefs {
|
||||||
|
info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name]
|
||||||
|
if info.typ == typeScalar {
|
||||||
|
panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name))
|
||||||
|
}
|
||||||
|
arrayRef.ref.Index = info.index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If name refers to a local (in function inFunc), return that
|
||||||
|
// function's name, otherwise return "" (meaning global).
|
||||||
|
func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
|
||||||
|
if inFunc == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
for _, param := range prog.Functions[p.functions[inFunc]].Params {
|
||||||
|
if name == param {
|
||||||
|
return inFunc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record a "multi expression" (comma-separated pseudo-expression
|
||||||
|
// used to allow commas around print/printf arguments).
|
||||||
|
func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
|
||||||
|
expr := &ast.MultiExpr{exprs}
|
||||||
|
p.multiExprs[expr] = pos
|
||||||
|
return expr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the multi expression as used (by a print/printf statement).
|
||||||
|
func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
|
||||||
|
delete(p.multiExprs, expr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that there are no unused multi expressions (syntax error).
|
||||||
|
func (p *parser) checkMultiExprs() {
|
||||||
|
if len(p.multiExprs) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Show error on first comma-separated expression
|
||||||
|
min := Position{1000000000, 1000000000}
|
||||||
|
for _, pos := range p.multiExprs {
|
||||||
|
if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) {
|
||||||
|
min = pos
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic(p.posErrorf(min, "unexpected comma-separated expression"))
|
||||||
|
}
|
72
src/tool/awk/parser/toposort.go
Normal file
72
src/tool/awk/parser/toposort.go
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// Topological sorting
|
||||||
|
|
||||||
|
package parser
|
||||||
|
|
||||||
|
/*
|
||||||
|
This algorithm is taken from:
|
||||||
|
https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
|
||||||
|
|
||||||
|
L ← Empty list that will contain the sorted nodes
|
||||||
|
while exists nodes without a permanent mark do
|
||||||
|
select an unmarked node n
|
||||||
|
visit(n)
|
||||||
|
|
||||||
|
function visit(node n)
|
||||||
|
if n has a permanent mark then
|
||||||
|
return
|
||||||
|
if n has a temporary mark then
|
||||||
|
stop (not a DAG)
|
||||||
|
|
||||||
|
mark n with a temporary mark
|
||||||
|
|
||||||
|
for each node m with an edge from n to m do
|
||||||
|
visit(m)
|
||||||
|
|
||||||
|
remove temporary mark from n
|
||||||
|
mark n with a permanent mark
|
||||||
|
add n to head of L
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Perform a topological sort on the given graph.
|
||||||
|
func topoSort(graph map[string]map[string]struct{}) []string {
|
||||||
|
if len(graph) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
unmarked := make(map[string]struct{})
|
||||||
|
for node := range graph {
|
||||||
|
unmarked[node] = struct{}{}
|
||||||
|
}
|
||||||
|
permMarks := make(map[string]struct{})
|
||||||
|
tempMarks := make(map[string]struct{})
|
||||||
|
var sorted []string
|
||||||
|
|
||||||
|
var visit func(string)
|
||||||
|
visit = func(n string) {
|
||||||
|
if _, ok := permMarks[n]; ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, ok := tempMarks[n]; ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tempMarks[n] = struct{}{}
|
||||||
|
for m := range graph[n] {
|
||||||
|
visit(m)
|
||||||
|
}
|
||||||
|
delete(tempMarks, n)
|
||||||
|
permMarks[n] = struct{}{}
|
||||||
|
delete(unmarked, n)
|
||||||
|
sorted = append(sorted, n)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for len(unmarked) > 0 {
|
||||||
|
var n string
|
||||||
|
for n = range unmarked {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
visit(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sorted
|
||||||
|
}
|
100
src/tool/awk/parser/toposort_test.go
Normal file
100
src/tool/awk/parser/toposort_test.go
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTopoSortEmpty(t *testing.T) {
|
||||||
|
sorted := topoSort(nil)
|
||||||
|
if len(sorted) != 0 {
|
||||||
|
t.Fatalf("expected empty slice, got %v", sorted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopoSortSimple(t *testing.T) {
|
||||||
|
sorted := topoSort(map[string]map[string]struct{}{
|
||||||
|
"a": {"b": struct{}{}},
|
||||||
|
"b": {"c": struct{}{}},
|
||||||
|
})
|
||||||
|
if len(sorted) != 3 {
|
||||||
|
t.Fatalf("expected 3 items, got %d", len(sorted))
|
||||||
|
}
|
||||||
|
assertBefore(t, sorted, "c", "b")
|
||||||
|
assertBefore(t, sorted, "b", "a")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopoSortComplex(t *testing.T) {
|
||||||
|
sorted := topoSort(map[string]map[string]struct{}{
|
||||||
|
"a": {"b": struct{}{}, "c": struct{}{}},
|
||||||
|
"c": {"d": struct{}{}},
|
||||||
|
"f": {"g": struct{}{}, "h": struct{}{}},
|
||||||
|
"g": {},
|
||||||
|
"h": {},
|
||||||
|
})
|
||||||
|
if len(sorted) != 7 {
|
||||||
|
t.Fatalf("expected 7 items, got %d", len(sorted))
|
||||||
|
}
|
||||||
|
assertBefore(t, sorted, "g", "f")
|
||||||
|
assertBefore(t, sorted, "h", "f")
|
||||||
|
assertBefore(t, sorted, "d", "c")
|
||||||
|
assertBefore(t, sorted, "c", "a")
|
||||||
|
assertBefore(t, sorted, "b", "a")
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertBefore(t *testing.T, sorted []string, x, y string) {
|
||||||
|
xi := strIndex(sorted, x)
|
||||||
|
if xi < 0 {
|
||||||
|
t.Fatalf("expected %q to be in result", x)
|
||||||
|
}
|
||||||
|
yi := strIndex(sorted, y)
|
||||||
|
if yi < 0 {
|
||||||
|
t.Fatalf("expected %q to be in result", y)
|
||||||
|
}
|
||||||
|
if xi >= yi {
|
||||||
|
t.Fatalf("expected %q to come before %q, got indexes %d and %d", x, y, xi, yi)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func strIndex(slice []string, s string) int {
|
||||||
|
for i, item := range slice {
|
||||||
|
if s == item {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopoSortCycle(t *testing.T) {
|
||||||
|
sorted := topoSort(map[string]map[string]struct{}{
|
||||||
|
"a": {"b": struct{}{}, "c": struct{}{}},
|
||||||
|
"c": {"a": struct{}{}},
|
||||||
|
})
|
||||||
|
if len(sorted) != 3 {
|
||||||
|
t.Fatalf("expected 3 items, got %d", len(sorted))
|
||||||
|
}
|
||||||
|
assertBefore(t, sorted, "b", "a")
|
||||||
|
c := strIndex(sorted, "a")
|
||||||
|
if c < 0 {
|
||||||
|
t.Fatalf("expected %q to be in result", c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopoSortLarge(t *testing.T) {
|
||||||
|
const num = 1000
|
||||||
|
graph := make(map[string]map[string]struct{})
|
||||||
|
for i := 0; i < num; i++ {
|
||||||
|
graph[strconv.Itoa(i)] = map[string]struct{}{strconv.Itoa(i + 1): {}}
|
||||||
|
}
|
||||||
|
graph[strconv.Itoa(num)] = map[string]struct{}{}
|
||||||
|
sorted := topoSort(graph)
|
||||||
|
if len(sorted) != num+1 {
|
||||||
|
t.Fatalf("expected %d items, got %d", num+1, len(sorted))
|
||||||
|
}
|
||||||
|
for i := 0; i < num+1; i++ {
|
||||||
|
expected := num - i
|
||||||
|
if sorted[i] != strconv.Itoa(expected) {
|
||||||
|
t.Fatalf("expected %d to be at index %d, got %s", num-1, i, sorted[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
125
src/tool/awk/readme.md
Normal file
125
src/tool/awk/readme.md
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
|
||||||
|
# GoAWK: an AWK interpreter with CSV support
|
||||||
|
|
||||||
|
[![Documentation](https://pkg.go.dev/badge/github.com/benhoyt/goawk)](https://pkg.go.dev/github.com/benhoyt/goawk)
|
||||||
|
[![GitHub Actions Build](https://github.com/benhoyt/goawk/workflows/Go/badge.svg)](https://github.com/benhoyt/goawk/actions?query=workflow%3AGo)
|
||||||
|
|
||||||
|
|
||||||
|
AWK is a fascinating text-processing language, and somehow after reading the delightfully-terse [*The AWK Programming Language*](https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf) I was inspired to write an interpreter for it in Go. So here it is, feature-complete and tested against "the one true AWK" and GNU AWK test suites.
|
||||||
|
|
||||||
|
GoAWK is a POSIX-compatible version of AWK, and additionally has a CSV mode for reading and writing CSV and TSV files. This feature was sponsored by the [library of the University of Antwerp](https://www.uantwerpen.be/en/library/). Read the [CSV documentation](https://github.com/benhoyt/goawk/blob/master/csv.md).
|
||||||
|
|
||||||
|
You can also read one of the articles I've written about GoAWK:
|
||||||
|
|
||||||
|
* The original article about [how GoAWK works and performs](https://benhoyt.com/writings/goawk/)
|
||||||
|
* How I converted the tree-walking interpreter to a [bytecode compiler and virtual machine](https://benhoyt.com/writings/goawk-compiler-vm/)
|
||||||
|
* A description of why and how I added [CSV support](https://benhoyt.com/writings/goawk-csv/)
|
||||||
|
|
||||||
|
|
||||||
|
## Basic usage
|
||||||
|
|
||||||
|
To use the command-line version, simply use `go install` to install it, and then run it using `goawk` (assuming `~/go/bin` is in your `PATH`):
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ go install github.com/benhoyt/goawk@latest
|
||||||
|
|
||||||
|
$ goawk 'BEGIN { print "foo", 42 }'
|
||||||
|
foo 42
|
||||||
|
|
||||||
|
$ echo 1 2 3 | goawk '{ print $1 + $3 }'
|
||||||
|
4
|
||||||
|
|
||||||
|
# Or use GoAWK's CSV and @"named-field" support:
|
||||||
|
$ echo -e 'name,amount\nBob,17.50\nJill,20\n"Boba Fett",100.00' | \
|
||||||
|
goawk -i csv -H '{ total += @"amount" } END { print total }'
|
||||||
|
137.5
|
||||||
|
```
|
||||||
|
|
||||||
|
On Windows, `"` is the shell quoting character, so use `"` around the entire AWK program on the command line, and use `'` around AWK strings -- this is a non-POSIX extension to make GoAWK easier to use on Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
C:\> goawk "BEGIN { print 'foo', 42 }"
|
||||||
|
foo 42
|
||||||
|
```
|
||||||
|
|
||||||
|
To use it in your Go programs, you can call `interp.Exec()` directly for simple needs:
|
||||||
|
|
||||||
|
```go
|
||||||
|
input := strings.NewReader("foo bar\n\nbaz buz")
|
||||||
|
err := interp.Exec("$0 { print $1 }", " ", input, nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// foo
|
||||||
|
// baz
|
||||||
|
```
|
||||||
|
|
||||||
|
Or you can use the `parser` module and then `interp.ExecProgram()` to control execution, set variables, and so on:
|
||||||
|
|
||||||
|
```go
|
||||||
|
src := "{ print NR, tolower($0) }"
|
||||||
|
input := "A\naB\nAbC"
|
||||||
|
|
||||||
|
prog, err := parser.ParseProgram([]byte(src), nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
config := &interp.Config{
|
||||||
|
Stdin: strings.NewReader(input),
|
||||||
|
Vars: []string{"OFS", ":"},
|
||||||
|
}
|
||||||
|
_, err = interp.ExecProgram(prog, config)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// 1:a
|
||||||
|
// 2:ab
|
||||||
|
// 3:abc
|
||||||
|
```
|
||||||
|
|
||||||
|
If you need to repeat execution of the same program on different inputs, you can call [`interp.New`](https://pkg.go.dev/github.com/benhoyt/goawk/interp#New) once, and then call the returned object's `Execute` method as many times as you need.
|
||||||
|
|
||||||
|
Read the [package documentation](https://pkg.go.dev/github.com/benhoyt/goawk) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
## Differences from AWK
|
||||||
|
|
||||||
|
The intention is for GoAWK to conform to `awk`'s behavior and to the [POSIX AWK spec](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html), but this section describes some areas where it's different.
|
||||||
|
|
||||||
|
Additional features GoAWK has over AWK:
|
||||||
|
|
||||||
|
* It has proper support for CSV and TSV files ([read the documentation](https://github.com/benhoyt/goawk/blob/master/csv.md)).
|
||||||
|
* It supports negative field indexes to access fields from the right, for example, `$-1` refers to the last field.
|
||||||
|
* It's embeddable in your Go programs! You can even call custom Go functions from your AWK scripts.
|
||||||
|
* Most AWK scripts are faster than `awk` and on a par with `gawk`, though usually slower than `mawk`. (See [recent benchmarks](https://benhoyt.com/writings/goawk-compiler-vm/#virtual-machine-results).)
|
||||||
|
* The parser supports `'single-quoted strings'` in addition to `"double-quoted strings"`, primarily to make Windows one-liners easier (the Windows `cmd.exe` shell uses `"` as the quote character).
|
||||||
|
|
||||||
|
Things AWK has over GoAWK:
|
||||||
|
|
||||||
|
* Scripts that use regular expressions are slower than other implementations (unfortunately Go's `regexp` package is relatively slow).
|
||||||
|
* AWK is written by Alfred Aho, Peter Weinberger, and Brian Kernighan.
|
||||||
|
|
||||||
|
|
||||||
|
## Stability
|
||||||
|
|
||||||
|
This project has a good suite of tests, which include my own intepreter tests, the original AWK test suite, and the relevant tests from the Gawk test suite. I've used it a bunch personally, and it's used in the [Benthos](https://github.com/benthosdev/benthos) stream processor as well as by the software team at the library of the University of Antwerp. However, to `err == human`, so please use GoAWK at your own risk. I intend not to change the Go API in a breaking way in any v1.x.y version.
|
||||||
|
|
||||||
|
|
||||||
|
## AWKGo
|
||||||
|
|
||||||
|
The GoAWK repository also includes the creatively-named AWKGo, an AWK-to-Go compiler. This is experimental and is not subject to the stability requirements of GoAWK itself. You can [read more about AWKGo](https://benhoyt.com/writings/awkgo/) or browse the code on the [`awkgo` branch](https://github.com/benhoyt/goawk/tree/awkgo/awkgo).
|
||||||
|
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
GoAWK is licensed under an open source [MIT license](https://github.com/benhoyt/goawk/blob/master/LICENSE.txt).
|
||||||
|
|
||||||
|
|
||||||
|
## The end
|
||||||
|
|
||||||
|
Have fun, and please [contact me](https://benhoyt.com/) if you're using GoAWK or have any feedback!
|
2
src/tool/awk/scripts/benchmark.sh
Executable file
2
src/tool/awk/scripts/benchmark.sh
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
go test ./interp -bench=. -count=5 > benchmarks_new.txt
|
124
src/tool/awk/scripts/benchmark_awks.py
Executable file
124
src/tool/awk/scripts/benchmark_awks.py
Executable file
|
@ -0,0 +1,124 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Benchmark GoAWK against other AWK versions
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os.path
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
AWKS = [
|
||||||
|
'./goawk',
|
||||||
|
'./orig', # GoAWK without perf improvements
|
||||||
|
'original-awk',
|
||||||
|
'gawk',
|
||||||
|
'mawk',
|
||||||
|
]
|
||||||
|
NORM_INDEX = AWKS.index('original-awk')
|
||||||
|
TESTS_TO_MEAN = None # By default, calculate the mean of all tests
|
||||||
|
if False:
|
||||||
|
# Only get the mean of these tests because these are the only ones
|
||||||
|
# we show in the GoAWK article.
|
||||||
|
TESTS_TO_MEAN = [
|
||||||
|
'tt.01_print',
|
||||||
|
'tt.02_print_NR_NF',
|
||||||
|
'tt.02a_print_length',
|
||||||
|
'tt.03_sum_length',
|
||||||
|
'tt.03a_sum_field',
|
||||||
|
'tt.04_printf_fields',
|
||||||
|
'tt.05_concat_fields',
|
||||||
|
'tt.06_count_lengths',
|
||||||
|
'tt.07_even_fields',
|
||||||
|
'tt.big_complex_program',
|
||||||
|
'tt.x1_mandelbrot',
|
||||||
|
'tt.x2_sum_loop',
|
||||||
|
]
|
||||||
|
NUM_RUNS = 6
|
||||||
|
MIN_TIME = 0.5
|
||||||
|
PROGRAM_GLOB = 'testdata/tt.*'
|
||||||
|
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
PROGRAM_GLOB = 'testdata/' + sys.argv[1]
|
||||||
|
|
||||||
|
|
||||||
|
def repeat_file(input_file, repeated_file, n):
|
||||||
|
with open(input_file, 'rb') as fin, open(repeated_file, 'wb') as fout:
|
||||||
|
for i in range(n):
|
||||||
|
fin.seek(0)
|
||||||
|
shutil.copyfileobj(fin, fout)
|
||||||
|
|
||||||
|
|
||||||
|
print('Test ', end='')
|
||||||
|
for awk in AWKS:
|
||||||
|
display_awk = os.path.basename(awk)
|
||||||
|
display_awk = display_awk.replace('original-awk', 'awk')
|
||||||
|
print('| {:>8} '.format(display_awk), end='')
|
||||||
|
print()
|
||||||
|
print('-'*28 + ' | --------'*len(AWKS))
|
||||||
|
|
||||||
|
repeats_created = []
|
||||||
|
products = [1] * len(AWKS)
|
||||||
|
num_products = 0
|
||||||
|
programs = sorted(glob.glob(PROGRAM_GLOB))
|
||||||
|
for program in programs:
|
||||||
|
# First do a test run with GoAWK to see roughly how long it takes
|
||||||
|
cmdline = '{} -f {} testdata/foo.td >tt.out'.format(AWKS[0], program)
|
||||||
|
start = time.time()
|
||||||
|
status = subprocess.call(cmdline, shell=True)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
# If test run took less than MIN_TIME seconds, scale/repeat input
|
||||||
|
# file accordingly
|
||||||
|
input_file = 'testdata/foo.td'
|
||||||
|
if elapsed < MIN_TIME:
|
||||||
|
multiplier = int(round(MIN_TIME / elapsed))
|
||||||
|
repeated_file = '{}.{}'.format(input_file, multiplier)
|
||||||
|
if not os.path.exists(repeated_file):
|
||||||
|
repeat_file(input_file, repeated_file, multiplier)
|
||||||
|
repeats_created.append(repeated_file)
|
||||||
|
input_file = repeated_file
|
||||||
|
|
||||||
|
# Record time taken to run this test, running each NUM_RUMS times
|
||||||
|
# and taking the minimum elapsed time
|
||||||
|
awk_times = []
|
||||||
|
for awk in AWKS:
|
||||||
|
cmdline = '{} -f {} {} >tt.out'.format(awk, program, input_file)
|
||||||
|
times = []
|
||||||
|
for i in range(NUM_RUNS):
|
||||||
|
start = time.time()
|
||||||
|
status = subprocess.call(cmdline, shell=True)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
times.append(elapsed)
|
||||||
|
if status != 0:
|
||||||
|
print('ERROR status {} from cmd: {}'.format(status, cmdline), file=sys.stderr)
|
||||||
|
min_time = min(sorted(times)[1:])
|
||||||
|
awk_times.append(min_time)
|
||||||
|
|
||||||
|
# Normalize to One True AWK time = 1.0
|
||||||
|
norm_time = awk_times[NORM_INDEX]
|
||||||
|
speeds = [norm_time/t for t in awk_times]
|
||||||
|
test_name = program.split('/')[1]
|
||||||
|
if TESTS_TO_MEAN is None or test_name in TESTS_TO_MEAN:
|
||||||
|
num_products += 1
|
||||||
|
for i in range(len(AWKS)):
|
||||||
|
products[i] *= speeds[i]
|
||||||
|
|
||||||
|
display_name = test_name.split('_')[0] + ' (' + ' '.join(test_name.split('_')[1:]) + ')'
|
||||||
|
print('{:28}'.format(display_name), end='')
|
||||||
|
for i, awk in enumerate(AWKS):
|
||||||
|
print(' | {:8.2f}'.format(speeds[i]), end='')
|
||||||
|
print()
|
||||||
|
|
||||||
|
print('-'*28 + ' | --------'*len(AWKS))
|
||||||
|
print('**Geo mean** ', end='')
|
||||||
|
for i, awk in enumerate(AWKS):
|
||||||
|
print(' | **{:.2f}**'.format(products[i] ** (1.0/num_products)), end='')
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Delete temporary files created
|
||||||
|
os.remove('tt.out')
|
||||||
|
for repeated_file in repeats_created:
|
||||||
|
os.remove(repeated_file)
|
2
src/tool/awk/scripts/benchstat.sh
Executable file
2
src/tool/awk/scripts/benchstat.sh
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
~/go/bin/benchstat -sort=delta -geomean benchmarks_old.txt benchmarks_new.txt
|
9
src/tool/awk/scripts/csvbench/count.py
Normal file
9
src/tool/awk/scripts/csvbench/count.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
import csv
|
||||||
|
import sys
|
||||||
|
|
||||||
|
lines, fields = 0, 0
|
||||||
|
for row in csv.reader(sys.stdin):
|
||||||
|
lines += 1
|
||||||
|
fields += len(row)
|
||||||
|
|
||||||
|
print(lines, fields)
|
27
src/tool/awk/scripts/csvbench/count/main.go
Normal file
27
src/tool/awk/scripts/csvbench/count/main.go
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
reader := csv.NewReader(bufio.NewReader(os.Stdin))
|
||||||
|
lines, fields := 0, 0
|
||||||
|
for {
|
||||||
|
row, err := reader.Read()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
lines++
|
||||||
|
fields += len(row)
|
||||||
|
}
|
||||||
|
fmt.Println(lines, fields)
|
||||||
|
}
|
48
src/tool/awk/scripts/csvbench/csvbench.sh
Executable file
48
src/tool/awk/scripts/csvbench/csvbench.sh
Executable file
|
@ -0,0 +1,48 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo ===== Writing 1GB - goawk
|
||||||
|
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
||||||
|
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
||||||
|
time goawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field" }' >/dev/null
|
||||||
|
|
||||||
|
echo ===== Writing 1GB - frawk
|
||||||
|
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
||||||
|
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
||||||
|
time frawk -o csv 'BEGIN { for (i=0; i<3514073; i++) print i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field", i, "foo", "bob@example.com", "simple,quoted", "quoted string with \" in it", "0123456789", "9876543210", "The quick brown fox jumps over the lazy dog", "", "final field"; }' >/dev/null
|
||||||
|
|
||||||
|
echo ===== Writing 1GB - Python
|
||||||
|
time python3 write.py >/dev/null
|
||||||
|
time python3 write.py >/dev/null
|
||||||
|
time python3 write.py >/dev/null
|
||||||
|
|
||||||
|
echo ===== Writing 1GB - Go
|
||||||
|
go build -o bin/write ./write
|
||||||
|
time ./bin/write >/dev/null
|
||||||
|
time ./bin/write >/dev/null
|
||||||
|
time ./bin/write >/dev/null
|
||||||
|
|
||||||
|
|
||||||
|
./bin/write >count.csv
|
||||||
|
|
||||||
|
echo ===== Reading 1GB - goawk
|
||||||
|
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
time goawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
|
||||||
|
echo ===== Reading 1GB - frawk
|
||||||
|
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
time frawk -i csv '{ w+=NF } END { print NR, w }' <count.csv
|
||||||
|
|
||||||
|
echo ===== Reading 1GB - Python
|
||||||
|
time python3 count.py <count.csv
|
||||||
|
time python3 count.py <count.csv
|
||||||
|
time python3 count.py <count.csv
|
||||||
|
|
||||||
|
echo ===== Reading 1GB - Go
|
||||||
|
go build -o bin/count ./count
|
||||||
|
time ./bin/count <count.csv
|
||||||
|
time ./bin/count <count.csv
|
||||||
|
time ./bin/count <count.csv
|
27
src/tool/awk/scripts/csvbench/write.py
Normal file
27
src/tool/awk/scripts/csvbench/write.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
import csv
|
||||||
|
import sys
|
||||||
|
|
||||||
|
writer = csv.writer(sys.stdout)
|
||||||
|
for i in range(3514073): # will create a ~1GB file
|
||||||
|
writer.writerow([
|
||||||
|
i,
|
||||||
|
"foo",
|
||||||
|
"bob@example.com",
|
||||||
|
"simple,quoted",
|
||||||
|
"quoted string with \" in it",
|
||||||
|
"0123456789",
|
||||||
|
"9876543210",
|
||||||
|
"The quick brown fox jumps over the lazy dog",
|
||||||
|
"",
|
||||||
|
"final field",
|
||||||
|
i,
|
||||||
|
"foo",
|
||||||
|
"bob@example.com",
|
||||||
|
"simple,quoted",
|
||||||
|
"quoted string with \" in it",
|
||||||
|
"0123456789",
|
||||||
|
"9876543210",
|
||||||
|
"The quick brown fox jumps over the lazy dog",
|
||||||
|
"",
|
||||||
|
"final field",
|
||||||
|
])
|
43
src/tool/awk/scripts/csvbench/write/main.go
Normal file
43
src/tool/awk/scripts/csvbench/write/main.go
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
writer := csv.NewWriter(os.Stdout)
|
||||||
|
for i := 0; i < 3514073; i++ { // will create a ~1GB file
|
||||||
|
err := writer.Write([]string{
|
||||||
|
strconv.Itoa(i),
|
||||||
|
"foo",
|
||||||
|
"bob@example.com",
|
||||||
|
"simple,quoted",
|
||||||
|
"quoted string with \" in it",
|
||||||
|
"0123456789",
|
||||||
|
"9876543210",
|
||||||
|
"The quick brown fox jumps over the lazy dog",
|
||||||
|
"",
|
||||||
|
"final field",
|
||||||
|
strconv.Itoa(i),
|
||||||
|
"foo",
|
||||||
|
"bob@example.com",
|
||||||
|
"simple,quoted",
|
||||||
|
"quoted string with \" in it",
|
||||||
|
"0123456789",
|
||||||
|
"9876543210",
|
||||||
|
"The quick brown fox jumps over the lazy dog",
|
||||||
|
"",
|
||||||
|
"final field",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.Flush()
|
||||||
|
if writer.Error() != nil {
|
||||||
|
log.Fatal(writer.Error())
|
||||||
|
}
|
||||||
|
}
|
2
src/tool/awk/scripts/fuzz_input.sh
Executable file
2
src/tool/awk/scripts/fuzz_input.sh
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
go1.18rc1 test ./interp -run=^$ -fuzz=Input -parallel=4
|
2
src/tool/awk/scripts/fuzz_source.sh
Executable file
2
src/tool/awk/scripts/fuzz_source.sh
Executable file
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
go1.18rc1 test ./interp -run=^$ -fuzz=Source -parallel=4
|
21
src/tool/awk/scripts/make_binaries.sh
Executable file
21
src/tool/awk/scripts/make_binaries.sh
Executable file
|
@ -0,0 +1,21 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
go build
|
||||||
|
VERSION="$(./goawk -version)"
|
||||||
|
|
||||||
|
GOOS=windows GOARCH=386 go build -ldflags="-w"
|
||||||
|
zip "goawk_${VERSION}_windows_386.zip" goawk.exe README.md csv.md LICENSE.txt
|
||||||
|
GOOS=windows GOARCH=amd64 go build -ldflags="-w"
|
||||||
|
zip "goawk_${VERSION}_windows_amd64.zip" goawk.exe README.md csv.md LICENSE.txt
|
||||||
|
|
||||||
|
GOOS=linux GOARCH=386 go build -ldflags="-w"
|
||||||
|
tar -cvzf "goawk_${VERSION}_linux_386.tar.gz" goawk README.md csv.md LICENSE.txt
|
||||||
|
GOOS=linux GOARCH=amd64 go build -ldflags="-w"
|
||||||
|
tar -cvzf "goawk_${VERSION}_linux_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
|
||||||
|
|
||||||
|
GOOS=darwin GOARCH=amd64 go build -ldflags="-w"
|
||||||
|
tar -cvzf "goawk_${VERSION}_darwin_amd64.tar.gz" goawk README.md csv.md LICENSE.txt
|
||||||
|
GOOS=darwin GOARCH=arm64 go build -ldflags="-w"
|
||||||
|
tar -cvzf "goawk_${VERSION}_darwin_arm64.tar.gz" goawk README.md csv.md LICENSE.txt
|
||||||
|
|
||||||
|
rm -f goawk goawk.exe
|
46
src/tool/awk/testdata/README
vendored
Normal file
46
src/tool/awk/testdata/README
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
Original README.TESTS from one-true-awk regdir tests directory:
|
||||||
|
---------------------------------------------------------------
|
||||||
|
The archive of test files contains
|
||||||
|
|
||||||
|
- A shell file called REGRESS that controls the testing process.
|
||||||
|
|
||||||
|
- Several shell files called Compare* that control sub-parts
|
||||||
|
of the testing.
|
||||||
|
|
||||||
|
- About 160 small tests called t.* that constitute a random
|
||||||
|
sampling of awk constructions collected over the years.
|
||||||
|
Not organized, but they touch almost everything.
|
||||||
|
|
||||||
|
- About 60 small tests called p.* that come from the first
|
||||||
|
two chapters of The AWK Programming Environment. This is
|
||||||
|
basic stuff -- they have to work.
|
||||||
|
|
||||||
|
These two sets are intended as regression tests, to be sure
|
||||||
|
that a new version produces the same results as a previous one.
|
||||||
|
There are a couple of standard data files used with them,
|
||||||
|
test.data and test.countries, but others would work too.
|
||||||
|
|
||||||
|
- About 20 files called T.* that are self-contained and
|
||||||
|
more systematic tests of specific language features.
|
||||||
|
For example, T.clv tests command-line variable handling.
|
||||||
|
These tests are not regressions -- they compute the right
|
||||||
|
answer by separate means, then compare the awk output.
|
||||||
|
A specific test for each new bug found shows up in at least
|
||||||
|
one of these, most often T.misc. There are about 220 tests
|
||||||
|
total in these files.
|
||||||
|
|
||||||
|
- Two of these files, T.re and T.sub, are systematic tests
|
||||||
|
of the regular expression and substitution code. They express
|
||||||
|
tests in a small language, then generate awk programs that
|
||||||
|
verify behavior.
|
||||||
|
|
||||||
|
- About 20 files called tt.* that are used as timing tests;
|
||||||
|
they use the most common awk constructions in straightforward
|
||||||
|
ways, against a large input file constructed by Compare.tt.
|
||||||
|
|
||||||
|
|
||||||
|
There is undoubtedly more stuff in the archive; it's been
|
||||||
|
collecting for years and may need pruning. Suggestions for
|
||||||
|
improvement, additional tests (especially systematic ones),
|
||||||
|
and the like are all welcome.
|
||||||
|
|
3
src/tool/awk/testdata/csv/1.csv
vendored
Normal file
3
src/tool/awk/testdata/csv/1.csv
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
name,age
|
||||||
|
Bob,42
|
||||||
|
Jill,37
|
|
2
src/tool/awk/testdata/csv/2.csv
vendored
Normal file
2
src/tool/awk/testdata/csv/2.csv
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
age,email,name
|
||||||
|
25,sarah@example.com,Sarah
|
|
2
src/tool/awk/testdata/csv/address5.csv
vendored
Normal file
2
src/tool/awk/testdata/csv/address5.csv
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
name,address_1,address_2,address_3,address_4,address_5
|
||||||
|
Bob Smith,123 Way St,Apt 2B,Township,Cityville,United Plates
|
|
2
src/tool/awk/testdata/csv/fields.csv
vendored
Normal file
2
src/tool/awk/testdata/csv/fields.csv
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
id,name,email
|
||||||
|
1,Bob,b@bob.com
|
|
2563
src/tool/awk/testdata/csv/nz-schools.csv
vendored
Normal file
2563
src/tool/awk/testdata/csv/nz-schools.csv
vendored
Normal file
File diff suppressed because it is too large
Load diff
52
src/tool/awk/testdata/csv/states.csv
vendored
Normal file
52
src/tool/awk/testdata/csv/states.csv
vendored
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
"State","Abbreviation"
|
||||||
|
"Alabama","AL"
|
||||||
|
"Alaska","AK"
|
||||||
|
"Arizona","AZ"
|
||||||
|
"Arkansas","AR"
|
||||||
|
"California","CA"
|
||||||
|
"Colorado","CO"
|
||||||
|
"Connecticut","CT"
|
||||||
|
"Delaware","DE"
|
||||||
|
"District of Columbia","DC"
|
||||||
|
"Florida","FL"
|
||||||
|
"Georgia","GA"
|
||||||
|
"Hawaii","HI"
|
||||||
|
"Idaho","ID"
|
||||||
|
"Illinois","IL"
|
||||||
|
"Indiana","IN"
|
||||||
|
"Iowa","IA"
|
||||||
|
"Kansas","KS"
|
||||||
|
"Kentucky","KY"
|
||||||
|
"Louisiana","LA"
|
||||||
|
"Maine","ME"
|
||||||
|
"Montana","MT"
|
||||||
|
"Nebraska","NE"
|
||||||
|
"Nevada","NV"
|
||||||
|
"New Hampshire","NH"
|
||||||
|
"New Jersey","NJ"
|
||||||
|
"New Mexico","NM"
|
||||||
|
"New York","NY"
|
||||||
|
"North Carolina","NC"
|
||||||
|
"North Dakota","ND"
|
||||||
|
"Ohio","OH"
|
||||||
|
"Oklahoma","OK"
|
||||||
|
"Oregon","OR"
|
||||||
|
"Maryland","MD"
|
||||||
|
"Massachusetts","MA"
|
||||||
|
"Michigan","MI"
|
||||||
|
"Minnesota","MN"
|
||||||
|
"Mississippi","MS"
|
||||||
|
"Missouri","MO"
|
||||||
|
"Pennsylvania","PA"
|
||||||
|
"Rhode Island","RI"
|
||||||
|
"South Carolina","SC"
|
||||||
|
"South Dakota","SD"
|
||||||
|
"Tennessee","TN"
|
||||||
|
"Texas","TX"
|
||||||
|
"Utah","UT"
|
||||||
|
"Vermont","VT"
|
||||||
|
"Virginia","VA"
|
||||||
|
"Washington","WA"
|
||||||
|
"West Virginia","WV"
|
||||||
|
"Wisconsin","WI"
|
||||||
|
"Wyoming","WY"
|
|
53
src/tool/awk/testdata/csv/states.psv
vendored
Normal file
53
src/tool/awk/testdata/csv/states.psv
vendored
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# comment
|
||||||
|
State|Abbreviation
|
||||||
|
Alabama|AL
|
||||||
|
Alaska|AK
|
||||||
|
Arizona|AZ
|
||||||
|
Arkansas|AR
|
||||||
|
California|CA
|
||||||
|
Colorado|CO
|
||||||
|
Connecticut|CT
|
||||||
|
Delaware|DE
|
||||||
|
District of Columbia|DC
|
||||||
|
Florida|FL
|
||||||
|
Georgia|GA
|
||||||
|
Hawaii|HI
|
||||||
|
Idaho|ID
|
||||||
|
Illinois|IL
|
||||||
|
Indiana|IN
|
||||||
|
Iowa|IA
|
||||||
|
Kansas|KS
|
||||||
|
Kentucky|KY
|
||||||
|
Louisiana|LA
|
||||||
|
Maine|ME
|
||||||
|
Montana|MT
|
||||||
|
Nebraska|NE
|
||||||
|
Nevada|NV
|
||||||
|
New Hampshire|NH
|
||||||
|
New Jersey|NJ
|
||||||
|
New Mexico|NM
|
||||||
|
New York|NY
|
||||||
|
North Carolina|NC
|
||||||
|
North Dakota|ND
|
||||||
|
Ohio|OH
|
||||||
|
Oklahoma|OK
|
||||||
|
Oregon|OR
|
||||||
|
Maryland|MD
|
||||||
|
Massachusetts|MA
|
||||||
|
Michigan|MI
|
||||||
|
Minnesota|MN
|
||||||
|
Mississippi|MS
|
||||||
|
Missouri|MO
|
||||||
|
Pennsylvania|PA
|
||||||
|
Rhode Island|RI
|
||||||
|
South Carolina|SC
|
||||||
|
South Dakota|SD
|
||||||
|
Tennessee|TN
|
||||||
|
Texas|TX
|
||||||
|
Utah|UT
|
||||||
|
Vermont|VT
|
||||||
|
Virginia|VA
|
||||||
|
Washington|WA
|
||||||
|
West Virginia|WV
|
||||||
|
Wisconsin|WI
|
||||||
|
Wyoming|WY
|
BIN
src/tool/awk/testdata/echo
vendored
Executable file
BIN
src/tool/awk/testdata/echo
vendored
Executable file
Binary file not shown.
1
src/tool/awk/testdata/filename/10
vendored
Normal file
1
src/tool/awk/testdata/filename/10
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
foo
|
1
src/tool/awk/testdata/filename/10x
vendored
Normal file
1
src/tool/awk/testdata/filename/10x
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
bar
|
37801
src/tool/awk/testdata/foo.td
vendored
Normal file
37801
src/tool/awk/testdata/foo.td
vendored
Normal file
File diff suppressed because it is too large
Load diff
1
src/tool/awk/testdata/g.1
vendored
Normal file
1
src/tool/awk/testdata/g.1
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
ONE
|
1
src/tool/awk/testdata/g.2
vendored
Normal file
1
src/tool/awk/testdata/g.2
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
TWO
|
10
src/tool/awk/testdata/g.3
vendored
Normal file
10
src/tool/awk/testdata/g.3
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
BEGIN {
|
||||||
|
printf "A=%d, B=%d\n", A, B
|
||||||
|
for (i = 1; i < ARGC; i++) {
|
||||||
|
printf "\tARGV[%d] = %s\n", i, ARGV[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
printf "A=%d, B=%d\n", A, B
|
||||||
|
}
|
2
src/tool/awk/testdata/g.4
vendored
Normal file
2
src/tool/awk/testdata/g.4
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
FOUR a
|
||||||
|
FOUR b
|
15
src/tool/awk/testdata/gawk/addcomma.awk
vendored
Normal file
15
src/tool/awk/testdata/gawk/addcomma.awk
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# addcomma - put commas in numbers
|
||||||
|
# input: a number per line
|
||||||
|
# output: the input number followed by
|
||||||
|
# the number with commas and two decimal places
|
||||||
|
|
||||||
|
{ printf("%-12s %20s\n", $0, addcomma($0)) }
|
||||||
|
|
||||||
|
function addcomma(x, num) {
|
||||||
|
if (x < 0)
|
||||||
|
return "-" addcomma(-x)
|
||||||
|
num = sprintf("%.2f", x) # num is dddddd.dd
|
||||||
|
while (num ~ /[0-9][0-9][0-9][0-9]/)
|
||||||
|
sub(/[0-9][0-9][0-9][,.]/, ",&", num)
|
||||||
|
return num
|
||||||
|
}
|
7
src/tool/awk/testdata/gawk/addcomma.in
vendored
Normal file
7
src/tool/awk/testdata/gawk/addcomma.in
vendored
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
0
|
||||||
|
-1
|
||||||
|
-12.34
|
||||||
|
12345
|
||||||
|
-1234567.89
|
||||||
|
-123.
|
||||||
|
-123456
|
7
src/tool/awk/testdata/gawk/addcomma.ok
vendored
Normal file
7
src/tool/awk/testdata/gawk/addcomma.ok
vendored
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
0 0.00
|
||||||
|
-1 -1.00
|
||||||
|
-12.34 -12.34
|
||||||
|
12345 12,345.00
|
||||||
|
-1234567.89 -1,234,567.89
|
||||||
|
-123. -123.00
|
||||||
|
-123456 -123,456.00
|
1
src/tool/awk/testdata/gawk/anchgsub.awk
vendored
Normal file
1
src/tool/awk/testdata/gawk/anchgsub.awk
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{ gsub(/^[ ]*/, "", $0) ; print }
|
1
src/tool/awk/testdata/gawk/anchgsub.in
vendored
Normal file
1
src/tool/awk/testdata/gawk/anchgsub.in
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
This is a test, this is only a test.
|
1
src/tool/awk/testdata/gawk/anchgsub.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/anchgsub.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
This is a test, this is only a test.
|
33
src/tool/awk/testdata/gawk/anchor.awk
vendored
Normal file
33
src/tool/awk/testdata/gawk/anchor.awk
vendored
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
BEGIN { RS = "" }
|
||||||
|
|
||||||
|
{
|
||||||
|
if (/^A/)
|
||||||
|
print "ok"
|
||||||
|
else
|
||||||
|
print "not ok"
|
||||||
|
|
||||||
|
if (/B$/)
|
||||||
|
print "not ok"
|
||||||
|
else
|
||||||
|
print "ok"
|
||||||
|
|
||||||
|
if (/^C/)
|
||||||
|
print "not ok"
|
||||||
|
else
|
||||||
|
print "ok"
|
||||||
|
|
||||||
|
if (/D$/)
|
||||||
|
print "not ok"
|
||||||
|
else
|
||||||
|
print "ok"
|
||||||
|
|
||||||
|
if (/^E/)
|
||||||
|
print "not ok"
|
||||||
|
else
|
||||||
|
print "ok"
|
||||||
|
|
||||||
|
if (/F$/)
|
||||||
|
print "ok"
|
||||||
|
else
|
||||||
|
print "not ok"
|
||||||
|
}
|
3
src/tool/awk/testdata/gawk/anchor.in
vendored
Normal file
3
src/tool/awk/testdata/gawk/anchor.in
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
A line1 B
|
||||||
|
C line2 D
|
||||||
|
E line3 F
|
6
src/tool/awk/testdata/gawk/anchor.ok
vendored
Normal file
6
src/tool/awk/testdata/gawk/anchor.ok
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
ok
|
||||||
|
ok
|
||||||
|
ok
|
||||||
|
ok
|
||||||
|
ok
|
||||||
|
ok
|
14
src/tool/awk/testdata/gawk/argarray.awk
vendored
Normal file
14
src/tool/awk/testdata/gawk/argarray.awk
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
BEGIN {
|
||||||
|
argn = " argument" (ARGC > 1 ? "s" : "")
|
||||||
|
are = ARGC > 1 ? "are" : "is"
|
||||||
|
print "here we have " ARGC argn
|
||||||
|
print "which " are
|
||||||
|
for (x = 0; x < ARGC; x++)
|
||||||
|
print "\t", ARGV[x]
|
||||||
|
print "Environment variable TEST=" ENVIRON["TEST"]
|
||||||
|
print "and the current input file is called \"" FILENAME "\""
|
||||||
|
}
|
||||||
|
|
||||||
|
FNR == 1 {
|
||||||
|
print "in main loop, this input file is known as \"" FILENAME "\""
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/argarray.in
vendored
Normal file
1
src/tool/awk/testdata/gawk/argarray.in
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
this is a simple test file
|
6
src/tool/awk/testdata/gawk/argarray.ok
vendored
Normal file
6
src/tool/awk/testdata/gawk/argarray.ok
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
here we have 1 argument
|
||||||
|
which is
|
||||||
|
|
||||||
|
Environment variable TEST=
|
||||||
|
and the current input file is called ""
|
||||||
|
in main loop, this input file is known as "-"
|
19
src/tool/awk/testdata/gawk/arrayind3.awk
vendored
Normal file
19
src/tool/awk/testdata/gawk/arrayind3.awk
vendored
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
BEGIN {
|
||||||
|
# initialize cint arrays
|
||||||
|
pos[0] = 0
|
||||||
|
posout[0] = 0
|
||||||
|
split("00000779770060", f) # f[1] is a strnum
|
||||||
|
pos[f[1]] = 1 # subscripts must be strings!
|
||||||
|
for (x in pos) {
|
||||||
|
# if x is a strnum, then the
|
||||||
|
# x != 0 test may convert it to an integral NUMBER,
|
||||||
|
# and we might lose the unusual string representation
|
||||||
|
# if the cint code is not careful to recognize that this is
|
||||||
|
# actually a string
|
||||||
|
if (x != 0)
|
||||||
|
posout[x] = pos[x]
|
||||||
|
}
|
||||||
|
# which array element is populated?
|
||||||
|
print posout[779770060]
|
||||||
|
print posout["00000779770060"]
|
||||||
|
}
|
2
src/tool/awk/testdata/gawk/arrayind3.ok
vendored
Normal file
2
src/tool/awk/testdata/gawk/arrayind3.ok
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
|
||||||
|
1
|
21
src/tool/awk/testdata/gawk/arrayparm.awk
vendored
Normal file
21
src/tool/awk/testdata/gawk/arrayparm.awk
vendored
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#
|
||||||
|
# Test program from:
|
||||||
|
#
|
||||||
|
# Date: Tue, 21 Feb 95 16:09:29 EST
|
||||||
|
# From: emory!blackhawk.com!aaron (Aaron Sosnick)
|
||||||
|
#
|
||||||
|
BEGIN {
|
||||||
|
foo[1]=1;
|
||||||
|
foo[2]=2;
|
||||||
|
bug1(foo);
|
||||||
|
}
|
||||||
|
function bug1(i) {
|
||||||
|
for (i in foo) {
|
||||||
|
bug2(i);
|
||||||
|
delete foo[i];
|
||||||
|
print i,1,bot[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function bug2(arg) {
|
||||||
|
bot[arg]=arg;
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/arrayparm.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arrayparm.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
parse error at 10:5: can't pass array "foo" as scalar param
|
67
src/tool/awk/testdata/gawk/arrayprm2.awk
vendored
Normal file
67
src/tool/awk/testdata/gawk/arrayprm2.awk
vendored
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# From spcecdt@armory.com Wed Apr 30 11:08:48 2003
|
||||||
|
# Return-Path: <spcecdt@armory.com>
|
||||||
|
# Received: from localhost (skeeve [127.0.0.1])
|
||||||
|
# by skeeve.com (8.12.5/8.12.5) with ESMTP id h3U7uZWr015489
|
||||||
|
# for <arnold@localhost>; Wed, 30 Apr 2003 11:08:48 +0300
|
||||||
|
# Received: from actcom.co.il [192.114.47.1]
|
||||||
|
# by localhost with POP3 (fetchmail-5.9.0)
|
||||||
|
# for arnold@localhost (single-drop); Wed, 30 Apr 2003 11:08:48 +0300 (IDT)
|
||||||
|
# Received: by actcom.co.il (mbox arobbins)
|
||||||
|
# (with Cubic Circle's cucipop (v1.31 1998/05/13) Wed Apr 30 11:05:01 2003)
|
||||||
|
# X-From_: spcecdt@armory.com Wed Apr 30 04:06:46 2003
|
||||||
|
# Received: from smtp1.actcom.net.il by actcom.co.il with ESMTP
|
||||||
|
# (8.11.6/actcom-0.2) id h3U16iv04111 for <arobbins@actcom.co.il>;
|
||||||
|
# Wed, 30 Apr 2003 04:06:45 +0300 (EET DST)
|
||||||
|
# (rfc931-sender: mail.actcom.co.il [192.114.47.13])
|
||||||
|
# Received: from f7.net (consort.superb.net [209.61.216.22])
|
||||||
|
# by smtp1.actcom.net.il (8.12.8/8.12.8) with ESMTP id h3U16nEv009589
|
||||||
|
# for <arobbins@actcom.co.il>; Wed, 30 Apr 2003 04:06:50 +0300
|
||||||
|
# Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164])
|
||||||
|
# by f7.net (8.11.7/8.11.6) with ESMTP id h3U16gj29182
|
||||||
|
# for <arnold@skeeve.com>; Tue, 29 Apr 2003 21:06:42 -0400
|
||||||
|
# Received: from monty-python.gnu.org ([199.232.76.173])
|
||||||
|
# by fencepost.gnu.org with esmtp (Exim 4.10)
|
||||||
|
# id 19Ag3W-00029w-00
|
||||||
|
# for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:06:42 -0400
|
||||||
|
# Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.10.13)
|
||||||
|
# id 19Ag1V-0001AN-00
|
||||||
|
# for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:04:39 -0400
|
||||||
|
# Received: from deepthought.armory.com ([192.122.209.42] helo=armory.com)
|
||||||
|
# by monty-python.gnu.org with smtp (Exim 4.10.13)
|
||||||
|
# id 19Ag1V-0001A3-00
|
||||||
|
# for bug-gawk@gnu.org; Tue, 29 Apr 2003 21:04:37 -0400
|
||||||
|
# Date: Tue, 29 Apr 2003 18:04:35 -0700
|
||||||
|
# From: "John H. DuBois III" <spcecdt@armory.com>
|
||||||
|
# To: bug-gawk@gnu.org
|
||||||
|
# Subject: gawk 3.1.2a bug
|
||||||
|
# Message-ID: <20030430010434.GA4278@armory.com>
|
||||||
|
# Mime-Version: 1.0
|
||||||
|
# Content-Type: text/plain; charset=us-ascii
|
||||||
|
# Content-Disposition: inline
|
||||||
|
# User-Agent: Mutt/1.3.28i
|
||||||
|
# X-Www: http://www.armory.com./~spcecdt/
|
||||||
|
# Sender: spcecdt@armory.com
|
||||||
|
# X-Spam-Status: No, hits=-7.2 required=5.0
|
||||||
|
# tests=SIGNATURE_SHORT_DENSE,SPAM_PHRASE_00_01,USER_AGENT,
|
||||||
|
# USER_AGENT_MUTT
|
||||||
|
# version=2.41
|
||||||
|
# X-Spam-Level:
|
||||||
|
# X-SpamBouncer: 1.4 (10/07/01)
|
||||||
|
# X-SBClass: OK
|
||||||
|
# Status: RO
|
||||||
|
#
|
||||||
|
# gawk-3.1.2a 'BEGIN {foo(bar)};function foo(baz){split("x",baz)}'
|
||||||
|
# gawk-3.1.2a: cmd. line:1: fatal: split: second argument is not an array
|
||||||
|
#
|
||||||
|
# John
|
||||||
|
# --
|
||||||
|
# John DuBois spcecdt@armory.com KC6QKZ/AE http://www.armory.com/~spcecdt/
|
||||||
|
#
|
||||||
|
BEGIN {
|
||||||
|
foo(bar)
|
||||||
|
}
|
||||||
|
|
||||||
|
function foo(baz)
|
||||||
|
{
|
||||||
|
split("x", baz)
|
||||||
|
}
|
0
src/tool/awk/testdata/gawk/arrayprm2.ok
vendored
Normal file
0
src/tool/awk/testdata/gawk/arrayprm2.ok
vendored
Normal file
56
src/tool/awk/testdata/gawk/arrayprm3.awk
vendored
Normal file
56
src/tool/awk/testdata/gawk/arrayprm3.awk
vendored
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
# From spcecdt@armory.com Fri May 2 13:24:46 2003
|
||||||
|
# Return-Path: <spcecdt@armory.com>
|
||||||
|
# Received: from localhost (skeeve [127.0.0.1])
|
||||||
|
# by skeeve.com (8.12.5/8.12.5) with ESMTP id h42AChum021950
|
||||||
|
# for <arnold@localhost>; Fri, 2 May 2003 13:24:46 +0300
|
||||||
|
# Received: from actcom.co.il [192.114.47.1]
|
||||||
|
# by localhost with POP3 (fetchmail-5.9.0)
|
||||||
|
# for arnold@localhost (single-drop); Fri, 02 May 2003 13:24:46 +0300 (IDT)
|
||||||
|
# Received: by actcom.co.il (mbox arobbins)
|
||||||
|
# (with Cubic Circle's cucipop (v1.31 1998/05/13) Fri May 2 13:23:37 2003)
|
||||||
|
# X-From_: spcecdt@armory.com Fri May 2 00:43:51 2003
|
||||||
|
# Received: from smtp1.actcom.net.il by actcom.co.il with ESMTP
|
||||||
|
# (8.11.6/actcom-0.2) id h41Lhm500217 for <arobbins@actcom.co.il>;
|
||||||
|
# Fri, 2 May 2003 00:43:49 +0300 (EET DST)
|
||||||
|
# (rfc931-sender: lmail.actcom.co.il [192.114.47.13])
|
||||||
|
# Received: from f7.net (consort.superb.net [209.61.216.22])
|
||||||
|
# by smtp1.actcom.net.il (8.12.8/8.12.8) with ESMTP id h41LiGcO022817
|
||||||
|
# for <arobbins@actcom.co.il>; Fri, 2 May 2003 00:44:18 +0300
|
||||||
|
# Received: from armory.com (deepthought.armory.com [192.122.209.42])
|
||||||
|
# by f7.net (8.11.7/8.11.6) with SMTP id h41Lhj106516
|
||||||
|
# for <arnold@skeeve.com>; Thu, 1 May 2003 17:43:46 -0400
|
||||||
|
# Date: Thu, 1 May 2003 14:43:45 -0700
|
||||||
|
# From: "John H. DuBois III" <spcecdt@armory.com>
|
||||||
|
# To: Aharon Robbins <arnold@skeeve.com>
|
||||||
|
# Subject: Re: gawk 3.1.2a bug
|
||||||
|
# Message-ID: <20030501214345.GA24615@armory.com>
|
||||||
|
# References: <200305011738.h41Hcg76017565@localhost.localdomain>
|
||||||
|
# Mime-Version: 1.0
|
||||||
|
# Content-Type: text/plain; charset=us-ascii
|
||||||
|
# Content-Disposition: inline
|
||||||
|
# In-Reply-To: <200305011738.h41Hcg76017565@localhost.localdomain>
|
||||||
|
# User-Agent: Mutt/1.3.28i
|
||||||
|
# X-Www: http://www.armory.com./~spcecdt/
|
||||||
|
# Sender: spcecdt@armory.com
|
||||||
|
# X-SpamBouncer: 1.4 (10/07/01)
|
||||||
|
# X-SBClass: OK
|
||||||
|
# Status: RO
|
||||||
|
#
|
||||||
|
# On Thu, May 01, 2003 at 08:38:42PM +0300, Aharon Robbins wrote:
|
||||||
|
# > > That worked, thanks.
|
||||||
|
# >
|
||||||
|
# > Great. Your report motivated me to find everywhere such additional
|
||||||
|
# > code ought to be needed. I think I did so. --Arnold
|
||||||
|
#
|
||||||
|
# Here's another one (perhaps fixed by your additional work):
|
||||||
|
#
|
||||||
|
BEGIN { foo(a) }
|
||||||
|
function foo(a) { bar(a); print "" in a }
|
||||||
|
function bar(a) { a[""]; }
|
||||||
|
#
|
||||||
|
# Prints 1 with gawk-3.1.1; 0 with 3.1.2a.
|
||||||
|
#
|
||||||
|
# John
|
||||||
|
# --
|
||||||
|
# John DuBois spcecdt@armory.com KC6QKZ/AE http://www.armory.com/~spcecdt/
|
||||||
|
#
|
1
src/tool/awk/testdata/gawk/arrayprm3.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arrayprm3.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
1
|
13
src/tool/awk/testdata/gawk/arrayref.awk
vendored
Normal file
13
src/tool/awk/testdata/gawk/arrayref.awk
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
BEGIN { # foo[10] = 0 # put this line in and it will work
|
||||||
|
test(foo); print foo[1]
|
||||||
|
test2(foo2); print foo2[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
function test(foo)
|
||||||
|
{
|
||||||
|
test2(foo)
|
||||||
|
}
|
||||||
|
function test2(bar)
|
||||||
|
{
|
||||||
|
bar[1] = 1
|
||||||
|
}
|
2
src/tool/awk/testdata/gawk/arrayref.ok
vendored
Normal file
2
src/tool/awk/testdata/gawk/arrayref.ok
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
1
|
||||||
|
1
|
81
src/tool/awk/testdata/gawk/arrymem1.awk
vendored
Normal file
81
src/tool/awk/testdata/gawk/arrymem1.awk
vendored
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
# From spcecdt@armory.com Thu Jun 14 13:24:32 2001
|
||||||
|
# Received: from mail.actcom.co.il [192.114.47.13]
|
||||||
|
# by localhost with POP3 (fetchmail-5.5.0)
|
||||||
|
# for arnold@localhost (single-drop); Thu, 14 Jun 2001 13:24:32 +0300 (IDT)
|
||||||
|
# Received: by actcom.co.il (mbox arobbins)
|
||||||
|
# (with Cubic Circle's cucipop (v1.31 1998/05/13) Thu Jun 14 13:25:13 2001)
|
||||||
|
# X-From_: spcecdt@armory.com Thu Jun 14 06:34:47 2001
|
||||||
|
# Received: from lmail.actcom.co.il by actcom.co.il with ESMTP
|
||||||
|
# (8.9.1a/actcom-0.2) id GAA29661 for <arobbins@actcom.co.il>;
|
||||||
|
# Thu, 14 Jun 2001 06:34:46 +0300 (EET DST)
|
||||||
|
# (rfc931-sender: lmail.actcom.co.il [192.114.47.13])
|
||||||
|
# Received: from billohost.com (www.billohost.com [209.196.35.10])
|
||||||
|
# by lmail.actcom.co.il (8.11.2/8.11.2) with ESMTP id f5E3YiO27337
|
||||||
|
# for <arobbins@actcom.co.il>; Thu, 14 Jun 2001 06:34:45 +0300
|
||||||
|
# Received: from fencepost.gnu.org (we-refuse-to-spy-on-our-users@fencepost.gnu.org [199.232.76.164])
|
||||||
|
# by billohost.com (8.9.3/8.9.3) with ESMTP id XAA02681
|
||||||
|
# for <arnold@skeeve.com>; Wed, 13 Jun 2001 23:33:57 -0400
|
||||||
|
# Received: from deepthought.armory.com ([192.122.209.42])
|
||||||
|
# by fencepost.gnu.org with smtp (Exim 3.16 #1 (Debian))
|
||||||
|
# id 15ANu2-00005C-00
|
||||||
|
# for <bug-gawk@gnu.org>; Wed, 13 Jun 2001 23:34:38 -0400
|
||||||
|
# Date: Wed, 13 Jun 2001 20:32:42 -0700
|
||||||
|
# From: "John H. DuBois III" <spcecdt@armory.com>
|
||||||
|
# To: bug-gawk@gnu.org
|
||||||
|
# Subject: gawk 3.1.0 bug
|
||||||
|
# Message-ID: <20010613203242.A29975@armory.com>
|
||||||
|
# Mime-Version: 1.0
|
||||||
|
# Content-Type: text/plain; charset=us-ascii
|
||||||
|
# X-Mailer: Mutt 1.0.1i
|
||||||
|
# X-Www: http://www.armory.com./~spcecdt/
|
||||||
|
# Sender: spcecdt@armory.com
|
||||||
|
# Status: RO
|
||||||
|
#
|
||||||
|
# Under SCO OpenServer 5.0.6a using gawk 3.1.0 compiled with gcc 2.95.2, this
|
||||||
|
# program:
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
f1(Procs,b)
|
||||||
|
print "test"
|
||||||
|
}
|
||||||
|
|
||||||
|
function f1(Procs,a) {
|
||||||
|
# a[""]
|
||||||
|
a[""] = "a" # ADR: Give it a value so can trace it
|
||||||
|
f2()
|
||||||
|
}
|
||||||
|
|
||||||
|
function f2() {
|
||||||
|
# b[""]
|
||||||
|
b[""] = "b" # ADR: Give it a value so can trace it
|
||||||
|
}
|
||||||
|
|
||||||
|
# ADR: 1/28/2003: Added this:
|
||||||
|
BEGIN { for (i in b) printf("b[\"%s\"] = \"%s\"\n", i, b[i]) }
|
||||||
|
# END ADR added.
|
||||||
|
|
||||||
|
# gives:
|
||||||
|
#
|
||||||
|
# gawk: ./gtest:5: fatal error: internal error
|
||||||
|
#
|
||||||
|
# and dumps core.
|
||||||
|
#
|
||||||
|
# gdb gives me this stack backtrace:
|
||||||
|
#
|
||||||
|
# #0 0x80019943 in kill () from /usr/lib/libc.so.1
|
||||||
|
# #1 0x8003e754 in abort () from /usr/lib/libc.so.1
|
||||||
|
# #2 0x8062a87 in catchsig (sig=0, code=0) at main.c:947
|
||||||
|
# #3 0x80053a0c in _sigreturn () from /usr/lib/libc.so.1
|
||||||
|
# #4 0x80023d36 in cleanfree () from /usr/lib/libc.so.1
|
||||||
|
# #5 0x80023156 in _real_malloc () from /usr/lib/libc.so.1
|
||||||
|
# #6 0x80023019 in malloc () from /usr/lib/libc.so.1
|
||||||
|
# #7 0x8053b95 in do_print (tree=0x0) at builtin.c:1336
|
||||||
|
# #8 0x806b47c in interpret (tree=0x8084ee4) at eval.c:606
|
||||||
|
# #9 0x806ad8d in interpret (tree=0x8084f0c) at eval.c:384
|
||||||
|
# #10 0x806ad21 in interpret (tree=0x8084f5c) at eval.c:367
|
||||||
|
# #11 0x8061d5b in main (argc=4, argv=0x80478ac) at main.c:506
|
||||||
|
#
|
||||||
|
# John
|
||||||
|
# --
|
||||||
|
# John DuBois spcecdt@armory.com. KC6QKZ/AE http://www.armory.com./~spcecdt/
|
||||||
|
#
|
2
src/tool/awk/testdata/gawk/arrymem1.ok
vendored
Normal file
2
src/tool/awk/testdata/gawk/arrymem1.ok
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
test
|
||||||
|
b[""] = "b"
|
18
src/tool/awk/testdata/gawk/arryref2.awk
vendored
Normal file
18
src/tool/awk/testdata/gawk/arryref2.awk
vendored
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
BEGIN {
|
||||||
|
foo(a)
|
||||||
|
|
||||||
|
for (i in a)
|
||||||
|
print i, a[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
function foo(b)
|
||||||
|
{
|
||||||
|
bar(b)
|
||||||
|
b[2] = "local"
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(c)
|
||||||
|
{
|
||||||
|
a[3] = "global"
|
||||||
|
c[1] = "local2"
|
||||||
|
}
|
3
src/tool/awk/testdata/gawk/arryref2.ok
vendored
Normal file
3
src/tool/awk/testdata/gawk/arryref2.ok
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
1 local2
|
||||||
|
2 local
|
||||||
|
3 global
|
18
src/tool/awk/testdata/gawk/arryref3.awk
vendored
Normal file
18
src/tool/awk/testdata/gawk/arryref3.awk
vendored
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
BEGIN {
|
||||||
|
foo(a)
|
||||||
|
|
||||||
|
for (i in a)
|
||||||
|
print i, a[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
function foo(b)
|
||||||
|
{
|
||||||
|
a[1] = "global"
|
||||||
|
b[2] = "local"
|
||||||
|
bar(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(c)
|
||||||
|
{
|
||||||
|
c = 12
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/arryref3.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arryref3.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
parse error at 12:2: can't pass array "b" as scalar param
|
17
src/tool/awk/testdata/gawk/arryref4.awk
vendored
Normal file
17
src/tool/awk/testdata/gawk/arryref4.awk
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
BEGIN {
|
||||||
|
foo(a)
|
||||||
|
|
||||||
|
print a
|
||||||
|
}
|
||||||
|
|
||||||
|
function foo(b)
|
||||||
|
{
|
||||||
|
a = "global"
|
||||||
|
b[2] = "local"
|
||||||
|
# bar(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(c)
|
||||||
|
{
|
||||||
|
c = 12
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/arryref4.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arryref4.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
parse error at 2:2: can't pass scalar "a" as array param
|
17
src/tool/awk/testdata/gawk/arryref5.awk
vendored
Normal file
17
src/tool/awk/testdata/gawk/arryref5.awk
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
BEGIN {
|
||||||
|
foo(a)
|
||||||
|
|
||||||
|
print a
|
||||||
|
}
|
||||||
|
|
||||||
|
function foo(b)
|
||||||
|
{
|
||||||
|
b[2] = "local"
|
||||||
|
a = "global"
|
||||||
|
# bar(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(c)
|
||||||
|
{
|
||||||
|
c = 12
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/arryref5.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arryref5.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
parse error at 2:2: can't pass scalar "a" as array param
|
16
src/tool/awk/testdata/gawk/arynasty.awk
vendored
Normal file
16
src/tool/awk/testdata/gawk/arynasty.awk
vendored
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
BEGIN {
|
||||||
|
a = 12.153
|
||||||
|
#print "-- stroring test[a]" > "/dev/stderr" ; fflush("/dev/stderr")
|
||||||
|
test[a] = "hi"
|
||||||
|
#print "-- setting CONVFMT" > "/dev/stderr" ; fflush("/dev/stderr")
|
||||||
|
CONVFMT = "%.0f"
|
||||||
|
#print "-- setting a" > "/dev/stderr" ; fflush("/dev/stderr")
|
||||||
|
a = 5
|
||||||
|
#stopme()
|
||||||
|
#print "-- starting loop" > "/dev/stderr" ; fflush("/dev/stderr")
|
||||||
|
for (i in test) {
|
||||||
|
#print("-- i =", i) > "/dev/stderr" ; fflush("/dev/stderr");
|
||||||
|
#printf("-- i = <%s>\n", i) > "/dev/stderr" ; fflush("/dev/stderr");
|
||||||
|
printf ("test[%s] = %s\n", i, test[i])
|
||||||
|
}
|
||||||
|
}
|
1
src/tool/awk/testdata/gawk/arynasty.ok
vendored
Normal file
1
src/tool/awk/testdata/gawk/arynasty.ok
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
test[12.153] = hi
|
9
src/tool/awk/testdata/gawk/aryprm1.awk
vendored
Normal file
9
src/tool/awk/testdata/gawk/aryprm1.awk
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
function f(a) {
|
||||||
|
if (3 in a)
|
||||||
|
print 7
|
||||||
|
a = 5
|
||||||
|
}
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
f(arr)
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue