some performance optimization in VM

This commit is contained in:
Daniel Kang 2019-01-20 07:41:57 -08:00
parent 88dd0224d5
commit 336fbc30e5
3 changed files with 90 additions and 92 deletions

View file

@ -28,17 +28,19 @@ Tengo is [fast](#benchmark) as it's compiled to bytecode and executed on stack-b
| | fib(35) | fibt(35) | Type |
| :--- | ---: | ---: | :---: |
| Go | `59ms` | `4ms` | Go (native) |
| [**Tengo**](https://github.com/d5/tengo) | `4,809ms` | `5ms` | VM on Go |
| Lua | `1,752ms` | `3ms` | Lua (native) |
| [go-lua](https://github.com/Shopify/go-lua) | `5,236ms` | `5ms` | Lua VM on Go |
| [GopherLua](https://github.com/yuin/gopher-lua) | `5,558ms` | `5ms` | Lua VM on Go |
| Python | `3,132ms` | `28ms` | Python (native) |
| [starlark-go](https://github.com/google/starlark-go) | `16,789ms` | `5ms` | Python-like Interpreter on Go |
| [otto](https://github.com/robertkrimen/otto) | `85,765ms` | `22ms` | JS Interpreter on Go |
| [Anko](https://github.com/mattn/anko) | `99,235ms` | `24ms` | Interpreter on Go |
| Go | `67ms` | `4ms` | Go (native) |
| [**Tengo**](https://github.com/d5/tengo) | `4,390ms` | `5ms` | VM on Go |
| Lua | `1,804ms` | `3ms` | Lua (native) |
| [go-lua](https://github.com/Shopify/go-lua) | `5,114ms` | `4ms` | Lua VM on Go |
| [GopherLua](https://github.com/yuin/gopher-lua) | `5,679ms` | `5ms` | Lua VM on Go |
| Python | `2,853ms` | `25ms` | Python (native) |
| [starlark-go](https://github.com/google/starlark-go) | `16,725ms` | `5ms` | Python-like Interpreter on Go |
| [otto](https://github.com/robertkrimen/otto) | `88,148ms` | `21ms` | JS Interpreter on Go |
| [Anko](https://github.com/mattn/anko) | `107,968ms` | `22ms` | Interpreter on Go |
[fib(35)](https://github.com/d5/tengobench/blob/master/code/fib.tengo) is a function to compute 35th Fibonacci number, and, [fibt(35)](https://github.com/d5/tengobench/blob/master/code/fibtc.tengo) is the [tail-call](https://en.wikipedia.org/wiki/Tail_call) version of the same function. You can see all the code used for this test in [tengobench](https://github.com/d5/tengobench).
[fib(35)](https://github.com/d5/tengobench/blob/master/code/fib.tengo) is a function to compute 35th Fibonacci number, and, [fibt(35)](https://github.com/d5/tengobench/blob/master/code/fibtc.tengo) is the [tail-call](https://en.wikipedia.org/wiki/Tail_call) version of the same function.
_Please note that **Go** case does not read the source code from a local file, while all other cases do. All shell commands and the source code used in this benchmarking is available [here](https://github.com/d5/tengobench)._
## Tengo Syntax in 5 Minutes

View file

@ -176,9 +176,9 @@ func ReadOperands(numOperands []int, ins []byte) (operands []int, offset int) {
for _, width := range numOperands {
switch width {
case 1:
operands = append(operands, int(ReadUint8(ins[offset:])))
operands = append(operands, int(ins[offset]))
case 2:
operands = append(operands, int(ReadUint16(ins[offset:])))
operands = append(operands, int(ins[offset+1])|int(ins[offset])<<8)
}
offset += width
@ -186,13 +186,3 @@ func ReadOperands(numOperands []int, ins []byte) (operands []int, offset int) {
return
}
// ReadUint16 reads uint16 from the byte slice.
func ReadUint16(b []byte) uint16 {
return uint16(b[1]) | uint16(b[0])<<8
}
// ReadUint8 reads uint8 from the byte slice.
func ReadUint8(b []byte) uint8 {
return uint8(b[0])
}

View file

@ -39,6 +39,7 @@ type VM struct {
curFrame *Frame
curInsts []byte
curIPLimit int
ip int
aborting int64
}
@ -68,6 +69,7 @@ func NewVM(bytecode *compiler.Bytecode, globals []*objects.Object) *VM {
curFrame: &(frames[0]),
curInsts: frames[0].fn.Instructions,
curIPLimit: len(frames[0].fn.Instructions) - 1,
ip: -1,
}
}
@ -78,17 +80,13 @@ func (v *VM) Abort() {
// Run starts the execution.
func (v *VM) Run() error {
var ip int
for v.ip < v.curIPLimit && (atomic.LoadInt64(&v.aborting) == 0) {
v.ip++
for v.curFrame.ip < v.curIPLimit && (atomic.LoadInt64(&v.aborting) == 0) {
v.curFrame.ip++
ip = v.curFrame.ip
switch compiler.Opcode(v.curInsts[ip]) {
switch compiler.Opcode(v.curInsts[v.ip]) {
case compiler.OpConstant:
cidx := compiler.ReadUint16(v.curInsts[ip+1:])
v.curFrame.ip += 2
cidx := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
if v.sp >= StackSize {
return ErrStackOverflow
@ -438,54 +436,54 @@ func (v *VM) Run() error {
}
case compiler.OpJumpFalsy:
pos := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip += 2
pos := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
condition := v.stack[v.sp-1]
v.sp--
if (*condition).IsFalsy() {
v.curFrame.ip = pos - 1
v.ip = pos - 1
}
case compiler.OpAndJump:
pos := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip += 2
pos := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
condition := *v.stack[v.sp-1]
if condition.IsFalsy() {
v.curFrame.ip = pos - 1
v.ip = pos - 1
} else {
v.sp--
}
case compiler.OpOrJump:
pos := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip += 2
pos := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
condition := *v.stack[v.sp-1]
if !condition.IsFalsy() {
v.curFrame.ip = pos - 1
v.ip = pos - 1
} else {
v.sp--
}
case compiler.OpJump:
pos := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip = pos - 1
pos := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip = pos - 1
case compiler.OpSetGlobal:
globalIndex := compiler.ReadUint16(v.curInsts[ip+1:])
v.curFrame.ip += 2
globalIndex := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
v.sp--
v.globals[globalIndex] = v.stack[v.sp]
case compiler.OpSetSelGlobal:
globalIndex := compiler.ReadUint16(v.curInsts[ip+1:])
numSelectors := int(compiler.ReadUint8(v.curInsts[ip+3:]))
v.curFrame.ip += 3
globalIndex := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
numSelectors := int(v.curInsts[v.ip+3])
v.ip += 3
// pop selector outcomes (left to right)
selectors := make([]interface{}, numSelectors, numSelectors)
@ -512,8 +510,8 @@ func (v *VM) Run() error {
}
case compiler.OpGetGlobal:
globalIndex := compiler.ReadUint16(v.curInsts[ip+1:])
v.curFrame.ip += 2
globalIndex := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
val := v.globals[globalIndex]
@ -525,8 +523,8 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpArray:
numElements := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip += 2
numElements := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
var elements []objects.Object
for i := v.sp - numElements; i < v.sp; i++ {
@ -544,8 +542,8 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpMap:
numElements := int(compiler.ReadUint16(v.curInsts[ip+1:]))
v.curFrame.ip += 2
numElements := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
kv := make(map[string]objects.Object)
for i := v.sp - numElements; i < v.sp; i += 2 {
@ -804,8 +802,8 @@ func (v *VM) Run() error {
}
case compiler.OpCall:
numArgs := int(compiler.ReadUint8(v.curInsts[ip+1:]))
v.curFrame.ip++
numArgs := int(v.curInsts[v.ip+1])
v.ip++
callee := *v.stack[v.sp-1-numArgs]
@ -848,9 +846,9 @@ func (v *VM) Run() error {
}
case compiler.OpReturnValue:
//numRets := int(compiler.ReadUint8(v.curInsts[ip+1:]))
_ = int(compiler.ReadUint8(v.curInsts[ip+1:]))
v.curFrame.ip++
//numRets := int(compiler.ReadUint8(v.curInsts[v.ip+1:]))
//_ = int64(compiler.ReadUint8(v.curInsts[v.ip+1:]))
v.ip++
// TODO: multi-value return is not fully implemented yet
//var rets []*objects.Object
@ -866,6 +864,7 @@ func (v *VM) Run() error {
v.curFrame = &v.frames[v.framesIndex-1]
v.curInsts = v.curFrame.fn.Instructions
v.curIPLimit = len(v.curInsts) - 1
v.ip = v.curFrame.ip
//v.sp = lastFrame.basePointer - 1
v.sp = lastFrame.basePointer
@ -888,6 +887,7 @@ func (v *VM) Run() error {
v.curFrame = &v.frames[v.framesIndex-1]
v.curInsts = v.curFrame.fn.Instructions
v.curIPLimit = len(v.curInsts) - 1
v.ip = v.curFrame.ip
v.sp = lastFrame.basePointer - 1
@ -899,10 +899,10 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpDefineLocal:
localIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
localIndex := int(v.curInsts[v.ip+1])
v.ip++
sp := v.curFrame.basePointer + int(localIndex)
sp := v.curFrame.basePointer + localIndex
// local variables can be mutated by other actions
// so always store the copy of popped value
@ -912,10 +912,10 @@ func (v *VM) Run() error {
v.stack[sp] = &val
case compiler.OpSetLocal:
localIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
localIndex := int(v.curInsts[v.ip+1])
v.ip++
sp := v.curFrame.basePointer + int(localIndex)
sp := v.curFrame.basePointer + localIndex
// update pointee of v.stack[sp] instead of replacing the pointer itself.
// this is needed because there can be free variables referencing the same local variables.
@ -925,10 +925,10 @@ func (v *VM) Run() error {
*v.stack[sp] = *val // also use a copy of popped value
case compiler.OpSetSelLocal:
localIndex := compiler.ReadUint8(v.curInsts[ip+1:])
numSelectors := int(compiler.ReadUint8(v.curInsts[ip+2:]))
v.curFrame.ip += 2
localIndex := int(v.curInsts[v.ip+1])
numSelectors := int(v.curInsts[v.ip+2])
v.ip += 2
// pop selector outcomes (left to right)
selectors := make([]interface{}, numSelectors, numSelectors)
for i := 0; i < numSelectors; i++ {
@ -949,17 +949,17 @@ func (v *VM) Run() error {
val := v.stack[v.sp-1] // no need to copy value here; selectorAssign uses copy of value
v.sp--
sp := v.curFrame.basePointer + int(localIndex)
sp := v.curFrame.basePointer + localIndex
if err := selectorAssign(v.stack[sp], val, selectors); err != nil {
return err
}
case compiler.OpGetLocal:
localIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
localIndex := int(v.curInsts[v.ip+1])
v.ip++
val := v.stack[v.curFrame.basePointer+int(localIndex)]
val := v.stack[v.curFrame.basePointer+localIndex]
if v.sp >= StackSize {
return ErrStackOverflow
@ -969,8 +969,8 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpGetBuiltin:
builtinIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
builtinIndex := int(v.curInsts[v.ip+1])
v.ip++
if v.sp >= StackSize {
return ErrStackOverflow
@ -980,17 +980,17 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpClosure:
constIndex := compiler.ReadUint16(v.curInsts[ip+1:])
numFree := compiler.ReadUint8(v.curInsts[ip+3:])
v.curFrame.ip += 3
constIndex := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
numFree := int(v.curInsts[v.ip+3])
v.ip += 3
if err := v.pushClosure(int(constIndex), int(numFree)); err != nil {
if err := v.pushClosure(constIndex, numFree); err != nil {
return err
}
case compiler.OpGetFree:
freeIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
freeIndex := int(v.curInsts[v.ip+1])
v.ip++
val := v.curFrame.freeVars[freeIndex]
@ -1002,9 +1002,9 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpSetSelFree:
freeIndex := compiler.ReadUint8(v.curInsts[ip+1:])
numSelectors := int(compiler.ReadUint8(v.curInsts[ip+2:]))
v.curFrame.ip += 2
freeIndex := int(v.curInsts[v.ip+1])
numSelectors := int(v.curInsts[v.ip+2])
v.ip += 2
// pop selector outcomes (left to right)
selectors := make([]interface{}, numSelectors, numSelectors)
@ -1031,8 +1031,8 @@ func (v *VM) Run() error {
}
case compiler.OpSetFree:
freeIndex := compiler.ReadUint8(v.curInsts[ip+1:])
v.curFrame.ip++
freeIndex := int(v.curInsts[v.ip+1])
v.ip++
val := v.stack[v.sp-1]
v.sp--
@ -1108,15 +1108,15 @@ func (v *VM) Run() error {
v.sp++
case compiler.OpModule:
cidx := compiler.ReadUint16(v.curInsts[ip+1:])
v.curFrame.ip += 2
cidx := int(v.curInsts[v.ip+2]) | int(v.curInsts[v.ip+1])<<8
v.ip += 2
if err := v.importModule(v.constants[cidx].(*objects.CompiledModule)); err != nil {
return err
}
default:
return fmt.Errorf("unknown opcode: %d", v.curInsts[ip])
return fmt.Errorf("unknown opcode: %d", v.curInsts[v.ip])
}
}
@ -1134,7 +1134,7 @@ func (v *VM) Globals() []*objects.Object {
}
// FrameInfo returns the current function call frame information.
func (v *VM) FrameInfo() (frameIndex int, ip int) {
func (v *VM) FrameInfo() (frameIndex, ip int) {
return v.framesIndex - 1, v.frames[v.framesIndex-1].ip
}
@ -1175,10 +1175,10 @@ func (v *VM) callFunction(fn *objects.CompiledFunction, freeVars []*objects.Obje
// check if this is a tail-call (recursive call right before return)
if fn == v.curFrame.fn { // recursion
nextOp := compiler.Opcode(v.curInsts[v.curFrame.ip+1])
nextOp := compiler.Opcode(v.curInsts[v.ip+1])
if nextOp == compiler.OpReturnValue || // tail call
(nextOp == compiler.OpPop &&
compiler.OpReturn == compiler.Opcode(v.curInsts[v.curFrame.ip+2])) {
compiler.OpReturn == compiler.Opcode(v.curInsts[v.ip+2])) {
// stack before tail-call
//
@ -1204,7 +1204,8 @@ func (v *VM) callFunction(fn *objects.CompiledFunction, freeVars []*objects.Obje
v.stack[v.curFrame.basePointer+p] = v.stack[v.sp-numArgs+p]
}
v.sp -= numArgs + 1
v.curFrame.ip = -1 // reset IP to beginning of the frame
v.ip = -1
//v.curFrame.ip = -1 // reset IP to beginning of the frame
// stack after tail-call
//
@ -1230,12 +1231,17 @@ func (v *VM) callFunction(fn *objects.CompiledFunction, freeVars []*objects.Obje
}
}
// store current ip before call
v.curFrame.ip = v.ip
// update call frame
v.curFrame = &(v.frames[v.framesIndex])
v.curFrame.fn = fn
v.curFrame.freeVars = freeVars
v.curFrame.ip = -1
//v.curFrame.ip = -1
v.curFrame.basePointer = v.sp - numArgs
v.curInsts = fn.Instructions
v.ip = -1
v.curIPLimit = len(v.curInsts) - 1
v.framesIndex++