xgo/stdlib/text_regexp.go

252 lines
5.3 KiB
Go
Raw Normal View History

2019-01-30 06:52:00 +03:00
package stdlib
import (
"regexp"
"github.com/d5/tengo"
2019-01-30 06:52:00 +03:00
)
2019-12-20 22:40:38 +03:00
func makeTextRegexp(re *regexp.Regexp) *tengo.ImmutableMap {
return &tengo.ImmutableMap{
Value: map[string]tengo.Object{
2019-01-30 06:52:00 +03:00
// match(text) => bool
2019-12-20 22:40:38 +03:00
"match": &tengo.UserFunction{
Value: func(args ...tengo.Object) (
ret tengo.Object,
err error,
) {
2019-01-30 06:52:00 +03:00
if len(args) != 1 {
2019-12-20 22:40:38 +03:00
err = tengo.ErrWrongNumArguments
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
s1, ok := tengo.ToString(args[0])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "first",
Expected: "string(compatible)",
Found: args[0].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
if re.MatchString(s1) {
2019-12-20 22:40:38 +03:00
ret = tengo.TrueValue
2019-01-30 06:52:00 +03:00
} else {
2019-12-20 22:40:38 +03:00
ret = tengo.FalseValue
2019-01-30 06:52:00 +03:00
}
return
},
},
// find(text) => array(array({text:,begin:,end:}))/undefined
// find(text, maxCount) => array(array({text:,begin:,end:}))/undefined
2019-12-20 22:40:38 +03:00
"find": &tengo.UserFunction{
Value: func(args ...tengo.Object) (
ret tengo.Object,
err error,
) {
2019-01-30 06:52:00 +03:00
numArgs := len(args)
if numArgs != 1 && numArgs != 2 {
2019-12-20 22:40:38 +03:00
err = tengo.ErrWrongNumArguments
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
s1, ok := tengo.ToString(args[0])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "first",
Expected: "string(compatible)",
Found: args[0].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
if numArgs == 1 {
m := re.FindStringSubmatchIndex(s1)
if m == nil {
2019-12-20 22:40:38 +03:00
ret = tengo.UndefinedValue
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
arr := &tengo.Array{}
2019-01-30 06:52:00 +03:00
for i := 0; i < len(m); i += 2 {
2019-12-20 22:40:38 +03:00
arr.Value = append(arr.Value,
&tengo.ImmutableMap{
Value: map[string]tengo.Object{
"text": &tengo.String{
Value: s1[m[i]:m[i+1]],
},
"begin": &tengo.Int{
Value: int64(m[i]),
},
"end": &tengo.Int{
Value: int64(m[i+1]),
},
}})
2019-01-30 06:52:00 +03:00
}
2019-12-20 22:40:38 +03:00
ret = &tengo.Array{Value: []tengo.Object{arr}}
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
i2, ok := tengo.ToInt(args[1])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "second",
Expected: "int(compatible)",
Found: args[1].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
m := re.FindAllStringSubmatchIndex(s1, i2)
if m == nil {
2019-12-20 22:40:38 +03:00
ret = tengo.UndefinedValue
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
arr := &tengo.Array{}
2019-01-30 06:52:00 +03:00
for _, m := range m {
2019-12-20 22:40:38 +03:00
subMatch := &tengo.Array{}
2019-01-30 06:52:00 +03:00
for i := 0; i < len(m); i += 2 {
2019-12-20 22:40:38 +03:00
subMatch.Value = append(subMatch.Value,
&tengo.ImmutableMap{
Value: map[string]tengo.Object{
"text": &tengo.String{
Value: s1[m[i]:m[i+1]],
},
"begin": &tengo.Int{
Value: int64(m[i]),
},
"end": &tengo.Int{
Value: int64(m[i+1]),
},
}})
2019-01-30 06:52:00 +03:00
}
arr.Value = append(arr.Value, subMatch)
}
ret = arr
return
},
},
// replace(src, repl) => string
2019-12-20 22:40:38 +03:00
"replace": &tengo.UserFunction{
Value: func(args ...tengo.Object) (
ret tengo.Object,
err error,
) {
2019-01-30 06:52:00 +03:00
if len(args) != 2 {
2019-12-20 22:40:38 +03:00
err = tengo.ErrWrongNumArguments
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
s1, ok := tengo.ToString(args[0])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "first",
Expected: "string(compatible)",
Found: args[0].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
s2, ok := tengo.ToString(args[1])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "second",
Expected: "string(compatible)",
Found: args[1].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
s, ok := doTextRegexpReplace(re, s1, s2)
if !ok {
2019-12-20 22:40:38 +03:00
return nil, tengo.ErrStringLimit
}
2019-12-20 22:40:38 +03:00
ret = &tengo.String{Value: s}
2019-01-30 06:52:00 +03:00
return
},
},
// split(text) => array(string)
// split(text, maxCount) => array(string)
2019-12-20 22:40:38 +03:00
"split": &tengo.UserFunction{
Value: func(args ...tengo.Object) (
ret tengo.Object,
err error,
) {
2019-01-30 06:52:00 +03:00
numArgs := len(args)
if numArgs != 1 && numArgs != 2 {
2019-12-20 22:40:38 +03:00
err = tengo.ErrWrongNumArguments
2019-01-30 06:52:00 +03:00
return
}
2019-12-20 22:40:38 +03:00
s1, ok := tengo.ToString(args[0])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "first",
Expected: "string(compatible)",
Found: args[0].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
var i2 = -1
if numArgs > 1 {
2019-12-20 22:40:38 +03:00
i2, ok = tengo.ToInt(args[1])
2019-01-30 06:52:00 +03:00
if !ok {
2019-12-20 22:40:38 +03:00
err = tengo.ErrInvalidArgumentType{
Name: "second",
Expected: "int(compatible)",
Found: args[1].TypeName(),
}
2019-01-30 06:52:00 +03:00
return
}
}
2019-12-20 22:40:38 +03:00
arr := &tengo.Array{}
2019-01-30 06:52:00 +03:00
for _, s := range re.Split(s1, i2) {
2019-12-20 22:40:38 +03:00
arr.Value = append(arr.Value,
&tengo.String{Value: s})
2019-01-30 06:52:00 +03:00
}
ret = arr
return
},
},
},
}
}
// Size-limit checking implementation of regexp.ReplaceAllString.
func doTextRegexpReplace(re *regexp.Regexp, src, repl string) (string, bool) {
idx := 0
out := ""
for _, m := range re.FindAllStringSubmatchIndex(src, -1) {
var exp []byte
exp = re.ExpandString(exp, repl, src, m)
if len(out)+m[0]-idx+len(exp) > tengo.MaxStringLen {
return "", false
}
out += src[idx:m[0]] + string(exp)
idx = m[1]
}
if idx < len(src) {
if len(out)+len(src)-idx > tengo.MaxStringLen {
return "", false
}
out += src[idx:]
}
2019-12-20 22:40:38 +03:00
return out, true
}