mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 16:33:47 +03:00
prevent unicode-confusion in password by applying PRECIS, and username/email address by applying unicode NFC normalization
an é (e with accent) can also be written as e+\u0301. the first form is NFC, the second NFD. when logging in, we transform usernames (email addresses) to NFC. so both forms will be accepted. if a client is using NFD, they can log in too. for passwords, we apply the PRECIS "opaquestring", which (despite the name) transforms the value too: unicode spaces are replaced with ascii spaces. the string is also normalized to NFC. PRECIS may reject confusing passwords when you set a password.
This commit is contained in:
parent
8e6fe7459b
commit
c57aeac7f0
99 changed files with 59625 additions and 114 deletions
|
@ -18,11 +18,11 @@ func TestAppend(t *testing.T) {
|
||||||
tc3 := startNoSwitchboard(t)
|
tc3 := startNoSwitchboard(t)
|
||||||
defer tc3.close()
|
defer tc3.close()
|
||||||
|
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
tc3.client.Login("mjl@mox.example", "testtest")
|
tc3.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc2.transactf("bad", "append") // Missing params.
|
tc2.transactf("bad", "append") // Missing params.
|
||||||
tc2.transactf("bad", `append inbox`) // Missing message.
|
tc2.transactf("bad", `append inbox`) // Missing message.
|
||||||
|
@ -32,13 +32,13 @@ func TestAppend(t *testing.T) {
|
||||||
tc2.transactf("bad", "append inbox (\\Badflag) {1+}\r\nx") // Unknown flag.
|
tc2.transactf("bad", "append inbox (\\Badflag) {1+}\r\nx") // Unknown flag.
|
||||||
tc2 = startNoSwitchboard(t)
|
tc2 = startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
|
|
||||||
tc2.transactf("bad", "append inbox () \"bad time\" {1+}\r\nx") // Bad time.
|
tc2.transactf("bad", "append inbox () \"bad time\" {1+}\r\nx") // Bad time.
|
||||||
tc2 = startNoSwitchboard(t)
|
tc2 = startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
|
|
||||||
tc2.transactf("no", "append nobox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" {1}")
|
tc2.transactf("no", "append nobox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" {1}")
|
||||||
|
@ -81,7 +81,7 @@ func TestAppend(t *testing.T) {
|
||||||
|
|
||||||
tclimit := startArgs(t, false, false, true, true, "limit")
|
tclimit := startArgs(t, false, false, true, true, "limit")
|
||||||
defer tclimit.close()
|
defer tclimit.close()
|
||||||
tclimit.client.Login("limit@mox.example", "testtest")
|
tclimit.client.Login("limit@mox.example", password0)
|
||||||
tclimit.client.Select("inbox")
|
tclimit.client.Select("inbox")
|
||||||
// First message of 1 byte is within limits.
|
// First message of 1 byte is within limits.
|
||||||
tclimit.transactf("ok", "append inbox (\\Seen Label1 $label2) \" 1-Jan-2022 10:10:00 +0100\" {1+}\r\nx")
|
tclimit.transactf("ok", "append inbox (\\Seen Label1 $label2) \" 1-Jan-2022 10:10:00 +0100\" {1+}\r\nx")
|
||||||
|
|
|
@ -12,9 +12,18 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
|
|
||||||
"github.com/mjl-/mox/scram"
|
"github.com/mjl-/mox/scram"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestAuthenticateLogin(t *testing.T) {
|
||||||
|
// NFD username and PRECIS-cleaned password.
|
||||||
|
tc := start(t)
|
||||||
|
tc.client.Login("mo\u0301x@mox.example", password1)
|
||||||
|
tc.close()
|
||||||
|
}
|
||||||
|
|
||||||
func TestAuthenticatePlain(t *testing.T) {
|
func TestAuthenticatePlain(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
|
|
||||||
|
@ -28,21 +37,26 @@ func TestAuthenticatePlain(t *testing.T) {
|
||||||
tc.xcode("AUTHENTICATIONFAILED")
|
tc.xcode("AUTHENTICATIONFAILED")
|
||||||
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000test")))
|
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000test")))
|
||||||
tc.xcode("AUTHENTICATIONFAILED")
|
tc.xcode("AUTHENTICATIONFAILED")
|
||||||
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000testtesttest")))
|
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000test"+password0)))
|
||||||
tc.xcode("AUTHENTICATIONFAILED")
|
tc.xcode("AUTHENTICATIONFAILED")
|
||||||
tc.transactf("bad", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000")))
|
tc.transactf("bad", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000")))
|
||||||
tc.xcode("")
|
tc.xcode("")
|
||||||
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("other\u0000mjl@mox.example\u0000testtest")))
|
tc.transactf("no", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("other\u0000mjl@mox.example\u0000"+password0)))
|
||||||
tc.xcode("AUTHORIZATIONFAILED")
|
tc.xcode("AUTHORIZATIONFAILED")
|
||||||
tc.transactf("ok", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000testtest")))
|
tc.transactf("ok", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000"+password0)))
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = start(t)
|
tc = start(t)
|
||||||
tc.transactf("ok", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("mjl@mox.example\u0000mjl@mox.example\u0000testtest")))
|
tc.transactf("ok", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("mjl@mox.example\u0000mjl@mox.example\u0000"+password0)))
|
||||||
|
tc.close()
|
||||||
|
|
||||||
|
// NFD username and PRECIS-cleaned password.
|
||||||
|
tc = start(t)
|
||||||
|
tc.transactf("ok", "authenticate plain %s", base64.StdEncoding.EncodeToString([]byte("mo\u0301x@mox.example\u0000mo\u0301x@mox.example\u0000"+password1)))
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = start(t)
|
tc = start(t)
|
||||||
tc.client.AuthenticatePlain("mjl@mox.example", "testtest")
|
tc.client.AuthenticatePlain("mjl@mox.example", password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = start(t)
|
tc = start(t)
|
||||||
|
@ -55,7 +69,7 @@ func TestAuthenticatePlain(t *testing.T) {
|
||||||
|
|
||||||
tc.cmdf("", "authenticate plain")
|
tc.cmdf("", "authenticate plain")
|
||||||
tc.readprefixline("+ ")
|
tc.readprefixline("+ ")
|
||||||
tc.writelinef("%s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000testtest")))
|
tc.writelinef("%s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000"+password0)))
|
||||||
tc.readstatus("ok")
|
tc.readstatus("ok")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +91,7 @@ func TestAuthenticateSCRAMSHA256PLUS(t *testing.T) {
|
||||||
|
|
||||||
func testAuthenticateSCRAM(t *testing.T, tls bool, method string, h func() hash.Hash) {
|
func testAuthenticateSCRAM(t *testing.T, tls bool, method string, h func() hash.Hash) {
|
||||||
tc := startArgs(t, true, tls, true, true, "mjl")
|
tc := startArgs(t, true, tls, true, true, "mjl")
|
||||||
tc.client.AuthenticateSCRAM(method, h, "mjl@mox.example", "testtest")
|
tc.client.AuthenticateSCRAM(method, h, "mjl@mox.example", password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
auth := func(status string, serverFinalError error, username, password string) {
|
auth := func(status string, serverFinalError error, username, password string) {
|
||||||
|
@ -129,11 +143,15 @@ func testAuthenticateSCRAM(t *testing.T, tls bool, method string, h func() hash.
|
||||||
auth("no", scram.ErrInvalidProof, "mjl@mox.example", "badpass")
|
auth("no", scram.ErrInvalidProof, "mjl@mox.example", "badpass")
|
||||||
auth("no", scram.ErrInvalidProof, "mjl@mox.example", "")
|
auth("no", scram.ErrInvalidProof, "mjl@mox.example", "")
|
||||||
// todo: server aborts due to invalid username. we should probably make client continue with fake determinisitically generated salt and result in error in the end.
|
// todo: server aborts due to invalid username. we should probably make client continue with fake determinisitically generated salt and result in error in the end.
|
||||||
// auth("no", nil, "other@mox.example", "testtest")
|
// auth("no", nil, "other@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("no", "authenticate bogus ")
|
tc.transactf("no", "authenticate bogus ")
|
||||||
tc.transactf("bad", "authenticate %s not base64...", method)
|
tc.transactf("bad", "authenticate %s not base64...", method)
|
||||||
tc.transactf("bad", "authenticate %s %s", method, base64.StdEncoding.EncodeToString([]byte("bad data")))
|
tc.transactf("bad", "authenticate %s %s", method, base64.StdEncoding.EncodeToString([]byte("bad data")))
|
||||||
|
|
||||||
|
// NFD username, with PRECIS-cleaned password.
|
||||||
|
auth("ok", nil, "mo\u0301x@mox.example", password1)
|
||||||
|
|
||||||
tc.close()
|
tc.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,6 +181,10 @@ func TestAuthenticateCRAMMD5(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
chal := xreadContinuation()
|
chal := xreadContinuation()
|
||||||
|
pw, err := precis.OpaqueString.String(password)
|
||||||
|
if err == nil {
|
||||||
|
password = pw
|
||||||
|
}
|
||||||
h := hmac.New(md5.New, []byte(password))
|
h := hmac.New(md5.New, []byte(password))
|
||||||
h.Write([]byte(chal))
|
h.Write([]byte(chal))
|
||||||
resp := fmt.Sprintf("%s %x", username, h.Sum(nil))
|
resp := fmt.Sprintf("%s %x", username, h.Sum(nil))
|
||||||
|
@ -177,9 +199,14 @@ func TestAuthenticateCRAMMD5(t *testing.T) {
|
||||||
|
|
||||||
auth("no", "mjl@mox.example", "badpass")
|
auth("no", "mjl@mox.example", "badpass")
|
||||||
auth("no", "mjl@mox.example", "")
|
auth("no", "mjl@mox.example", "")
|
||||||
auth("no", "other@mox.example", "testtest")
|
auth("no", "other@mox.example", password0)
|
||||||
|
|
||||||
auth("ok", "mjl@mox.example", "testtest")
|
auth("ok", "mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
|
// NFD username, with PRECIS-cleaned password.
|
||||||
|
tc = start(t)
|
||||||
|
auth("ok", "mo\u0301x@mox.example", password1)
|
||||||
|
tc.close()
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ func testCondstoreQresync(t *testing.T, qresync bool) {
|
||||||
capability = "Qresync"
|
capability = "Qresync"
|
||||||
}
|
}
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Enable(capability)
|
tc.client.Enable(capability)
|
||||||
tc.transactf("ok", "Select inbox")
|
tc.transactf("ok", "Select inbox")
|
||||||
tc.xuntaggedOpt(false, imapclient.UntaggedResult{Status: imapclient.OK, RespText: imapclient.RespText{Code: "HIGHESTMODSEQ", CodeArg: imapclient.CodeHighestModSeq(1), More: "x"}})
|
tc.xuntaggedOpt(false, imapclient.UntaggedResult{Status: imapclient.OK, RespText: imapclient.RespText{Code: "HIGHESTMODSEQ", CodeArg: imapclient.CodeHighestModSeq(1), More: "x"}})
|
||||||
|
@ -101,13 +101,13 @@ func testCondstoreQresync(t *testing.T, qresync bool) {
|
||||||
// tc2 is a client without condstore, so no modseq responses.
|
// tc2 is a client without condstore, so no modseq responses.
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
|
|
||||||
// tc3 is a client with condstore, so with modseq responses.
|
// tc3 is a client with condstore, so with modseq responses.
|
||||||
tc3 := startNoSwitchboard(t)
|
tc3 := startNoSwitchboard(t)
|
||||||
defer tc3.close()
|
defer tc3.close()
|
||||||
tc3.client.Login("mjl@mox.example", "testtest")
|
tc3.client.Login("mjl@mox.example", password0)
|
||||||
tc3.client.Enable(capability)
|
tc3.client.Enable(capability)
|
||||||
tc3.client.Select("inbox")
|
tc3.client.Select("inbox")
|
||||||
|
|
||||||
|
@ -357,7 +357,7 @@ func testCondstoreQresync(t *testing.T, qresync bool) {
|
||||||
xtc.close()
|
xtc.close()
|
||||||
store.CheckConsistencyOnClose = true
|
store.CheckConsistencyOnClose = true
|
||||||
}()
|
}()
|
||||||
xtc.client.Login("mjl@mox.example", "testtest")
|
xtc.client.Login("mjl@mox.example", password0)
|
||||||
fn(xtc)
|
fn(xtc)
|
||||||
tagcount++
|
tagcount++
|
||||||
label := fmt.Sprintf("l%d", tagcount)
|
label := fmt.Sprintf("l%d", tagcount)
|
||||||
|
@ -444,13 +444,13 @@ func testCondstoreQresync(t *testing.T, qresync bool) {
|
||||||
// tc2o is a client without condstore, so no modseq responses.
|
// tc2o is a client without condstore, so no modseq responses.
|
||||||
tc2o := startNoSwitchboard(t)
|
tc2o := startNoSwitchboard(t)
|
||||||
defer tc2o.close()
|
defer tc2o.close()
|
||||||
tc2o.client.Login("mjl@mox.example", "testtest")
|
tc2o.client.Login("mjl@mox.example", password0)
|
||||||
tc2o.client.Select("otherbox")
|
tc2o.client.Select("otherbox")
|
||||||
|
|
||||||
// tc3o is a client with condstore, so with modseq responses.
|
// tc3o is a client with condstore, so with modseq responses.
|
||||||
tc3o := startNoSwitchboard(t)
|
tc3o := startNoSwitchboard(t)
|
||||||
defer tc3o.close()
|
defer tc3o.close()
|
||||||
tc3o.client.Login("mjl@mox.example", "testtest")
|
tc3o.client.Login("mjl@mox.example", password0)
|
||||||
tc3o.client.Enable(capability)
|
tc3o.client.Enable(capability)
|
||||||
tc3o.client.Select("otherbox")
|
tc3o.client.Select("otherbox")
|
||||||
|
|
||||||
|
@ -529,7 +529,7 @@ func testQresync(t *testing.T, tc *testconn, clientModseq int64) {
|
||||||
|
|
||||||
// Vanished not allowed without first enabling qresync. ../rfc/7162:1697
|
// Vanished not allowed without first enabling qresync. ../rfc/7162:1697
|
||||||
xtc := startNoSwitchboard(t)
|
xtc := startNoSwitchboard(t)
|
||||||
xtc.client.Login("mjl@mox.example", "testtest")
|
xtc.client.Login("mjl@mox.example", password0)
|
||||||
xtc.transactf("ok", "Select inbox (Condstore)")
|
xtc.transactf("ok", "Select inbox (Condstore)")
|
||||||
xtc.transactf("bad", "Uid Fetch 1:* (Flags) (Changedsince 1 Vanished)")
|
xtc.transactf("bad", "Uid Fetch 1:* (Flags) (Changedsince 1 Vanished)")
|
||||||
// Prevent triggering the consistency checker, we still have modseq/createseq at 0.
|
// Prevent triggering the consistency checker, we still have modseq/createseq at 0.
|
||||||
|
@ -553,7 +553,7 @@ func testQresync(t *testing.T, tc *testconn, clientModseq int64) {
|
||||||
|
|
||||||
// Must enable qresync explicitly before using. ../rfc/7162:1446
|
// Must enable qresync explicitly before using. ../rfc/7162:1446
|
||||||
xtc = startNoSwitchboard(t)
|
xtc = startNoSwitchboard(t)
|
||||||
xtc.client.Login("mjl@mox.example", "testtest")
|
xtc.client.Login("mjl@mox.example", password0)
|
||||||
xtc.transactf("bad", "Select inbox (Qresync 1 0)")
|
xtc.transactf("bad", "Select inbox (Qresync 1 0)")
|
||||||
// Prevent triggering the consistency checker, we still have modseq/createseq at 0.
|
// Prevent triggering the consistency checker, we still have modseq/createseq at 0.
|
||||||
store.CheckConsistencyOnClose = false
|
store.CheckConsistencyOnClose = false
|
||||||
|
|
|
@ -14,10 +14,10 @@ func TestCopy(t *testing.T) {
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("Trash")
|
tc2.client.Select("Trash")
|
||||||
|
|
||||||
tc.transactf("bad", "copy") // Missing params.
|
tc.transactf("bad", "copy") // Missing params.
|
||||||
|
@ -61,7 +61,7 @@ func TestCopy(t *testing.T) {
|
||||||
|
|
||||||
tclimit := startArgs(t, false, false, true, true, "limit")
|
tclimit := startArgs(t, false, false, true, true, "limit")
|
||||||
defer tclimit.close()
|
defer tclimit.close()
|
||||||
tclimit.client.Login("limit@mox.example", "testtest")
|
tclimit.client.Login("limit@mox.example", password0)
|
||||||
tclimit.client.Select("inbox")
|
tclimit.client.Select("inbox")
|
||||||
// First message of 1 byte is within limits.
|
// First message of 1 byte is within limits.
|
||||||
tclimit.transactf("ok", "append inbox (\\Seen Label1 $label2) \" 1-Jan-2022 10:10:00 +0100\" {1+}\r\nx")
|
tclimit.transactf("ok", "append inbox (\\Seen Label1 $label2) \" 1-Jan-2022 10:10:00 +0100\" {1+}\r\nx")
|
||||||
|
|
|
@ -13,8 +13,8 @@ func TestCreate(t *testing.T) {
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("no", "create inbox") // Already exists and not allowed. ../rfc/9051:1913
|
tc.transactf("no", "create inbox") // Already exists and not allowed. ../rfc/9051:1913
|
||||||
tc.transactf("no", "create Inbox") // Idem.
|
tc.transactf("no", "create Inbox") // Idem.
|
||||||
|
|
|
@ -16,9 +16,9 @@ func TestDelete(t *testing.T) {
|
||||||
tc3 := startNoSwitchboard(t)
|
tc3 := startNoSwitchboard(t)
|
||||||
defer tc3.close()
|
defer tc3.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc3.client.Login("mjl@mox.example", "testtest")
|
tc3.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "delete") // Missing mailbox.
|
tc.transactf("bad", "delete") // Missing mailbox.
|
||||||
tc.transactf("no", "delete inbox") // Cannot delete inbox.
|
tc.transactf("no", "delete inbox") // Cannot delete inbox.
|
||||||
|
|
|
@ -14,10 +14,10 @@ func TestExpunge(t *testing.T) {
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
|
|
||||||
tc.transactf("bad", "expunge leftover") // Leftover data.
|
tc.transactf("bad", "expunge leftover") // Leftover data.
|
||||||
|
|
|
@ -12,7 +12,7 @@ func TestFetch(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Enable("imap4rev2")
|
tc.client.Enable("imap4rev2")
|
||||||
received, err := time.Parse(time.RFC3339, "2022-11-16T10:01:00+01:00")
|
received, err := time.Parse(time.RFC3339, "2022-11-16T10:01:00+01:00")
|
||||||
tc.check(err, "parse time")
|
tc.check(err, "parse time")
|
||||||
|
|
|
@ -70,7 +70,7 @@ func FuzzServer(f *testing.F) {
|
||||||
f.Fatalf("open account: %v", err)
|
f.Fatalf("open account: %v", err)
|
||||||
}
|
}
|
||||||
defer acc.Close()
|
defer acc.Close()
|
||||||
err = acc.SetPassword(log, "testtest")
|
err = acc.SetPassword(log, password0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Fatalf("set password: %v", err)
|
f.Fatalf("set password: %v", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,11 +11,11 @@ import (
|
||||||
func TestIdle(t *testing.T) {
|
func TestIdle(t *testing.T) {
|
||||||
tc1 := start(t)
|
tc1 := start(t)
|
||||||
defer tc1.close()
|
defer tc1.close()
|
||||||
tc1.transactf("ok", "login mjl@mox.example testtest")
|
tc1.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.transactf("ok", "login mjl@mox.example testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc1.transactf("ok", "select inbox")
|
tc1.transactf("ok", "select inbox")
|
||||||
tc2.transactf("ok", "select inbox")
|
tc2.transactf("ok", "select inbox")
|
||||||
|
|
|
@ -11,7 +11,7 @@ func TestListBasic(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
ulist := func(name string, flags ...string) imapclient.UntaggedList {
|
ulist := func(name string, flags ...string) imapclient.UntaggedList {
|
||||||
if len(flags) == 0 {
|
if len(flags) == 0 {
|
||||||
|
@ -61,7 +61,7 @@ func TestListExtended(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
ulist := func(name string, flags ...string) imapclient.UntaggedList {
|
ulist := func(name string, flags ...string) imapclient.UntaggedList {
|
||||||
if len(flags) == 0 {
|
if len(flags) == 0 {
|
||||||
|
|
|
@ -10,7 +10,7 @@ func TestLsub(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "lsub") // Missing params.
|
tc.transactf("bad", "lsub") // Missing params.
|
||||||
tc.transactf("bad", `lsub ""`) // Missing param.
|
tc.transactf("bad", `lsub ""`) // Missing param.
|
||||||
|
|
|
@ -17,13 +17,13 @@ func TestMove(t *testing.T) {
|
||||||
tc3 := startNoSwitchboard(t)
|
tc3 := startNoSwitchboard(t)
|
||||||
defer tc3.close()
|
defer tc3.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("Trash")
|
tc2.client.Select("Trash")
|
||||||
|
|
||||||
tc3.client.Login("mjl@mox.example", "testtest")
|
tc3.client.Login("mjl@mox.example", password0)
|
||||||
tc3.client.Select("inbox")
|
tc3.client.Select("inbox")
|
||||||
|
|
||||||
tc.transactf("bad", "move") // Missing params.
|
tc.transactf("bad", "move") // Missing params.
|
||||||
|
|
|
@ -14,8 +14,8 @@ func TestRename(t *testing.T) {
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "rename") // Missing parameters.
|
tc.transactf("bad", "rename") // Missing parameters.
|
||||||
tc.transactf("bad", "rename x") // Missing destination.
|
tc.transactf("bad", "rename x") // Missing destination.
|
||||||
|
|
|
@ -59,7 +59,7 @@ func (tc *testconn) xesearch(exp imapclient.UntaggedEsearch) {
|
||||||
func TestSearch(t *testing.T) {
|
func TestSearch(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
// Add 5 and delete first 4 messages. So UIDs start at 5.
|
// Add 5 and delete first 4 messages. So UIDs start at 5.
|
||||||
|
|
|
@ -21,7 +21,7 @@ func testSelectExamine(t *testing.T, examine bool) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
cmd := "select"
|
cmd := "select"
|
||||||
okcode := "READ-WRITE"
|
okcode := "READ-WRITE"
|
||||||
|
|
|
@ -334,6 +334,9 @@ func startNoSwitchboard(t *testing.T) *testconn {
|
||||||
return startArgs(t, false, false, true, false, "mjl")
|
return startArgs(t, false, false, true, false, "mjl")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const password0 = "te\u0301st \u00a0\u2002\u200a" // NFD and various unicode spaces.
|
||||||
|
const password1 = "tést " // PRECIS normalized, with NFC.
|
||||||
|
|
||||||
func startArgs(t *testing.T, first, isTLS, allowLoginWithoutTLS, setPassword bool, accname string) *testconn {
|
func startArgs(t *testing.T, first, isTLS, allowLoginWithoutTLS, setPassword bool, accname string) *testconn {
|
||||||
limitersInit() // Reset rate limiters.
|
limitersInit() // Reset rate limiters.
|
||||||
|
|
||||||
|
@ -346,7 +349,7 @@ func startArgs(t *testing.T, first, isTLS, allowLoginWithoutTLS, setPassword boo
|
||||||
acc, err := store.OpenAccount(pkglog, accname)
|
acc, err := store.OpenAccount(pkglog, accname)
|
||||||
tcheck(t, err, "open account")
|
tcheck(t, err, "open account")
|
||||||
if setPassword {
|
if setPassword {
|
||||||
err = acc.SetPassword(pkglog, "testtest")
|
err = acc.SetPassword(pkglog, password0)
|
||||||
tcheck(t, err, "set password")
|
tcheck(t, err, "set password")
|
||||||
}
|
}
|
||||||
switchStop := func() {}
|
switchStop := func() {}
|
||||||
|
@ -405,20 +408,20 @@ func TestLogin(t *testing.T) {
|
||||||
tc.transactf("bad", "login too many args")
|
tc.transactf("bad", "login too many args")
|
||||||
tc.transactf("bad", "login") // no args
|
tc.transactf("bad", "login") // no args
|
||||||
tc.transactf("no", "login mjl@mox.example badpass")
|
tc.transactf("no", "login mjl@mox.example badpass")
|
||||||
tc.transactf("no", "login mjl testtest") // must use email, not account
|
tc.transactf("no", `login mjl "%s"`, password0) // must use email, not account
|
||||||
tc.transactf("no", "login mjl@mox.example test")
|
tc.transactf("no", "login mjl@mox.example test")
|
||||||
tc.transactf("no", "login mjl@mox.example testtesttest")
|
tc.transactf("no", "login mjl@mox.example testtesttest")
|
||||||
tc.transactf("no", `login "mjl@mox.example" "testtesttest"`)
|
tc.transactf("no", `login "mjl@mox.example" "testtesttest"`)
|
||||||
tc.transactf("no", "login \"m\xf8x@mox.example\" \"testtesttest\"")
|
tc.transactf("no", "login \"m\xf8x@mox.example\" \"testtesttest\"")
|
||||||
tc.transactf("ok", "login mjl@mox.example testtest")
|
tc.transactf("ok", `login mjl@mox.example "%s"`, password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = start(t)
|
tc = start(t)
|
||||||
tc.transactf("ok", `login "mjl@mox.example" "testtest"`)
|
tc.transactf("ok", `login "mjl@mox.example" "%s"`, password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = start(t)
|
tc = start(t)
|
||||||
tc.transactf("ok", `login "\"\"@mox.example" "testtest"`)
|
tc.transactf("ok", `login "\"\"@mox.example" "%s"`, password0)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.transactf("bad", "logout badarg")
|
tc.transactf("bad", "logout badarg")
|
||||||
|
@ -447,7 +450,7 @@ func TestState(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some commands not allowed when authenticated.
|
// Some commands not allowed when authenticated.
|
||||||
tc.transactf("ok", "login mjl@mox.example testtest")
|
tc.transactf("ok", `login mjl@mox.example "%s"`, password0)
|
||||||
for _, cmd := range append(append([]string{}, notAuthenticated...), selected...) {
|
for _, cmd := range append(append([]string{}, notAuthenticated...), selected...) {
|
||||||
tc.transactf("no", "%s", cmd)
|
tc.transactf("no", "%s", cmd)
|
||||||
}
|
}
|
||||||
|
@ -472,7 +475,7 @@ func TestLiterals(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Create("tmpbox")
|
tc.client.Create("tmpbox")
|
||||||
|
|
||||||
tc.transactf("ok", "rename {6+}\r\ntmpbox {7+}\r\nntmpbox")
|
tc.transactf("ok", "rename {6+}\r\ntmpbox {7+}\r\nntmpbox")
|
||||||
|
@ -495,7 +498,7 @@ func TestLiterals(t *testing.T) {
|
||||||
func TestScenario(t *testing.T) {
|
func TestScenario(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.transactf("ok", "login mjl@mox.example testtest")
|
tc.transactf("ok", `login mjl@mox.example "%s"`, password0)
|
||||||
|
|
||||||
tc.transactf("bad", " missingcommand")
|
tc.transactf("bad", " missingcommand")
|
||||||
|
|
||||||
|
@ -573,7 +576,7 @@ func TestScenario(t *testing.T) {
|
||||||
func TestMailbox(t *testing.T) {
|
func TestMailbox(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
invalid := []string{
|
invalid := []string{
|
||||||
"e\u0301", // é but as e + acute, not unicode-normalized
|
"e\u0301", // é but as e + acute, not unicode-normalized
|
||||||
|
@ -595,11 +598,11 @@ func TestMailbox(t *testing.T) {
|
||||||
func TestMailboxDeleted(t *testing.T) {
|
func TestMailboxDeleted(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.client.Create("testbox")
|
tc.client.Create("testbox")
|
||||||
tc2.client.Select("testbox")
|
tc2.client.Select("testbox")
|
||||||
|
@ -631,7 +634,7 @@ func TestMailboxDeleted(t *testing.T) {
|
||||||
func TestID(t *testing.T) {
|
func TestID(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("ok", "id nil")
|
tc.transactf("ok", "id nil")
|
||||||
tc.xuntagged(imapclient.UntaggedID{"name": "mox", "version": moxvar.Version})
|
tc.xuntagged(imapclient.UntaggedID{"name": "mox", "version": moxvar.Version})
|
||||||
|
@ -645,7 +648,7 @@ func TestID(t *testing.T) {
|
||||||
func TestSequence(t *testing.T) {
|
func TestSequence(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
tc.transactf("bad", "fetch * all") // ../rfc/9051:7018
|
tc.transactf("bad", "fetch * all") // ../rfc/9051:7018
|
||||||
|
@ -673,13 +676,13 @@ func TestSequence(t *testing.T) {
|
||||||
func DisabledTestReference(t *testing.T) {
|
func DisabledTestReference(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
tc.client.Append("inbox", nil, nil, []byte(exampleMsg))
|
tc.client.Append("inbox", nil, nil, []byte(exampleMsg))
|
||||||
|
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Select("inbox")
|
tc2.client.Select("inbox")
|
||||||
|
|
||||||
tc.client.StoreFlagsSet("1", true, `\Deleted`)
|
tc.client.StoreFlagsSet("1", true, `\Deleted`)
|
||||||
|
@ -687,7 +690,7 @@ func DisabledTestReference(t *testing.T) {
|
||||||
|
|
||||||
tc3 := startNoSwitchboard(t)
|
tc3 := startNoSwitchboard(t)
|
||||||
defer tc3.close()
|
defer tc3.close()
|
||||||
tc3.client.Login("mjl@mox.example", "testtest")
|
tc3.client.Login("mjl@mox.example", password0)
|
||||||
tc3.transactf("ok", `list "" "inbox" return (status (messages))`)
|
tc3.transactf("ok", `list "" "inbox" return (status (messages))`)
|
||||||
tc3.xuntagged(imapclient.UntaggedList{Separator: '/', Mailbox: "Inbox"}, imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 0}})
|
tc3.xuntagged(imapclient.UntaggedList{Separator: '/', Mailbox: "Inbox"}, imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 0}})
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ func TestStarttls(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
|
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
|
||||||
tc.transactf("bad", "starttls") // TLS already active.
|
tc.transactf("bad", "starttls") // TLS already active.
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = startArgs(t, true, true, false, true, "mjl")
|
tc = startArgs(t, true, true, false, true, "mjl")
|
||||||
|
@ -18,11 +18,11 @@ func TestStarttls(t *testing.T) {
|
||||||
tc.close()
|
tc.close()
|
||||||
|
|
||||||
tc = startArgs(t, true, false, false, true, "mjl")
|
tc = startArgs(t, true, false, false, true, "mjl")
|
||||||
tc.transactf("no", `login "mjl@mox.example" "testtest"`)
|
tc.transactf("no", `login "mjl@mox.example" "%s"`, password0)
|
||||||
tc.xcode("PRIVACYREQUIRED")
|
tc.xcode("PRIVACYREQUIRED")
|
||||||
tc.transactf("no", "authenticate PLAIN %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000testtest")))
|
tc.transactf("no", "authenticate PLAIN %s", base64.StdEncoding.EncodeToString([]byte("\u0000mjl@mox.example\u0000"+password0)))
|
||||||
tc.xcode("PRIVACYREQUIRED")
|
tc.xcode("PRIVACYREQUIRED")
|
||||||
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
|
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.close()
|
tc.close()
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ func TestStatus(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "status") // Missing param.
|
tc.transactf("bad", "status") // Missing param.
|
||||||
tc.transactf("bad", "status inbox") // Missing param.
|
tc.transactf("bad", "status inbox") // Missing param.
|
||||||
|
|
|
@ -11,7 +11,7 @@ func TestStore(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Enable("imap4rev2")
|
tc.client.Enable("imap4rev2")
|
||||||
|
|
||||||
tc.client.Append("inbox", nil, nil, []byte(exampleMsg))
|
tc.client.Append("inbox", nil, nil, []byte(exampleMsg))
|
||||||
|
|
|
@ -13,8 +13,8 @@ func TestSubscribe(t *testing.T) {
|
||||||
tc2 := startNoSwitchboard(t)
|
tc2 := startNoSwitchboard(t)
|
||||||
defer tc2.close()
|
defer tc2.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc2.client.Login("mjl@mox.example", "testtest")
|
tc2.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "subscribe") // Missing param.
|
tc.transactf("bad", "subscribe") // Missing param.
|
||||||
tc.transactf("bad", "subscribe ") // Missing param.
|
tc.transactf("bad", "subscribe ") // Missing param.
|
||||||
|
|
|
@ -10,7 +10,7 @@ func TestUnselect(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
tc.client.Select("inbox")
|
tc.client.Select("inbox")
|
||||||
|
|
||||||
tc.transactf("bad", "unselect bogus") // Leftover data.
|
tc.transactf("bad", "unselect bogus") // Leftover data.
|
||||||
|
|
|
@ -8,7 +8,7 @@ func TestUnsubscribe(t *testing.T) {
|
||||||
tc := start(t)
|
tc := start(t)
|
||||||
defer tc.close()
|
defer tc.close()
|
||||||
|
|
||||||
tc.client.Login("mjl@mox.example", "testtest")
|
tc.client.Login("mjl@mox.example", password0)
|
||||||
|
|
||||||
tc.transactf("bad", "unsubscribe") // Missing param.
|
tc.transactf("bad", "unsubscribe") // Missing param.
|
||||||
tc.transactf("bad", "unsubscribe ") // Missing param.
|
tc.transactf("bad", "unsubscribe ") // Missing param.
|
||||||
|
|
3
main.go
3
main.go
|
@ -33,6 +33,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
|
|
||||||
"github.com/mjl-/adns"
|
"github.com/mjl-/adns"
|
||||||
|
|
||||||
|
@ -1246,6 +1247,8 @@ The password is read from stdin. Its bcrypt hash is stored in a file named
|
||||||
}
|
}
|
||||||
|
|
||||||
pw := xreadpassword()
|
pw := xreadpassword()
|
||||||
|
pw, err := precis.OpaqueString.String(pw)
|
||||||
|
xcheckf(err, `checking password with "precis" requirements`)
|
||||||
hash, err := bcrypt.GenerateFromPassword([]byte(pw), bcrypt.DefaultCost)
|
hash, err := bcrypt.GenerateFromPassword([]byte(pw), bcrypt.DefaultCost)
|
||||||
xcheckf(err, "generating hash for password")
|
xcheckf(err, "generating hash for password")
|
||||||
err = os.WriteFile(path, hash, 0660)
|
err = os.WriteFile(path, hash, 0660)
|
||||||
|
|
20
sasl/sasl.go
20
sasl/sasl.go
|
@ -10,6 +10,8 @@ import (
|
||||||
"hash"
|
"hash"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
|
|
||||||
"github.com/mjl-/mox/scram"
|
"github.com/mjl-/mox/scram"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -52,6 +54,7 @@ var _ Client = (*clientPlain)(nil)
|
||||||
// PLAIN is specified in RFC 4616, The PLAIN Simple Authentication and Security
|
// PLAIN is specified in RFC 4616, The PLAIN Simple Authentication and Security
|
||||||
// Layer (SASL) Mechanism.
|
// Layer (SASL) Mechanism.
|
||||||
func NewClientPlain(username, password string) Client {
|
func NewClientPlain(username, password string) Client {
|
||||||
|
// No "precis" processing, remote can clean password up. ../rfc/8265:679
|
||||||
return &clientPlain{username, password, 0}
|
return &clientPlain{username, password, 0}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,6 +83,7 @@ var _ Client = (*clientLogin)(nil)
|
||||||
//
|
//
|
||||||
// See https://datatracker.ietf.org/doc/html/draft-murchison-sasl-login-00
|
// See https://datatracker.ietf.org/doc/html/draft-murchison-sasl-login-00
|
||||||
func NewClientLogin(username, password string) Client {
|
func NewClientLogin(username, password string) Client {
|
||||||
|
// No "precis" processing, remote can clean password up. ../rfc/8265:679
|
||||||
return &clientLogin{username, password, 0}
|
return &clientLogin{username, password, 0}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,6 +103,17 @@ func (a *clientLogin) Next(fromServer []byte) (toServer []byte, last bool, rerr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cleanup password with precis, like remote should have done. If the password
|
||||||
|
// appears invalid, we'll return the original, there is a chance the server also
|
||||||
|
// doesn't enforce requirements and accepts it. ../rfc/8265:679
|
||||||
|
func precisPassword(password string) string {
|
||||||
|
pw, err := precis.OpaqueString.String(password)
|
||||||
|
if err != nil {
|
||||||
|
return password
|
||||||
|
}
|
||||||
|
return pw
|
||||||
|
}
|
||||||
|
|
||||||
type clientCRAMMD5 struct {
|
type clientCRAMMD5 struct {
|
||||||
Username, Password string
|
Username, Password string
|
||||||
step int
|
step int
|
||||||
|
@ -111,6 +126,7 @@ var _ Client = (*clientCRAMMD5)(nil)
|
||||||
// CRAM-MD5 is specified in RFC 2195, IMAP/POP AUTHorize Extension for Simple
|
// CRAM-MD5 is specified in RFC 2195, IMAP/POP AUTHorize Extension for Simple
|
||||||
// Challenge/Response.
|
// Challenge/Response.
|
||||||
func NewClientCRAMMD5(username, password string) Client {
|
func NewClientCRAMMD5(username, password string) Client {
|
||||||
|
password = precisPassword(password)
|
||||||
return &clientCRAMMD5{username, password, 0}
|
return &clientCRAMMD5{username, password, 0}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,6 +215,7 @@ var _ Client = (*clientSCRAMSHA)(nil)
|
||||||
// SCRAM-SHA-1 is specified in RFC 5802, "Salted Challenge Response Authentication
|
// SCRAM-SHA-1 is specified in RFC 5802, "Salted Challenge Response Authentication
|
||||||
// Mechanism (SCRAM) SASL and GSS-API Mechanisms".
|
// Mechanism (SCRAM) SASL and GSS-API Mechanisms".
|
||||||
func NewClientSCRAMSHA1(username, password string, noServerPlus bool) Client {
|
func NewClientSCRAMSHA1(username, password string, noServerPlus bool) Client {
|
||||||
|
password = precisPassword(password)
|
||||||
return &clientSCRAMSHA{username, password, sha1.New, false, tls.ConnectionState{}, noServerPlus, "SCRAM-SHA-1", 0, nil}
|
return &clientSCRAMSHA{username, password, sha1.New, false, tls.ConnectionState{}, noServerPlus, "SCRAM-SHA-1", 0, nil}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,6 +227,7 @@ func NewClientSCRAMSHA1(username, password string, noServerPlus bool) Client {
|
||||||
// SCRAM-SHA-1-PLUS is specified in RFC 5802, "Salted Challenge Response
|
// SCRAM-SHA-1-PLUS is specified in RFC 5802, "Salted Challenge Response
|
||||||
// Authentication Mechanism (SCRAM) SASL and GSS-API Mechanisms".
|
// Authentication Mechanism (SCRAM) SASL and GSS-API Mechanisms".
|
||||||
func NewClientSCRAMSHA1PLUS(username, password string, cs tls.ConnectionState) Client {
|
func NewClientSCRAMSHA1PLUS(username, password string, cs tls.ConnectionState) Client {
|
||||||
|
password = precisPassword(password)
|
||||||
return &clientSCRAMSHA{username, password, sha1.New, true, cs, false, "SCRAM-SHA-1-PLUS", 0, nil}
|
return &clientSCRAMSHA{username, password, sha1.New, true, cs, false, "SCRAM-SHA-1-PLUS", 0, nil}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,6 +242,7 @@ func NewClientSCRAMSHA1PLUS(username, password string, cs tls.ConnectionState) C
|
||||||
// SCRAM-SHA-256 is specified in RFC 7677, "SCRAM-SHA-256 and SCRAM-SHA-256-PLUS
|
// SCRAM-SHA-256 is specified in RFC 7677, "SCRAM-SHA-256 and SCRAM-SHA-256-PLUS
|
||||||
// Simple Authentication and Security Layer (SASL) Mechanisms".
|
// Simple Authentication and Security Layer (SASL) Mechanisms".
|
||||||
func NewClientSCRAMSHA256(username, password string, noServerPlus bool) Client {
|
func NewClientSCRAMSHA256(username, password string, noServerPlus bool) Client {
|
||||||
|
password = precisPassword(password)
|
||||||
return &clientSCRAMSHA{username, password, sha256.New, false, tls.ConnectionState{}, noServerPlus, "SCRAM-SHA-256", 0, nil}
|
return &clientSCRAMSHA{username, password, sha256.New, false, tls.ConnectionState{}, noServerPlus, "SCRAM-SHA-256", 0, nil}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,6 +254,7 @@ func NewClientSCRAMSHA256(username, password string, noServerPlus bool) Client {
|
||||||
// SCRAM-SHA-256-PLUS is specified in RFC 7677, "SCRAM-SHA-256 and SCRAM-SHA-256-PLUS
|
// SCRAM-SHA-256-PLUS is specified in RFC 7677, "SCRAM-SHA-256 and SCRAM-SHA-256-PLUS
|
||||||
// Simple Authentication and Security Layer (SASL) Mechanisms".
|
// Simple Authentication and Security Layer (SASL) Mechanisms".
|
||||||
func NewClientSCRAMSHA256PLUS(username, password string, cs tls.ConnectionState) Client {
|
func NewClientSCRAMSHA256PLUS(username, password string, cs tls.ConnectionState) Client {
|
||||||
|
password = precisPassword(password)
|
||||||
return &clientSCRAMSHA{username, password, sha256.New, true, cs, false, "SCRAM-SHA-256-PLUS", 0, nil}
|
return &clientSCRAMSHA{username, password, sha256.New, true, cs, false, "SCRAM-SHA-256-PLUS", 0, nil}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/crypto/pbkdf2"
|
"golang.org/x/crypto/pbkdf2"
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
"golang.org/x/text/unicode/norm"
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,9 +85,20 @@ func MakeRandom() []byte {
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cleanup password with precis, like remote should have done. If the password
|
||||||
|
// appears invalid, we'll return the original, there is a chance the server also
|
||||||
|
// doesn't enforce requirements and accepts it. ../rfc/8265:679
|
||||||
|
func precisPassword(password string) string {
|
||||||
|
pw, err := precis.OpaqueString.String(password)
|
||||||
|
if err != nil {
|
||||||
|
return password
|
||||||
|
}
|
||||||
|
return pw
|
||||||
|
}
|
||||||
|
|
||||||
// SaltPassword returns a salted password.
|
// SaltPassword returns a salted password.
|
||||||
func SaltPassword(h func() hash.Hash, password string, salt []byte, iterations int) []byte {
|
func SaltPassword(h func() hash.Hash, password string, salt []byte, iterations int) []byte {
|
||||||
password = norm.NFC.String(password)
|
password = precisPassword(password)
|
||||||
return pbkdf2.Key([]byte(password), salt, iterations, h().Size(), h)
|
return pbkdf2.Key([]byte(password), salt, iterations, h().Size(), h)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
@ -962,8 +963,6 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
xsmtpUserErrorf(smtp.C503BadCmdSeq, smtp.SeProto5BadCmdOrSeq1, "authentication not allowed during mail transaction")
|
xsmtpUserErrorf(smtp.C503BadCmdSeq, smtp.SeProto5BadCmdOrSeq1, "authentication not allowed during mail transaction")
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo future: we may want to normalize usernames and passwords, see stringprep in ../rfc/4013:38 and possibly newer mechanisms (though they are opt-in and that may not have happened yet).
|
|
||||||
|
|
||||||
// If authentication fails due to missing derived secrets, we don't hold it against
|
// If authentication fails due to missing derived secrets, we don't hold it against
|
||||||
// the connection. There is no way to indicate server support for an authentication
|
// the connection. There is no way to indicate server support for an authentication
|
||||||
// mechanism, but that a mechanism won't work for an account.
|
// mechanism, but that a mechanism won't work for an account.
|
||||||
|
@ -1071,8 +1070,8 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
if len(plain) != 3 {
|
if len(plain) != 3 {
|
||||||
xsmtpUserErrorf(smtp.C501BadParamSyntax, smtp.SeProto5BadParams4, "auth data should have 3 nul-separated tokens, got %d", len(plain))
|
xsmtpUserErrorf(smtp.C501BadParamSyntax, smtp.SeProto5BadParams4, "auth data should have 3 nul-separated tokens, got %d", len(plain))
|
||||||
}
|
}
|
||||||
authz := string(plain[0])
|
authz := norm.NFC.String(string(plain[0]))
|
||||||
authc := string(plain[1])
|
authc := norm.NFC.String(string(plain[1]))
|
||||||
password := string(plain[2])
|
password := string(plain[2])
|
||||||
|
|
||||||
if authz != "" && authz != authc {
|
if authz != "" && authz != authc {
|
||||||
|
@ -1115,6 +1114,7 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
// I-D says maximum length must be 64 bytes. We allow more, for long user names
|
// I-D says maximum length must be 64 bytes. We allow more, for long user names
|
||||||
// (domains).
|
// (domains).
|
||||||
username := string(xreadInitial())
|
username := string(xreadInitial())
|
||||||
|
username = norm.NFC.String(username)
|
||||||
|
|
||||||
// Again, client should ignore the challenge, we send the same as the example in
|
// Again, client should ignore the challenge, we send the same as the example in
|
||||||
// the I-D.
|
// the I-D.
|
||||||
|
@ -1156,7 +1156,7 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
if len(t) != 2 || len(t[1]) != 2*md5.Size {
|
if len(t) != 2 || len(t[1]) != 2*md5.Size {
|
||||||
xsmtpUserErrorf(smtp.C501BadParamSyntax, smtp.SeProto5BadParams4, "malformed cram-md5 response")
|
xsmtpUserErrorf(smtp.C501BadParamSyntax, smtp.SeProto5BadParams4, "malformed cram-md5 response")
|
||||||
}
|
}
|
||||||
addr := t[0]
|
addr := norm.NFC.String(t[0])
|
||||||
c.log.Debug("cram-md5 auth", slog.String("address", addr))
|
c.log.Debug("cram-md5 auth", slog.String("address", addr))
|
||||||
acc, _, err := store.OpenEmail(c.log, addr)
|
acc, _, err := store.OpenEmail(c.log, addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -1245,13 +1245,14 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
c0 := xreadInitial()
|
c0 := xreadInitial()
|
||||||
ss, err := scram.NewServer(h, c0, cs, channelBindingRequired)
|
ss, err := scram.NewServer(h, c0, cs, channelBindingRequired)
|
||||||
xcheckf(err, "starting scram")
|
xcheckf(err, "starting scram")
|
||||||
c.log.Debug("scram auth", slog.String("authentication", ss.Authentication))
|
authc := norm.NFC.String(ss.Authentication)
|
||||||
acc, _, err := store.OpenEmail(c.log, ss.Authentication)
|
c.log.Debug("scram auth", slog.String("authentication", authc))
|
||||||
|
acc, _, err := store.OpenEmail(c.log, authc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// todo: we could continue scram with a generated salt, deterministically generated
|
// todo: we could continue scram with a generated salt, deterministically generated
|
||||||
// from the username. that way we don't have to store anything but attackers cannot
|
// from the username. that way we don't have to store anything but attackers cannot
|
||||||
// learn if an account exists. same for absent scram saltedpassword below.
|
// learn if an account exists. same for absent scram saltedpassword below.
|
||||||
c.log.Info("failed authentication attempt", slog.String("username", ss.Authentication), slog.Any("remote", c.remoteIP))
|
c.log.Info("failed authentication attempt", slog.String("username", authc), slog.Any("remote", c.remoteIP))
|
||||||
xsmtpUserErrorf(smtp.C454TempAuthFail, smtp.SeSys3Other0, "scram not possible")
|
xsmtpUserErrorf(smtp.C454TempAuthFail, smtp.SeSys3Other0, "scram not possible")
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -1268,7 +1269,7 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
err := acc.DB.Read(context.TODO(), func(tx *bstore.Tx) error {
|
err := acc.DB.Read(context.TODO(), func(tx *bstore.Tx) error {
|
||||||
password, err := bstore.QueryTx[store.Password](tx).Get()
|
password, err := bstore.QueryTx[store.Password](tx).Get()
|
||||||
if err == bstore.ErrAbsent {
|
if err == bstore.ErrAbsent {
|
||||||
c.log.Info("failed authentication attempt", slog.String("username", ss.Authentication), slog.Any("remote", c.remoteIP))
|
c.log.Info("failed authentication attempt", slog.String("username", authc), slog.Any("remote", c.remoteIP))
|
||||||
xsmtpUserErrorf(smtp.C535AuthBadCreds, smtp.SePol7AuthBadCreds8, "bad user/pass")
|
xsmtpUserErrorf(smtp.C535AuthBadCreds, smtp.SePol7AuthBadCreds8, "bad user/pass")
|
||||||
}
|
}
|
||||||
xcheckf(err, "fetching credentials")
|
xcheckf(err, "fetching credentials")
|
||||||
|
@ -1282,8 +1283,8 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
}
|
}
|
||||||
if len(xscram.Salt) == 0 || xscram.Iterations == 0 || len(xscram.SaltedPassword) == 0 {
|
if len(xscram.Salt) == 0 || xscram.Iterations == 0 || len(xscram.SaltedPassword) == 0 {
|
||||||
missingDerivedSecrets = true
|
missingDerivedSecrets = true
|
||||||
c.log.Info("scram auth attempt without derived secrets set, save password again to store secrets", slog.String("address", ss.Authentication))
|
c.log.Info("scram auth attempt without derived secrets set, save password again to store secrets", slog.String("address", authc))
|
||||||
c.log.Info("failed authentication attempt", slog.String("username", ss.Authentication), slog.Any("remote", c.remoteIP))
|
c.log.Info("failed authentication attempt", slog.String("username", authc), slog.Any("remote", c.remoteIP))
|
||||||
xsmtpUserErrorf(smtp.C454TempAuthFail, smtp.SeSys3Other0, "scram not possible")
|
xsmtpUserErrorf(smtp.C454TempAuthFail, smtp.SeSys3Other0, "scram not possible")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -1302,7 +1303,7 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
c.readline() // Should be "*" for cancellation.
|
c.readline() // Should be "*" for cancellation.
|
||||||
if errors.Is(err, scram.ErrInvalidProof) {
|
if errors.Is(err, scram.ErrInvalidProof) {
|
||||||
authResult = "badcreds"
|
authResult = "badcreds"
|
||||||
c.log.Info("failed authentication attempt", slog.String("username", ss.Authentication), slog.Any("remote", c.remoteIP))
|
c.log.Info("failed authentication attempt", slog.String("username", authc), slog.Any("remote", c.remoteIP))
|
||||||
xsmtpUserErrorf(smtp.C535AuthBadCreds, smtp.SePol7AuthBadCreds8, "bad credentials")
|
xsmtpUserErrorf(smtp.C535AuthBadCreds, smtp.SePol7AuthBadCreds8, "bad credentials")
|
||||||
}
|
}
|
||||||
xcheckf(err, "server final")
|
xcheckf(err, "server final")
|
||||||
|
@ -1317,7 +1318,7 @@ func (c *conn) cmdAuth(p *parser) {
|
||||||
c.setSlow(false)
|
c.setSlow(false)
|
||||||
c.account = acc
|
c.account = acc
|
||||||
acc = nil // Cancel cleanup.
|
acc = nil // Cancel cleanup.
|
||||||
c.username = ss.Authentication
|
c.username = authc
|
||||||
// ../rfc/4954:276
|
// ../rfc/4954:276
|
||||||
c.writecodeline(smtp.C235AuthSuccess, smtp.SePol7Other0, "nice", nil)
|
c.writecodeline(smtp.C235AuthSuccess, smtp.SePol7Other0, "nice", nil)
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,9 @@ type testserver struct {
|
||||||
tlspkix bool
|
tlspkix bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const password0 = "te\u0301st \u00a0\u2002\u200a" // NFD and various unicode spaces.
|
||||||
|
const password1 = "tést " // PRECIS normalized, with NFC.
|
||||||
|
|
||||||
func newTestServer(t *testing.T, configPath string, resolver dns.Resolver) *testserver {
|
func newTestServer(t *testing.T, configPath string, resolver dns.Resolver) *testserver {
|
||||||
limitersInit() // Reset rate limiters.
|
limitersInit() // Reset rate limiters.
|
||||||
|
|
||||||
|
@ -116,7 +119,7 @@ func newTestServer(t *testing.T, configPath string, resolver dns.Resolver) *test
|
||||||
var err error
|
var err error
|
||||||
ts.acc, err = store.OpenAccount(log, "mjl")
|
ts.acc, err = store.OpenAccount(log, "mjl")
|
||||||
tcheck(t, err, "open account")
|
tcheck(t, err, "open account")
|
||||||
err = ts.acc.SetPassword(log, "testtest")
|
err = ts.acc.SetPassword(log, password0)
|
||||||
tcheck(t, err, "set password")
|
tcheck(t, err, "set password")
|
||||||
ts.switchStop = store.Switchboard()
|
ts.switchStop = store.Switchboard()
|
||||||
err = queue.Init()
|
err = queue.Init()
|
||||||
|
@ -285,9 +288,14 @@ func TestSubmission(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, fn := range authfns {
|
for _, fn := range authfns {
|
||||||
testAuth(fn, "mjl@mox.example", "test", &smtpclient.Error{Secode: smtp.SePol7AuthBadCreds8}) // Bad (short) password.
|
testAuth(fn, "mjl@mox.example", "test", &smtpclient.Error{Secode: smtp.SePol7AuthBadCreds8}) // Bad (short) password.
|
||||||
testAuth(fn, "mjl@mox.example", "testtesttest", &smtpclient.Error{Secode: smtp.SePol7AuthBadCreds8}) // Bad password.
|
testAuth(fn, "mjl@mox.example", password0+"test", &smtpclient.Error{Secode: smtp.SePol7AuthBadCreds8}) // Bad password.
|
||||||
testAuth(fn, "mjl@mox.example", "testtest", nil)
|
testAuth(fn, "mjl@mox.example", password0, nil)
|
||||||
|
testAuth(fn, "mjl@mox.example", password1, nil)
|
||||||
|
testAuth(fn, "móx@mox.example", password0, nil)
|
||||||
|
testAuth(fn, "móx@mox.example", password1, nil)
|
||||||
|
testAuth(fn, "mo\u0301x@mox.example", password0, nil)
|
||||||
|
testAuth(fn, "mo\u0301x@mox.example", password1, nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -370,12 +378,12 @@ func TestDelivery(t *testing.T) {
|
||||||
ts.run(func(err error, client *smtpclient.Client) {
|
ts.run(func(err error, client *smtpclient.Client) {
|
||||||
recipients := []string{
|
recipients := []string{
|
||||||
"mjl@mox.example",
|
"mjl@mox.example",
|
||||||
"o@mox.example", // ascii o, as configured
|
"o@mox.example", // ascii o, as configured
|
||||||
"\u2126@mox.example", // ohm sign, as configured
|
"\u2126@mox.example", // ohm sign, as configured
|
||||||
"ω@mox.example", // lower-case omega, we match case-insensitively and this is the lowercase of ohm (!)
|
"ω@mox.example", // lower-case omega, we match case-insensitively and this is the lowercase of ohm (!)
|
||||||
"\u03a9@mox.example", // capital omega, also lowercased to omega.
|
"\u03a9@mox.example", // capital omega, also lowercased to omega.
|
||||||
"tést@mox.example", // NFC
|
"móx@mox.example", // NFC
|
||||||
"te\u0301st@mox.example", // not NFC, but normalized as tést@, see https://go.dev/blog/normalization
|
"mo\u0301x@mox.example", // not NFC, but normalized as móx@, see https://go.dev/blog/normalization
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, rcptTo := range recipients {
|
for _, rcptTo := range recipients {
|
||||||
|
@ -1258,7 +1266,7 @@ func TestLimitOutgoing(t *testing.T) {
|
||||||
defer ts.close()
|
defer ts.close()
|
||||||
|
|
||||||
ts.user = "mjl@mox.example"
|
ts.user = "mjl@mox.example"
|
||||||
ts.pass = "testtest"
|
ts.pass = password0
|
||||||
ts.submission = true
|
ts.submission = true
|
||||||
|
|
||||||
err := ts.acc.DB.Insert(ctxbg, &store.Outgoing{Recipient: "a@other.example", Submitted: time.Now().Add(-24*time.Hour - time.Minute)})
|
err := ts.acc.DB.Insert(ctxbg, &store.Outgoing{Recipient: "a@other.example", Submitted: time.Now().Add(-24*time.Hour - time.Minute)})
|
||||||
|
@ -1413,7 +1421,7 @@ func TestDKIMSign(t *testing.T) {
|
||||||
|
|
||||||
ts.submission = true
|
ts.submission = true
|
||||||
ts.user = "mjl@mox.example"
|
ts.user = "mjl@mox.example"
|
||||||
ts.pass = "testtest"
|
ts.pass = password0
|
||||||
|
|
||||||
n := 0
|
n := 0
|
||||||
testSubmit := func(mailFrom, msgFrom string) {
|
testSubmit := func(mailFrom, msgFrom string) {
|
||||||
|
@ -1541,7 +1549,7 @@ func TestRequireTLS(t *testing.T) {
|
||||||
ts.submission = true
|
ts.submission = true
|
||||||
ts.requiretls = true
|
ts.requiretls = true
|
||||||
ts.user = "mjl@mox.example"
|
ts.user = "mjl@mox.example"
|
||||||
ts.pass = "testtest"
|
ts.pass = password0
|
||||||
|
|
||||||
no := false
|
no := false
|
||||||
yes := true
|
yes := true
|
||||||
|
@ -1692,7 +1700,7 @@ func TestFutureRelease(t *testing.T) {
|
||||||
ts := newTestServer(t, filepath.FromSlash("../testdata/smtp/mox.conf"), dns.MockResolver{})
|
ts := newTestServer(t, filepath.FromSlash("../testdata/smtp/mox.conf"), dns.MockResolver{})
|
||||||
ts.tlsmode = smtpclient.TLSSkip
|
ts.tlsmode = smtpclient.TLSSkip
|
||||||
ts.user = "mjl@mox.example"
|
ts.user = "mjl@mox.example"
|
||||||
ts.pass = "testtest"
|
ts.pass = password0
|
||||||
ts.submission = true
|
ts.submission = true
|
||||||
defer ts.close()
|
defer ts.close()
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
"golang.org/x/text/unicode/norm"
|
"golang.org/x/text/unicode/norm"
|
||||||
|
|
||||||
"github.com/mjl-/bstore"
|
"github.com/mjl-/bstore"
|
||||||
|
@ -1473,6 +1474,16 @@ func (a *Account) DeliverMessage(log mlog.Log, tx *bstore.Tx, m *Message, msgFil
|
||||||
// SetPassword saves a new password for this account. This password is used for
|
// SetPassword saves a new password for this account. This password is used for
|
||||||
// IMAP, SMTP (submission) sessions and the HTTP account web page.
|
// IMAP, SMTP (submission) sessions and the HTTP account web page.
|
||||||
func (a *Account) SetPassword(log mlog.Log, password string) error {
|
func (a *Account) SetPassword(log mlog.Log, password string) error {
|
||||||
|
password, err := precis.OpaqueString.String(password)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf(`password not allowed by "precis"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(password) < 8 {
|
||||||
|
// We actually check for bytes...
|
||||||
|
return fmt.Errorf("password must be at least 8 characters long")
|
||||||
|
}
|
||||||
|
|
||||||
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
|
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("generating password hash: %w", err)
|
return fmt.Errorf("generating password hash: %w", err)
|
||||||
|
@ -2080,6 +2091,11 @@ func manageAuthCache() {
|
||||||
//
|
//
|
||||||
// The email address may contain a catchall separator.
|
// The email address may contain a catchall separator.
|
||||||
func OpenEmailAuth(log mlog.Log, email string, password string) (acc *Account, rerr error) {
|
func OpenEmailAuth(log mlog.Log, email string, password string) (acc *Account, rerr error) {
|
||||||
|
password, err := precis.OpaqueString.String(password)
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrUnknownCredentials
|
||||||
|
}
|
||||||
|
|
||||||
acc, _, rerr = OpenEmail(log, email)
|
acc, _, rerr = OpenEmail(log, email)
|
||||||
if rerr != nil {
|
if rerr != nil {
|
||||||
return
|
return
|
||||||
|
|
1
testdata/imap/domains.conf
vendored
1
testdata/imap/domains.conf
vendored
|
@ -7,6 +7,7 @@ Accounts:
|
||||||
Destinations:
|
Destinations:
|
||||||
mjl@mox.example: nil
|
mjl@mox.example: nil
|
||||||
""@mox.example: nil
|
""@mox.example: nil
|
||||||
|
móx@mox.example: nil
|
||||||
JunkFilter:
|
JunkFilter:
|
||||||
Threshold: 0.95
|
Threshold: 0.95
|
||||||
Params:
|
Params:
|
||||||
|
|
2
testdata/smtp/domains.conf
vendored
2
testdata/smtp/domains.conf
vendored
|
@ -12,7 +12,7 @@ Accounts:
|
||||||
o@mox.example: nil
|
o@mox.example: nil
|
||||||
# ohm sign, \u2126
|
# ohm sign, \u2126
|
||||||
Ω@mox.example: nil
|
Ω@mox.example: nil
|
||||||
tést@mox.example: nil
|
móx@mox.example: nil
|
||||||
JunkFilter:
|
JunkFilter:
|
||||||
Threshold: 0.9
|
Threshold: 0.9
|
||||||
Params:
|
Params:
|
||||||
|
|
1
testdata/webmail/domains.conf
vendored
1
testdata/webmail/domains.conf
vendored
|
@ -18,6 +18,7 @@ Accounts:
|
||||||
Destinations:
|
Destinations:
|
||||||
mjl@mox.example: nil
|
mjl@mox.example: nil
|
||||||
møx@mox.example: nil
|
møx@mox.example: nil
|
||||||
|
móx@mox.example: nil
|
||||||
RejectsMailbox: Rejects
|
RejectsMailbox: Rejects
|
||||||
JunkFilter:
|
JunkFilter:
|
||||||
Threshold: 0.95
|
Threshold: 0.95
|
||||||
|
|
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate go run gen.go gen_trieval.go
|
||||||
|
|
||||||
|
// Package cases provides general and language-specific case mappers.
|
||||||
|
package cases // import "golang.org/x/text/cases"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
// References:
|
||||||
|
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
|
||||||
|
// - https://www.unicode.org/reports/tr29/
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
|
||||||
|
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
|
||||||
|
// - http://userguide.icu-project.org/transforms/casemappings
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// - Case folding
|
||||||
|
// - Wide and Narrow?
|
||||||
|
// - Segmenter option for title casing.
|
||||||
|
// - ASCII fast paths
|
||||||
|
// - Encode Soft-Dotted property within trie somehow.
|
||||||
|
|
||||||
|
// A Caser transforms given input to a certain case. It implements
|
||||||
|
// transform.Transformer.
|
||||||
|
//
|
||||||
|
// A Caser may be stateful and should therefore not be shared between
|
||||||
|
// goroutines.
|
||||||
|
type Caser struct {
|
||||||
|
t transform.SpanningTransformer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of converting b to the case
|
||||||
|
// form implemented by c.
|
||||||
|
func (c Caser) Bytes(b []byte) []byte {
|
||||||
|
b, _, _ = transform.Bytes(c.t, b)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of transforming s to the case form
|
||||||
|
// implemented by c.
|
||||||
|
func (c Caser) String(s string) string {
|
||||||
|
s, _, _ = transform.String(c.t, s)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the Caser to be reused for new input after a previous call to
|
||||||
|
// Transform.
|
||||||
|
func (c Caser) Reset() { c.t.Reset() }
|
||||||
|
|
||||||
|
// Transform implements the transform.Transformer interface and transforms the
|
||||||
|
// given input to the case form implemented by c.
|
||||||
|
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
return c.t.Transform(dst, src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements the transform.SpanningTransformer interface.
|
||||||
|
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
return c.t.Span(src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upper returns a Caser for language-specific uppercasing.
|
||||||
|
func Upper(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeUpper(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lower returns a Caser for language-specific lowercasing.
|
||||||
|
func Lower(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeLower(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Title returns a Caser for language-specific title casing. It uses an
|
||||||
|
// approximation of the default Unicode Word Break algorithm.
|
||||||
|
func Title(t language.Tag, opts ...Option) Caser {
|
||||||
|
return Caser{makeTitle(t, getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fold returns a Caser that implements Unicode case folding. The returned Caser
|
||||||
|
// is stateless and safe to use concurrently by multiple goroutines.
|
||||||
|
//
|
||||||
|
// Case folding does not normalize the input and may not preserve a normal form.
|
||||||
|
// Use the collate or search package for more convenient and linguistically
|
||||||
|
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
|
||||||
|
// where security aspects are a concern.
|
||||||
|
func Fold(opts ...Option) Caser {
|
||||||
|
return Caser{makeFold(getOpts(opts...))}
|
||||||
|
}
|
||||||
|
|
||||||
|
// An Option is used to modify the behavior of a Caser.
|
||||||
|
type Option func(o options) options
|
||||||
|
|
||||||
|
// TODO: consider these options to take a boolean as well, like FinalSigma.
|
||||||
|
// The advantage of using this approach is that other providers of a lower-case
|
||||||
|
// algorithm could set different defaults by prefixing a user-provided slice
|
||||||
|
// of options with their own. This is handy, for instance, for the precis
|
||||||
|
// package which would override the default to not handle the Greek final sigma.
|
||||||
|
|
||||||
|
var (
|
||||||
|
// NoLower disables the lowercasing of non-leading letters for a title
|
||||||
|
// caser.
|
||||||
|
NoLower Option = noLower
|
||||||
|
|
||||||
|
// Compact omits mappings in case folding for characters that would grow the
|
||||||
|
// input. (Unimplemented.)
|
||||||
|
Compact Option = compact
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: option to preserve a normal form, if applicable?
|
||||||
|
|
||||||
|
type options struct {
|
||||||
|
noLower bool
|
||||||
|
simple bool
|
||||||
|
|
||||||
|
// TODO: segmenter, max ignorable, alternative versions, etc.
|
||||||
|
|
||||||
|
ignoreFinalSigma bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOpts(o ...Option) (res options) {
|
||||||
|
for _, f := range o {
|
||||||
|
res = f(res)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func noLower(o options) options {
|
||||||
|
o.noLower = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func compact(o options) options {
|
||||||
|
o.simple = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleFinalSigma specifies whether the special handling of Greek final sigma
|
||||||
|
// should be enabled. Unicode prescribes handling the Greek final sigma for all
|
||||||
|
// locales, but standards like IDNA and PRECIS override this default.
|
||||||
|
func HandleFinalSigma(enable bool) Option {
|
||||||
|
if enable {
|
||||||
|
return handleFinalSigma
|
||||||
|
}
|
||||||
|
return ignoreFinalSigma
|
||||||
|
}
|
||||||
|
|
||||||
|
func ignoreFinalSigma(o options) options {
|
||||||
|
o.ignoreFinalSigma = true
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleFinalSigma(o options) options {
|
||||||
|
o.ignoreFinalSigma = false
|
||||||
|
return o
|
||||||
|
}
|
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
|
@ -0,0 +1,376 @@
|
||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
import "golang.org/x/text/transform"
|
||||||
|
|
||||||
|
// A context is used for iterating over source bytes, fetching case info and
|
||||||
|
// writing to a destination buffer.
|
||||||
|
//
|
||||||
|
// Casing operations may need more than one rune of context to decide how a rune
|
||||||
|
// should be cased. Casing implementations should call checkpoint on context
|
||||||
|
// whenever it is known to be safe to return the runes processed so far.
|
||||||
|
//
|
||||||
|
// It is recommended for implementations to not allow for more than 30 case
|
||||||
|
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||||||
|
// unbounded lookahead is needed for cased runes.
|
||||||
|
type context struct {
|
||||||
|
dst, src []byte
|
||||||
|
atEOF bool
|
||||||
|
|
||||||
|
pDst int // pDst points past the last written rune in dst.
|
||||||
|
pSrc int // pSrc points to the start of the currently scanned rune.
|
||||||
|
|
||||||
|
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||||||
|
nDst, nSrc int
|
||||||
|
err error
|
||||||
|
|
||||||
|
sz int // size of current rune
|
||||||
|
info info // case information of currently scanned rune
|
||||||
|
|
||||||
|
// State preserved across calls to Transform.
|
||||||
|
isMidWord bool // false if next cased letter needs to be title-cased.
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *context) Reset() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// ret returns the return values for the Transform method. It checks whether
|
||||||
|
// there were insufficient bytes in src to complete and introduces an error
|
||||||
|
// accordingly, if necessary.
|
||||||
|
func (c *context) ret() (nDst, nSrc int, err error) {
|
||||||
|
if c.err != nil || c.nSrc == len(c.src) {
|
||||||
|
return c.nDst, c.nSrc, c.err
|
||||||
|
}
|
||||||
|
// This point is only reached by mappers if there was no short destination
|
||||||
|
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||||||
|
// set to 0 by next.
|
||||||
|
if c.atEOF && c.pSrc == len(c.src) {
|
||||||
|
return c.pDst, c.pSrc, nil
|
||||||
|
}
|
||||||
|
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
|
||||||
|
// retSpan returns the return values for the Span method. It checks whether
|
||||||
|
// there were insufficient bytes in src to complete and introduces an error
|
||||||
|
// accordingly, if necessary.
|
||||||
|
func (c *context) retSpan() (n int, err error) {
|
||||||
|
_, nSrc, err := c.ret()
|
||||||
|
return nSrc, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkpoint sets the return value buffer points for Transform to the current
|
||||||
|
// positions.
|
||||||
|
func (c *context) checkpoint() {
|
||||||
|
if c.err == nil {
|
||||||
|
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unreadRune causes the last rune read by next to be reread on the next
|
||||||
|
// invocation of next. Only one unreadRune may be called after a call to next.
|
||||||
|
func (c *context) unreadRune() {
|
||||||
|
c.sz = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *context) next() bool {
|
||||||
|
c.pSrc += c.sz
|
||||||
|
if c.pSrc == len(c.src) || c.err != nil {
|
||||||
|
c.info, c.sz = 0, 0
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
v, sz := trie.lookup(c.src[c.pSrc:])
|
||||||
|
c.info, c.sz = info(v), sz
|
||||||
|
if c.sz == 0 {
|
||||||
|
if c.atEOF {
|
||||||
|
// A zero size means we have an incomplete rune. If we are atEOF,
|
||||||
|
// this means it is an illegal rune, which we will consume one
|
||||||
|
// byte at a time.
|
||||||
|
c.sz = 1
|
||||||
|
} else {
|
||||||
|
c.err = transform.ErrShortSrc
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeBytes adds bytes to dst.
|
||||||
|
func (c *context) writeBytes(b []byte) bool {
|
||||||
|
if len(c.dst)-c.pDst < len(b) {
|
||||||
|
c.err = transform.ErrShortDst
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// This loop is faster than using copy.
|
||||||
|
for _, ch := range b {
|
||||||
|
c.dst[c.pDst] = ch
|
||||||
|
c.pDst++
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeString writes the given string to dst.
|
||||||
|
func (c *context) writeString(s string) bool {
|
||||||
|
if len(c.dst)-c.pDst < len(s) {
|
||||||
|
c.err = transform.ErrShortDst
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// This loop is faster than using copy.
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c.dst[c.pDst] = s[i]
|
||||||
|
c.pDst++
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy writes the current rune to dst.
|
||||||
|
func (c *context) copy() bool {
|
||||||
|
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||||||
|
}
|
||||||
|
|
||||||
|
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||||||
|
// pattern of the case info. It is the responsibility of the caller to ensure
|
||||||
|
// that this is a rune with a XOR pattern defined.
|
||||||
|
func (c *context) copyXOR() bool {
|
||||||
|
if !c.copy() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.info&xorIndexBit == 0 {
|
||||||
|
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||||||
|
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||||||
|
} else {
|
||||||
|
// Interpret XOR bits as an index.
|
||||||
|
// TODO: test performance for unrolling this loop. Verify that we have
|
||||||
|
// at least two bytes and at most three.
|
||||||
|
idx := c.info >> xorShift
|
||||||
|
for p := c.pDst - 1; ; p-- {
|
||||||
|
c.dst[p] ^= xorData[idx]
|
||||||
|
idx--
|
||||||
|
if xorData[idx] == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||||||
|
func (c *context) hasPrefix(s string) bool {
|
||||||
|
b := c.src[c.pSrc:]
|
||||||
|
if len(b) < len(s) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, c := range b[:len(s)] {
|
||||||
|
if c != s[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// caseType returns an info with only the case bits, normalized to either
|
||||||
|
// cLower, cUpper, cTitle or cUncased.
|
||||||
|
func (c *context) caseType() info {
|
||||||
|
cm := c.info & 0x7
|
||||||
|
if cm < 4 {
|
||||||
|
return cm
|
||||||
|
}
|
||||||
|
if cm >= cXORCase {
|
||||||
|
// xor the last bit of the rune with the case type bits.
|
||||||
|
b := c.src[c.pSrc+c.sz-1]
|
||||||
|
return info(b&1) ^ cm&0x3
|
||||||
|
}
|
||||||
|
if cm == cIgnorableCased {
|
||||||
|
return cLower
|
||||||
|
}
|
||||||
|
return cUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
// lower writes the lowercase version of the current rune to dst.
|
||||||
|
func lower(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||||
|
return c.writeString(e[offset : offset+nLower])
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLower(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// upper writes the uppercase version of the current rune to dst.
|
||||||
|
func upper(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
// Get length of first special case mapping.
|
||||||
|
n := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cTitle {
|
||||||
|
// The first special case mapping is for lower. Set n to the second.
|
||||||
|
if n == noChange {
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
n, e = e[1]&lengthMask, e[n:]
|
||||||
|
}
|
||||||
|
if n != noChange {
|
||||||
|
return c.writeString(e[offset : offset+n])
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isUpper writes the isUppercase version of the current rune to dst.
|
||||||
|
func isUpper(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
// Get length of first special case mapping.
|
||||||
|
n := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cTitle {
|
||||||
|
n = e[1] & lengthMask
|
||||||
|
}
|
||||||
|
if n != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// title writes the title case version of the current rune to dst.
|
||||||
|
func title(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
// Get the exception data.
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||||
|
|
||||||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||||
|
if nFirst != noChange {
|
||||||
|
e = e[nFirst:]
|
||||||
|
}
|
||||||
|
return c.writeString(e[offset : offset+nTitle])
|
||||||
|
}
|
||||||
|
if ct == cLower && nFirst != noChange {
|
||||||
|
// Use the uppercase version instead.
|
||||||
|
return c.writeString(e[offset : offset+nFirst])
|
||||||
|
}
|
||||||
|
// Already in correct case.
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTitle reports whether the current rune is in title case.
|
||||||
|
func isTitle(c *context) bool {
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Get the exception data.
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||||
|
if ct == cLower && nFirst != noChange {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// foldFull writes the foldFull version of the current rune to dst.
|
||||||
|
func foldFull(c *context) bool {
|
||||||
|
if c.info&hasMappingMask == 0 {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||||
|
return c.copyXOR()
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
n := e[0] & lengthMask
|
||||||
|
if n == 0 {
|
||||||
|
if ct == cLower {
|
||||||
|
return c.copy()
|
||||||
|
}
|
||||||
|
n = (e[1] >> lengthBits) & lengthMask
|
||||||
|
}
|
||||||
|
return c.writeString(e[2 : 2+n])
|
||||||
|
}
|
||||||
|
|
||||||
|
// isFoldFull reports whether the current run is mapped to foldFull
|
||||||
|
func isFoldFull(c *context) bool {
|
||||||
|
if c.info&hasMappingMask == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
ct := c.caseType()
|
||||||
|
if c.info&exceptionBit == 0 {
|
||||||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
e := exceptions[c.info>>exceptionShift:]
|
||||||
|
n := e[0] & lengthMask
|
||||||
|
if n == 0 && ct == cLower {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
import "golang.org/x/text/transform"
|
||||||
|
|
||||||
|
type caseFolder struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
// caseFolder implements the Transformer interface for doing case folding.
|
||||||
|
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() {
|
||||||
|
foldFull(&c)
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isFoldFull(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeFold(o options) transform.SpanningTransformer {
|
||||||
|
// TODO: Special case folding, through option Language, Special/Turkic, or
|
||||||
|
// both.
|
||||||
|
// TODO: Implement Compact options.
|
||||||
|
return &caseFolder{}
|
||||||
|
}
|
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
61
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build icu
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// Ideally these functions would be defined in a test file, but go test doesn't
|
||||||
|
// allow CGO in tests. The build tag should ensure either way that these
|
||||||
|
// functions will not end up in the package.
|
||||||
|
|
||||||
|
// TODO: Ensure that the correct ICU version is set.
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo LDFLAGS: -licui18n.57 -licuuc.57
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unicode/ustring.h>
|
||||||
|
#include <unicode/utypes.h>
|
||||||
|
#include <unicode/localpointer.h>
|
||||||
|
#include <unicode/ucasemap.h>
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
func doICU(tag, caser, input string) string {
|
||||||
|
err := C.UErrorCode(0)
|
||||||
|
loc := C.CString(tag)
|
||||||
|
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
|
||||||
|
|
||||||
|
buf := make([]byte, len(input)*4)
|
||||||
|
dst := (*C.char)(unsafe.Pointer(&buf[0]))
|
||||||
|
src := C.CString(input)
|
||||||
|
|
||||||
|
cn := C.int32_t(0)
|
||||||
|
|
||||||
|
switch caser {
|
||||||
|
case "fold":
|
||||||
|
cn = C.ucasemap_utf8FoldCase(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "lower":
|
||||||
|
cn = C.ucasemap_utf8ToLower(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "upper":
|
||||||
|
cn = C.ucasemap_utf8ToUpper(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
case "title":
|
||||||
|
cn = C.ucasemap_utf8ToTitle(cm,
|
||||||
|
dst, C.int32_t(len(buf)),
|
||||||
|
src, C.int32_t(len(input)),
|
||||||
|
&err)
|
||||||
|
}
|
||||||
|
return string(buf[:cn])
|
||||||
|
}
|
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
func (c info) cccVal() info {
|
||||||
|
if c&exceptionBit != 0 {
|
||||||
|
return info(exceptions[c>>exceptionShift]) & cccMask
|
||||||
|
}
|
||||||
|
return c & cccMask
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) cccType() info {
|
||||||
|
ccc := c.cccVal()
|
||||||
|
if ccc <= cccZero {
|
||||||
|
return cccZero
|
||||||
|
}
|
||||||
|
return ccc
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement full Unicode breaking algorithm:
|
||||||
|
// 1) Implement breaking in separate package.
|
||||||
|
// 2) Use the breaker here.
|
||||||
|
// 3) Compare table size and performance of using the more generic breaker.
|
||||||
|
//
|
||||||
|
// Note that we can extend the current algorithm to be much more accurate. This
|
||||||
|
// only makes sense, though, if the performance and/or space penalty of using
|
||||||
|
// the generic breaker is big. Extra data will only be needed for non-cased
|
||||||
|
// runes, which means there are sufficient bits left in the caseType.
|
||||||
|
// ICU prohibits breaking in such cases as well.
|
||||||
|
|
||||||
|
// For the purpose of title casing we use an approximation of the Unicode Word
|
||||||
|
// Breaking algorithm defined in Annex #29:
|
||||||
|
// https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
|
||||||
|
//
|
||||||
|
// For our approximation, we group the Word Break types into the following
|
||||||
|
// categories, with associated rules:
|
||||||
|
//
|
||||||
|
// 1) Letter:
|
||||||
|
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
|
||||||
|
// Rule: Never break between consecutive runes of this category.
|
||||||
|
//
|
||||||
|
// 2) Mid:
|
||||||
|
// MidLetter, MidNumLet, Single_Quote.
|
||||||
|
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
|
||||||
|
// Me, Cf, Lm or Sk).
|
||||||
|
// Rule: Don't break between Letter and Mid, but break between two Mids.
|
||||||
|
//
|
||||||
|
// 3) Break:
|
||||||
|
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
|
||||||
|
// Other.
|
||||||
|
// These categories should always result in a break between two cased letters.
|
||||||
|
// Rule: Always break.
|
||||||
|
//
|
||||||
|
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
|
||||||
|
// preventing a break between two cased letters. For now we will ignore this
|
||||||
|
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
|
||||||
|
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
|
||||||
|
//
|
||||||
|
// Note 2: the rule for Mid is very approximate, but works in most cases. To
|
||||||
|
// improve, we could store the categories in the trie value and use a FA to
|
||||||
|
// manage breaks. See TODO comment above.
|
||||||
|
//
|
||||||
|
// Note 3: according to the spec, it is possible for the Extend category to
|
||||||
|
// introduce breaks between other categories grouped in Letter. However, this
|
||||||
|
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
|
||||||
|
|
||||||
|
// isBreak returns whether this rune should introduce a break.
|
||||||
|
func (c info) isBreak() bool {
|
||||||
|
return c.cccVal() == cccBreak
|
||||||
|
}
|
||||||
|
|
||||||
|
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
|
||||||
|
// Numeric, ExtendNumLet, or Extend.
|
||||||
|
func (c info) isLetter() bool {
|
||||||
|
ccc := c.cccVal()
|
||||||
|
if ccc == cccZero {
|
||||||
|
return !c.isCaseIgnorable()
|
||||||
|
}
|
||||||
|
return ccc != cccBreak
|
||||||
|
}
|
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
|
@ -0,0 +1,816 @@
|
||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// This file contains the definitions of case mappings for all supported
|
||||||
|
// languages. The rules for the language-specific tailorings were taken and
|
||||||
|
// modified from the CLDR transform definitions in common/transforms.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal"
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A mapFunc takes a context set to the current rune and writes the mapped
|
||||||
|
// version to the same context. It may advance the context to the next rune. It
|
||||||
|
// returns whether a checkpoint is possible: whether the pDst bytes written to
|
||||||
|
// dst so far won't need changing as we see more source bytes.
|
||||||
|
type mapFunc func(*context) bool
|
||||||
|
|
||||||
|
// A spanFunc takes a context set to the current rune and returns whether this
|
||||||
|
// rune would be altered when written to the output. It may advance the context
|
||||||
|
// to the next rune. It returns whether a checkpoint is possible.
|
||||||
|
type spanFunc func(*context) bool
|
||||||
|
|
||||||
|
// maxIgnorable defines the maximum number of ignorables to consider for
|
||||||
|
// lookahead operations.
|
||||||
|
const maxIgnorable = 30
|
||||||
|
|
||||||
|
// supported lists the language tags for which we have tailorings.
|
||||||
|
const supported = "und af az el lt nl tr"
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
tags := []language.Tag{}
|
||||||
|
for _, s := range strings.Split(supported, " ") {
|
||||||
|
tags = append(tags, language.MustParse(s))
|
||||||
|
}
|
||||||
|
matcher = internal.NewInheritanceMatcher(tags)
|
||||||
|
Supported = language.NewCoverage(tags)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
matcher *internal.InheritanceMatcher
|
||||||
|
|
||||||
|
Supported language.Coverage
|
||||||
|
|
||||||
|
// We keep the following lists separate, instead of having a single per-
|
||||||
|
// language struct, to give the compiler a chance to remove unused code.
|
||||||
|
|
||||||
|
// Some uppercase mappers are stateless, so we can precompute the
|
||||||
|
// Transformers and save a bit on runtime allocations.
|
||||||
|
upperFunc = []struct {
|
||||||
|
upper mapFunc
|
||||||
|
span spanFunc
|
||||||
|
}{
|
||||||
|
{nil, nil}, // und
|
||||||
|
{nil, nil}, // af
|
||||||
|
{aztrUpper(upper), isUpper}, // az
|
||||||
|
{elUpper, noSpan}, // el
|
||||||
|
{ltUpper(upper), noSpan}, // lt
|
||||||
|
{nil, nil}, // nl
|
||||||
|
{aztrUpper(upper), isUpper}, // tr
|
||||||
|
}
|
||||||
|
|
||||||
|
undUpper transform.SpanningTransformer = &undUpperCaser{}
|
||||||
|
undLower transform.SpanningTransformer = &undLowerCaser{}
|
||||||
|
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
|
||||||
|
|
||||||
|
lowerFunc = []mapFunc{
|
||||||
|
nil, // und
|
||||||
|
nil, // af
|
||||||
|
aztrLower, // az
|
||||||
|
nil, // el
|
||||||
|
ltLower, // lt
|
||||||
|
nil, // nl
|
||||||
|
aztrLower, // tr
|
||||||
|
}
|
||||||
|
|
||||||
|
titleInfos = []struct {
|
||||||
|
title mapFunc
|
||||||
|
lower mapFunc
|
||||||
|
titleSpan spanFunc
|
||||||
|
rewrite func(*context)
|
||||||
|
}{
|
||||||
|
{title, lower, isTitle, nil}, // und
|
||||||
|
{title, lower, isTitle, afnlRewrite}, // af
|
||||||
|
{aztrUpper(title), aztrLower, isTitle, nil}, // az
|
||||||
|
{title, lower, isTitle, nil}, // el
|
||||||
|
{ltUpper(title), ltLower, noSpan, nil}, // lt
|
||||||
|
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
|
||||||
|
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
f := upperFunc[i].upper
|
||||||
|
if f == nil {
|
||||||
|
return undUpper
|
||||||
|
}
|
||||||
|
return &simpleCaser{f: f, span: upperFunc[i].span}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
f := lowerFunc[i]
|
||||||
|
if f == nil {
|
||||||
|
if o.ignoreFinalSigma {
|
||||||
|
return undLowerIgnoreSigma
|
||||||
|
}
|
||||||
|
return undLower
|
||||||
|
}
|
||||||
|
if o.ignoreFinalSigma {
|
||||||
|
return &simpleCaser{f: f, span: isLower}
|
||||||
|
}
|
||||||
|
return &lowerCaser{
|
||||||
|
first: f,
|
||||||
|
midWord: finalSigma(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
|
||||||
|
_, i, _ := matcher.Match(t)
|
||||||
|
x := &titleInfos[i]
|
||||||
|
lower := x.lower
|
||||||
|
if o.noLower {
|
||||||
|
lower = (*context).copy
|
||||||
|
} else if !o.ignoreFinalSigma {
|
||||||
|
lower = finalSigma(lower)
|
||||||
|
}
|
||||||
|
return &titleCaser{
|
||||||
|
title: x.title,
|
||||||
|
lower: lower,
|
||||||
|
titleSpan: x.titleSpan,
|
||||||
|
rewrite: x.rewrite,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func noSpan(c *context) bool {
|
||||||
|
c.err = transform.ErrEndOfSpan
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: consider a similar special case for the fast majority lower case. This
|
||||||
|
// is a bit more involved so will require some more precise benchmarking to
|
||||||
|
// justify it.
|
||||||
|
|
||||||
|
type undUpperCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
// undUpperCaser implements the Transformer interface for doing an upper case
|
||||||
|
// mapping for the root locale (und). It eliminates the need for an allocation
|
||||||
|
// as it prevents escaping by not using function pointers.
|
||||||
|
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() {
|
||||||
|
upper(&c)
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isUpper(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
|
||||||
|
// a lower case mapping for the root locale (und) ignoring final sigma
|
||||||
|
// handling. This casing algorithm is used in some performance-critical packages
|
||||||
|
// like secure/precis and x/net/http/idna, which warrants its special-casing.
|
||||||
|
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() && lower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements a generic lower-casing. This is possible as isLower works
|
||||||
|
// for all lowercasing variants. All lowercase variants only vary in how they
|
||||||
|
// transform a non-lowercase letter. They will never change an already lowercase
|
||||||
|
// letter. In addition, there is no state.
|
||||||
|
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isLower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
type simpleCaser struct {
|
||||||
|
context
|
||||||
|
f mapFunc
|
||||||
|
span spanFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// simpleCaser implements the Transformer interface for doing a case operation
|
||||||
|
// on a rune-by-rune basis.
|
||||||
|
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
for c.next() && t.f(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && t.span(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// undLowerCaser implements the Transformer interface for doing a lower case
|
||||||
|
// mapping for the root locale (und) ignoring final sigma handling. This casing
|
||||||
|
// algorithm is used in some performance-critical packages like secure/precis
|
||||||
|
// and x/net/http/idna, which warrants its special-casing.
|
||||||
|
type undLowerCaser struct{ transform.NopResetter }
|
||||||
|
|
||||||
|
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
|
||||||
|
for isInterWord := true; c.next(); {
|
||||||
|
if isInterWord {
|
||||||
|
if c.info.isCased() {
|
||||||
|
if !lower(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = false
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||||
|
if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = true
|
||||||
|
} else if !c.hasPrefix("Σ") {
|
||||||
|
if !lower(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if !finalSigmaBody(&c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
c := context{src: src, atEOF: atEOF}
|
||||||
|
for c.next() && isLower(&c) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// lowerCaser implements the Transformer interface. The default Unicode lower
|
||||||
|
// casing requires different treatment for the first and subsequent characters
|
||||||
|
// of a word, most notably to handle the Greek final Sigma.
|
||||||
|
type lowerCaser struct {
|
||||||
|
undLowerIgnoreSigmaCaser
|
||||||
|
|
||||||
|
context
|
||||||
|
|
||||||
|
first, midWord mapFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
t.context = context{dst: dst, src: src, atEOF: atEOF}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
for isInterWord := true; c.next(); {
|
||||||
|
if isInterWord {
|
||||||
|
if c.info.isCased() {
|
||||||
|
if !t.first(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = false
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||||
|
if !c.copy() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
isInterWord = true
|
||||||
|
} else if !t.midWord(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
// titleCaser implements the Transformer interface. Title casing algorithms
|
||||||
|
// distinguish between the first letter of a word and subsequent letters of the
|
||||||
|
// same word. It uses state to avoid requiring a potentially infinite lookahead.
|
||||||
|
type titleCaser struct {
|
||||||
|
context
|
||||||
|
|
||||||
|
// rune mappings used by the actual casing algorithms.
|
||||||
|
title mapFunc
|
||||||
|
lower mapFunc
|
||||||
|
titleSpan spanFunc
|
||||||
|
|
||||||
|
rewrite func(*context)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transform implements the standard Unicode title case algorithm as defined in
|
||||||
|
// Chapter 3 of The Unicode Standard:
|
||||||
|
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
|
||||||
|
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
|
||||||
|
// first cased character F following the word boundary. If F exists, map F to
|
||||||
|
// Titlecase_Mapping(F); then map all characters C between F and the following
|
||||||
|
// word boundary to Lowercase_Mapping(C).
|
||||||
|
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
p := c.info
|
||||||
|
if t.rewrite != nil {
|
||||||
|
t.rewrite(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
wasMid := p.isMid()
|
||||||
|
// Break out of this loop on failure to ensure we do not modify the
|
||||||
|
// state incorrectly.
|
||||||
|
if p.isCased() {
|
||||||
|
if !c.isMidWord {
|
||||||
|
if !t.title(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.isMidWord = true
|
||||||
|
} else if !t.lower(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if !c.copy() {
|
||||||
|
break
|
||||||
|
} else if p.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// As we save the state of the transformer, it is safe to call
|
||||||
|
// checkpoint after any successful write.
|
||||||
|
if !(c.isMidWord && wasMid) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if wasMid && c.info.isMid() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.ret()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||||
|
c := &t.context
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
p := c.info
|
||||||
|
if t.rewrite != nil {
|
||||||
|
t.rewrite(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
wasMid := p.isMid()
|
||||||
|
// Break out of this loop on failure to ensure we do not modify the
|
||||||
|
// state incorrectly.
|
||||||
|
if p.isCased() {
|
||||||
|
if !c.isMidWord {
|
||||||
|
if !t.titleSpan(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.isMidWord = true
|
||||||
|
} else if !isLower(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if p.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
// As we save the state of the transformer, it is safe to call
|
||||||
|
// checkpoint after any successful write.
|
||||||
|
if !(c.isMidWord && wasMid) {
|
||||||
|
c.checkpoint()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.next() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if wasMid && c.info.isMid() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.retSpan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalSigma adds Greek final Sigma handing to another casing function. It
|
||||||
|
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
|
||||||
|
// case-ignorables and a cased letters.
|
||||||
|
func finalSigma(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
if !c.hasPrefix("Σ") {
|
||||||
|
return f(c)
|
||||||
|
}
|
||||||
|
return finalSigmaBody(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func finalSigmaBody(c *context) bool {
|
||||||
|
// Current rune must be ∑.
|
||||||
|
|
||||||
|
// ::NFD();
|
||||||
|
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||||
|
// Σ } [:case-ignorable:]* [:cased:] → σ;
|
||||||
|
// [:cased:] [:case-ignorable:]* { Σ → ς;
|
||||||
|
// ::Any-Lower;
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
p := c.pDst
|
||||||
|
c.writeString("ς")
|
||||||
|
|
||||||
|
// TODO: we should do this here, but right now this will never have an
|
||||||
|
// effect as this is called when the prefix is Sigma, whereas Dutch and
|
||||||
|
// Afrikaans only test for an apostrophe.
|
||||||
|
//
|
||||||
|
// if t.rewrite != nil {
|
||||||
|
// t.rewrite(c)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// We need to do one more iteration after maxIgnorable, as a cased
|
||||||
|
// letter is not an ignorable and may modify the result.
|
||||||
|
wasMid := false
|
||||||
|
for i := 0; i < maxIgnorable+1; i++ {
|
||||||
|
if !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !c.info.isCaseIgnorable() {
|
||||||
|
// All Midword runes are also case ignorable, so we are
|
||||||
|
// guaranteed to have a letter or word break here. As we are
|
||||||
|
// unreading the run, there is no need to unset c.isMidWord;
|
||||||
|
// the title caser will handle this.
|
||||||
|
if c.info.isCased() {
|
||||||
|
// p+1 is guaranteed to be in bounds: if writing ς was
|
||||||
|
// successful, p+1 will contain the second byte of ς. If not,
|
||||||
|
// this function will have returned after c.next returned false.
|
||||||
|
c.dst[p+1]++ // ς → σ
|
||||||
|
}
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// A case ignorable may also introduce a word break, so we may need
|
||||||
|
// to continue searching even after detecting a break.
|
||||||
|
isMid := c.info.isMid()
|
||||||
|
if (wasMid && isMid) || c.info.isBreak() {
|
||||||
|
c.isMidWord = false
|
||||||
|
}
|
||||||
|
wasMid = isMid
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalSigmaSpan would be the same as isLower.
|
||||||
|
|
||||||
|
// elUpper implements Greek upper casing, which entails removing a predefined
|
||||||
|
// set of non-blocked modifiers. Note that these accents should not be removed
|
||||||
|
// for title casing!
|
||||||
|
// Example: "Οδός" -> "ΟΔΟΣ".
|
||||||
|
func elUpper(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
|
||||||
|
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
|
||||||
|
|
||||||
|
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||||
|
oldPDst := c.pDst
|
||||||
|
if !upper(c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(unicode.Greek, r) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
i := 0
|
||||||
|
// Take the properties of the uppercased rune that is already written to the
|
||||||
|
// destination. This saves us the trouble of having to uppercase the
|
||||||
|
// decomposed rune again.
|
||||||
|
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
|
||||||
|
// Restore the destination position and process the decomposed rune.
|
||||||
|
r, sz := utf8.DecodeRune(b)
|
||||||
|
if r <= 0xFF { // See A.6.1
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
c.pDst = oldPDst
|
||||||
|
// Insert the first rune and ignore the modifiers. See A.6.2.
|
||||||
|
c.writeBytes(b[:sz])
|
||||||
|
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
|
||||||
|
}
|
||||||
|
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
|
||||||
|
// Above and Iota Subscript
|
||||||
|
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
|
||||||
|
0x0301, // U+0301 COMBINING ACUTE ACCENT
|
||||||
|
0x0304, // U+0304 COMBINING MACRON
|
||||||
|
0x0306, // U+0306 COMBINING BREVE
|
||||||
|
0x0308, // U+0308 COMBINING DIAERESIS
|
||||||
|
0x0313, // U+0313 COMBINING COMMA ABOVE
|
||||||
|
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
|
||||||
|
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
|
||||||
|
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
|
||||||
|
// No-op. Gobble the modifier.
|
||||||
|
|
||||||
|
default:
|
||||||
|
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
|
||||||
|
// We don't need to test for IotaSubscript as the only rune that
|
||||||
|
// qualifies (U+0345) was already excluded in the switch statement
|
||||||
|
// above. See A.4.
|
||||||
|
|
||||||
|
case cccAbove:
|
||||||
|
return c.copy()
|
||||||
|
default:
|
||||||
|
// Some other modifier. We're still allowed to gobble Greek
|
||||||
|
// modifiers after this.
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
|
||||||
|
|
||||||
|
func ltLower(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||||
|
// # whenever there are more accents above.
|
||||||
|
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||||
|
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||||
|
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||||
|
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
// ::NFD();
|
||||||
|
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
||||||
|
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
||||||
|
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
||||||
|
// I \u0300 (Ì) → i \u0307 \u0300;
|
||||||
|
// I \u0301 (Í) → i \u0307 \u0301;
|
||||||
|
// I \u0303 (Ĩ) → i \u0307 \u0303;
|
||||||
|
// ::Any-Lower();
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
|
||||||
|
lower(c)
|
||||||
|
if r != 'I' && r != 'J' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p := norm.NFD.Properties(c.src[c.pSrc:])
|
||||||
|
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
|
||||||
|
// UTF-8 optimization: the decomposition will only have an above
|
||||||
|
// modifier if the last rune of the decomposition is in [U+300-U+311].
|
||||||
|
// In all other cases, a decomposition starting with I is always
|
||||||
|
// an I followed by modifiers that are not cased themselves. See A.2.
|
||||||
|
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
|
||||||
|
if !c.writeBytes(d[:1]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
c.dst[c.pDst-1] += 'a' - 'A' // lower
|
||||||
|
|
||||||
|
// Assumption: modifier never changes on lowercase. See A.1.
|
||||||
|
// Assumption: all modifiers added have CCC = Above. See A.2.3.
|
||||||
|
return c.writeString("\u0307") && c.writeBytes(d[1:])
|
||||||
|
}
|
||||||
|
// In all other cases the additional modifiers will have a CCC
|
||||||
|
// that is less than 230 (Above). We will insert the U+0307, if
|
||||||
|
// needed, after these modifiers so that a string in FCD form
|
||||||
|
// will remain so. See A.2.2.
|
||||||
|
lower(c)
|
||||||
|
i = 1
|
||||||
|
} else {
|
||||||
|
return lower(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
case cccAbove:
|
||||||
|
return c.writeString("\u0307") && c.copy() // See A.1.
|
||||||
|
default:
|
||||||
|
c.copy() // See A.1.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
|
||||||
|
// ltLowerSpan would be the same as isLower.
|
||||||
|
|
||||||
|
func ltUpper(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
// Unicode:
|
||||||
|
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||||
|
//
|
||||||
|
// From CLDR:
|
||||||
|
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
|
||||||
|
// # intervening non-230 marks.
|
||||||
|
// ::NFD();
|
||||||
|
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
||||||
|
// ::Any-Upper();
|
||||||
|
// ::NFC();
|
||||||
|
|
||||||
|
// TODO: See A.5. A soft-dotted rune never has an exception. This would
|
||||||
|
// allow us to overload the exception bit and encode this property in
|
||||||
|
// info. Need to measure performance impact of this.
|
||||||
|
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||||
|
oldPDst := c.pDst
|
||||||
|
if !f(c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(unicode.Soft_Dotted, r) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't need to do an NFD normalization, as a soft-dotted rune never
|
||||||
|
// contains U+0307. See A.3.
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
case cccAbove:
|
||||||
|
if c.hasPrefix("\u0307") {
|
||||||
|
// We don't do a full NFC, but rather combine runes for
|
||||||
|
// some of the common cases. (Returning NFC or
|
||||||
|
// preserving normal form is neither a requirement nor
|
||||||
|
// a possibility anyway).
|
||||||
|
if !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
|
||||||
|
s := ""
|
||||||
|
switch c.src[c.pSrc+1] {
|
||||||
|
case 0x80: // U+0300 COMBINING GRAVE ACCENT
|
||||||
|
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
case 0x81: // U+0301 COMBINING ACUTE ACCENT
|
||||||
|
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
case 0x83: // U+0303 COMBINING TILDE
|
||||||
|
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
case 0x88: // U+0308 COMBINING DIAERESIS
|
||||||
|
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if s != "" {
|
||||||
|
c.pDst = oldPDst
|
||||||
|
return c.writeString(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c.copy()
|
||||||
|
default:
|
||||||
|
c.copy()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == maxIgnorable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
|
||||||
|
|
||||||
|
func aztrUpper(f mapFunc) mapFunc {
|
||||||
|
return func(c *context) bool {
|
||||||
|
// i→İ;
|
||||||
|
if c.src[c.pSrc] == 'i' {
|
||||||
|
return c.writeString("İ")
|
||||||
|
}
|
||||||
|
return f(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func aztrLower(c *context) (done bool) {
|
||||||
|
// From CLDR:
|
||||||
|
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||||
|
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
// İ→i;
|
||||||
|
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||||
|
// # This matches the behavior of the canonically equivalent I-dot_above
|
||||||
|
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||||
|
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||||
|
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||||
|
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
|
||||||
|
// I→ı ;
|
||||||
|
// ::Any-Lower();
|
||||||
|
if c.hasPrefix("\u0130") { // İ
|
||||||
|
return c.writeString("i")
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] != 'I' {
|
||||||
|
return lower(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We ignore the lower-case I for now, but insert it later when we know
|
||||||
|
// which form we need.
|
||||||
|
start := c.pSrc + c.sz
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
Loop:
|
||||||
|
// We check for up to n ignorables before \u0307. As \u0307 is an
|
||||||
|
// ignorable as well, n is maxIgnorable-1.
|
||||||
|
for ; i < maxIgnorable && c.next(); i++ {
|
||||||
|
switch c.info.cccType() {
|
||||||
|
case cccAbove:
|
||||||
|
if c.hasPrefix("\u0307") {
|
||||||
|
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
|
||||||
|
}
|
||||||
|
done = true
|
||||||
|
break Loop
|
||||||
|
case cccZero:
|
||||||
|
c.unreadRune()
|
||||||
|
done = true
|
||||||
|
break Loop
|
||||||
|
default:
|
||||||
|
// We'll write this rune after we know which starter to use.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i == maxIgnorable {
|
||||||
|
done = true
|
||||||
|
}
|
||||||
|
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
|
||||||
|
}
|
||||||
|
|
||||||
|
// aztrLowerSpan would be the same as isLower.
|
||||||
|
|
||||||
|
func nlTitle(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Special titlecasing for Dutch initial "ij".
|
||||||
|
// ::Any-Title();
|
||||||
|
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||||
|
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||||
|
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
|
||||||
|
return title(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.writeString("I") || !c.next() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
|
||||||
|
return c.writeString("J")
|
||||||
|
}
|
||||||
|
c.unreadRune()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func nlTitleSpan(c *context) bool {
|
||||||
|
// From CLDR:
|
||||||
|
// # Special titlecasing for Dutch initial "ij".
|
||||||
|
// ::Any-Title();
|
||||||
|
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||||
|
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||||
|
if c.src[c.pSrc] != 'I' {
|
||||||
|
return isTitle(c)
|
||||||
|
}
|
||||||
|
if !c.next() || c.src[c.pSrc] == 'j' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c.src[c.pSrc] != 'J' {
|
||||||
|
c.unreadRune()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
|
||||||
|
func afnlRewrite(c *context) {
|
||||||
|
if c.hasPrefix("'") || c.hasPrefix("’") {
|
||||||
|
c.isMidWord = true
|
||||||
|
}
|
||||||
|
}
|
2255
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
2255
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2316
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
2316
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2359
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
2359
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2399
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
2399
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2527
vendor/golang.org/x/text/cases/tables15.0.0.go
generated
vendored
Normal file
2527
vendor/golang.org/x/text/cases/tables15.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2215
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
2215
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
217
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
217
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package cases
|
||||||
|
|
||||||
|
// This file contains definitions for interpreting the trie value of the case
|
||||||
|
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||||
|
// program and the resultant package. Sharing is achieved by the generator
|
||||||
|
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||||
|
|
||||||
|
// info holds case information for a single rune. It is the value returned
|
||||||
|
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||||
|
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||||
|
// stores some basic case data and an index into an array with additional data.
|
||||||
|
//
|
||||||
|
// The per-rune values have the following format:
|
||||||
|
//
|
||||||
|
// if (exception) {
|
||||||
|
// 15..4 unsigned exception index
|
||||||
|
// } else {
|
||||||
|
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||||
|
// Only 13..8 are used for XOR patterns.
|
||||||
|
// 7 inverseFold (fold to upper, not to lower)
|
||||||
|
// 6 index: interpret the XOR pattern as an index
|
||||||
|
// or isMid if case mode is cIgnorableUncased.
|
||||||
|
// 5..4 CCC: zero (normal or break), above or other
|
||||||
|
// }
|
||||||
|
// 3 exception: interpret this value as an exception index
|
||||||
|
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||||
|
// 2..0 case mode
|
||||||
|
//
|
||||||
|
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||||
|
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||||
|
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||||
|
// least-significant bits of the rune).
|
||||||
|
//
|
||||||
|
// See the definitions below for a more detailed description of the various
|
||||||
|
// bits.
|
||||||
|
type info uint16
|
||||||
|
|
||||||
|
const (
|
||||||
|
casedMask = 0x0003
|
||||||
|
fullCasedMask = 0x0007
|
||||||
|
ignorableMask = 0x0006
|
||||||
|
ignorableValue = 0x0004
|
||||||
|
|
||||||
|
inverseFoldBit = 1 << 7
|
||||||
|
isMidBit = 1 << 6
|
||||||
|
|
||||||
|
exceptionBit = 1 << 3
|
||||||
|
exceptionShift = 4
|
||||||
|
numExceptionBits = 12
|
||||||
|
|
||||||
|
xorIndexBit = 1 << 6
|
||||||
|
xorShift = 8
|
||||||
|
|
||||||
|
// There is no mapping if all xor bits and the exception bit are zero.
|
||||||
|
hasMappingMask = 0xff80 | exceptionBit
|
||||||
|
)
|
||||||
|
|
||||||
|
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||||
|
// title, upper and lower case and case ignorable. (For a definition of these
|
||||||
|
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||||
|
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||||
|
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||||
|
// cased while not having a mapping.
|
||||||
|
//
|
||||||
|
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||||
|
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||||
|
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||||
|
// characters). We use this property by defining a cXORCase mode, where the case
|
||||||
|
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||||
|
// pattern for case mappings is often identical for successive runes, using
|
||||||
|
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||||
|
// allows us to better compress the trie blocks.
|
||||||
|
const (
|
||||||
|
cUncased info = iota // 000
|
||||||
|
cTitle // 001
|
||||||
|
cLower // 010
|
||||||
|
cUpper // 011
|
||||||
|
cIgnorableUncased // 100
|
||||||
|
cIgnorableCased // 101 // lower case if mappings exist
|
||||||
|
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||||
|
|
||||||
|
maxCaseMode = cUpper
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c info) isCased() bool {
|
||||||
|
return c&casedMask != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isCaseIgnorable() bool {
|
||||||
|
return c&ignorableMask == ignorableValue
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||||
|
return c&fullCasedMask == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||||
|
return c&fullCasedMask == cIgnorableUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c info) isMid() bool {
|
||||||
|
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||||
|
}
|
||||||
|
|
||||||
|
// The case mapping implementation will need to know about various Canonical
|
||||||
|
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||||
|
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||||
|
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||||
|
// the rune also has the break category Break (see below).
|
||||||
|
const (
|
||||||
|
cccBreak info = iota << 4
|
||||||
|
cccZero
|
||||||
|
cccAbove
|
||||||
|
cccOther
|
||||||
|
|
||||||
|
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
starter = 0
|
||||||
|
above = 230
|
||||||
|
iotaSubscript = 240
|
||||||
|
)
|
||||||
|
|
||||||
|
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||||
|
// The entry is pointed to by the exception index in an entry. It has the
|
||||||
|
// following format:
|
||||||
|
//
|
||||||
|
// Header:
|
||||||
|
//
|
||||||
|
// byte 0:
|
||||||
|
// 7..6 unused
|
||||||
|
// 5..4 CCC type (same bits as entry)
|
||||||
|
// 3 unused
|
||||||
|
// 2..0 length of fold
|
||||||
|
//
|
||||||
|
// byte 1:
|
||||||
|
// 7..6 unused
|
||||||
|
// 5..3 length of 1st mapping of case type
|
||||||
|
// 2..0 length of 2nd mapping of case type
|
||||||
|
//
|
||||||
|
// case 1st 2nd
|
||||||
|
// lower -> upper, title
|
||||||
|
// upper -> lower, title
|
||||||
|
// title -> lower, upper
|
||||||
|
//
|
||||||
|
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||||
|
// A length of 0 indicates a mapping to zero-length string.
|
||||||
|
//
|
||||||
|
// Body bytes:
|
||||||
|
//
|
||||||
|
// case folding bytes
|
||||||
|
// lowercase mapping bytes
|
||||||
|
// uppercase mapping bytes
|
||||||
|
// titlecase mapping bytes
|
||||||
|
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||||
|
//
|
||||||
|
// Fallbacks:
|
||||||
|
//
|
||||||
|
// missing fold -> lower
|
||||||
|
// missing title -> upper
|
||||||
|
// all missing -> original rune
|
||||||
|
//
|
||||||
|
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||||
|
// value.
|
||||||
|
const (
|
||||||
|
lengthMask = 0x07
|
||||||
|
lengthBits = 3
|
||||||
|
noChange = 0
|
||||||
|
)
|
||||||
|
|
||||||
|
// References to generated trie.
|
||||||
|
|
||||||
|
var trie = newCaseTrie(0)
|
||||||
|
|
||||||
|
var sparse = sparseBlocks{
|
||||||
|
values: sparseValues[:],
|
||||||
|
offsets: sparseOffsets[:],
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sparse block lookup code.
|
||||||
|
|
||||||
|
// valueRange is an entry in a sparse block.
|
||||||
|
type valueRange struct {
|
||||||
|
value uint16
|
||||||
|
lo, hi byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type sparseBlocks struct {
|
||||||
|
values []valueRange
|
||||||
|
offsets []uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
// lookup returns the value from values block n for byte b using binary search.
|
||||||
|
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||||
|
lo := s.offsets[n]
|
||||||
|
hi := s.offsets[n+1]
|
||||||
|
for lo < hi {
|
||||||
|
m := lo + (hi-lo)/2
|
||||||
|
r := s.values[m]
|
||||||
|
if r.lo <= b && b <= r.hi {
|
||||||
|
return r.value
|
||||||
|
}
|
||||||
|
if b < r.lo {
|
||||||
|
hi = m
|
||||||
|
} else {
|
||||||
|
lo = m + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||||
|
// is boring.
|
||||||
|
const lastRuneForTesting = rune(0x1FFFF)
|
49
vendor/golang.org/x/text/internal/internal.go
generated
vendored
Normal file
49
vendor/golang.org/x/text/internal/internal.go
generated
vendored
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package internal contains non-exported functionality that are used by
|
||||||
|
// packages in the text repository.
|
||||||
|
package internal // import "golang.org/x/text/internal"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SortTags sorts tags in place.
|
||||||
|
func SortTags(tags []language.Tag) {
|
||||||
|
sort.Sort(sorter(tags))
|
||||||
|
}
|
||||||
|
|
||||||
|
type sorter []language.Tag
|
||||||
|
|
||||||
|
func (s sorter) Len() int {
|
||||||
|
return len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s sorter) Swap(i, j int) {
|
||||||
|
s[i], s[j] = s[j], s[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s sorter) Less(i, j int) bool {
|
||||||
|
return s[i].String() < s[j].String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
|
||||||
|
// only unique tags.
|
||||||
|
func UniqueTags(tags []language.Tag) []language.Tag {
|
||||||
|
if len(tags) <= 1 {
|
||||||
|
return tags
|
||||||
|
}
|
||||||
|
SortTags(tags)
|
||||||
|
k := 0
|
||||||
|
for i := 1; i < len(tags); i++ {
|
||||||
|
if tags[k].String() < tags[i].String() {
|
||||||
|
k++
|
||||||
|
tags[k] = tags[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tags[:k+1]
|
||||||
|
}
|
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// This file contains code common to the maketables.go and the package code.
|
||||||
|
|
||||||
|
// AliasType is the type of an alias in AliasMap.
|
||||||
|
type AliasType int8
|
||||||
|
|
||||||
|
const (
|
||||||
|
Deprecated AliasType = iota
|
||||||
|
Macro
|
||||||
|
Legacy
|
||||||
|
|
||||||
|
AliasTypeUnknown AliasType = -1
|
||||||
|
)
|
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// CompactCoreInfo is a compact integer with the three core tags encoded.
|
||||||
|
type CompactCoreInfo uint32
|
||||||
|
|
||||||
|
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
|
||||||
|
// different language, region, and script values.
|
||||||
|
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
|
||||||
|
if t.LangID > langNoIndexOffset {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
|
||||||
|
cci |= CompactCoreInfo(t.ScriptID) << 12
|
||||||
|
cci |= CompactCoreInfo(t.RegionID)
|
||||||
|
return cci, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag generates a tag from c.
|
||||||
|
func (c CompactCoreInfo) Tag() Tag {
|
||||||
|
return Tag{
|
||||||
|
LangID: Language(c >> 20),
|
||||||
|
RegionID: Region(c & 0x3ff),
|
||||||
|
ScriptID: Script(c>>12) & 0xff,
|
||||||
|
}
|
||||||
|
}
|
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package compact defines a compact representation of language tags.
|
||||||
|
//
|
||||||
|
// Common language tags (at least all for which locale information is defined
|
||||||
|
// in CLDR) are assigned a unique index. Each Tag is associated with such an
|
||||||
|
// ID for selecting language-related resources (such as translations) as well
|
||||||
|
// as one for selecting regional defaults (currency, number formatting, etc.)
|
||||||
|
//
|
||||||
|
// It may want to export this functionality at some point, but at this point
|
||||||
|
// this is only available for use within x/text.
|
||||||
|
package compact // import "golang.org/x/text/internal/language/compact"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ID is an integer identifying a single tag.
|
||||||
|
type ID uint16
|
||||||
|
|
||||||
|
func getCoreIndex(t language.Tag) (id ID, ok bool) {
|
||||||
|
cci, ok := language.GetCompactCore(t)
|
||||||
|
if !ok {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
i := sort.Search(len(coreTags), func(i int) bool {
|
||||||
|
return cci <= coreTags[i]
|
||||||
|
})
|
||||||
|
if i == len(coreTags) || coreTags[i] != cci {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return ID(i), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent returns the ID of the parent or the root ID if id is already the root.
|
||||||
|
func (id ID) Parent() ID {
|
||||||
|
return parents[id]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag converts id to an internal language Tag.
|
||||||
|
func (id ID) Tag() language.Tag {
|
||||||
|
if int(id) >= len(coreTags) {
|
||||||
|
return specialTags[int(id)-len(coreTags)]
|
||||||
|
}
|
||||||
|
return coreTags[id].Tag()
|
||||||
|
}
|
||||||
|
|
||||||
|
var specialTags []language.Tag
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
tags := strings.Split(specialTagsStr, " ")
|
||||||
|
specialTags = make([]language.Tag, len(tags))
|
||||||
|
for i, t := range tags {
|
||||||
|
specialTags[i] = language.MustParse(t)
|
||||||
|
}
|
||||||
|
}
|
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
|
@ -0,0 +1,260 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate go run gen.go gen_index.go -output tables.go
|
||||||
|
//go:generate go run gen_parents.go
|
||||||
|
|
||||||
|
package compact
|
||||||
|
|
||||||
|
// TODO: Remove above NOTE after:
|
||||||
|
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||||
|
// specific language or locale. All language tag values are guaranteed to be
|
||||||
|
// well-formed.
|
||||||
|
type Tag struct {
|
||||||
|
// NOTE: exported tags will become part of the public API.
|
||||||
|
language ID
|
||||||
|
locale ID
|
||||||
|
full fullTag // always a language.Tag for now.
|
||||||
|
}
|
||||||
|
|
||||||
|
const _und = 0
|
||||||
|
|
||||||
|
type fullTag interface {
|
||||||
|
IsRoot() bool
|
||||||
|
Parent() language.Tag
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make a compact Tag from a fully specified internal language Tag.
|
||||||
|
func Make(t language.Tag) (tag Tag) {
|
||||||
|
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
|
||||||
|
if r, err := language.ParseRegion(region[:2]); err == nil {
|
||||||
|
tFull := t
|
||||||
|
t, _ = t.SetTypeForKey("rg", "")
|
||||||
|
// TODO: should we not consider "va" for the language tag?
|
||||||
|
var exact1, exact2 bool
|
||||||
|
tag.language, exact1 = FromTag(t)
|
||||||
|
t.RegionID = r
|
||||||
|
tag.locale, exact2 = FromTag(t)
|
||||||
|
if !exact1 || !exact2 {
|
||||||
|
tag.full = tFull
|
||||||
|
}
|
||||||
|
return tag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lang, ok := FromTag(t)
|
||||||
|
tag.language = lang
|
||||||
|
tag.locale = lang
|
||||||
|
if !ok {
|
||||||
|
tag.full = t
|
||||||
|
}
|
||||||
|
return tag
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag returns an internal language Tag version of this tag.
|
||||||
|
func (t Tag) Tag() language.Tag {
|
||||||
|
if t.full != nil {
|
||||||
|
return t.full.(language.Tag)
|
||||||
|
}
|
||||||
|
tag := t.language.Tag()
|
||||||
|
if t.language != t.locale {
|
||||||
|
loc := t.locale.Tag()
|
||||||
|
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
|
||||||
|
}
|
||||||
|
return tag
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsCompact reports whether this tag is fully defined in terms of ID.
|
||||||
|
func (t *Tag) IsCompact() bool {
|
||||||
|
return t.full == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MayHaveVariants reports whether a tag may have variants. If it returns false
|
||||||
|
// it is guaranteed the tag does not have variants.
|
||||||
|
func (t Tag) MayHaveVariants() bool {
|
||||||
|
return t.full != nil || int(t.language) >= len(coreTags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MayHaveExtensions reports whether a tag may have extensions. If it returns
|
||||||
|
// false it is guaranteed the tag does not have them.
|
||||||
|
func (t Tag) MayHaveExtensions() bool {
|
||||||
|
return t.full != nil ||
|
||||||
|
int(t.language) >= len(coreTags) ||
|
||||||
|
t.language != t.locale
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsRoot returns true if t is equal to language "und".
|
||||||
|
func (t Tag) IsRoot() bool {
|
||||||
|
if t.full != nil {
|
||||||
|
return t.full.IsRoot()
|
||||||
|
}
|
||||||
|
return t.language == _und
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||||
|
// specific language are substituted with fields from the parent language.
|
||||||
|
// The parent for a language may change for newer versions of CLDR.
|
||||||
|
func (t Tag) Parent() Tag {
|
||||||
|
if t.full != nil {
|
||||||
|
return Make(t.full.Parent())
|
||||||
|
}
|
||||||
|
if t.language != t.locale {
|
||||||
|
// Simulate stripping -u-rg-xxxxxx
|
||||||
|
return Tag{language: t.language, locale: t.language}
|
||||||
|
}
|
||||||
|
// TODO: use parent lookup table once cycle from internal package is
|
||||||
|
// removed. Probably by internalizing the table and declaring this fast
|
||||||
|
// enough.
|
||||||
|
// lang := compactID(internal.Parent(uint16(t.language)))
|
||||||
|
lang, _ := FromTag(t.language.Tag().Parent())
|
||||||
|
return Tag{language: lang, locale: lang}
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextToken returns token t and the rest of the string.
|
||||||
|
func nextToken(s string) (t, tail string) {
|
||||||
|
p := strings.Index(s[1:], "-")
|
||||||
|
if p == -1 {
|
||||||
|
return s[1:], ""
|
||||||
|
}
|
||||||
|
p++
|
||||||
|
return s[1:p], s[p:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
|
||||||
|
// for which data exists in the text repository.The index will change over time
|
||||||
|
// and should not be stored in persistent storage. If t does not match a compact
|
||||||
|
// index, exact will be false and the compact index will be returned for the
|
||||||
|
// first match after repeatedly taking the Parent of t.
|
||||||
|
func LanguageID(t Tag) (id ID, exact bool) {
|
||||||
|
return t.language, t.full == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegionalID returns the ID for the regional variant of this tag. This index is
|
||||||
|
// used to indicate region-specific overrides, such as default currency, default
|
||||||
|
// calendar and week data, default time cycle, and default measurement system
|
||||||
|
// and unit preferences.
|
||||||
|
//
|
||||||
|
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
|
||||||
|
// settings for currency, number formatting, etc. The CompactIndex for this tag
|
||||||
|
// will be that for en-GB, while the RegionalID will be the one corresponding to
|
||||||
|
// en-US.
|
||||||
|
func RegionalID(t Tag) (id ID, exact bool) {
|
||||||
|
return t.locale, t.full == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LanguageTag returns t stripped of regional variant indicators.
|
||||||
|
//
|
||||||
|
// At the moment this means it is stripped of a regional and variant subtag "rg"
|
||||||
|
// and "va" in the "u" extension.
|
||||||
|
func (t Tag) LanguageTag() Tag {
|
||||||
|
if t.full == nil {
|
||||||
|
return Tag{language: t.language, locale: t.language}
|
||||||
|
}
|
||||||
|
tt := t.Tag()
|
||||||
|
tt.SetTypeForKey("rg", "")
|
||||||
|
tt.SetTypeForKey("va", "")
|
||||||
|
return Make(tt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegionalTag returns the regional variant of the tag.
|
||||||
|
//
|
||||||
|
// At the moment this means that the region is set from the regional subtag
|
||||||
|
// "rg" in the "u" extension.
|
||||||
|
func (t Tag) RegionalTag() Tag {
|
||||||
|
rt := Tag{language: t.locale, locale: t.locale}
|
||||||
|
if t.full == nil {
|
||||||
|
return rt
|
||||||
|
}
|
||||||
|
b := language.Builder{}
|
||||||
|
tag := t.Tag()
|
||||||
|
// tag, _ = tag.SetTypeForKey("rg", "")
|
||||||
|
b.SetTag(t.locale.Tag())
|
||||||
|
if v := tag.Variants(); v != "" {
|
||||||
|
for _, v := range strings.Split(v, "-") {
|
||||||
|
b.AddVariant(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, e := range tag.Extensions() {
|
||||||
|
b.AddExt(e)
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromTag reports closest matching ID for an internal language Tag.
|
||||||
|
func FromTag(t language.Tag) (id ID, exact bool) {
|
||||||
|
// TODO: perhaps give more frequent tags a lower index.
|
||||||
|
// TODO: we could make the indexes stable. This will excluded some
|
||||||
|
// possibilities for optimization, so don't do this quite yet.
|
||||||
|
exact = true
|
||||||
|
|
||||||
|
b, s, r := t.Raw()
|
||||||
|
if t.HasString() {
|
||||||
|
if t.IsPrivateUse() {
|
||||||
|
// We have no entries for user-defined tags.
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
hasExtra := false
|
||||||
|
if t.HasVariants() {
|
||||||
|
if t.HasExtensions() {
|
||||||
|
build := language.Builder{}
|
||||||
|
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
|
||||||
|
build.AddVariant(t.Variants())
|
||||||
|
exact = false
|
||||||
|
t = build.Make()
|
||||||
|
}
|
||||||
|
hasExtra = true
|
||||||
|
} else if _, ok := t.Extension('u'); ok {
|
||||||
|
// TODO: va may mean something else. Consider not considering it.
|
||||||
|
// Strip all but the 'va' entry.
|
||||||
|
old := t
|
||||||
|
variant := t.TypeForKey("va")
|
||||||
|
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||||
|
if variant != "" {
|
||||||
|
t, _ = t.SetTypeForKey("va", variant)
|
||||||
|
hasExtra = true
|
||||||
|
}
|
||||||
|
exact = old == t
|
||||||
|
} else {
|
||||||
|
exact = false
|
||||||
|
}
|
||||||
|
if hasExtra {
|
||||||
|
// We have some variants.
|
||||||
|
for i, s := range specialTags {
|
||||||
|
if s == t {
|
||||||
|
return ID(i + len(coreTags)), exact
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exact = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if x, ok := getCoreIndex(t); ok {
|
||||||
|
return x, exact
|
||||||
|
}
|
||||||
|
exact = false
|
||||||
|
if r != 0 && s == 0 {
|
||||||
|
// Deal with cases where an extra script is inserted for the region.
|
||||||
|
t, _ := t.Maximize()
|
||||||
|
if x, ok := getCoreIndex(t); ok {
|
||||||
|
return x, exact
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for t = t.Parent(); t != root; t = t.Parent() {
|
||||||
|
// No variants specified: just compare core components.
|
||||||
|
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||||
|
// respectively the langID, scriptID, and regionID.
|
||||||
|
if x, ok := getCoreIndex(t); ok {
|
||||||
|
return x, exact
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, exact
|
||||||
|
}
|
||||||
|
|
||||||
|
var root = language.Tag{}
|
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package compact
|
||||||
|
|
||||||
|
// parents maps a compact index of a tag to the compact index of the parent of
|
||||||
|
// this tag.
|
||||||
|
var parents = []ID{ // 775 elements
|
||||||
|
// Entry 0 - 3F
|
||||||
|
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
|
||||||
|
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||||
|
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||||
|
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||||
|
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
|
||||||
|
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
|
||||||
|
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
|
||||||
|
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
|
||||||
|
// Entry 40 - 7F
|
||||||
|
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
|
||||||
|
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
|
||||||
|
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
|
||||||
|
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
|
||||||
|
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
|
||||||
|
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
|
||||||
|
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
|
||||||
|
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
|
||||||
|
// Entry 80 - BF
|
||||||
|
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
|
||||||
|
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
|
||||||
|
// Entry C0 - FF
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
|
||||||
|
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||||
|
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
|
||||||
|
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
|
||||||
|
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
|
||||||
|
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
|
||||||
|
// Entry 100 - 13F
|
||||||
|
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
|
||||||
|
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
|
||||||
|
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
|
||||||
|
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||||
|
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
// Entry 140 - 17F
|
||||||
|
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||||
|
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||||
|
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
|
||||||
|
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
|
||||||
|
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||||
|
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||||
|
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||||
|
// Entry 180 - 1BF
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
|
||||||
|
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
|
||||||
|
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||||
|
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
|
||||||
|
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||||
|
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
|
||||||
|
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||||
|
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
|
||||||
|
// Entry 1C0 - 1FF
|
||||||
|
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
|
||||||
|
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
|
||||||
|
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
|
||||||
|
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
|
||||||
|
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||||
|
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||||
|
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
|
||||||
|
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
|
||||||
|
// Entry 200 - 23F
|
||||||
|
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
|
||||||
|
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
|
||||||
|
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
|
||||||
|
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
|
||||||
|
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
|
||||||
|
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
|
||||||
|
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
|
||||||
|
// Entry 240 - 27F
|
||||||
|
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
|
||||||
|
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
|
||||||
|
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
|
||||||
|
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
|
||||||
|
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||||
|
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||||
|
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
|
||||||
|
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
|
||||||
|
// Entry 280 - 2BF
|
||||||
|
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
|
||||||
|
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
|
||||||
|
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
|
||||||
|
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||||
|
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
|
||||||
|
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
|
||||||
|
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
|
||||||
|
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
|
||||||
|
// Entry 2C0 - 2FF
|
||||||
|
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
|
||||||
|
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
|
||||||
|
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
|
||||||
|
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
|
||||||
|
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||||
|
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
|
||||||
|
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
|
||||||
|
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
|
||||||
|
// Entry 300 - 33F
|
||||||
|
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
|
||||||
|
} // Size: 1574 bytes
|
||||||
|
|
||||||
|
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
|
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package compact
|
||||||
|
|
||||||
|
var (
|
||||||
|
und = Tag{}
|
||||||
|
|
||||||
|
Und Tag = Tag{}
|
||||||
|
|
||||||
|
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
|
||||||
|
Amharic Tag = Tag{language: amIndex, locale: amIndex}
|
||||||
|
Arabic Tag = Tag{language: arIndex, locale: arIndex}
|
||||||
|
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
|
||||||
|
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
|
||||||
|
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
|
||||||
|
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
|
||||||
|
Catalan Tag = Tag{language: caIndex, locale: caIndex}
|
||||||
|
Czech Tag = Tag{language: csIndex, locale: csIndex}
|
||||||
|
Danish Tag = Tag{language: daIndex, locale: daIndex}
|
||||||
|
German Tag = Tag{language: deIndex, locale: deIndex}
|
||||||
|
Greek Tag = Tag{language: elIndex, locale: elIndex}
|
||||||
|
English Tag = Tag{language: enIndex, locale: enIndex}
|
||||||
|
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
|
||||||
|
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
|
||||||
|
Spanish Tag = Tag{language: esIndex, locale: esIndex}
|
||||||
|
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
|
||||||
|
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
|
||||||
|
Estonian Tag = Tag{language: etIndex, locale: etIndex}
|
||||||
|
Persian Tag = Tag{language: faIndex, locale: faIndex}
|
||||||
|
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
|
||||||
|
Filipino Tag = Tag{language: filIndex, locale: filIndex}
|
||||||
|
French Tag = Tag{language: frIndex, locale: frIndex}
|
||||||
|
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
|
||||||
|
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
|
||||||
|
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
|
||||||
|
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
|
||||||
|
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
|
||||||
|
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
|
||||||
|
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
|
||||||
|
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
|
||||||
|
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
|
||||||
|
Italian Tag = Tag{language: itIndex, locale: itIndex}
|
||||||
|
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
|
||||||
|
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
|
||||||
|
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
|
||||||
|
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
|
||||||
|
Kannada Tag = Tag{language: knIndex, locale: knIndex}
|
||||||
|
Korean Tag = Tag{language: koIndex, locale: koIndex}
|
||||||
|
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
|
||||||
|
Lao Tag = Tag{language: loIndex, locale: loIndex}
|
||||||
|
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
|
||||||
|
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
|
||||||
|
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
|
||||||
|
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
|
||||||
|
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
|
||||||
|
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
|
||||||
|
Malay Tag = Tag{language: msIndex, locale: msIndex}
|
||||||
|
Burmese Tag = Tag{language: myIndex, locale: myIndex}
|
||||||
|
Nepali Tag = Tag{language: neIndex, locale: neIndex}
|
||||||
|
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
|
||||||
|
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
|
||||||
|
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
|
||||||
|
Polish Tag = Tag{language: plIndex, locale: plIndex}
|
||||||
|
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
|
||||||
|
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
|
||||||
|
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
|
||||||
|
Romanian Tag = Tag{language: roIndex, locale: roIndex}
|
||||||
|
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
|
||||||
|
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
|
||||||
|
Slovak Tag = Tag{language: skIndex, locale: skIndex}
|
||||||
|
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
|
||||||
|
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
|
||||||
|
Serbian Tag = Tag{language: srIndex, locale: srIndex}
|
||||||
|
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
|
||||||
|
Swedish Tag = Tag{language: svIndex, locale: svIndex}
|
||||||
|
Swahili Tag = Tag{language: swIndex, locale: swIndex}
|
||||||
|
Tamil Tag = Tag{language: taIndex, locale: taIndex}
|
||||||
|
Telugu Tag = Tag{language: teIndex, locale: teIndex}
|
||||||
|
Thai Tag = Tag{language: thIndex, locale: thIndex}
|
||||||
|
Turkish Tag = Tag{language: trIndex, locale: trIndex}
|
||||||
|
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
|
||||||
|
Urdu Tag = Tag{language: urIndex, locale: urIndex}
|
||||||
|
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
|
||||||
|
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
|
||||||
|
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
|
||||||
|
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
|
||||||
|
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
|
||||||
|
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
|
||||||
|
)
|
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
// Copyright 2018 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A Builder allows constructing a Tag from individual components.
|
||||||
|
// Its main user is Compose in the top-level language package.
|
||||||
|
type Builder struct {
|
||||||
|
Tag Tag
|
||||||
|
|
||||||
|
private string // the x extension
|
||||||
|
variants []string
|
||||||
|
extensions []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make returns a new Tag from the current settings.
|
||||||
|
func (b *Builder) Make() Tag {
|
||||||
|
t := b.Tag
|
||||||
|
|
||||||
|
if len(b.extensions) > 0 || len(b.variants) > 0 {
|
||||||
|
sort.Sort(sortVariants(b.variants))
|
||||||
|
sort.Strings(b.extensions)
|
||||||
|
|
||||||
|
if b.private != "" {
|
||||||
|
b.extensions = append(b.extensions, b.private)
|
||||||
|
}
|
||||||
|
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
|
||||||
|
buf := make([]byte, n)
|
||||||
|
p := t.genCoreBytes(buf)
|
||||||
|
t.pVariant = byte(p)
|
||||||
|
p += appendTokens(buf[p:], b.variants...)
|
||||||
|
t.pExt = uint16(p)
|
||||||
|
p += appendTokens(buf[p:], b.extensions...)
|
||||||
|
t.str = string(buf[:p])
|
||||||
|
// We may not always need to remake the string, but when or when not
|
||||||
|
// to do so is rather tricky.
|
||||||
|
scan := makeScanner(buf[:p])
|
||||||
|
t, _ = parse(&scan, "")
|
||||||
|
return t
|
||||||
|
|
||||||
|
} else if b.private != "" {
|
||||||
|
t.str = b.private
|
||||||
|
t.RemakeString()
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetTag copies all the settings from a given Tag. Any previously set values
|
||||||
|
// are discarded.
|
||||||
|
func (b *Builder) SetTag(t Tag) {
|
||||||
|
b.Tag.LangID = t.LangID
|
||||||
|
b.Tag.RegionID = t.RegionID
|
||||||
|
b.Tag.ScriptID = t.ScriptID
|
||||||
|
// TODO: optimize
|
||||||
|
b.variants = b.variants[:0]
|
||||||
|
if variants := t.Variants(); variants != "" {
|
||||||
|
for _, vr := range strings.Split(variants[1:], "-") {
|
||||||
|
b.variants = append(b.variants, vr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.extensions, b.private = b.extensions[:0], ""
|
||||||
|
for _, e := range t.Extensions() {
|
||||||
|
b.AddExt(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddExt adds extension e to the tag. e must be a valid extension as returned
|
||||||
|
// by Tag.Extension. If the extension already exists, it will be discarded,
|
||||||
|
// except for a -u extension, where non-existing key-type pairs will added.
|
||||||
|
func (b *Builder) AddExt(e string) {
|
||||||
|
if e[0] == 'x' {
|
||||||
|
if b.private == "" {
|
||||||
|
b.private = e
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i, s := range b.extensions {
|
||||||
|
if s[0] == e[0] {
|
||||||
|
if e[0] == 'u' {
|
||||||
|
b.extensions[i] += e[1:]
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.extensions = append(b.extensions, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetExt sets the extension e to the tag. e must be a valid extension as
|
||||||
|
// returned by Tag.Extension. If the extension already exists, it will be
|
||||||
|
// overwritten, except for a -u extension, where the individual key-type pairs
|
||||||
|
// will be set.
|
||||||
|
func (b *Builder) SetExt(e string) {
|
||||||
|
if e[0] == 'x' {
|
||||||
|
b.private = e
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i, s := range b.extensions {
|
||||||
|
if s[0] == e[0] {
|
||||||
|
if e[0] == 'u' {
|
||||||
|
b.extensions[i] = e + s[1:]
|
||||||
|
} else {
|
||||||
|
b.extensions[i] = e
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.extensions = append(b.extensions, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddVariant adds any number of variants.
|
||||||
|
func (b *Builder) AddVariant(v ...string) {
|
||||||
|
for _, v := range v {
|
||||||
|
if v != "" {
|
||||||
|
b.variants = append(b.variants, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearVariants removes any variants previously added, including those
|
||||||
|
// copied from a Tag in SetTag.
|
||||||
|
func (b *Builder) ClearVariants() {
|
||||||
|
b.variants = b.variants[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearExtensions removes any extensions previously added, including those
|
||||||
|
// copied from a Tag in SetTag.
|
||||||
|
func (b *Builder) ClearExtensions() {
|
||||||
|
b.private = ""
|
||||||
|
b.extensions = b.extensions[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func tokenLen(token ...string) (n int) {
|
||||||
|
for _, t := range token {
|
||||||
|
n += len(t) + 1
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendTokens(b []byte, token ...string) int {
|
||||||
|
p := 0
|
||||||
|
for _, t := range token {
|
||||||
|
b[p] = '-'
|
||||||
|
copy(b[p+1:], t)
|
||||||
|
p += 1 + len(t)
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
type sortVariants []string
|
||||||
|
|
||||||
|
func (s sortVariants) Len() int {
|
||||||
|
return len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s sortVariants) Swap(i, j int) {
|
||||||
|
s[j], s[i] = s[i], s[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s sortVariants) Less(i, j int) bool {
|
||||||
|
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||||
|
}
|
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// BaseLanguages returns the list of all supported base languages. It generates
|
||||||
|
// the list by traversing the internal structures.
|
||||||
|
func BaseLanguages() []Language {
|
||||||
|
base := make([]Language, 0, NumLanguages)
|
||||||
|
for i := 0; i < langNoIndexOffset; i++ {
|
||||||
|
// We included "und" already for the value 0.
|
||||||
|
if i != nonCanonicalUnd {
|
||||||
|
base = append(base, Language(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i := langNoIndexOffset
|
||||||
|
for _, v := range langNoIndex {
|
||||||
|
for k := 0; k < 8; k++ {
|
||||||
|
if v&1 == 1 {
|
||||||
|
base = append(base, Language(i))
|
||||||
|
}
|
||||||
|
v >>= 1
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return base
|
||||||
|
}
|
627
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
627
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
|
@ -0,0 +1,627 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate go run gen.go gen_common.go -output tables.go
|
||||||
|
|
||||||
|
package language // import "golang.org/x/text/internal/language"
|
||||||
|
|
||||||
|
// TODO: Remove above NOTE after:
|
||||||
|
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||||
|
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||||
|
maxCoreSize = 12
|
||||||
|
|
||||||
|
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||||
|
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||||
|
max99thPercentileSize = 32
|
||||||
|
|
||||||
|
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||||
|
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||||
|
maxSimpleUExtensionSize = 14
|
||||||
|
)
|
||||||
|
|
||||||
|
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||||
|
// specific language or locale. All language tag values are guaranteed to be
|
||||||
|
// well-formed. The zero value of Tag is Und.
|
||||||
|
type Tag struct {
|
||||||
|
// TODO: the following fields have the form TagTypeID. This name is chosen
|
||||||
|
// to allow refactoring the public package without conflicting with its
|
||||||
|
// Base, Script, and Region methods. Once the transition is fully completed
|
||||||
|
// the ID can be stripped from the name.
|
||||||
|
|
||||||
|
LangID Language
|
||||||
|
RegionID Region
|
||||||
|
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
|
||||||
|
// storing lang, region, and ScriptID codes, store only the compact index and
|
||||||
|
// have a lookup table from this code to its expansion. This greatly speeds
|
||||||
|
// up table lookup, speed up common variant cases.
|
||||||
|
// This will also immediately free up 3 extra bytes. Also, the pVariant
|
||||||
|
// field can now be moved to the lookup table, as the compact index uniquely
|
||||||
|
// determines the offset of a possible variant.
|
||||||
|
ScriptID Script
|
||||||
|
pVariant byte // offset in str, includes preceding '-'
|
||||||
|
pExt uint16 // offset of first extension, includes preceding '-'
|
||||||
|
|
||||||
|
// str is the string representation of the Tag. It will only be used if the
|
||||||
|
// tag has variants or extensions.
|
||||||
|
str string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make is a convenience wrapper for Parse that omits the error.
|
||||||
|
// In case of an error, a sensible default is returned.
|
||||||
|
func Make(s string) Tag {
|
||||||
|
t, _ := Parse(s)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raw returns the raw base language, script and region, without making an
|
||||||
|
// attempt to infer their values.
|
||||||
|
// TODO: consider removing
|
||||||
|
func (t Tag) Raw() (b Language, s Script, r Region) {
|
||||||
|
return t.LangID, t.ScriptID, t.RegionID
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalTags compares language, script and region subtags only.
|
||||||
|
func (t Tag) equalTags(a Tag) bool {
|
||||||
|
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsRoot returns true if t is equal to language "und".
|
||||||
|
func (t Tag) IsRoot() bool {
|
||||||
|
if int(t.pVariant) < len(t.str) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return t.equalTags(Und)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
|
||||||
|
// tag.
|
||||||
|
func (t Tag) IsPrivateUse() bool {
|
||||||
|
return t.str != "" && t.pVariant == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemakeString is used to update t.str in case lang, script or region changed.
|
||||||
|
// It is assumed that pExt and pVariant still point to the start of the
|
||||||
|
// respective parts.
|
||||||
|
func (t *Tag) RemakeString() {
|
||||||
|
if t.str == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
extra := t.str[t.pVariant:]
|
||||||
|
if t.pVariant > 0 {
|
||||||
|
extra = extra[1:]
|
||||||
|
}
|
||||||
|
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
|
||||||
|
t.str = extra
|
||||||
|
t.pVariant = 0
|
||||||
|
t.pExt = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||||
|
b := buf[:t.genCoreBytes(buf[:])]
|
||||||
|
if extra != "" {
|
||||||
|
diff := len(b) - int(t.pVariant)
|
||||||
|
b = append(b, '-')
|
||||||
|
b = append(b, extra...)
|
||||||
|
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||||
|
t.pExt = uint16(int(t.pExt) + diff)
|
||||||
|
} else {
|
||||||
|
t.pVariant = uint8(len(b))
|
||||||
|
t.pExt = uint16(len(b))
|
||||||
|
}
|
||||||
|
t.str = string(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// genCoreBytes writes a string for the base languages, script and region tags
|
||||||
|
// to the given buffer and returns the number of bytes written. It will never
|
||||||
|
// write more than maxCoreSize bytes.
|
||||||
|
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||||
|
n := t.LangID.StringToBuf(buf[:])
|
||||||
|
if t.ScriptID != 0 {
|
||||||
|
n += copy(buf[n:], "-")
|
||||||
|
n += copy(buf[n:], t.ScriptID.String())
|
||||||
|
}
|
||||||
|
if t.RegionID != 0 {
|
||||||
|
n += copy(buf[n:], "-")
|
||||||
|
n += copy(buf[n:], t.RegionID.String())
|
||||||
|
}
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the canonical string representation of the language tag.
|
||||||
|
func (t Tag) String() string {
|
||||||
|
if t.str != "" {
|
||||||
|
return t.str
|
||||||
|
}
|
||||||
|
if t.ScriptID == 0 && t.RegionID == 0 {
|
||||||
|
return t.LangID.String()
|
||||||
|
}
|
||||||
|
buf := [maxCoreSize]byte{}
|
||||||
|
return string(buf[:t.genCoreBytes(buf[:])])
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalText implements encoding.TextMarshaler.
|
||||||
|
func (t Tag) MarshalText() (text []byte, err error) {
|
||||||
|
if t.str != "" {
|
||||||
|
text = append(text, t.str...)
|
||||||
|
} else if t.ScriptID == 0 && t.RegionID == 0 {
|
||||||
|
text = append(text, t.LangID.String()...)
|
||||||
|
} else {
|
||||||
|
buf := [maxCoreSize]byte{}
|
||||||
|
text = buf[:t.genCoreBytes(buf[:])]
|
||||||
|
}
|
||||||
|
return text, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||||
|
func (t *Tag) UnmarshalText(text []byte) error {
|
||||||
|
tag, err := Parse(string(text))
|
||||||
|
*t = tag
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variants returns the part of the tag holding all variants or the empty string
|
||||||
|
// if there are no variants defined.
|
||||||
|
func (t Tag) Variants() string {
|
||||||
|
if t.pVariant == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return t.str[t.pVariant:t.pExt]
|
||||||
|
}
|
||||||
|
|
||||||
|
// VariantOrPrivateUseTags returns variants or private use tags.
|
||||||
|
func (t Tag) VariantOrPrivateUseTags() string {
|
||||||
|
if t.pExt > 0 {
|
||||||
|
return t.str[t.pVariant:t.pExt]
|
||||||
|
}
|
||||||
|
return t.str[t.pVariant:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasString reports whether this tag defines more than just the raw
|
||||||
|
// components.
|
||||||
|
func (t Tag) HasString() bool {
|
||||||
|
return t.str != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||||
|
// specific language are substituted with fields from the parent language.
|
||||||
|
// The parent for a language may change for newer versions of CLDR.
|
||||||
|
func (t Tag) Parent() Tag {
|
||||||
|
if t.str != "" {
|
||||||
|
// Strip the variants and extensions.
|
||||||
|
b, s, r := t.Raw()
|
||||||
|
t = Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||||
|
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
|
||||||
|
base, _ := addTags(Tag{LangID: t.LangID})
|
||||||
|
if base.ScriptID == t.ScriptID {
|
||||||
|
return Tag{LangID: t.LangID}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
if t.LangID != 0 {
|
||||||
|
if t.RegionID != 0 {
|
||||||
|
maxScript := t.ScriptID
|
||||||
|
if maxScript == 0 {
|
||||||
|
max, _ := addTags(t)
|
||||||
|
maxScript = max.ScriptID
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range parents {
|
||||||
|
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
|
||||||
|
for _, r := range parents[i].fromRegion {
|
||||||
|
if Region(r) == t.RegionID {
|
||||||
|
return Tag{
|
||||||
|
LangID: t.LangID,
|
||||||
|
ScriptID: Script(parents[i].script),
|
||||||
|
RegionID: Region(parents[i].toRegion),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the script if it is the default one.
|
||||||
|
base, _ := addTags(Tag{LangID: t.LangID})
|
||||||
|
if base.ScriptID != maxScript {
|
||||||
|
return Tag{LangID: t.LangID, ScriptID: maxScript}
|
||||||
|
}
|
||||||
|
return Tag{LangID: t.LangID}
|
||||||
|
} else if t.ScriptID != 0 {
|
||||||
|
// The parent for an base-script pair with a non-default script is
|
||||||
|
// "und" instead of the base language.
|
||||||
|
base, _ := addTags(Tag{LangID: t.LangID})
|
||||||
|
if base.ScriptID != t.ScriptID {
|
||||||
|
return Und
|
||||||
|
}
|
||||||
|
return Tag{LangID: t.LangID}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Und
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseExtension parses s as an extension and returns it on success.
|
||||||
|
func ParseExtension(s string) (ext string, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
ext = ""
|
||||||
|
err = ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
scan := makeScannerString(s)
|
||||||
|
var end int
|
||||||
|
if n := len(scan.token); n != 1 {
|
||||||
|
return "", ErrSyntax
|
||||||
|
}
|
||||||
|
scan.toLower(0, len(scan.b))
|
||||||
|
end = parseExtension(&scan)
|
||||||
|
if end != len(s) {
|
||||||
|
return "", ErrSyntax
|
||||||
|
}
|
||||||
|
return string(scan.b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasVariants reports whether t has variants.
|
||||||
|
func (t Tag) HasVariants() bool {
|
||||||
|
return uint16(t.pVariant) < t.pExt
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasExtensions reports whether t has extensions.
|
||||||
|
func (t Tag) HasExtensions() bool {
|
||||||
|
return int(t.pExt) < len(t.str)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extension returns the extension of type x for tag t. It will return
|
||||||
|
// false for ok if t does not have the requested extension. The returned
|
||||||
|
// extension will be invalid in this case.
|
||||||
|
func (t Tag) Extension(x byte) (ext string, ok bool) {
|
||||||
|
for i := int(t.pExt); i < len(t.str)-1; {
|
||||||
|
var ext string
|
||||||
|
i, ext = getExtension(t.str, i)
|
||||||
|
if ext[0] == x {
|
||||||
|
return ext, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extensions returns all extensions of t.
|
||||||
|
func (t Tag) Extensions() []string {
|
||||||
|
e := []string{}
|
||||||
|
for i := int(t.pExt); i < len(t.str)-1; {
|
||||||
|
var ext string
|
||||||
|
i, ext = getExtension(t.str, i)
|
||||||
|
e = append(e, ext)
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
// TypeForKey returns the type associated with the given key, where key and type
|
||||||
|
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||||
|
//
|
||||||
|
// If there are multiple types associated with a key, only the first will be
|
||||||
|
// returned. If there is no type associated with a key, it returns the empty
|
||||||
|
// string.
|
||||||
|
func (t Tag) TypeForKey(key string) string {
|
||||||
|
if _, start, end, _ := t.findTypeForKey(key); end != start {
|
||||||
|
s := t.str[start:end]
|
||||||
|
if p := strings.IndexByte(s, '-'); p >= 0 {
|
||||||
|
s = s[:p]
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||||
|
errInvalidArguments = errors.New("invalid key or type")
|
||||||
|
)
|
||||||
|
|
||||||
|
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||||
|
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// An empty value removes an existing pair with the same key.
|
||||||
|
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||||
|
if t.IsPrivateUse() {
|
||||||
|
return t, errPrivateUse
|
||||||
|
}
|
||||||
|
if len(key) != 2 {
|
||||||
|
return t, errInvalidArguments
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the setting if value is "".
|
||||||
|
if value == "" {
|
||||||
|
start, sep, end, _ := t.findTypeForKey(key)
|
||||||
|
if start != sep {
|
||||||
|
// Remove a possible empty extension.
|
||||||
|
switch {
|
||||||
|
case t.str[start-2] != '-': // has previous elements.
|
||||||
|
case end == len(t.str), // end of string
|
||||||
|
end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
|
||||||
|
start -= 2
|
||||||
|
}
|
||||||
|
if start == int(t.pVariant) && end == len(t.str) {
|
||||||
|
t.str = ""
|
||||||
|
t.pVariant, t.pExt = 0, 0
|
||||||
|
} else {
|
||||||
|
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(value) < 3 || len(value) > 8 {
|
||||||
|
return t, errInvalidArguments
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||||
|
uStart int // start of the -u extension.
|
||||||
|
)
|
||||||
|
|
||||||
|
// Generate the tag string if needed.
|
||||||
|
if t.str == "" {
|
||||||
|
uStart = t.genCoreBytes(buf[:])
|
||||||
|
buf[uStart] = '-'
|
||||||
|
uStart++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new key-type pair and parse it to verify.
|
||||||
|
b := buf[uStart:]
|
||||||
|
copy(b, "u-")
|
||||||
|
copy(b[2:], key)
|
||||||
|
b[4] = '-'
|
||||||
|
b = b[:5+copy(b[5:], value)]
|
||||||
|
scan := makeScanner(b)
|
||||||
|
if parseExtensions(&scan); scan.err != nil {
|
||||||
|
return t, scan.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assemble the replacement string.
|
||||||
|
if t.str == "" {
|
||||||
|
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||||
|
t.str = string(buf[:uStart+len(b)])
|
||||||
|
} else {
|
||||||
|
s := t.str
|
||||||
|
start, sep, end, hasExt := t.findTypeForKey(key)
|
||||||
|
if start == sep {
|
||||||
|
if hasExt {
|
||||||
|
b = b[2:]
|
||||||
|
}
|
||||||
|
t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
|
||||||
|
} else {
|
||||||
|
t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// findTypeForKey returns the start and end position for the type corresponding
|
||||||
|
// to key or the point at which to insert the key-value pair if the type
|
||||||
|
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||||
|
// Note: the extensions are typically very small and are likely to contain
|
||||||
|
// only one key-type pair.
|
||||||
|
func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
|
||||||
|
p := int(t.pExt)
|
||||||
|
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||||
|
return p, p, p, false
|
||||||
|
}
|
||||||
|
s := t.str
|
||||||
|
|
||||||
|
// Find the correct extension.
|
||||||
|
for p++; s[p] != 'u'; p++ {
|
||||||
|
if s[p] > 'u' {
|
||||||
|
p--
|
||||||
|
return p, p, p, false
|
||||||
|
}
|
||||||
|
if p = nextExtension(s, p); p == len(s) {
|
||||||
|
return len(s), len(s), len(s), false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Proceed to the hyphen following the extension name.
|
||||||
|
p++
|
||||||
|
|
||||||
|
// curKey is the key currently being processed.
|
||||||
|
curKey := ""
|
||||||
|
|
||||||
|
// Iterate over keys until we get the end of a section.
|
||||||
|
for {
|
||||||
|
end = p
|
||||||
|
for p++; p < len(s) && s[p] != '-'; p++ {
|
||||||
|
}
|
||||||
|
n := p - end - 1
|
||||||
|
if n <= 2 && curKey == key {
|
||||||
|
if sep < end {
|
||||||
|
sep++
|
||||||
|
}
|
||||||
|
return start, sep, end, true
|
||||||
|
}
|
||||||
|
switch n {
|
||||||
|
case 0, // invalid string
|
||||||
|
1: // next extension
|
||||||
|
return end, end, end, true
|
||||||
|
case 2:
|
||||||
|
// next key
|
||||||
|
curKey = s[end+1 : p]
|
||||||
|
if curKey > key {
|
||||||
|
return end, end, end, true
|
||||||
|
}
|
||||||
|
start = end
|
||||||
|
sep = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseBase(s string) (l Language, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
l = 0
|
||||||
|
err = ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if n := len(s); n < 2 || 3 < n {
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
var buf [3]byte
|
||||||
|
return getLangID(buf[:copy(buf[:], s)])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseScript parses a 4-letter ISO 15924 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseScript(s string) (scr Script, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
scr = 0
|
||||||
|
err = ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if len(s) != 4 {
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
var buf [4]byte
|
||||||
|
return getScriptID(script, buf[:copy(buf[:], s)])
|
||||||
|
}
|
||||||
|
|
||||||
|
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||||
|
// It returns an error if r is not a valid code.
|
||||||
|
func EncodeM49(r int) (Region, error) {
|
||||||
|
return getRegionM49(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseRegion(s string) (r Region, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
r = 0
|
||||||
|
err = ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if n := len(s); n < 2 || 3 < n {
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
var buf [3]byte
|
||||||
|
return getRegionID(buf[:copy(buf[:], s)])
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsCountry returns whether this region is a country or autonomous area. This
|
||||||
|
// includes non-standard definitions from CLDR.
|
||||||
|
func (r Region) IsCountry() bool {
|
||||||
|
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsGroup returns whether this region defines a collection of regions. This
|
||||||
|
// includes non-standard definitions from CLDR.
|
||||||
|
func (r Region) IsGroup() bool {
|
||||||
|
if r == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return int(regionInclusion[r]) < len(regionContainment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains returns whether Region c is contained by Region r. It returns true
|
||||||
|
// if c == r.
|
||||||
|
func (r Region) Contains(c Region) bool {
|
||||||
|
if r == c {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
g := regionInclusion[r]
|
||||||
|
if g >= nRegionGroups {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
m := regionContainment[g]
|
||||||
|
|
||||||
|
d := regionInclusion[c]
|
||||||
|
b := regionInclusionBits[d]
|
||||||
|
|
||||||
|
// A contained country may belong to multiple disjoint groups. Matching any
|
||||||
|
// of these indicates containment. If the contained region is a group, it
|
||||||
|
// must strictly be a subset.
|
||||||
|
if d >= nRegionGroups {
|
||||||
|
return b&m != 0
|
||||||
|
}
|
||||||
|
return b&^m == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||||
|
|
||||||
|
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||||
|
// In all other cases it returns either the region itself or an error.
|
||||||
|
//
|
||||||
|
// This method may return an error for a region for which there exists a
|
||||||
|
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||||
|
// region will already be canonicalized it was obtained from a Tag that was
|
||||||
|
// obtained using any of the default methods.
|
||||||
|
func (r Region) TLD() (Region, error) {
|
||||||
|
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||||
|
// difference between ISO 3166-1 and IANA ccTLD.
|
||||||
|
if r == _GB {
|
||||||
|
r = _UK
|
||||||
|
}
|
||||||
|
if (r.typ() & ccTLD) == 0 {
|
||||||
|
return 0, errNoTLD
|
||||||
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Canonicalize returns the region or a possible replacement if the region is
|
||||||
|
// deprecated. It will not return a replacement for deprecated regions that
|
||||||
|
// are split into multiple regions.
|
||||||
|
func (r Region) Canonicalize() Region {
|
||||||
|
if cr := normRegion(r); cr != 0 {
|
||||||
|
return cr
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||||
|
type Variant struct {
|
||||||
|
ID uint8
|
||||||
|
str string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||||
|
// a valid variant.
|
||||||
|
func ParseVariant(s string) (v Variant, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
v = Variant{}
|
||||||
|
err = ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
if id, ok := variantIndex[s]; ok {
|
||||||
|
return Variant{id, s}, nil
|
||||||
|
}
|
||||||
|
return Variant{}, NewValueError([]byte(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the string representation of the variant.
|
||||||
|
func (v Variant) String() string {
|
||||||
|
return v.str
|
||||||
|
}
|
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
|
@ -0,0 +1,412 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/tag"
|
||||||
|
)
|
||||||
|
|
||||||
|
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||||
|
// if it could not be found.
|
||||||
|
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||||
|
if !tag.FixCase(form, key) {
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
i := idx.Index(key)
|
||||||
|
if i == -1 {
|
||||||
|
return 0, NewValueError(key)
|
||||||
|
}
|
||||||
|
return i, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func searchUint(imap []uint16, key uint16) int {
|
||||||
|
return sort.Search(len(imap), func(i int) bool {
|
||||||
|
return imap[i] >= key
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type Language uint16
|
||||||
|
|
||||||
|
// getLangID returns the langID of s if s is a canonical subtag
|
||||||
|
// or langUnknown if s is not a canonical subtag.
|
||||||
|
func getLangID(s []byte) (Language, error) {
|
||||||
|
if len(s) == 2 {
|
||||||
|
return getLangISO2(s)
|
||||||
|
}
|
||||||
|
return getLangISO3(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO language normalization as well as the AliasMaps could be moved to the
|
||||||
|
// higher level package, but it is a bit tricky to separate the generation.
|
||||||
|
|
||||||
|
func (id Language) Canonicalize() (Language, AliasType) {
|
||||||
|
return normLang(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// normLang returns the mapped langID of id according to mapping m.
|
||||||
|
func normLang(id Language) (Language, AliasType) {
|
||||||
|
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||||
|
return AliasMap[i].From >= uint16(id)
|
||||||
|
})
|
||||||
|
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
|
||||||
|
return Language(AliasMap[k].To), AliasTypes[k]
|
||||||
|
}
|
||||||
|
return id, AliasTypeUnknown
|
||||||
|
}
|
||||||
|
|
||||||
|
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||||
|
// or unknownLang if this does not exist.
|
||||||
|
func getLangISO2(s []byte) (Language, error) {
|
||||||
|
if !tag.FixCase("zz", s) {
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||||
|
return Language(i), nil
|
||||||
|
}
|
||||||
|
return 0, NewValueError(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
const base = 'z' - 'a' + 1
|
||||||
|
|
||||||
|
func strToInt(s []byte) uint {
|
||||||
|
v := uint(0)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
v *= base
|
||||||
|
v += uint(s[i] - 'a')
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// converts the given integer to the original ASCII string passed to strToInt.
|
||||||
|
// len(s) must match the number of characters obtained.
|
||||||
|
func intToStr(v uint, s []byte) {
|
||||||
|
for i := len(s) - 1; i >= 0; i-- {
|
||||||
|
s[i] = byte(v%base) + 'a'
|
||||||
|
v /= base
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||||
|
// or unknownLang if this does not exist.
|
||||||
|
func getLangISO3(s []byte) (Language, error) {
|
||||||
|
if tag.FixCase("und", s) {
|
||||||
|
// first try to match canonical 3-letter entries
|
||||||
|
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||||
|
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||||
|
// We treat "und" as special and always translate it to "unspecified".
|
||||||
|
// Note that ZZ and Zzzz are private use and are not treated as
|
||||||
|
// unspecified by default.
|
||||||
|
id := Language(i)
|
||||||
|
if id == nonCanonicalUnd {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i := altLangISO3.Index(s); i != -1 {
|
||||||
|
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||||
|
}
|
||||||
|
n := strToInt(s)
|
||||||
|
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||||
|
return Language(n) + langNoIndexOffset, nil
|
||||||
|
}
|
||||||
|
// Check for non-canonical uses of ISO3.
|
||||||
|
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||||
|
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||||
|
return Language(i), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, NewValueError(s)
|
||||||
|
}
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringToBuf writes the string to b and returns the number of bytes
|
||||||
|
// written. cap(b) must be >= 3.
|
||||||
|
func (id Language) StringToBuf(b []byte) int {
|
||||||
|
if id >= langNoIndexOffset {
|
||||||
|
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||||
|
return 3
|
||||||
|
} else if id == 0 {
|
||||||
|
return copy(b, "und")
|
||||||
|
}
|
||||||
|
l := lang[id<<2:]
|
||||||
|
if l[3] == 0 {
|
||||||
|
return copy(b, l[:3])
|
||||||
|
}
|
||||||
|
return copy(b, l[:2])
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the BCP 47 representation of the langID.
|
||||||
|
// Use b as variable name, instead of id, to ensure the variable
|
||||||
|
// used is consistent with that of Base in which this type is embedded.
|
||||||
|
func (b Language) String() string {
|
||||||
|
if b == 0 {
|
||||||
|
return "und"
|
||||||
|
} else if b >= langNoIndexOffset {
|
||||||
|
b -= langNoIndexOffset
|
||||||
|
buf := [3]byte{}
|
||||||
|
intToStr(uint(b), buf[:])
|
||||||
|
return string(buf[:])
|
||||||
|
}
|
||||||
|
l := lang.Elem(int(b))
|
||||||
|
if l[3] == 0 {
|
||||||
|
return l[:3]
|
||||||
|
}
|
||||||
|
return l[:2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ISO3 returns the ISO 639-3 language code.
|
||||||
|
func (b Language) ISO3() string {
|
||||||
|
if b == 0 || b >= langNoIndexOffset {
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
l := lang.Elem(int(b))
|
||||||
|
if l[3] == 0 {
|
||||||
|
return l[:3]
|
||||||
|
} else if l[2] == 0 {
|
||||||
|
return altLangISO3.Elem(int(l[3]))[:3]
|
||||||
|
}
|
||||||
|
// This allocation will only happen for 3-letter ISO codes
|
||||||
|
// that are non-canonical BCP 47 language identifiers.
|
||||||
|
return l[0:1] + l[2:4]
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||||
|
func (b Language) IsPrivateUse() bool {
|
||||||
|
return langPrivateStart <= b && b <= langPrivateEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
// SuppressScript returns the script marked as SuppressScript in the IANA
|
||||||
|
// language tag repository, or 0 if there is no such script.
|
||||||
|
func (b Language) SuppressScript() Script {
|
||||||
|
if b < langNoIndexOffset {
|
||||||
|
return Script(suppressScript[b])
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type Region uint16
|
||||||
|
|
||||||
|
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||||
|
// or unknownRegion.
|
||||||
|
func getRegionID(s []byte) (Region, error) {
|
||||||
|
if len(s) == 3 {
|
||||||
|
if isAlpha(s[0]) {
|
||||||
|
return getRegionISO3(s)
|
||||||
|
}
|
||||||
|
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||||
|
return getRegionM49(int(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getRegionISO2(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||||
|
// or unknownRegion if this does not exist.
|
||||||
|
func getRegionISO2(s []byte) (Region, error) {
|
||||||
|
i, err := findIndex(regionISO, s, "ZZ")
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return Region(i) + isoRegionOffset, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||||
|
// or unknownRegion if this does not exist.
|
||||||
|
func getRegionISO3(s []byte) (Region, error) {
|
||||||
|
if tag.FixCase("ZZZ", s) {
|
||||||
|
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||||
|
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||||
|
return Region(i) + isoRegionOffset, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||||
|
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||||
|
return Region(altRegionIDs[i/3]), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, NewValueError(s)
|
||||||
|
}
|
||||||
|
return 0, ErrSyntax
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRegionM49(n int) (Region, error) {
|
||||||
|
if 0 < n && n <= 999 {
|
||||||
|
const (
|
||||||
|
searchBits = 7
|
||||||
|
regionBits = 9
|
||||||
|
regionMask = 1<<regionBits - 1
|
||||||
|
)
|
||||||
|
idx := n >> searchBits
|
||||||
|
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||||
|
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||||
|
i := sort.Search(len(buf), func(i int) bool {
|
||||||
|
return buf[i] >= val
|
||||||
|
})
|
||||||
|
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||||
|
return Region(r & regionMask), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var e ValueError
|
||||||
|
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||||
|
return 0, e
|
||||||
|
}
|
||||||
|
|
||||||
|
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||||
|
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||||
|
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||||
|
func normRegion(r Region) Region {
|
||||||
|
m := regionOldMap
|
||||||
|
k := sort.Search(len(m), func(i int) bool {
|
||||||
|
return m[i].From >= uint16(r)
|
||||||
|
})
|
||||||
|
if k < len(m) && m[k].From == uint16(r) {
|
||||||
|
return Region(m[k].To)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
iso3166UserAssigned = 1 << iota
|
||||||
|
ccTLD
|
||||||
|
bcp47Region
|
||||||
|
)
|
||||||
|
|
||||||
|
func (r Region) typ() byte {
|
||||||
|
return regionTypes[r]
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the BCP 47 representation for the region.
|
||||||
|
// It returns "ZZ" for an unspecified region.
|
||||||
|
func (r Region) String() string {
|
||||||
|
if r < isoRegionOffset {
|
||||||
|
if r == 0 {
|
||||||
|
return "ZZ"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%03d", r.M49())
|
||||||
|
}
|
||||||
|
r -= isoRegionOffset
|
||||||
|
return regionISO.Elem(int(r))[:2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ISO3 returns the 3-letter ISO code of r.
|
||||||
|
// Note that not all regions have a 3-letter ISO code.
|
||||||
|
// In such cases this method returns "ZZZ".
|
||||||
|
func (r Region) ISO3() string {
|
||||||
|
if r < isoRegionOffset {
|
||||||
|
return "ZZZ"
|
||||||
|
}
|
||||||
|
r -= isoRegionOffset
|
||||||
|
reg := regionISO.Elem(int(r))
|
||||||
|
switch reg[2] {
|
||||||
|
case 0:
|
||||||
|
return altRegionISO3[reg[3]:][:3]
|
||||||
|
case ' ':
|
||||||
|
return "ZZZ"
|
||||||
|
}
|
||||||
|
return reg[0:1] + reg[2:4]
|
||||||
|
}
|
||||||
|
|
||||||
|
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||||
|
// is not defined for r.
|
||||||
|
func (r Region) M49() int {
|
||||||
|
return int(m49[r])
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||||
|
// may include private-use tags that are assigned by CLDR and used in this
|
||||||
|
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||||
|
func (r Region) IsPrivateUse() bool {
|
||||||
|
return r.typ()&iso3166UserAssigned != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type Script uint16
|
||||||
|
|
||||||
|
// getScriptID returns the script id for string s. It assumes that s
|
||||||
|
// is of the format [A-Z][a-z]{3}.
|
||||||
|
func getScriptID(idx tag.Index, s []byte) (Script, error) {
|
||||||
|
i, err := findIndex(idx, s, "Zzzz")
|
||||||
|
return Script(i), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the script code in title case.
|
||||||
|
// It returns "Zzzz" for an unspecified script.
|
||||||
|
func (s Script) String() string {
|
||||||
|
if s == 0 {
|
||||||
|
return "Zzzz"
|
||||||
|
}
|
||||||
|
return script.Elem(int(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||||
|
func (s Script) IsPrivateUse() bool {
|
||||||
|
return _Qaaa <= s && s <= _Qabx
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
maxAltTaglen = len("en-US-POSIX")
|
||||||
|
maxLen = maxAltTaglen
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||||
|
// their base language or index to more elaborate tag.
|
||||||
|
grandfatheredMap = map[[maxLen]byte]int16{
|
||||||
|
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||||
|
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||||
|
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||||
|
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||||
|
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||||
|
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||||
|
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||||
|
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||||
|
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||||
|
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||||
|
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||||
|
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||||
|
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||||
|
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||||
|
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||||
|
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||||
|
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||||
|
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||||
|
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||||
|
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||||
|
|
||||||
|
// Grandfathered tags with no modern replacement will be converted as
|
||||||
|
// follows:
|
||||||
|
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||||
|
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||||
|
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||||
|
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||||
|
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||||
|
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||||
|
|
||||||
|
// CLDR-specific tag.
|
||||||
|
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||||
|
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||||
|
}
|
||||||
|
|
||||||
|
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||||
|
|
||||||
|
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||||
|
if v, ok := grandfatheredMap[s]; ok {
|
||||||
|
if v < 0 {
|
||||||
|
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||||
|
}
|
||||||
|
t.LangID = Language(v)
|
||||||
|
return t, true
|
||||||
|
}
|
||||||
|
return t, false
|
||||||
|
}
|
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
|
@ -0,0 +1,226 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
type scriptRegionFlags uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
isList = 1 << iota
|
||||||
|
scriptInFrom
|
||||||
|
regionInFrom
|
||||||
|
)
|
||||||
|
|
||||||
|
func (t *Tag) setUndefinedLang(id Language) {
|
||||||
|
if t.LangID == 0 {
|
||||||
|
t.LangID = id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tag) setUndefinedScript(id Script) {
|
||||||
|
if t.ScriptID == 0 {
|
||||||
|
t.ScriptID = id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tag) setUndefinedRegion(id Region) {
|
||||||
|
if t.RegionID == 0 || t.RegionID.Contains(id) {
|
||||||
|
t.RegionID = id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrMissingLikelyTagsData indicates no information was available
|
||||||
|
// to compute likely values of missing tags.
|
||||||
|
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||||
|
|
||||||
|
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||||
|
// In most cases this means setting fields for unknown values, but in some
|
||||||
|
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
|
||||||
|
// if the given locale cannot be expanded.
|
||||||
|
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||||
|
id, err := addTags(t)
|
||||||
|
if err != nil {
|
||||||
|
return t, err
|
||||||
|
} else if id.equalTags(t) {
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
id.RemakeString()
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// specializeRegion attempts to specialize a group region.
|
||||||
|
func specializeRegion(t *Tag) bool {
|
||||||
|
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||||
|
x := likelyRegionGroup[i]
|
||||||
|
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
|
||||||
|
t.RegionID = Region(x.region)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Maximize returns a new tag with missing tags filled in.
|
||||||
|
func (t Tag) Maximize() (Tag, error) {
|
||||||
|
return addTags(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addTags(t Tag) (Tag, error) {
|
||||||
|
// We leave private use identifiers alone.
|
||||||
|
if t.IsPrivateUse() {
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
if t.ScriptID != 0 && t.RegionID != 0 {
|
||||||
|
if t.LangID != 0 {
|
||||||
|
// already fully specified
|
||||||
|
specializeRegion(&t)
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
// Search matches for und-script-region. Note that for these cases
|
||||||
|
// region will never be a group so there is no need to check for this.
|
||||||
|
list := likelyRegion[t.RegionID : t.RegionID+1]
|
||||||
|
if x := list[0]; x.flags&isList != 0 {
|
||||||
|
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||||
|
}
|
||||||
|
for _, x := range list {
|
||||||
|
// Deviating from the spec. See match_test.go for details.
|
||||||
|
if Script(x.script) == t.ScriptID {
|
||||||
|
t.setUndefinedLang(Language(x.lang))
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if t.LangID != 0 {
|
||||||
|
// Search matches for lang-script and lang-region, where lang != und.
|
||||||
|
if t.LangID < langNoIndexOffset {
|
||||||
|
x := likelyLang[t.LangID]
|
||||||
|
if x.flags&isList != 0 {
|
||||||
|
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||||
|
if t.ScriptID != 0 {
|
||||||
|
for _, x := range list {
|
||||||
|
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
|
||||||
|
t.setUndefinedRegion(Region(x.region))
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if t.RegionID != 0 {
|
||||||
|
count := 0
|
||||||
|
goodScript := true
|
||||||
|
tt := t
|
||||||
|
for _, x := range list {
|
||||||
|
// We visit all entries for which the script was not
|
||||||
|
// defined, including the ones where the region was not
|
||||||
|
// defined. This allows for proper disambiguation within
|
||||||
|
// regions.
|
||||||
|
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
|
||||||
|
tt.RegionID = Region(x.region)
|
||||||
|
tt.setUndefinedScript(Script(x.script))
|
||||||
|
goodScript = goodScript && tt.ScriptID == Script(x.script)
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count == 1 {
|
||||||
|
return tt, nil
|
||||||
|
}
|
||||||
|
// Even if we fail to find a unique Region, we might have
|
||||||
|
// an unambiguous script.
|
||||||
|
if goodScript {
|
||||||
|
t.ScriptID = tt.ScriptID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Search matches for und-script.
|
||||||
|
if t.ScriptID != 0 {
|
||||||
|
x := likelyScript[t.ScriptID]
|
||||||
|
if x.region != 0 {
|
||||||
|
t.setUndefinedRegion(Region(x.region))
|
||||||
|
t.setUndefinedLang(Language(x.lang))
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Search matches for und-region. If und-script-region exists, it would
|
||||||
|
// have been found earlier.
|
||||||
|
if t.RegionID != 0 {
|
||||||
|
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||||
|
x := likelyRegionGroup[i]
|
||||||
|
if x.region != 0 {
|
||||||
|
t.setUndefinedLang(Language(x.lang))
|
||||||
|
t.setUndefinedScript(Script(x.script))
|
||||||
|
t.RegionID = Region(x.region)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
x := likelyRegion[t.RegionID]
|
||||||
|
if x.flags&isList != 0 {
|
||||||
|
x = likelyRegionList[x.lang]
|
||||||
|
}
|
||||||
|
if x.script != 0 && x.flags != scriptInFrom {
|
||||||
|
t.setUndefinedLang(Language(x.lang))
|
||||||
|
t.setUndefinedScript(Script(x.script))
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search matches for lang.
|
||||||
|
if t.LangID < langNoIndexOffset {
|
||||||
|
x := likelyLang[t.LangID]
|
||||||
|
if x.flags&isList != 0 {
|
||||||
|
x = likelyLangList[x.region]
|
||||||
|
}
|
||||||
|
if x.region != 0 {
|
||||||
|
t.setUndefinedScript(Script(x.script))
|
||||||
|
t.setUndefinedRegion(Region(x.region))
|
||||||
|
}
|
||||||
|
specializeRegion(&t)
|
||||||
|
if t.LangID == 0 {
|
||||||
|
t.LangID = _en // default language
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
return t, ErrMissingLikelyTagsData
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tag) setTagsFrom(id Tag) {
|
||||||
|
t.LangID = id.LangID
|
||||||
|
t.ScriptID = id.ScriptID
|
||||||
|
t.RegionID = id.RegionID
|
||||||
|
}
|
||||||
|
|
||||||
|
// minimize removes the region or script subtags from t such that
|
||||||
|
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||||
|
func (t Tag) minimize() (Tag, error) {
|
||||||
|
t, err := minimizeTags(t)
|
||||||
|
if err != nil {
|
||||||
|
return t, err
|
||||||
|
}
|
||||||
|
t.RemakeString()
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||||
|
func minimizeTags(t Tag) (Tag, error) {
|
||||||
|
if t.equalTags(Und) {
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
max, err := addTags(t)
|
||||||
|
if err != nil {
|
||||||
|
return t, err
|
||||||
|
}
|
||||||
|
for _, id := range [...]Tag{
|
||||||
|
{LangID: t.LangID},
|
||||||
|
{LangID: t.LangID, RegionID: t.RegionID},
|
||||||
|
{LangID: t.LangID, ScriptID: t.ScriptID},
|
||||||
|
} {
|
||||||
|
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||||
|
t.setTagsFrom(id)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
}
|
608
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
608
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
|
@ -0,0 +1,608 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/tag"
|
||||||
|
)
|
||||||
|
|
||||||
|
// isAlpha returns true if the byte is not a digit.
|
||||||
|
// b must be an ASCII letter or digit.
|
||||||
|
func isAlpha(b byte) bool {
|
||||||
|
return b > '9'
|
||||||
|
}
|
||||||
|
|
||||||
|
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||||
|
func isAlphaNum(s []byte) bool {
|
||||||
|
for _, c := range s {
|
||||||
|
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrSyntax is returned by any of the parsing functions when the
|
||||||
|
// input is not well-formed, according to BCP 47.
|
||||||
|
// TODO: return the position at which the syntax error occurred?
|
||||||
|
var ErrSyntax = errors.New("language: tag is not well-formed")
|
||||||
|
|
||||||
|
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
||||||
|
// different values in the -u section.
|
||||||
|
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
||||||
|
|
||||||
|
// ValueError is returned by any of the parsing functions when the
|
||||||
|
// input is well-formed but the respective subtag is not recognized
|
||||||
|
// as a valid value.
|
||||||
|
type ValueError struct {
|
||||||
|
v [8]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewValueError creates a new ValueError.
|
||||||
|
func NewValueError(tag []byte) ValueError {
|
||||||
|
var e ValueError
|
||||||
|
copy(e.v[:], tag)
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e ValueError) tag() []byte {
|
||||||
|
n := bytes.IndexByte(e.v[:], 0)
|
||||||
|
if n == -1 {
|
||||||
|
n = 8
|
||||||
|
}
|
||||||
|
return e.v[:n]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error implements the error interface.
|
||||||
|
func (e ValueError) Error() string {
|
||||||
|
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subtag returns the subtag for which the error occurred.
|
||||||
|
func (e ValueError) Subtag() string {
|
||||||
|
return string(e.tag())
|
||||||
|
}
|
||||||
|
|
||||||
|
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||||
|
type scanner struct {
|
||||||
|
b []byte
|
||||||
|
bytes [max99thPercentileSize]byte
|
||||||
|
token []byte
|
||||||
|
start int // start position of the current token
|
||||||
|
end int // end position of the current token
|
||||||
|
next int // next point for scan
|
||||||
|
err error
|
||||||
|
done bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeScannerString(s string) scanner {
|
||||||
|
scan := scanner{}
|
||||||
|
if len(s) <= len(scan.bytes) {
|
||||||
|
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||||
|
} else {
|
||||||
|
scan.b = []byte(s)
|
||||||
|
}
|
||||||
|
scan.init()
|
||||||
|
return scan
|
||||||
|
}
|
||||||
|
|
||||||
|
// makeScanner returns a scanner using b as the input buffer.
|
||||||
|
// b is not copied and may be modified by the scanner routines.
|
||||||
|
func makeScanner(b []byte) scanner {
|
||||||
|
scan := scanner{b: b}
|
||||||
|
scan.init()
|
||||||
|
return scan
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *scanner) init() {
|
||||||
|
for i, c := range s.b {
|
||||||
|
if c == '_' {
|
||||||
|
s.b[i] = '-'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.scan()
|
||||||
|
}
|
||||||
|
|
||||||
|
// restToLower converts the string between start and end to lower case.
|
||||||
|
func (s *scanner) toLower(start, end int) {
|
||||||
|
for i := start; i < end; i++ {
|
||||||
|
c := s.b[i]
|
||||||
|
if 'A' <= c && c <= 'Z' {
|
||||||
|
s.b[i] += 'a' - 'A'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *scanner) setError(e error) {
|
||||||
|
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
||||||
|
s.err = e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// resizeRange shrinks or grows the array at position oldStart such that
|
||||||
|
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||||
|
// Sets the scan point to after the resized range.
|
||||||
|
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||||
|
s.start = oldStart
|
||||||
|
if end := oldStart + newSize; end != oldEnd {
|
||||||
|
diff := end - oldEnd
|
||||||
|
var b []byte
|
||||||
|
if n := len(s.b) + diff; n > cap(s.b) {
|
||||||
|
b = make([]byte, n)
|
||||||
|
copy(b, s.b[:oldStart])
|
||||||
|
} else {
|
||||||
|
b = s.b[:n]
|
||||||
|
}
|
||||||
|
copy(b[end:], s.b[oldEnd:])
|
||||||
|
s.b = b
|
||||||
|
s.next = end + (s.next - s.end)
|
||||||
|
s.end = end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// replace replaces the current token with repl.
|
||||||
|
func (s *scanner) replace(repl string) {
|
||||||
|
s.resizeRange(s.start, s.end, len(repl))
|
||||||
|
copy(s.b[s.start:], repl)
|
||||||
|
}
|
||||||
|
|
||||||
|
// gobble removes the current token from the input.
|
||||||
|
// Caller must call scan after calling gobble.
|
||||||
|
func (s *scanner) gobble(e error) {
|
||||||
|
s.setError(e)
|
||||||
|
if s.start == 0 {
|
||||||
|
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||||
|
s.end = 0
|
||||||
|
} else {
|
||||||
|
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||||
|
s.end = s.start - 1
|
||||||
|
}
|
||||||
|
s.next = s.start
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteRange removes the given range from s.b before the current token.
|
||||||
|
func (s *scanner) deleteRange(start, end int) {
|
||||||
|
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||||
|
diff := end - start
|
||||||
|
s.next -= diff
|
||||||
|
s.start -= diff
|
||||||
|
s.end -= diff
|
||||||
|
}
|
||||||
|
|
||||||
|
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||||
|
// than 8 characters or include non-alphanumeric characters result in an error
|
||||||
|
// and are gobbled and removed from the output.
|
||||||
|
// It returns the end position of the last token consumed.
|
||||||
|
func (s *scanner) scan() (end int) {
|
||||||
|
end = s.end
|
||||||
|
s.token = nil
|
||||||
|
for s.start = s.next; s.next < len(s.b); {
|
||||||
|
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||||
|
if i == -1 {
|
||||||
|
s.end = len(s.b)
|
||||||
|
s.next = len(s.b)
|
||||||
|
i = s.end - s.start
|
||||||
|
} else {
|
||||||
|
s.end = s.next + i
|
||||||
|
s.next = s.end + 1
|
||||||
|
}
|
||||||
|
token := s.b[s.start:s.end]
|
||||||
|
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||||
|
s.gobble(ErrSyntax)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.token = token
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||||
|
s.setError(ErrSyntax)
|
||||||
|
s.b = s.b[:len(s.b)-1]
|
||||||
|
}
|
||||||
|
s.done = true
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||||
|
// It returns the end position of the last token consumed.
|
||||||
|
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||||
|
end = s.end
|
||||||
|
s.scan()
|
||||||
|
for ; len(s.token) >= min; s.scan() {
|
||||||
|
end = s.end
|
||||||
|
}
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||||
|
// failed it returns an error and any part of the tag that could be parsed.
|
||||||
|
// If parsing succeeded but an unknown value was found, it returns
|
||||||
|
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||||
|
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||||
|
// and extensions to this standard defined in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
func Parse(s string) (t Tag, err error) {
|
||||||
|
// TODO: consider supporting old-style locale key-value pairs.
|
||||||
|
if s == "" {
|
||||||
|
return Und, ErrSyntax
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
t = Und
|
||||||
|
err = ErrSyntax
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if len(s) <= maxAltTaglen {
|
||||||
|
b := [maxAltTaglen]byte{}
|
||||||
|
for i, c := range s {
|
||||||
|
// Generating invalid UTF-8 is okay as it won't match.
|
||||||
|
if 'A' <= c && c <= 'Z' {
|
||||||
|
c += 'a' - 'A'
|
||||||
|
} else if c == '_' {
|
||||||
|
c = '-'
|
||||||
|
}
|
||||||
|
b[i] = byte(c)
|
||||||
|
}
|
||||||
|
if t, ok := grandfathered(b); ok {
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scan := makeScannerString(s)
|
||||||
|
return parse(&scan, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||||
|
t = Und
|
||||||
|
var end int
|
||||||
|
if n := len(scan.token); n <= 1 {
|
||||||
|
scan.toLower(0, len(scan.b))
|
||||||
|
if n == 0 || scan.token[0] != 'x' {
|
||||||
|
return t, ErrSyntax
|
||||||
|
}
|
||||||
|
end = parseExtensions(scan)
|
||||||
|
} else if n >= 4 {
|
||||||
|
return Und, ErrSyntax
|
||||||
|
} else { // the usual case
|
||||||
|
t, end = parseTag(scan, true)
|
||||||
|
if n := len(scan.token); n == 1 {
|
||||||
|
t.pExt = uint16(end)
|
||||||
|
end = parseExtensions(scan)
|
||||||
|
} else if end < len(scan.b) {
|
||||||
|
scan.setError(ErrSyntax)
|
||||||
|
scan.b = scan.b[:end]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if int(t.pVariant) < len(scan.b) {
|
||||||
|
if end < len(s) {
|
||||||
|
s = s[:end]
|
||||||
|
}
|
||||||
|
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||||
|
t.str = s
|
||||||
|
} else {
|
||||||
|
t.str = string(scan.b)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.pVariant, t.pExt = 0, 0
|
||||||
|
}
|
||||||
|
return t, scan.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseTag parses language, script, region and variants.
|
||||||
|
// It returns a Tag and the end position in the input that was parsed.
|
||||||
|
// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
|
||||||
|
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
|
||||||
|
var e error
|
||||||
|
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||||
|
t.LangID, e = getLangID(scan.token)
|
||||||
|
scan.setError(e)
|
||||||
|
scan.replace(t.LangID.String())
|
||||||
|
langStart := scan.start
|
||||||
|
end = scan.scan()
|
||||||
|
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||||
|
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||||
|
// to a tag of the form <extlang>.
|
||||||
|
if doNorm {
|
||||||
|
lang, e := getLangID(scan.token)
|
||||||
|
if lang != 0 {
|
||||||
|
t.LangID = lang
|
||||||
|
langStr := lang.String()
|
||||||
|
copy(scan.b[langStart:], langStr)
|
||||||
|
scan.b[langStart+len(langStr)] = '-'
|
||||||
|
scan.start = langStart + len(langStr) + 1
|
||||||
|
}
|
||||||
|
scan.gobble(e)
|
||||||
|
}
|
||||||
|
end = scan.scan()
|
||||||
|
}
|
||||||
|
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||||
|
t.ScriptID, e = getScriptID(script, scan.token)
|
||||||
|
if t.ScriptID == 0 {
|
||||||
|
scan.gobble(e)
|
||||||
|
}
|
||||||
|
end = scan.scan()
|
||||||
|
}
|
||||||
|
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||||
|
t.RegionID, e = getRegionID(scan.token)
|
||||||
|
if t.RegionID == 0 {
|
||||||
|
scan.gobble(e)
|
||||||
|
} else {
|
||||||
|
scan.replace(t.RegionID.String())
|
||||||
|
}
|
||||||
|
end = scan.scan()
|
||||||
|
}
|
||||||
|
scan.toLower(scan.start, len(scan.b))
|
||||||
|
t.pVariant = byte(end)
|
||||||
|
end = parseVariants(scan, end, t)
|
||||||
|
t.pExt = uint16(end)
|
||||||
|
return t, end
|
||||||
|
}
|
||||||
|
|
||||||
|
var separator = []byte{'-'}
|
||||||
|
|
||||||
|
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||||
|
// Duplicate variants are removed.
|
||||||
|
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||||
|
start := scan.start
|
||||||
|
varIDBuf := [4]uint8{}
|
||||||
|
variantBuf := [4][]byte{}
|
||||||
|
varID := varIDBuf[:0]
|
||||||
|
variant := variantBuf[:0]
|
||||||
|
last := -1
|
||||||
|
needSort := false
|
||||||
|
for ; len(scan.token) >= 4; scan.scan() {
|
||||||
|
// TODO: measure the impact of needing this conversion and redesign
|
||||||
|
// the data structure if there is an issue.
|
||||||
|
v, ok := variantIndex[string(scan.token)]
|
||||||
|
if !ok {
|
||||||
|
// unknown variant
|
||||||
|
// TODO: allow user-defined variants?
|
||||||
|
scan.gobble(NewValueError(scan.token))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
varID = append(varID, v)
|
||||||
|
variant = append(variant, scan.token)
|
||||||
|
if !needSort {
|
||||||
|
if last < int(v) {
|
||||||
|
last = int(v)
|
||||||
|
} else {
|
||||||
|
needSort = true
|
||||||
|
// There is no legal combinations of more than 7 variants
|
||||||
|
// (and this is by no means a useful sequence).
|
||||||
|
const maxVariants = 8
|
||||||
|
if len(varID) > maxVariants {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
if needSort {
|
||||||
|
sort.Sort(variantsSort{varID, variant})
|
||||||
|
k, l := 0, -1
|
||||||
|
for i, v := range varID {
|
||||||
|
w := int(v)
|
||||||
|
if l == w {
|
||||||
|
// Remove duplicates.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
varID[k] = varID[i]
|
||||||
|
variant[k] = variant[i]
|
||||||
|
k++
|
||||||
|
l = w
|
||||||
|
}
|
||||||
|
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||||
|
end = start - 1
|
||||||
|
} else {
|
||||||
|
scan.resizeRange(start, end, len(str))
|
||||||
|
copy(scan.b[scan.start:], str)
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
|
||||||
|
type variantsSort struct {
|
||||||
|
i []uint8
|
||||||
|
v [][]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s variantsSort) Len() int {
|
||||||
|
return len(s.i)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s variantsSort) Swap(i, j int) {
|
||||||
|
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||||
|
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s variantsSort) Less(i, j int) bool {
|
||||||
|
return s.i[i] < s.i[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
type bytesSort struct {
|
||||||
|
b [][]byte
|
||||||
|
n int // first n bytes to compare
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b bytesSort) Len() int {
|
||||||
|
return len(b.b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b bytesSort) Swap(i, j int) {
|
||||||
|
b.b[i], b.b[j] = b.b[j], b.b[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b bytesSort) Less(i, j int) bool {
|
||||||
|
for k := 0; k < b.n; k++ {
|
||||||
|
if b.b[i][k] == b.b[j][k] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return b.b[i][k] < b.b[j][k]
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||||
|
// It returns the last position of scan.b that is part of any extension.
|
||||||
|
// It also trims scan.b to remove excess parts accordingly.
|
||||||
|
func parseExtensions(scan *scanner) int {
|
||||||
|
start := scan.start
|
||||||
|
exts := [][]byte{}
|
||||||
|
private := []byte{}
|
||||||
|
end := scan.end
|
||||||
|
for len(scan.token) == 1 {
|
||||||
|
extStart := scan.start
|
||||||
|
ext := scan.token[0]
|
||||||
|
end = parseExtension(scan)
|
||||||
|
extension := scan.b[extStart:end]
|
||||||
|
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||||
|
scan.setError(ErrSyntax)
|
||||||
|
end = extStart
|
||||||
|
continue
|
||||||
|
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||||
|
scan.b = scan.b[:end]
|
||||||
|
return end
|
||||||
|
} else if ext == 'x' {
|
||||||
|
private = extension
|
||||||
|
break
|
||||||
|
}
|
||||||
|
exts = append(exts, extension)
|
||||||
|
}
|
||||||
|
sort.Sort(bytesSort{exts, 1})
|
||||||
|
if len(private) > 0 {
|
||||||
|
exts = append(exts, private)
|
||||||
|
}
|
||||||
|
scan.b = scan.b[:start]
|
||||||
|
if len(exts) > 0 {
|
||||||
|
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||||
|
} else if start > 0 {
|
||||||
|
// Strip trailing '-'.
|
||||||
|
scan.b = scan.b[:start-1]
|
||||||
|
}
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseExtension parses a single extension and returns the position of
|
||||||
|
// the extension end.
|
||||||
|
func parseExtension(scan *scanner) int {
|
||||||
|
start, end := scan.start, scan.end
|
||||||
|
switch scan.token[0] {
|
||||||
|
case 'u': // https://www.ietf.org/rfc/rfc6067.txt
|
||||||
|
attrStart := end
|
||||||
|
scan.scan()
|
||||||
|
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||||
|
if bytes.Compare(scan.token, last) != -1 {
|
||||||
|
// Attributes are unsorted. Start over from scratch.
|
||||||
|
p := attrStart + 1
|
||||||
|
scan.next = p
|
||||||
|
attrs := [][]byte{}
|
||||||
|
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||||
|
attrs = append(attrs, scan.token)
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
sort.Sort(bytesSort{attrs, 3})
|
||||||
|
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
last = scan.token
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
// Scan key-type sequences. A key is of length 2 and may be followed
|
||||||
|
// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
|
||||||
|
var last, key []byte
|
||||||
|
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||||
|
key = scan.token
|
||||||
|
end = scan.end
|
||||||
|
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
// TODO: check key value validity
|
||||||
|
if bytes.Compare(key, last) != 1 || scan.err != nil {
|
||||||
|
// We have an invalid key or the keys are not sorted.
|
||||||
|
// Start scanning keys from scratch and reorder.
|
||||||
|
p := attrEnd + 1
|
||||||
|
scan.next = p
|
||||||
|
keys := [][]byte{}
|
||||||
|
for scan.scan(); len(scan.token) == 2; {
|
||||||
|
keyStart := scan.start
|
||||||
|
end = scan.end
|
||||||
|
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
||||||
|
end = scan.end
|
||||||
|
}
|
||||||
|
keys = append(keys, scan.b[keyStart:end])
|
||||||
|
}
|
||||||
|
sort.Stable(bytesSort{keys, 2})
|
||||||
|
if n := len(keys); n > 0 {
|
||||||
|
k := 0
|
||||||
|
for i := 1; i < n; i++ {
|
||||||
|
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
||||||
|
k++
|
||||||
|
keys[k] = keys[i]
|
||||||
|
} else if !bytes.Equal(keys[k], keys[i]) {
|
||||||
|
scan.setError(ErrDuplicateKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keys = keys[:k+1]
|
||||||
|
}
|
||||||
|
reordered := bytes.Join(keys, separator)
|
||||||
|
if e := p + len(reordered); e < end {
|
||||||
|
scan.deleteRange(e, end)
|
||||||
|
end = e
|
||||||
|
}
|
||||||
|
copy(scan.b[p:], reordered)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 't': // https://www.ietf.org/rfc/rfc6497.txt
|
||||||
|
scan.scan()
|
||||||
|
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||||
|
_, end = parseTag(scan, false)
|
||||||
|
scan.toLower(start, end)
|
||||||
|
}
|
||||||
|
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||||
|
end = scan.acceptMinSize(3)
|
||||||
|
}
|
||||||
|
case 'x':
|
||||||
|
end = scan.acceptMinSize(1)
|
||||||
|
default:
|
||||||
|
end = scan.acceptMinSize(2)
|
||||||
|
}
|
||||||
|
return end
|
||||||
|
}
|
||||||
|
|
||||||
|
// getExtension returns the name, body and end position of the extension.
|
||||||
|
func getExtension(s string, p int) (end int, ext string) {
|
||||||
|
if s[p] == '-' {
|
||||||
|
p++
|
||||||
|
}
|
||||||
|
if s[p] == 'x' {
|
||||||
|
return len(s), s[p:]
|
||||||
|
}
|
||||||
|
end = nextExtension(s, p)
|
||||||
|
return end, s[p:end]
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextExtension finds the next extension within the string, searching
|
||||||
|
// for the -<char>- pattern from position p.
|
||||||
|
// In the fast majority of cases, language tags will have at most
|
||||||
|
// one extension and extensions tend to be small.
|
||||||
|
func nextExtension(s string, p int) int {
|
||||||
|
for n := len(s) - 3; p < n; {
|
||||||
|
if s[p] == '-' {
|
||||||
|
if s[p+2] == '-' {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
p += 3
|
||||||
|
} else {
|
||||||
|
p++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(s)
|
||||||
|
}
|
3494
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
3494
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||||
|
// It simplifies safe initialization of Tag values.
|
||||||
|
func MustParse(s string) Tag {
|
||||||
|
t, err := Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||||
|
// It simplifies safe initialization of Base values.
|
||||||
|
func MustParseBase(s string) Language {
|
||||||
|
b, err := ParseBase(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||||
|
// parsed. It simplifies safe initialization of Script values.
|
||||||
|
func MustParseScript(s string) Script {
|
||||||
|
scr, err := ParseScript(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return scr
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||||
|
// parsed. It simplifies safe initialization of Region values.
|
||||||
|
func MustParseRegion(s string) Region {
|
||||||
|
r, err := ParseRegion(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Und is the root language.
|
||||||
|
var Und Tag
|
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
Normal file
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package internal
|
||||||
|
|
||||||
|
// This file contains matchers that implement CLDR inheritance.
|
||||||
|
//
|
||||||
|
// See https://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||||
|
//
|
||||||
|
// Some of the inheritance described in this document is already handled by
|
||||||
|
// the cldr package.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: consider if (some of the) matching algorithm needs to be public after
|
||||||
|
// getting some feel about what is generic and what is specific.
|
||||||
|
|
||||||
|
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
|
||||||
|
// chain.
|
||||||
|
//
|
||||||
|
// The matcher uses canonicalization and the parent relationship to find a
|
||||||
|
// match. The resulting match will always be either Und or a language with the
|
||||||
|
// same language and script as the requested language. It will not match
|
||||||
|
// languages for which there is understood to be mutual or one-directional
|
||||||
|
// intelligibility.
|
||||||
|
//
|
||||||
|
// A Match will indicate an Exact match if the language matches after
|
||||||
|
// canonicalization and High if the matched tag is a parent.
|
||||||
|
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
|
||||||
|
tags := &InheritanceMatcher{make(map[language.Tag]int)}
|
||||||
|
for i, tag := range t {
|
||||||
|
ct, err := language.All.Canonicalize(tag)
|
||||||
|
if err != nil {
|
||||||
|
ct = tag
|
||||||
|
}
|
||||||
|
tags.index[ct] = i
|
||||||
|
}
|
||||||
|
return tags
|
||||||
|
}
|
||||||
|
|
||||||
|
type InheritanceMatcher struct {
|
||||||
|
index map[language.Tag]int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
|
||||||
|
for _, t := range want {
|
||||||
|
ct, err := language.All.Canonicalize(t)
|
||||||
|
if err != nil {
|
||||||
|
ct = t
|
||||||
|
}
|
||||||
|
conf := language.Exact
|
||||||
|
for {
|
||||||
|
if index, ok := m.index[ct]; ok {
|
||||||
|
return ct, index, conf
|
||||||
|
}
|
||||||
|
if ct == language.Und {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
ct = ct.Parent()
|
||||||
|
conf = language.High
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return language.Und, 0, language.No
|
||||||
|
}
|
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package tag contains functionality handling tags and related data.
|
||||||
|
package tag // import "golang.org/x/text/internal/tag"
|
||||||
|
|
||||||
|
import "sort"
|
||||||
|
|
||||||
|
// An Index converts tags to a compact numeric value.
|
||||||
|
//
|
||||||
|
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
|
||||||
|
// be used to store additional information about the tag.
|
||||||
|
type Index string
|
||||||
|
|
||||||
|
// Elem returns the element data at the given index.
|
||||||
|
func (s Index) Elem(x int) string {
|
||||||
|
return string(s[x*4 : x*4+4])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index reports the index of the given key or -1 if it could not be found.
|
||||||
|
// Only the first len(key) bytes from the start of the 4-byte entries will be
|
||||||
|
// considered for the search and the first match in Index will be returned.
|
||||||
|
func (s Index) Index(key []byte) int {
|
||||||
|
n := len(key)
|
||||||
|
// search the index of the first entry with an equal or higher value than
|
||||||
|
// key in s.
|
||||||
|
index := sort.Search(len(s)/4, func(i int) bool {
|
||||||
|
return cmp(s[i*4:i*4+n], key) != -1
|
||||||
|
})
|
||||||
|
i := index * 4
|
||||||
|
if cmp(s[i:i+len(key)], key) != 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next finds the next occurrence of key after index x, which must have been
|
||||||
|
// obtained from a call to Index using the same key. It returns x+1 or -1.
|
||||||
|
func (s Index) Next(key []byte, x int) int {
|
||||||
|
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// cmp returns an integer comparing a and b lexicographically.
|
||||||
|
func cmp(a Index, b []byte) int {
|
||||||
|
n := len(a)
|
||||||
|
if len(b) < n {
|
||||||
|
n = len(b)
|
||||||
|
}
|
||||||
|
for i, c := range b[:n] {
|
||||||
|
switch {
|
||||||
|
case a[i] > c:
|
||||||
|
return 1
|
||||||
|
case a[i] < c:
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case len(a) < len(b):
|
||||||
|
return -1
|
||||||
|
case len(a) > len(b):
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare returns an integer comparing a and b lexicographically.
|
||||||
|
func Compare(a string, b []byte) int {
|
||||||
|
return cmp(Index(a), b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FixCase reformats b to the same pattern of cases as form.
|
||||||
|
// If returns false if string b is malformed.
|
||||||
|
func FixCase(form string, b []byte) bool {
|
||||||
|
if len(form) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, c := range b {
|
||||||
|
if form[i] <= 'Z' {
|
||||||
|
if c >= 'a' {
|
||||||
|
c -= 'z' - 'Z'
|
||||||
|
}
|
||||||
|
if c < 'A' || 'Z' < c {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if c <= 'Z' {
|
||||||
|
c += 'z' - 'Z'
|
||||||
|
}
|
||||||
|
if c < 'a' || 'z' < c {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b[i] = c
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
187
vendor/golang.org/x/text/language/coverage.go
generated
vendored
Normal file
187
vendor/golang.org/x/text/language/coverage.go
generated
vendored
Normal file
|
@ -0,0 +1,187 @@
|
||||||
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// The Coverage interface is used to define the level of coverage of an
|
||||||
|
// internationalization service. Note that not all types are supported by all
|
||||||
|
// services. As lists may be generated on the fly, it is recommended that users
|
||||||
|
// of a Coverage cache the results.
|
||||||
|
type Coverage interface {
|
||||||
|
// Tags returns the list of supported tags.
|
||||||
|
Tags() []Tag
|
||||||
|
|
||||||
|
// BaseLanguages returns the list of supported base languages.
|
||||||
|
BaseLanguages() []Base
|
||||||
|
|
||||||
|
// Scripts returns the list of supported scripts.
|
||||||
|
Scripts() []Script
|
||||||
|
|
||||||
|
// Regions returns the list of supported regions.
|
||||||
|
Regions() []Region
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Supported defines a Coverage that lists all supported subtags. Tags
|
||||||
|
// always returns nil.
|
||||||
|
Supported Coverage = allSubtags{}
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// - Support Variants, numbering systems.
|
||||||
|
// - CLDR coverage levels.
|
||||||
|
// - Set of common tags defined in this package.
|
||||||
|
|
||||||
|
type allSubtags struct{}
|
||||||
|
|
||||||
|
// Regions returns the list of supported regions. As all regions are in a
|
||||||
|
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||||
|
// The "undefined" region is not returned.
|
||||||
|
func (s allSubtags) Regions() []Region {
|
||||||
|
reg := make([]Region, language.NumRegions)
|
||||||
|
for i := range reg {
|
||||||
|
reg[i] = Region{language.Region(i + 1)}
|
||||||
|
}
|
||||||
|
return reg
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scripts returns the list of supported scripts. As all scripts are in a
|
||||||
|
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||||
|
// The "undefined" script is not returned.
|
||||||
|
func (s allSubtags) Scripts() []Script {
|
||||||
|
scr := make([]Script, language.NumScripts)
|
||||||
|
for i := range scr {
|
||||||
|
scr[i] = Script{language.Script(i + 1)}
|
||||||
|
}
|
||||||
|
return scr
|
||||||
|
}
|
||||||
|
|
||||||
|
// BaseLanguages returns the list of all supported base languages. It generates
|
||||||
|
// the list by traversing the internal structures.
|
||||||
|
func (s allSubtags) BaseLanguages() []Base {
|
||||||
|
bs := language.BaseLanguages()
|
||||||
|
base := make([]Base, len(bs))
|
||||||
|
for i, b := range bs {
|
||||||
|
base[i] = Base{b}
|
||||||
|
}
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tags always returns nil.
|
||||||
|
func (s allSubtags) Tags() []Tag {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// coverage is used by NewCoverage which is used as a convenient way for
|
||||||
|
// creating Coverage implementations for partially defined data. Very often a
|
||||||
|
// package will only need to define a subset of slices. coverage provides a
|
||||||
|
// convenient way to do this. Moreover, packages using NewCoverage, instead of
|
||||||
|
// their own implementation, will not break if later new slice types are added.
|
||||||
|
type coverage struct {
|
||||||
|
tags func() []Tag
|
||||||
|
bases func() []Base
|
||||||
|
scripts func() []Script
|
||||||
|
regions func() []Region
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *coverage) Tags() []Tag {
|
||||||
|
if s.tags == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s.tags()
|
||||||
|
}
|
||||||
|
|
||||||
|
// bases implements sort.Interface and is used to sort base languages.
|
||||||
|
type bases []Base
|
||||||
|
|
||||||
|
func (b bases) Len() int {
|
||||||
|
return len(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b bases) Swap(i, j int) {
|
||||||
|
b[i], b[j] = b[j], b[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b bases) Less(i, j int) bool {
|
||||||
|
return b[i].langID < b[j].langID
|
||||||
|
}
|
||||||
|
|
||||||
|
// BaseLanguages returns the result from calling s.bases if it is specified or
|
||||||
|
// otherwise derives the set of supported base languages from tags.
|
||||||
|
func (s *coverage) BaseLanguages() []Base {
|
||||||
|
if s.bases == nil {
|
||||||
|
tags := s.Tags()
|
||||||
|
if len(tags) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
a := make([]Base, len(tags))
|
||||||
|
for i, t := range tags {
|
||||||
|
a[i] = Base{language.Language(t.lang())}
|
||||||
|
}
|
||||||
|
sort.Sort(bases(a))
|
||||||
|
k := 0
|
||||||
|
for i := 1; i < len(a); i++ {
|
||||||
|
if a[k] != a[i] {
|
||||||
|
k++
|
||||||
|
a[k] = a[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a[:k+1]
|
||||||
|
}
|
||||||
|
return s.bases()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *coverage) Scripts() []Script {
|
||||||
|
if s.scripts == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s.scripts()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *coverage) Regions() []Region {
|
||||||
|
if s.regions == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s.regions()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCoverage returns a Coverage for the given lists. It is typically used by
|
||||||
|
// packages providing internationalization services to define their level of
|
||||||
|
// coverage. A list may be of type []T or func() []T, where T is either Tag,
|
||||||
|
// Base, Script or Region. The returned Coverage derives the value for Bases
|
||||||
|
// from Tags if no func or slice for []Base is specified. For other unspecified
|
||||||
|
// types the returned Coverage will return nil for the respective methods.
|
||||||
|
func NewCoverage(list ...interface{}) Coverage {
|
||||||
|
s := &coverage{}
|
||||||
|
for _, x := range list {
|
||||||
|
switch v := x.(type) {
|
||||||
|
case func() []Base:
|
||||||
|
s.bases = v
|
||||||
|
case func() []Script:
|
||||||
|
s.scripts = v
|
||||||
|
case func() []Region:
|
||||||
|
s.regions = v
|
||||||
|
case func() []Tag:
|
||||||
|
s.tags = v
|
||||||
|
case []Base:
|
||||||
|
s.bases = func() []Base { return v }
|
||||||
|
case []Script:
|
||||||
|
s.scripts = func() []Script { return v }
|
||||||
|
case []Region:
|
||||||
|
s.regions = func() []Region { return v }
|
||||||
|
case []Tag:
|
||||||
|
s.tags = func() []Tag { return v }
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("language: unsupported set type %T", v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
98
vendor/golang.org/x/text/language/doc.go
generated
vendored
Normal file
98
vendor/golang.org/x/text/language/doc.go
generated
vendored
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
// Copyright 2017 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package language implements BCP 47 language tags and related functionality.
|
||||||
|
//
|
||||||
|
// The most important function of package language is to match a list of
|
||||||
|
// user-preferred languages to a list of supported languages.
|
||||||
|
// It alleviates the developer of dealing with the complexity of this process
|
||||||
|
// and provides the user with the best experience
|
||||||
|
// (see https://blog.golang.org/matchlang).
|
||||||
|
//
|
||||||
|
// # Matching preferred against supported languages
|
||||||
|
//
|
||||||
|
// A Matcher for an application that supports English, Australian English,
|
||||||
|
// Danish, and standard Mandarin can be created as follows:
|
||||||
|
//
|
||||||
|
// var matcher = language.NewMatcher([]language.Tag{
|
||||||
|
// language.English, // The first language is used as fallback.
|
||||||
|
// language.MustParse("en-AU"),
|
||||||
|
// language.Danish,
|
||||||
|
// language.Chinese,
|
||||||
|
// })
|
||||||
|
//
|
||||||
|
// This list of supported languages is typically implied by the languages for
|
||||||
|
// which there exists translations of the user interface.
|
||||||
|
//
|
||||||
|
// User-preferred languages usually come as a comma-separated list of BCP 47
|
||||||
|
// language tags.
|
||||||
|
// The MatchString finds best matches for such strings:
|
||||||
|
//
|
||||||
|
// handler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// lang, _ := r.Cookie("lang")
|
||||||
|
// accept := r.Header.Get("Accept-Language")
|
||||||
|
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||||
|
//
|
||||||
|
// // tag should now be used for the initialization of any
|
||||||
|
// // locale-specific service.
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The Matcher's Match method can be used to match Tags directly.
|
||||||
|
//
|
||||||
|
// Matchers are aware of the intricacies of equivalence between languages, such
|
||||||
|
// as deprecated subtags, legacy tags, macro languages, mutual
|
||||||
|
// intelligibility between scripts and languages, and transparently passing
|
||||||
|
// BCP 47 user configuration.
|
||||||
|
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
|
||||||
|
// and will know that Cantonese ("yue") is a good match for "zh-HK".
|
||||||
|
//
|
||||||
|
// # Using match results
|
||||||
|
//
|
||||||
|
// To guarantee a consistent user experience to the user it is important to
|
||||||
|
// use the same language tag for the selection of any locale-specific services.
|
||||||
|
// For example, it is utterly confusing to substitute spelled-out numbers
|
||||||
|
// or dates in one language in text of another language.
|
||||||
|
// More subtly confusing is using the wrong sorting order or casing
|
||||||
|
// algorithm for a certain language.
|
||||||
|
//
|
||||||
|
// All the packages in x/text that provide locale-specific services
|
||||||
|
// (e.g. collate, cases) should be initialized with the tag that was
|
||||||
|
// obtained at the start of an interaction with the user.
|
||||||
|
//
|
||||||
|
// Note that Tag that is returned by Match and MatchString may differ from any
|
||||||
|
// of the supported languages, as it may contain carried over settings from
|
||||||
|
// the user tags.
|
||||||
|
// This may be inconvenient when your application has some additional
|
||||||
|
// locale-specific data for your supported languages.
|
||||||
|
// Match and MatchString both return the index of the matched supported tag
|
||||||
|
// to simplify associating such data with the matched tag.
|
||||||
|
//
|
||||||
|
// # Canonicalization
|
||||||
|
//
|
||||||
|
// If one uses the Matcher to compare languages one does not need to
|
||||||
|
// worry about canonicalization.
|
||||||
|
//
|
||||||
|
// The meaning of a Tag varies per application. The language package
|
||||||
|
// therefore delays canonicalization and preserves information as much
|
||||||
|
// as possible. The Matcher, however, will always take into account that
|
||||||
|
// two different tags may represent the same language.
|
||||||
|
//
|
||||||
|
// By default, only legacy and deprecated tags are converted into their
|
||||||
|
// canonical equivalent. All other information is preserved. This approach makes
|
||||||
|
// the confidence scores more accurate and allows matchers to distinguish
|
||||||
|
// between variants that are otherwise lost.
|
||||||
|
//
|
||||||
|
// As a consequence, two tags that should be treated as identical according to
|
||||||
|
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
|
||||||
|
// Matcher handles such distinctions, though, and is aware of the
|
||||||
|
// equivalence relations. The CanonType type can be used to alter the
|
||||||
|
// canonicalization form.
|
||||||
|
//
|
||||||
|
// # References
|
||||||
|
//
|
||||||
|
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
|
||||||
|
package language // import "golang.org/x/text/language"
|
||||||
|
|
||||||
|
// TODO: explanation on how to match languages for your own locale-specific
|
||||||
|
// service.
|
605
vendor/golang.org/x/text/language/language.go
generated
vendored
Normal file
605
vendor/golang.org/x/text/language/language.go
generated
vendored
Normal file
|
@ -0,0 +1,605 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate go run gen.go -output tables.go
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// TODO: Remove above NOTE after:
|
||||||
|
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
"golang.org/x/text/internal/language/compact"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||||
|
// specific language or locale. All language tag values are guaranteed to be
|
||||||
|
// well-formed.
|
||||||
|
type Tag compact.Tag
|
||||||
|
|
||||||
|
func makeTag(t language.Tag) (tag Tag) {
|
||||||
|
return Tag(compact.Make(t))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tag) tag() language.Tag {
|
||||||
|
return (*compact.Tag)(t).Tag()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tag) isCompact() bool {
|
||||||
|
return (*compact.Tag)(t).IsCompact()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: improve performance.
|
||||||
|
func (t *Tag) lang() language.Language { return t.tag().LangID }
|
||||||
|
func (t *Tag) region() language.Region { return t.tag().RegionID }
|
||||||
|
func (t *Tag) script() language.Script { return t.tag().ScriptID }
|
||||||
|
|
||||||
|
// Make is a convenience wrapper for Parse that omits the error.
|
||||||
|
// In case of an error, a sensible default is returned.
|
||||||
|
func Make(s string) Tag {
|
||||||
|
return Default.Make(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make is a convenience wrapper for c.Parse that omits the error.
|
||||||
|
// In case of an error, a sensible default is returned.
|
||||||
|
func (c CanonType) Make(s string) Tag {
|
||||||
|
t, _ := c.Parse(s)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raw returns the raw base language, script and region, without making an
|
||||||
|
// attempt to infer their values.
|
||||||
|
func (t Tag) Raw() (b Base, s Script, r Region) {
|
||||||
|
tt := t.tag()
|
||||||
|
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsRoot returns true if t is equal to language "und".
|
||||||
|
func (t Tag) IsRoot() bool {
|
||||||
|
return compact.Tag(t).IsRoot()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CanonType can be used to enable or disable various types of canonicalization.
|
||||||
|
type CanonType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Replace deprecated base languages with their preferred replacements.
|
||||||
|
DeprecatedBase CanonType = 1 << iota
|
||||||
|
// Replace deprecated scripts with their preferred replacements.
|
||||||
|
DeprecatedScript
|
||||||
|
// Replace deprecated regions with their preferred replacements.
|
||||||
|
DeprecatedRegion
|
||||||
|
// Remove redundant scripts.
|
||||||
|
SuppressScript
|
||||||
|
// Normalize legacy encodings. This includes legacy languages defined in
|
||||||
|
// CLDR as well as bibliographic codes defined in ISO-639.
|
||||||
|
Legacy
|
||||||
|
// Map the dominant language of a macro language group to the macro language
|
||||||
|
// subtag. For example cmn -> zh.
|
||||||
|
Macro
|
||||||
|
// The CLDR flag should be used if full compatibility with CLDR is required.
|
||||||
|
// There are a few cases where language.Tag may differ from CLDR. To follow all
|
||||||
|
// of CLDR's suggestions, use All|CLDR.
|
||||||
|
CLDR
|
||||||
|
|
||||||
|
// Raw can be used to Compose or Parse without Canonicalization.
|
||||||
|
Raw CanonType = 0
|
||||||
|
|
||||||
|
// Replace all deprecated tags with their preferred replacements.
|
||||||
|
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
|
||||||
|
|
||||||
|
// All canonicalizations recommended by BCP 47.
|
||||||
|
BCP47 = Deprecated | SuppressScript
|
||||||
|
|
||||||
|
// All canonicalizations.
|
||||||
|
All = BCP47 | Legacy | Macro
|
||||||
|
|
||||||
|
// Default is the canonicalization used by Parse, Make and Compose. To
|
||||||
|
// preserve as much information as possible, canonicalizations that remove
|
||||||
|
// potentially valuable information are not included. The Matcher is
|
||||||
|
// designed to recognize similar tags that would be the same if
|
||||||
|
// they were canonicalized using All.
|
||||||
|
Default = Deprecated | Legacy
|
||||||
|
|
||||||
|
canonLang = DeprecatedBase | Legacy | Macro
|
||||||
|
|
||||||
|
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
|
||||||
|
)
|
||||||
|
|
||||||
|
// canonicalize returns the canonicalized equivalent of the tag and
|
||||||
|
// whether there was any change.
|
||||||
|
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
|
||||||
|
if c == Raw {
|
||||||
|
return t, false
|
||||||
|
}
|
||||||
|
changed := false
|
||||||
|
if c&SuppressScript != 0 {
|
||||||
|
if t.LangID.SuppressScript() == t.ScriptID {
|
||||||
|
t.ScriptID = 0
|
||||||
|
changed = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c&canonLang != 0 {
|
||||||
|
for {
|
||||||
|
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
|
||||||
|
switch aliasType {
|
||||||
|
case language.Legacy:
|
||||||
|
if c&Legacy != 0 {
|
||||||
|
if t.LangID == _sh && t.ScriptID == 0 {
|
||||||
|
t.ScriptID = _Latn
|
||||||
|
}
|
||||||
|
t.LangID = l
|
||||||
|
changed = true
|
||||||
|
}
|
||||||
|
case language.Macro:
|
||||||
|
if c&Macro != 0 {
|
||||||
|
// We deviate here from CLDR. The mapping "nb" -> "no"
|
||||||
|
// qualifies as a typical Macro language mapping. However,
|
||||||
|
// for legacy reasons, CLDR maps "no", the macro language
|
||||||
|
// code for Norwegian, to the dominant variant "nb". This
|
||||||
|
// change is currently under consideration for CLDR as well.
|
||||||
|
// See https://unicode.org/cldr/trac/ticket/2698 and also
|
||||||
|
// https://unicode.org/cldr/trac/ticket/1790 for some of the
|
||||||
|
// practical implications. TODO: this check could be removed
|
||||||
|
// if CLDR adopts this change.
|
||||||
|
if c&CLDR == 0 || t.LangID != _nb {
|
||||||
|
changed = true
|
||||||
|
t.LangID = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case language.Deprecated:
|
||||||
|
if c&DeprecatedBase != 0 {
|
||||||
|
if t.LangID == _mo && t.RegionID == 0 {
|
||||||
|
t.RegionID = _MD
|
||||||
|
}
|
||||||
|
t.LangID = l
|
||||||
|
changed = true
|
||||||
|
// Other canonicalization types may still apply.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
|
||||||
|
t.LangID = _nb
|
||||||
|
changed = true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c&DeprecatedScript != 0 {
|
||||||
|
if t.ScriptID == _Qaai {
|
||||||
|
changed = true
|
||||||
|
t.ScriptID = _Zinh
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c&DeprecatedRegion != 0 {
|
||||||
|
if r := t.RegionID.Canonicalize(); r != t.RegionID {
|
||||||
|
changed = true
|
||||||
|
t.RegionID = r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t, changed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||||
|
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
|
||||||
|
// First try fast path.
|
||||||
|
if t.isCompact() {
|
||||||
|
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// It is unlikely that one will canonicalize a tag after matching. So do
|
||||||
|
// a slow but simple approach here.
|
||||||
|
if tag, changed := canonicalize(c, t.tag()); changed {
|
||||||
|
tag.RemakeString()
|
||||||
|
return makeTag(tag), nil
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confidence indicates the level of certainty for a given return value.
|
||||||
|
// For example, Serbian may be written in Cyrillic or Latin script.
|
||||||
|
// The confidence level indicates whether a value was explicitly specified,
|
||||||
|
// whether it is typically the only possible value, or whether there is
|
||||||
|
// an ambiguity.
|
||||||
|
type Confidence int
|
||||||
|
|
||||||
|
const (
|
||||||
|
No Confidence = iota // full confidence that there was no match
|
||||||
|
Low // most likely value picked out of a set of alternatives
|
||||||
|
High // value is generally assumed to be the correct match
|
||||||
|
Exact // exact match or explicitly specified value
|
||||||
|
)
|
||||||
|
|
||||||
|
var confName = []string{"No", "Low", "High", "Exact"}
|
||||||
|
|
||||||
|
func (c Confidence) String() string {
|
||||||
|
return confName[c]
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the canonical string representation of the language tag.
|
||||||
|
func (t Tag) String() string {
|
||||||
|
return t.tag().String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalText implements encoding.TextMarshaler.
|
||||||
|
func (t Tag) MarshalText() (text []byte, err error) {
|
||||||
|
return t.tag().MarshalText()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||||
|
func (t *Tag) UnmarshalText(text []byte) error {
|
||||||
|
var tag language.Tag
|
||||||
|
err := tag.UnmarshalText(text)
|
||||||
|
*t = makeTag(tag)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Base returns the base language of the language tag. If the base language is
|
||||||
|
// unspecified, an attempt will be made to infer it from the context.
|
||||||
|
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||||
|
func (t Tag) Base() (Base, Confidence) {
|
||||||
|
if b := t.lang(); b != 0 {
|
||||||
|
return Base{b}, Exact
|
||||||
|
}
|
||||||
|
tt := t.tag()
|
||||||
|
c := High
|
||||||
|
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
|
||||||
|
c = Low
|
||||||
|
}
|
||||||
|
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
|
||||||
|
return Base{tag.LangID}, c
|
||||||
|
}
|
||||||
|
return Base{0}, No
|
||||||
|
}
|
||||||
|
|
||||||
|
// Script infers the script for the language tag. If it was not explicitly given, it will infer
|
||||||
|
// a most likely candidate.
|
||||||
|
// If more than one script is commonly used for a language, the most likely one
|
||||||
|
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||||
|
// for Serbian.
|
||||||
|
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
|
||||||
|
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
|
||||||
|
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
|
||||||
|
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
|
||||||
|
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
|
||||||
|
// Note that an inferred script is never guaranteed to be the correct one. Latin is
|
||||||
|
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||||
|
// in the past. Also, the script that is commonly used may change over time.
|
||||||
|
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||||
|
func (t Tag) Script() (Script, Confidence) {
|
||||||
|
if scr := t.script(); scr != 0 {
|
||||||
|
return Script{scr}, Exact
|
||||||
|
}
|
||||||
|
tt := t.tag()
|
||||||
|
sc, c := language.Script(_Zzzz), No
|
||||||
|
if scr := tt.LangID.SuppressScript(); scr != 0 {
|
||||||
|
// Note: it is not always the case that a language with a suppress
|
||||||
|
// script value is only written in one script (e.g. kk, ms, pa).
|
||||||
|
if tt.RegionID == 0 {
|
||||||
|
return Script{scr}, High
|
||||||
|
}
|
||||||
|
sc, c = scr, High
|
||||||
|
}
|
||||||
|
if tag, err := tt.Maximize(); err == nil {
|
||||||
|
if tag.ScriptID != sc {
|
||||||
|
sc, c = tag.ScriptID, Low
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||||
|
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
|
||||||
|
sc, c = tag.ScriptID, Low
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Script{sc}, c
|
||||||
|
}
|
||||||
|
|
||||||
|
// Region returns the region for the language tag. If it was not explicitly given, it will
|
||||||
|
// infer a most likely candidate from the context.
|
||||||
|
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||||
|
func (t Tag) Region() (Region, Confidence) {
|
||||||
|
if r := t.region(); r != 0 {
|
||||||
|
return Region{r}, Exact
|
||||||
|
}
|
||||||
|
tt := t.tag()
|
||||||
|
if tt, err := tt.Maximize(); err == nil {
|
||||||
|
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
|
||||||
|
}
|
||||||
|
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||||
|
if tag, err := tt.Maximize(); err == nil {
|
||||||
|
return Region{tag.RegionID}, Low
|
||||||
|
}
|
||||||
|
return Region{_ZZ}, No // TODO: return world instead of undetermined?
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variants returns the variants specified explicitly for this language tag.
|
||||||
|
// or nil if no variant was specified.
|
||||||
|
func (t Tag) Variants() []Variant {
|
||||||
|
if !compact.Tag(t).MayHaveVariants() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
v := []Variant{}
|
||||||
|
x, str := "", t.tag().Variants()
|
||||||
|
for str != "" {
|
||||||
|
x, str = nextToken(str)
|
||||||
|
v = append(v, Variant{x})
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||||
|
// specific language are substituted with fields from the parent language.
|
||||||
|
// The parent for a language may change for newer versions of CLDR.
|
||||||
|
//
|
||||||
|
// Parent returns a tag for a less specific language that is mutually
|
||||||
|
// intelligible or Und if there is no such language. This may not be the same as
|
||||||
|
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
|
||||||
|
// is "zh-Hant", and the parent of "zh-Hant" is "und".
|
||||||
|
func (t Tag) Parent() Tag {
|
||||||
|
return Tag(compact.Tag(t).Parent())
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextToken returns token t and the rest of the string.
|
||||||
|
func nextToken(s string) (t, tail string) {
|
||||||
|
p := strings.Index(s[1:], "-")
|
||||||
|
if p == -1 {
|
||||||
|
return s[1:], ""
|
||||||
|
}
|
||||||
|
p++
|
||||||
|
return s[1:p], s[p:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extension is a single BCP 47 extension.
|
||||||
|
type Extension struct {
|
||||||
|
s string
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the string representation of the extension, including the
|
||||||
|
// type tag.
|
||||||
|
func (e Extension) String() string {
|
||||||
|
return e.s
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseExtension parses s as an extension and returns it on success.
|
||||||
|
func ParseExtension(s string) (e Extension, err error) {
|
||||||
|
ext, err := language.ParseExtension(s)
|
||||||
|
return Extension{ext}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Type returns the one-byte extension type of e. It returns 0 for the zero
|
||||||
|
// exception.
|
||||||
|
func (e Extension) Type() byte {
|
||||||
|
if e.s == "" {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return e.s[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokens returns the list of tokens of e.
|
||||||
|
func (e Extension) Tokens() []string {
|
||||||
|
return strings.Split(e.s, "-")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extension returns the extension of type x for tag t. It will return
|
||||||
|
// false for ok if t does not have the requested extension. The returned
|
||||||
|
// extension will be invalid in this case.
|
||||||
|
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
|
||||||
|
if !compact.Tag(t).MayHaveExtensions() {
|
||||||
|
return Extension{}, false
|
||||||
|
}
|
||||||
|
e, ok := t.tag().Extension(x)
|
||||||
|
return Extension{e}, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extensions returns all extensions of t.
|
||||||
|
func (t Tag) Extensions() []Extension {
|
||||||
|
if !compact.Tag(t).MayHaveExtensions() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
e := []Extension{}
|
||||||
|
for _, ext := range t.tag().Extensions() {
|
||||||
|
e = append(e, Extension{ext})
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
// TypeForKey returns the type associated with the given key, where key and type
|
||||||
|
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||||
|
//
|
||||||
|
// If there are multiple types associated with a key, only the first will be
|
||||||
|
// returned. If there is no type associated with a key, it returns the empty
|
||||||
|
// string.
|
||||||
|
func (t Tag) TypeForKey(key string) string {
|
||||||
|
if !compact.Tag(t).MayHaveExtensions() {
|
||||||
|
if key != "rg" && key != "va" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t.tag().TypeForKey(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||||
|
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// An empty value removes an existing pair with the same key.
|
||||||
|
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||||
|
tt, err := t.tag().SetTypeForKey(key, value)
|
||||||
|
return makeTag(tt), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NumCompactTags is the number of compact tags. The maximum tag is
|
||||||
|
// NumCompactTags-1.
|
||||||
|
const NumCompactTags = compact.NumCompactTags
|
||||||
|
|
||||||
|
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
|
||||||
|
// for which data exists in the text repository.The index will change over time
|
||||||
|
// and should not be stored in persistent storage. If t does not match a compact
|
||||||
|
// index, exact will be false and the compact index will be returned for the
|
||||||
|
// first match after repeatedly taking the Parent of t.
|
||||||
|
func CompactIndex(t Tag) (index int, exact bool) {
|
||||||
|
id, exact := compact.LanguageID(compact.Tag(t))
|
||||||
|
return int(id), exact
|
||||||
|
}
|
||||||
|
|
||||||
|
var root = language.Tag{}
|
||||||
|
|
||||||
|
// Base is an ISO 639 language code, used for encoding the base language
|
||||||
|
// of a language tag.
|
||||||
|
type Base struct {
|
||||||
|
langID language.Language
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseBase(s string) (Base, error) {
|
||||||
|
l, err := language.ParseBase(s)
|
||||||
|
return Base{l}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the BCP 47 representation of the base language.
|
||||||
|
func (b Base) String() string {
|
||||||
|
return b.langID.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ISO3 returns the ISO 639-3 language code.
|
||||||
|
func (b Base) ISO3() string {
|
||||||
|
return b.langID.ISO3()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||||
|
func (b Base) IsPrivateUse() bool {
|
||||||
|
return b.langID.IsPrivateUse()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||||
|
// It is idiomatically represented in title case.
|
||||||
|
type Script struct {
|
||||||
|
scriptID language.Script
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseScript parses a 4-letter ISO 15924 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseScript(s string) (Script, error) {
|
||||||
|
sc, err := language.ParseScript(s)
|
||||||
|
return Script{sc}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the script code in title case.
|
||||||
|
// It returns "Zzzz" for an unspecified script.
|
||||||
|
func (s Script) String() string {
|
||||||
|
return s.scriptID.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||||
|
func (s Script) IsPrivateUse() bool {
|
||||||
|
return s.scriptID.IsPrivateUse()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||||
|
type Region struct {
|
||||||
|
regionID language.Region
|
||||||
|
}
|
||||||
|
|
||||||
|
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||||
|
// It returns an error if r is not a valid code.
|
||||||
|
func EncodeM49(r int) (Region, error) {
|
||||||
|
rid, err := language.EncodeM49(r)
|
||||||
|
return Region{rid}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||||
|
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||||
|
// or another error if another error occurred.
|
||||||
|
func ParseRegion(s string) (Region, error) {
|
||||||
|
r, err := language.ParseRegion(s)
|
||||||
|
return Region{r}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the BCP 47 representation for the region.
|
||||||
|
// It returns "ZZ" for an unspecified region.
|
||||||
|
func (r Region) String() string {
|
||||||
|
return r.regionID.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ISO3 returns the 3-letter ISO code of r.
|
||||||
|
// Note that not all regions have a 3-letter ISO code.
|
||||||
|
// In such cases this method returns "ZZZ".
|
||||||
|
func (r Region) ISO3() string {
|
||||||
|
return r.regionID.ISO3()
|
||||||
|
}
|
||||||
|
|
||||||
|
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||||
|
// is not defined for r.
|
||||||
|
func (r Region) M49() int {
|
||||||
|
return r.regionID.M49()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||||
|
// may include private-use tags that are assigned by CLDR and used in this
|
||||||
|
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||||
|
func (r Region) IsPrivateUse() bool {
|
||||||
|
return r.regionID.IsPrivateUse()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsCountry returns whether this region is a country or autonomous area. This
|
||||||
|
// includes non-standard definitions from CLDR.
|
||||||
|
func (r Region) IsCountry() bool {
|
||||||
|
return r.regionID.IsCountry()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsGroup returns whether this region defines a collection of regions. This
|
||||||
|
// includes non-standard definitions from CLDR.
|
||||||
|
func (r Region) IsGroup() bool {
|
||||||
|
return r.regionID.IsGroup()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains returns whether Region c is contained by Region r. It returns true
|
||||||
|
// if c == r.
|
||||||
|
func (r Region) Contains(c Region) bool {
|
||||||
|
return r.regionID.Contains(c.regionID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||||
|
// In all other cases it returns either the region itself or an error.
|
||||||
|
//
|
||||||
|
// This method may return an error for a region for which there exists a
|
||||||
|
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||||
|
// region will already be canonicalized it was obtained from a Tag that was
|
||||||
|
// obtained using any of the default methods.
|
||||||
|
func (r Region) TLD() (Region, error) {
|
||||||
|
tld, err := r.regionID.TLD()
|
||||||
|
return Region{tld}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Canonicalize returns the region or a possible replacement if the region is
|
||||||
|
// deprecated. It will not return a replacement for deprecated regions that
|
||||||
|
// are split into multiple regions.
|
||||||
|
func (r Region) Canonicalize() Region {
|
||||||
|
return Region{r.regionID.Canonicalize()}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||||
|
type Variant struct {
|
||||||
|
variant string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||||
|
// a valid variant.
|
||||||
|
func ParseVariant(s string) (Variant, error) {
|
||||||
|
v, err := language.ParseVariant(s)
|
||||||
|
return Variant{v.String()}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the string representation of the variant.
|
||||||
|
func (v Variant) String() string {
|
||||||
|
return v.variant
|
||||||
|
}
|
735
vendor/golang.org/x/text/language/match.go
generated
vendored
Normal file
735
vendor/golang.org/x/text/language/match.go
generated
vendored
Normal file
|
@ -0,0 +1,735 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A MatchOption configures a Matcher.
|
||||||
|
type MatchOption func(*matcher)
|
||||||
|
|
||||||
|
// PreferSameScript will, in the absence of a match, result in the first
|
||||||
|
// preferred tag with the same script as a supported tag to match this supported
|
||||||
|
// tag. The default is currently true, but this may change in the future.
|
||||||
|
func PreferSameScript(preferSame bool) MatchOption {
|
||||||
|
return func(m *matcher) { m.preferSameScript = preferSame }
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
|
||||||
|
// There doesn't seem to be too much need for multiple types.
|
||||||
|
// Making it a concrete type allows MatchStrings to be a method, which will
|
||||||
|
// improve its discoverability.
|
||||||
|
|
||||||
|
// MatchStrings parses and matches the given strings until one of them matches
|
||||||
|
// the language in the Matcher. A string may be an Accept-Language header as
|
||||||
|
// handled by ParseAcceptLanguage. The default language is returned if no
|
||||||
|
// other language matched.
|
||||||
|
func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
|
||||||
|
for _, accept := range lang {
|
||||||
|
desired, _, err := ParseAcceptLanguage(accept)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if tag, index, conf := m.Match(desired...); conf != No {
|
||||||
|
return tag, index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tag, index, _ = m.Match()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matcher is the interface that wraps the Match method.
|
||||||
|
//
|
||||||
|
// Match returns the best match for any of the given tags, along with
|
||||||
|
// a unique index associated with the returned tag and a confidence
|
||||||
|
// score.
|
||||||
|
type Matcher interface {
|
||||||
|
Match(t ...Tag) (tag Tag, index int, c Confidence)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Comprehends reports the confidence score for a speaker of a given language
|
||||||
|
// to being able to comprehend the written form of an alternative language.
|
||||||
|
func Comprehends(speaker, alternative Tag) Confidence {
|
||||||
|
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
|
||||||
|
// against a list of supported tags based on written intelligibility, closeness
|
||||||
|
// of dialect, equivalence of subtags and various other rules. It is initialized
|
||||||
|
// with the list of supported tags. The first element is used as the default
|
||||||
|
// value in case no match is found.
|
||||||
|
//
|
||||||
|
// Its Match method matches the first of the given Tags to reach a certain
|
||||||
|
// confidence threshold. The tags passed to Match should therefore be specified
|
||||||
|
// in order of preference. Extensions are ignored for matching.
|
||||||
|
//
|
||||||
|
// The index returned by the Match method corresponds to the index of the
|
||||||
|
// matched tag in t, but is augmented with the Unicode extension ('u')of the
|
||||||
|
// corresponding preferred tag. This allows user locale options to be passed
|
||||||
|
// transparently.
|
||||||
|
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
|
||||||
|
return newMatcher(t, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
|
||||||
|
var tt language.Tag
|
||||||
|
match, w, c := m.getBest(want...)
|
||||||
|
if match != nil {
|
||||||
|
tt, index = match.tag, match.index
|
||||||
|
} else {
|
||||||
|
// TODO: this should be an option
|
||||||
|
tt = m.default_.tag
|
||||||
|
if m.preferSameScript {
|
||||||
|
outer:
|
||||||
|
for _, w := range want {
|
||||||
|
script, _ := w.Script()
|
||||||
|
if script.scriptID == 0 {
|
||||||
|
// Don't do anything if there is no script, such as with
|
||||||
|
// private subtags.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for i, h := range m.supported {
|
||||||
|
if script.scriptID == h.maxScript {
|
||||||
|
tt, index = h.tag, i
|
||||||
|
break outer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO: select first language tag based on script.
|
||||||
|
}
|
||||||
|
if w.RegionID != tt.RegionID && w.RegionID != 0 {
|
||||||
|
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
|
||||||
|
tt.RegionID = w.RegionID
|
||||||
|
tt.RemakeString()
|
||||||
|
} else if r := w.RegionID.String(); len(r) == 2 {
|
||||||
|
// TODO: also filter macro and deprecated.
|
||||||
|
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Copy options from the user-provided tag into the result tag. This is hard
|
||||||
|
// to do after the fact, so we do it here.
|
||||||
|
// TODO: add in alternative variants to -u-va-.
|
||||||
|
// TODO: add preferred region to -u-rg-.
|
||||||
|
if e := w.Extensions(); len(e) > 0 {
|
||||||
|
b := language.Builder{}
|
||||||
|
b.SetTag(tt)
|
||||||
|
for _, e := range e {
|
||||||
|
b.AddExt(e)
|
||||||
|
}
|
||||||
|
tt = b.Make()
|
||||||
|
}
|
||||||
|
return makeTag(tt), index, c
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrMissingLikelyTagsData indicates no information was available
|
||||||
|
// to compute likely values of missing tags.
|
||||||
|
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||||
|
|
||||||
|
// func (t *Tag) setTagsFrom(id Tag) {
|
||||||
|
// t.LangID = id.LangID
|
||||||
|
// t.ScriptID = id.ScriptID
|
||||||
|
// t.RegionID = id.RegionID
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Tag Matching
|
||||||
|
// CLDR defines an algorithm for finding the best match between two sets of language
|
||||||
|
// tags. The basic algorithm defines how to score a possible match and then find
|
||||||
|
// the match with the best score
|
||||||
|
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
|
||||||
|
// Using scoring has several disadvantages. The scoring obfuscates the importance of
|
||||||
|
// the various factors considered, making the algorithm harder to understand. Using
|
||||||
|
// scoring also requires the full score to be computed for each pair of tags.
|
||||||
|
//
|
||||||
|
// We will use a different algorithm which aims to have the following properties:
|
||||||
|
// - clarity on the precedence of the various selection factors, and
|
||||||
|
// - improved performance by allowing early termination of a comparison.
|
||||||
|
//
|
||||||
|
// Matching algorithm (overview)
|
||||||
|
// Input:
|
||||||
|
// - supported: a set of supported tags
|
||||||
|
// - default: the default tag to return in case there is no match
|
||||||
|
// - desired: list of desired tags, ordered by preference, starting with
|
||||||
|
// the most-preferred.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// 1) Set the best match to the lowest confidence level
|
||||||
|
// 2) For each tag in "desired":
|
||||||
|
// a) For each tag in "supported":
|
||||||
|
// 1) compute the match between the two tags.
|
||||||
|
// 2) if the match is better than the previous best match, replace it
|
||||||
|
// with the new match. (see next section)
|
||||||
|
// b) if the current best match is Exact and pin is true the result will be
|
||||||
|
// frozen to the language found thusfar, although better matches may
|
||||||
|
// still be found for the same language.
|
||||||
|
// 3) If the best match so far is below a certain threshold, return "default".
|
||||||
|
//
|
||||||
|
// Ranking:
|
||||||
|
// We use two phases to determine whether one pair of tags are a better match
|
||||||
|
// than another pair of tags. First, we determine a rough confidence level. If the
|
||||||
|
// levels are different, the one with the highest confidence wins.
|
||||||
|
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
|
||||||
|
// rules.
|
||||||
|
//
|
||||||
|
// The confidence level of matching a pair of tags is determined by finding the
|
||||||
|
// lowest confidence level of any matches of the corresponding subtags (the
|
||||||
|
// result is deemed as good as its weakest link).
|
||||||
|
// We define the following levels:
|
||||||
|
// Exact - An exact match of a subtag, before adding likely subtags.
|
||||||
|
// MaxExact - An exact match of a subtag, after adding likely subtags.
|
||||||
|
// [See Note 2].
|
||||||
|
// High - High level of mutual intelligibility between different subtag
|
||||||
|
// variants.
|
||||||
|
// Low - Low level of mutual intelligibility between different subtag
|
||||||
|
// variants.
|
||||||
|
// No - No mutual intelligibility.
|
||||||
|
//
|
||||||
|
// The following levels can occur for each type of subtag:
|
||||||
|
// Base: Exact, MaxExact, High, Low, No
|
||||||
|
// Script: Exact, MaxExact [see Note 3], Low, No
|
||||||
|
// Region: Exact, MaxExact, High
|
||||||
|
// Variant: Exact, High
|
||||||
|
// Private: Exact, No
|
||||||
|
//
|
||||||
|
// Any result with a confidence level of Low or higher is deemed a possible match.
|
||||||
|
// Once a desired tag matches any of the supported tags with a level of MaxExact
|
||||||
|
// or higher, the next desired tag is not considered (see Step 2.b).
|
||||||
|
// Note that CLDR provides languageMatching data that defines close equivalence
|
||||||
|
// classes for base languages, scripts and regions.
|
||||||
|
//
|
||||||
|
// Tie-breaking
|
||||||
|
// If we get the same confidence level for two matches, we apply a sequence of
|
||||||
|
// tie-breaking rules. The first that succeeds defines the result. The rules are
|
||||||
|
// applied in the following order.
|
||||||
|
// 1) Original language was defined and was identical.
|
||||||
|
// 2) Original region was defined and was identical.
|
||||||
|
// 3) Distance between two maximized regions was the smallest.
|
||||||
|
// 4) Original script was defined and was identical.
|
||||||
|
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
|
||||||
|
// If there is still no winner after these rules are applied, the first match
|
||||||
|
// found wins.
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
// [2] In practice, as matching of Exact is done in a separate phase from
|
||||||
|
// matching the other levels, we reuse the Exact level to mean MaxExact in
|
||||||
|
// the second phase. As a consequence, we only need the levels defined by
|
||||||
|
// the Confidence type. The MaxExact confidence level is mapped to High in
|
||||||
|
// the public API.
|
||||||
|
// [3] We do not differentiate between maximized script values that were derived
|
||||||
|
// from suppressScript versus most likely tag data. We determined that in
|
||||||
|
// ranking the two, one ranks just after the other. Moreover, the two cannot
|
||||||
|
// occur concurrently. As a consequence, they are identical for practical
|
||||||
|
// purposes.
|
||||||
|
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
|
||||||
|
// the MaxExact level to allow iw vs he to still be a closer match than
|
||||||
|
// en-AU vs en-US, for example.
|
||||||
|
// [5] In CLDR a locale inherits fields that are unspecified for this locale
|
||||||
|
// from its parent. Therefore, if a locale is a parent of another locale,
|
||||||
|
// it is a strong measure for closeness, especially when no other tie
|
||||||
|
// breaker rule applies. One could also argue it is inconsistent, for
|
||||||
|
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
|
||||||
|
// though its parent is pt-PT according to the inheritance rules.
|
||||||
|
//
|
||||||
|
// Implementation Details:
|
||||||
|
// There are several performance considerations worth pointing out. Most notably,
|
||||||
|
// we preprocess as much as possible (within reason) at the time of creation of a
|
||||||
|
// matcher. This includes:
|
||||||
|
// - creating a per-language map, which includes data for the raw base language
|
||||||
|
// and its canonicalized variant (if applicable),
|
||||||
|
// - expanding entries for the equivalence classes defined in CLDR's
|
||||||
|
// languageMatch data.
|
||||||
|
// The per-language map ensures that typically only a very small number of tags
|
||||||
|
// need to be considered. The pre-expansion of canonicalized subtags and
|
||||||
|
// equivalence classes reduces the amount of map lookups that need to be done at
|
||||||
|
// runtime.
|
||||||
|
|
||||||
|
// matcher keeps a set of supported language tags, indexed by language.
|
||||||
|
type matcher struct {
|
||||||
|
default_ *haveTag
|
||||||
|
supported []*haveTag
|
||||||
|
index map[language.Language]*matchHeader
|
||||||
|
passSettings bool
|
||||||
|
preferSameScript bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchHeader has the lists of tags for exact matches and matches based on
|
||||||
|
// maximized and canonicalized tags for a given language.
|
||||||
|
type matchHeader struct {
|
||||||
|
haveTags []*haveTag
|
||||||
|
original bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// haveTag holds a supported Tag and its maximized script and region. The maximized
|
||||||
|
// or canonicalized language is not stored as it is not needed during matching.
|
||||||
|
type haveTag struct {
|
||||||
|
tag language.Tag
|
||||||
|
|
||||||
|
// index of this tag in the original list of supported tags.
|
||||||
|
index int
|
||||||
|
|
||||||
|
// conf is the maximum confidence that can result from matching this haveTag.
|
||||||
|
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
|
||||||
|
conf Confidence
|
||||||
|
|
||||||
|
// Maximized region and script.
|
||||||
|
maxRegion language.Region
|
||||||
|
maxScript language.Script
|
||||||
|
|
||||||
|
// altScript may be checked as an alternative match to maxScript. If altScript
|
||||||
|
// matches, the confidence level for this match is Low. Theoretically there
|
||||||
|
// could be multiple alternative scripts. This does not occur in practice.
|
||||||
|
altScript language.Script
|
||||||
|
|
||||||
|
// nextMax is the index of the next haveTag with the same maximized tags.
|
||||||
|
nextMax uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
|
||||||
|
max := tag
|
||||||
|
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
|
||||||
|
max, _ = canonicalize(All, max)
|
||||||
|
max, _ = max.Maximize()
|
||||||
|
max.RemakeString()
|
||||||
|
}
|
||||||
|
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
|
||||||
|
}
|
||||||
|
|
||||||
|
// altScript returns an alternative script that may match the given script with
|
||||||
|
// a low confidence. At the moment, the langMatch data allows for at most one
|
||||||
|
// script to map to another and we rely on this to keep the code simple.
|
||||||
|
func altScript(l language.Language, s language.Script) language.Script {
|
||||||
|
for _, alt := range matchScript {
|
||||||
|
// TODO: also match cases where language is not the same.
|
||||||
|
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
|
||||||
|
language.Script(alt.haveScript) == s {
|
||||||
|
return language.Script(alt.wantScript)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
|
||||||
|
// Tags that have the same maximized values are linked by index.
|
||||||
|
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
|
||||||
|
h.original = h.original || exact
|
||||||
|
// Don't add new exact matches.
|
||||||
|
for _, v := range h.haveTags {
|
||||||
|
if equalsRest(v.tag, n.tag) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Allow duplicate maximized tags, but create a linked list to allow quickly
|
||||||
|
// comparing the equivalents and bail out.
|
||||||
|
for i, v := range h.haveTags {
|
||||||
|
if v.maxScript == n.maxScript &&
|
||||||
|
v.maxRegion == n.maxRegion &&
|
||||||
|
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
|
||||||
|
for h.haveTags[i].nextMax != 0 {
|
||||||
|
i = int(h.haveTags[i].nextMax)
|
||||||
|
}
|
||||||
|
h.haveTags[i].nextMax = uint16(len(h.haveTags))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.haveTags = append(h.haveTags, &n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// header returns the matchHeader for the given language. It creates one if
|
||||||
|
// it doesn't already exist.
|
||||||
|
func (m *matcher) header(l language.Language) *matchHeader {
|
||||||
|
if h := m.index[l]; h != nil {
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
h := &matchHeader{}
|
||||||
|
m.index[l] = h
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
func toConf(d uint8) Confidence {
|
||||||
|
if d <= 10 {
|
||||||
|
return High
|
||||||
|
}
|
||||||
|
if d < 30 {
|
||||||
|
return Low
|
||||||
|
}
|
||||||
|
return No
|
||||||
|
}
|
||||||
|
|
||||||
|
// newMatcher builds an index for the given supported tags and returns it as
|
||||||
|
// a matcher. It also expands the index by considering various equivalence classes
|
||||||
|
// for a given tag.
|
||||||
|
func newMatcher(supported []Tag, options []MatchOption) *matcher {
|
||||||
|
m := &matcher{
|
||||||
|
index: make(map[language.Language]*matchHeader),
|
||||||
|
preferSameScript: true,
|
||||||
|
}
|
||||||
|
for _, o := range options {
|
||||||
|
o(m)
|
||||||
|
}
|
||||||
|
if len(supported) == 0 {
|
||||||
|
m.default_ = &haveTag{}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
// Add supported languages to the index. Add exact matches first to give
|
||||||
|
// them precedence.
|
||||||
|
for i, tag := range supported {
|
||||||
|
tt := tag.tag()
|
||||||
|
pair, _ := makeHaveTag(tt, i)
|
||||||
|
m.header(tt.LangID).addIfNew(pair, true)
|
||||||
|
m.supported = append(m.supported, &pair)
|
||||||
|
}
|
||||||
|
m.default_ = m.header(supported[0].lang()).haveTags[0]
|
||||||
|
// Keep these in two different loops to support the case that two equivalent
|
||||||
|
// languages are distinguished, such as iw and he.
|
||||||
|
for i, tag := range supported {
|
||||||
|
tt := tag.tag()
|
||||||
|
pair, max := makeHaveTag(tt, i)
|
||||||
|
if max != tt.LangID {
|
||||||
|
m.header(max).addIfNew(pair, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// update is used to add indexes in the map for equivalent languages.
|
||||||
|
// update will only add entries to original indexes, thus not computing any
|
||||||
|
// transitive relations.
|
||||||
|
update := func(want, have uint16, conf Confidence) {
|
||||||
|
if hh := m.index[language.Language(have)]; hh != nil {
|
||||||
|
if !hh.original {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
hw := m.header(language.Language(want))
|
||||||
|
for _, ht := range hh.haveTags {
|
||||||
|
v := *ht
|
||||||
|
if conf < v.conf {
|
||||||
|
v.conf = conf
|
||||||
|
}
|
||||||
|
v.nextMax = 0 // this value needs to be recomputed
|
||||||
|
if v.altScript != 0 {
|
||||||
|
v.altScript = altScript(language.Language(want), v.maxScript)
|
||||||
|
}
|
||||||
|
hw.addIfNew(v, conf == Exact && hh.original)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add entries for languages with mutual intelligibility as defined by CLDR's
|
||||||
|
// languageMatch data.
|
||||||
|
for _, ml := range matchLang {
|
||||||
|
update(ml.want, ml.have, toConf(ml.distance))
|
||||||
|
if !ml.oneway {
|
||||||
|
update(ml.have, ml.want, toConf(ml.distance))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add entries for possible canonicalizations. This is an optimization to
|
||||||
|
// ensure that only one map lookup needs to be done at runtime per desired tag.
|
||||||
|
// First we match deprecated equivalents. If they are perfect equivalents
|
||||||
|
// (their canonicalization simply substitutes a different language code, but
|
||||||
|
// nothing else), the match confidence is Exact, otherwise it is High.
|
||||||
|
for i, lm := range language.AliasMap {
|
||||||
|
// If deprecated codes match and there is no fiddling with the script
|
||||||
|
// or region, we consider it an exact match.
|
||||||
|
conf := Exact
|
||||||
|
if language.AliasTypes[i] != language.Macro {
|
||||||
|
if !isExactEquivalent(language.Language(lm.From)) {
|
||||||
|
conf = High
|
||||||
|
}
|
||||||
|
update(lm.To, lm.From, conf)
|
||||||
|
}
|
||||||
|
update(lm.From, lm.To, conf)
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBest gets the best matching tag in m for any of the given tags, taking into
|
||||||
|
// account the order of preference of the given tags.
|
||||||
|
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
|
||||||
|
best := bestMatch{}
|
||||||
|
for i, ww := range want {
|
||||||
|
w := ww.tag()
|
||||||
|
var max language.Tag
|
||||||
|
// Check for exact match first.
|
||||||
|
h := m.index[w.LangID]
|
||||||
|
if w.LangID != 0 {
|
||||||
|
if h == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Base language is defined.
|
||||||
|
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
|
||||||
|
// A region that is added through canonicalization is stronger than
|
||||||
|
// a maximized region: set it in the original (e.g. mo -> ro-MD).
|
||||||
|
if w.RegionID != max.RegionID {
|
||||||
|
w.RegionID = max.RegionID
|
||||||
|
}
|
||||||
|
// TODO: should we do the same for scripts?
|
||||||
|
// See test case: en, sr, nl ; sh ; sr
|
||||||
|
max, _ = max.Maximize()
|
||||||
|
} else {
|
||||||
|
// Base language is not defined.
|
||||||
|
if h != nil {
|
||||||
|
for i := range h.haveTags {
|
||||||
|
have := h.haveTags[i]
|
||||||
|
if equalsRest(have.tag, w) {
|
||||||
|
return have, w, Exact
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.ScriptID == 0 && w.RegionID == 0 {
|
||||||
|
// We skip all tags matching und for approximate matching, including
|
||||||
|
// private tags.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
max, _ = w.Maximize()
|
||||||
|
if h = m.index[max.LangID]; h == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pin := true
|
||||||
|
for _, t := range want[i+1:] {
|
||||||
|
if w.LangID == t.lang() {
|
||||||
|
pin = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check for match based on maximized tag.
|
||||||
|
for i := range h.haveTags {
|
||||||
|
have := h.haveTags[i]
|
||||||
|
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||||
|
if best.conf == Exact {
|
||||||
|
for have.nextMax != 0 {
|
||||||
|
have = h.haveTags[have.nextMax]
|
||||||
|
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||||
|
}
|
||||||
|
return best.have, best.want, best.conf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if best.conf <= No {
|
||||||
|
if len(want) != 0 {
|
||||||
|
return nil, want[0].tag(), No
|
||||||
|
}
|
||||||
|
return nil, language.Tag{}, No
|
||||||
|
}
|
||||||
|
return best.have, best.want, best.conf
|
||||||
|
}
|
||||||
|
|
||||||
|
// bestMatch accumulates the best match so far.
|
||||||
|
type bestMatch struct {
|
||||||
|
have *haveTag
|
||||||
|
want language.Tag
|
||||||
|
conf Confidence
|
||||||
|
pinnedRegion language.Region
|
||||||
|
pinLanguage bool
|
||||||
|
sameRegionGroup bool
|
||||||
|
// Cached results from applying tie-breaking rules.
|
||||||
|
origLang bool
|
||||||
|
origReg bool
|
||||||
|
paradigmReg bool
|
||||||
|
regGroupDist uint8
|
||||||
|
origScript bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// update updates the existing best match if the new pair is considered to be a
|
||||||
|
// better match. To determine if the given pair is a better match, it first
|
||||||
|
// computes the rough confidence level. If this surpasses the current match, it
|
||||||
|
// will replace it and update the tie-breaker rule cache. If there is a tie, it
|
||||||
|
// proceeds with applying a series of tie-breaker rules. If there is no
|
||||||
|
// conclusive winner after applying the tie-breaker rules, it leaves the current
|
||||||
|
// match as the preferred match.
|
||||||
|
//
|
||||||
|
// If pin is true and have and tag are a strong match, it will henceforth only
|
||||||
|
// consider matches for this language. This corresponds to the idea that most
|
||||||
|
// users have a strong preference for the first defined language. A user can
|
||||||
|
// still prefer a second language over a dialect of the preferred language by
|
||||||
|
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
|
||||||
|
// be false.
|
||||||
|
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
|
||||||
|
// Bail if the maximum attainable confidence is below that of the current best match.
|
||||||
|
c := have.conf
|
||||||
|
if c < m.conf {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Don't change the language once we already have found an exact match.
|
||||||
|
if m.pinLanguage && tag.LangID != m.want.LangID {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Pin the region group if we are comparing tags for the same language.
|
||||||
|
if tag.LangID == m.want.LangID && m.sameRegionGroup {
|
||||||
|
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
|
||||||
|
if !sameGroup {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c == Exact && have.maxScript == maxScript {
|
||||||
|
// If there is another language and then another entry of this language,
|
||||||
|
// don't pin anything, otherwise pin the language.
|
||||||
|
m.pinLanguage = pin
|
||||||
|
}
|
||||||
|
if equalsRest(have.tag, tag) {
|
||||||
|
} else if have.maxScript != maxScript {
|
||||||
|
// There is usually very little comprehension between different scripts.
|
||||||
|
// In a few cases there may still be Low comprehension. This possibility
|
||||||
|
// is pre-computed and stored in have.altScript.
|
||||||
|
if Low < m.conf || have.altScript != maxScript {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c = Low
|
||||||
|
} else if have.maxRegion != maxRegion {
|
||||||
|
if High < c {
|
||||||
|
// There is usually a small difference between languages across regions.
|
||||||
|
c = High
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We store the results of the computations of the tie-breaker rules along
|
||||||
|
// with the best match. There is no need to do the checks once we determine
|
||||||
|
// we have a winner, but we do still need to do the tie-breaker computations.
|
||||||
|
// We use "beaten" to keep track if we still need to do the checks.
|
||||||
|
beaten := false // true if the new pair defeats the current one.
|
||||||
|
if c != m.conf {
|
||||||
|
if c < m.conf {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tie-breaker rules:
|
||||||
|
// We prefer if the pre-maximized language was specified and identical.
|
||||||
|
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
|
||||||
|
if !beaten && m.origLang != origLang {
|
||||||
|
if m.origLang {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// We prefer if the pre-maximized region was specified and identical.
|
||||||
|
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
|
||||||
|
if !beaten && m.origReg != origReg {
|
||||||
|
if m.origReg {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
|
||||||
|
if !beaten && m.regGroupDist != regGroupDist {
|
||||||
|
if regGroupDist > m.regGroupDist {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
|
||||||
|
if !beaten && m.paradigmReg != paradigmReg {
|
||||||
|
if !paradigmReg {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next we prefer if the pre-maximized script was specified and identical.
|
||||||
|
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
|
||||||
|
if !beaten && m.origScript != origScript {
|
||||||
|
if m.origScript {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
beaten = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update m to the newly found best match.
|
||||||
|
if beaten {
|
||||||
|
m.have = have
|
||||||
|
m.want = tag
|
||||||
|
m.conf = c
|
||||||
|
m.pinnedRegion = maxRegion
|
||||||
|
m.sameRegionGroup = sameGroup
|
||||||
|
m.origLang = origLang
|
||||||
|
m.origReg = origReg
|
||||||
|
m.paradigmReg = paradigmReg
|
||||||
|
m.origScript = origScript
|
||||||
|
m.regGroupDist = regGroupDist
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isParadigmLocale(lang language.Language, r language.Region) bool {
|
||||||
|
for _, e := range paradigmLocales {
|
||||||
|
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// regionGroupDist computes the distance between two regions based on their
|
||||||
|
// CLDR grouping.
|
||||||
|
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
|
||||||
|
const defaultDistance = 4
|
||||||
|
|
||||||
|
aGroup := uint(regionToGroups[a]) << 1
|
||||||
|
bGroup := uint(regionToGroups[b]) << 1
|
||||||
|
for _, ri := range matchRegion {
|
||||||
|
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
|
||||||
|
group := uint(1 << (ri.group &^ 0x80))
|
||||||
|
if 0x80&ri.group == 0 {
|
||||||
|
if aGroup&bGroup&group != 0 { // Both regions are in the group.
|
||||||
|
return ri.distance, ri.distance == defaultDistance
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
|
||||||
|
return ri.distance, ri.distance == defaultDistance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultDistance, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalsRest compares everything except the language.
|
||||||
|
func equalsRest(a, b language.Tag) bool {
|
||||||
|
// TODO: don't include extensions in this comparison. To do this efficiently,
|
||||||
|
// though, we should handle private tags separately.
|
||||||
|
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
|
||||||
|
}
|
||||||
|
|
||||||
|
// isExactEquivalent returns true if canonicalizing the language will not alter
|
||||||
|
// the script or region of a tag.
|
||||||
|
func isExactEquivalent(l language.Language) bool {
|
||||||
|
for _, o := range notEquivalent {
|
||||||
|
if o == l {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var notEquivalent []language.Language
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Create a list of all languages for which canonicalization may alter the
|
||||||
|
// script or region.
|
||||||
|
for _, lm := range language.AliasMap {
|
||||||
|
tag := language.Tag{LangID: language.Language(lm.From)}
|
||||||
|
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
|
||||||
|
notEquivalent = append(notEquivalent, language.Language(lm.From))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Maximize undefined regions of paradigm locales.
|
||||||
|
for i, v := range paradigmLocales {
|
||||||
|
t := language.Tag{LangID: language.Language(v[0])}
|
||||||
|
max, _ := t.Maximize()
|
||||||
|
if v[1] == 0 {
|
||||||
|
paradigmLocales[i][1] = uint16(max.RegionID)
|
||||||
|
}
|
||||||
|
if v[2] == 0 {
|
||||||
|
paradigmLocales[i][2] = uint16(max.RegionID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
256
vendor/golang.org/x/text/language/parse.go
generated
vendored
Normal file
256
vendor/golang.org/x/text/language/parse.go
generated
vendored
Normal file
|
@ -0,0 +1,256 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/internal/language"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ValueError is returned by any of the parsing functions when the
|
||||||
|
// input is well-formed but the respective subtag is not recognized
|
||||||
|
// as a valid value.
|
||||||
|
type ValueError interface {
|
||||||
|
error
|
||||||
|
|
||||||
|
// Subtag returns the subtag for which the error occurred.
|
||||||
|
Subtag() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||||
|
// failed it returns an error and any part of the tag that could be parsed.
|
||||||
|
// If parsing succeeded but an unknown value was found, it returns
|
||||||
|
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||||
|
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||||
|
// and extensions to this standard defined in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// The resulting tag is canonicalized using the default canonicalization type.
|
||||||
|
func Parse(s string) (t Tag, err error) {
|
||||||
|
return Default.Parse(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||||
|
// failed it returns an error and any part of the tag that could be parsed.
|
||||||
|
// If parsing succeeded but an unknown value was found, it returns
|
||||||
|
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||||
|
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||||
|
// and extensions to this standard defined in
|
||||||
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||||
|
// The resulting tag is canonicalized using the canonicalization type c.
|
||||||
|
func (c CanonType) Parse(s string) (t Tag, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
t = Tag{}
|
||||||
|
err = language.ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
tt, err := language.Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
return makeTag(tt), err
|
||||||
|
}
|
||||||
|
tt, changed := canonicalize(c, tt)
|
||||||
|
if changed {
|
||||||
|
tt.RemakeString()
|
||||||
|
}
|
||||||
|
return makeTag(tt), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||||
|
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||||
|
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||||
|
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||||
|
// accumulated, but if two extensions of the same type are passed, the latter
|
||||||
|
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||||
|
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||||
|
// values and typically only makes sense as the first argument. The resulting
|
||||||
|
// tag is returned after canonicalizing using the Default CanonType. If one or
|
||||||
|
// more errors are encountered, one of the errors is returned.
|
||||||
|
func Compose(part ...interface{}) (t Tag, err error) {
|
||||||
|
return Default.Compose(part...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||||
|
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||||
|
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||||
|
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||||
|
// accumulated, but if two extensions of the same type are passed, the latter
|
||||||
|
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||||
|
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||||
|
// values and typically only makes sense as the first argument. The resulting
|
||||||
|
// tag is returned after canonicalizing using CanonType c. If one or more errors
|
||||||
|
// are encountered, one of the errors is returned.
|
||||||
|
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
t = Tag{}
|
||||||
|
err = language.ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
var b language.Builder
|
||||||
|
if err = update(&b, part...); err != nil {
|
||||||
|
return und, err
|
||||||
|
}
|
||||||
|
b.Tag, _ = canonicalize(c, b.Tag)
|
||||||
|
return makeTag(b.Make()), err
|
||||||
|
}
|
||||||
|
|
||||||
|
var errInvalidArgument = errors.New("invalid Extension or Variant")
|
||||||
|
|
||||||
|
func update(b *language.Builder, part ...interface{}) (err error) {
|
||||||
|
for _, x := range part {
|
||||||
|
switch v := x.(type) {
|
||||||
|
case Tag:
|
||||||
|
b.SetTag(v.tag())
|
||||||
|
case Base:
|
||||||
|
b.Tag.LangID = v.langID
|
||||||
|
case Script:
|
||||||
|
b.Tag.ScriptID = v.scriptID
|
||||||
|
case Region:
|
||||||
|
b.Tag.RegionID = v.regionID
|
||||||
|
case Variant:
|
||||||
|
if v.variant == "" {
|
||||||
|
err = errInvalidArgument
|
||||||
|
break
|
||||||
|
}
|
||||||
|
b.AddVariant(v.variant)
|
||||||
|
case Extension:
|
||||||
|
if v.s == "" {
|
||||||
|
err = errInvalidArgument
|
||||||
|
break
|
||||||
|
}
|
||||||
|
b.SetExt(v.s)
|
||||||
|
case []Variant:
|
||||||
|
b.ClearVariants()
|
||||||
|
for _, v := range v {
|
||||||
|
b.AddVariant(v.variant)
|
||||||
|
}
|
||||||
|
case []Extension:
|
||||||
|
b.ClearExtensions()
|
||||||
|
for _, e := range v {
|
||||||
|
b.SetExt(e.s)
|
||||||
|
}
|
||||||
|
// TODO: support parsing of raw strings based on morphology or just extensions?
|
||||||
|
case error:
|
||||||
|
if v != nil {
|
||||||
|
err = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||||
|
var errTagListTooLarge = errors.New("tag list exceeds max length")
|
||||||
|
|
||||||
|
// ParseAcceptLanguage parses the contents of an Accept-Language header as
|
||||||
|
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||||
|
// a list of corresponding quality weights. It is more permissive than RFC 2616
|
||||||
|
// and may return non-nil slices even if the input is not valid.
|
||||||
|
// The Tags will be sorted by highest weight first and then by first occurrence.
|
||||||
|
// Tags with a weight of zero will be dropped. An error will be returned if the
|
||||||
|
// input could not be parsed.
|
||||||
|
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
tag = nil
|
||||||
|
q = nil
|
||||||
|
err = language.ErrSyntax
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if strings.Count(s, "-") > 1000 {
|
||||||
|
return nil, nil, errTagListTooLarge
|
||||||
|
}
|
||||||
|
|
||||||
|
var entry string
|
||||||
|
for s != "" {
|
||||||
|
if entry, s = split(s, ','); entry == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
entry, weight := split(entry, ';')
|
||||||
|
|
||||||
|
// Scan the language.
|
||||||
|
t, err := Parse(entry)
|
||||||
|
if err != nil {
|
||||||
|
id, ok := acceptFallback[entry]
|
||||||
|
if !ok {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
t = makeTag(language.Tag{LangID: id})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan the optional weight.
|
||||||
|
w := 1.0
|
||||||
|
if weight != "" {
|
||||||
|
weight = consume(weight, 'q')
|
||||||
|
weight = consume(weight, '=')
|
||||||
|
// consume returns the empty string when a token could not be
|
||||||
|
// consumed, resulting in an error for ParseFloat.
|
||||||
|
if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
||||||
|
return nil, nil, errInvalidWeight
|
||||||
|
}
|
||||||
|
// Drop tags with a quality weight of 0.
|
||||||
|
if w <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tag = append(tag, t)
|
||||||
|
q = append(q, float32(w))
|
||||||
|
}
|
||||||
|
sort.Stable(&tagSort{tag, q})
|
||||||
|
return tag, q, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// consume removes a leading token c from s and returns the result or the empty
|
||||||
|
// string if there is no such token.
|
||||||
|
func consume(s string, c byte) string {
|
||||||
|
if s == "" || s[0] != c {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(s[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func split(s string, c byte) (head, tail string) {
|
||||||
|
if i := strings.IndexByte(s, c); i >= 0 {
|
||||||
|
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(s), ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add hack mapping to deal with a small number of cases that occur
|
||||||
|
// in Accept-Language (with reasonable frequency).
|
||||||
|
var acceptFallback = map[string]language.Language{
|
||||||
|
"english": _en,
|
||||||
|
"deutsch": _de,
|
||||||
|
"italian": _it,
|
||||||
|
"french": _fr,
|
||||||
|
"*": _mul, // defined in the spec to match all languages.
|
||||||
|
}
|
||||||
|
|
||||||
|
type tagSort struct {
|
||||||
|
tag []Tag
|
||||||
|
q []float32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *tagSort) Len() int {
|
||||||
|
return len(s.q)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *tagSort) Less(i, j int) bool {
|
||||||
|
return s.q[i] > s.q[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *tagSort) Swap(i, j int) {
|
||||||
|
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
||||||
|
s.q[i], s.q[j] = s.q[j], s.q[i]
|
||||||
|
}
|
298
vendor/golang.org/x/text/language/tables.go
generated
vendored
Normal file
298
vendor/golang.org/x/text/language/tables.go
generated
vendored
Normal file
|
@ -0,0 +1,298 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
// CLDRVersion is the CLDR version from which the tables in this package are derived.
|
||||||
|
const CLDRVersion = "32"
|
||||||
|
|
||||||
|
const (
|
||||||
|
_de = 269
|
||||||
|
_en = 313
|
||||||
|
_fr = 350
|
||||||
|
_it = 505
|
||||||
|
_mo = 784
|
||||||
|
_no = 879
|
||||||
|
_nb = 839
|
||||||
|
_pt = 960
|
||||||
|
_sh = 1031
|
||||||
|
_mul = 806
|
||||||
|
_und = 0
|
||||||
|
)
|
||||||
|
const (
|
||||||
|
_001 = 1
|
||||||
|
_419 = 31
|
||||||
|
_BR = 65
|
||||||
|
_CA = 73
|
||||||
|
_ES = 111
|
||||||
|
_GB = 124
|
||||||
|
_MD = 189
|
||||||
|
_PT = 239
|
||||||
|
_UK = 307
|
||||||
|
_US = 310
|
||||||
|
_ZZ = 358
|
||||||
|
_XA = 324
|
||||||
|
_XC = 326
|
||||||
|
_XK = 334
|
||||||
|
)
|
||||||
|
const (
|
||||||
|
_Latn = 91
|
||||||
|
_Hani = 57
|
||||||
|
_Hans = 59
|
||||||
|
_Hant = 60
|
||||||
|
_Qaaa = 149
|
||||||
|
_Qaai = 157
|
||||||
|
_Qabx = 198
|
||||||
|
_Zinh = 255
|
||||||
|
_Zyyy = 260
|
||||||
|
_Zzzz = 261
|
||||||
|
)
|
||||||
|
|
||||||
|
var regionToGroups = []uint8{ // 359 elements
|
||||||
|
// Entry 0 - 3F
|
||||||
|
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||||
|
0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
|
||||||
|
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
|
||||||
|
// Entry 40 - 7F
|
||||||
|
0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04,
|
||||||
|
0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||||
|
0x08, 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04,
|
||||||
|
// Entry 80 - BF
|
||||||
|
0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00,
|
||||||
|
0x00, 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00,
|
||||||
|
0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||||
|
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04,
|
||||||
|
// Entry C0 - FF
|
||||||
|
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
|
||||||
|
0x01, 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// Entry 100 - 13F
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||||
|
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04,
|
||||||
|
0x00, 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00,
|
||||||
|
// Entry 140 - 17F
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
} // Size: 383 bytes
|
||||||
|
|
||||||
|
var paradigmLocales = [][3]uint16{ // 3 elements
|
||||||
|
0: [3]uint16{0x139, 0x0, 0x7c},
|
||||||
|
1: [3]uint16{0x13e, 0x0, 0x1f},
|
||||||
|
2: [3]uint16{0x3c0, 0x41, 0xef},
|
||||||
|
} // Size: 42 bytes
|
||||||
|
|
||||||
|
type mutualIntelligibility struct {
|
||||||
|
want uint16
|
||||||
|
have uint16
|
||||||
|
distance uint8
|
||||||
|
oneway bool
|
||||||
|
}
|
||||||
|
type scriptIntelligibility struct {
|
||||||
|
wantLang uint16
|
||||||
|
haveLang uint16
|
||||||
|
wantScript uint8
|
||||||
|
haveScript uint8
|
||||||
|
distance uint8
|
||||||
|
}
|
||||||
|
type regionIntelligibility struct {
|
||||||
|
lang uint16
|
||||||
|
script uint8
|
||||||
|
group uint8
|
||||||
|
distance uint8
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchLang holds pairs of langIDs of base languages that are typically
|
||||||
|
// mutually intelligible. Each pair is associated with a confidence and
|
||||||
|
// whether the intelligibility goes one or both ways.
|
||||||
|
var matchLang = []mutualIntelligibility{ // 113 elements
|
||||||
|
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
|
||||||
|
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
|
||||||
|
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
|
||||||
|
3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
|
||||||
|
4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
|
||||||
|
5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
|
||||||
|
6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
|
||||||
|
7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
|
||||||
|
8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
|
||||||
|
9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
|
||||||
|
12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
|
||||||
|
14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
|
||||||
|
18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
|
||||||
|
21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
|
||||||
|
23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
|
||||||
|
29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
|
||||||
|
30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
|
||||||
|
31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
|
||||||
|
32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
|
||||||
|
36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
|
||||||
|
37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
|
||||||
|
38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||||
|
46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
|
||||||
|
53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
|
||||||
|
55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
|
||||||
|
64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
|
||||||
|
67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
|
||||||
|
68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
|
||||||
|
72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
|
||||||
|
73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
|
||||||
|
83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
|
||||||
|
84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
|
||||||
|
87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||||
|
94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
|
||||||
|
106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
|
||||||
|
108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||||
|
109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
|
||||||
|
} // Size: 702 bytes
|
||||||
|
|
||||||
|
// matchScript holds pairs of scriptIDs where readers of one script
|
||||||
|
// can typically also read the other. Each is associated with a confidence.
|
||||||
|
var matchScript = []scriptIntelligibility{ // 26 elements
|
||||||
|
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x5b, haveScript: 0x20, distance: 0x5},
|
||||||
|
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x20, haveScript: 0x5b, distance: 0x5},
|
||||||
|
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||||
|
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x5b, distance: 0xa},
|
||||||
|
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x20, distance: 0xa},
|
||||||
|
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2e, haveScript: 0x5b, distance: 0xa},
|
||||||
|
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x5b, distance: 0xa},
|
||||||
|
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x53, haveScript: 0x5b, distance: 0xa},
|
||||||
|
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x58, haveScript: 0x5b, distance: 0xa},
|
||||||
|
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6f, haveScript: 0x5b, distance: 0xa},
|
||||||
|
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x76, haveScript: 0x5b, distance: 0xa},
|
||||||
|
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x22, haveScript: 0x5b, distance: 0xa},
|
||||||
|
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x83, haveScript: 0x5b, distance: 0xa},
|
||||||
|
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5b, distance: 0xa},
|
||||||
|
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||||
|
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||||
|
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd6, haveScript: 0x5b, distance: 0xa},
|
||||||
|
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5b, distance: 0xa},
|
||||||
|
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe9, haveScript: 0x5b, distance: 0xa},
|
||||||
|
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5b, distance: 0xa},
|
||||||
|
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||||
|
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||||
|
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||||
|
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3e, haveScript: 0x5b, distance: 0xa},
|
||||||
|
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3b, haveScript: 0x3c, distance: 0xf},
|
||||||
|
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3c, haveScript: 0x3b, distance: 0x13},
|
||||||
|
} // Size: 232 bytes
|
||||||
|
|
||||||
|
var matchRegion = []regionIntelligibility{ // 15 elements
|
||||||
|
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
|
||||||
|
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
|
||||||
|
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
|
||||||
|
3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
|
||||||
|
4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
|
||||||
|
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
|
||||||
|
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
|
||||||
|
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
|
||||||
|
8: {lang: 0x529, script: 0x3c, group: 0x2, distance: 0x4},
|
||||||
|
9: {lang: 0x529, script: 0x3c, group: 0x82, distance: 0x4},
|
||||||
|
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
|
||||||
|
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
|
||||||
|
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
|
||||||
|
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
|
||||||
|
14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5},
|
||||||
|
} // Size: 114 bytes
|
||||||
|
|
||||||
|
// Total table size 1473 bytes (1KiB); checksum: 7BB90B5C
|
145
vendor/golang.org/x/text/language/tags.go
generated
vendored
Normal file
145
vendor/golang.org/x/text/language/tags.go
generated
vendored
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package language
|
||||||
|
|
||||||
|
import "golang.org/x/text/internal/language/compact"
|
||||||
|
|
||||||
|
// TODO: Various sets of commonly use tags and regions.
|
||||||
|
|
||||||
|
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||||
|
// It simplifies safe initialization of Tag values.
|
||||||
|
func MustParse(s string) Tag {
|
||||||
|
t, err := Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||||
|
// It simplifies safe initialization of Tag values.
|
||||||
|
func (c CanonType) MustParse(s string) Tag {
|
||||||
|
t, err := c.Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||||
|
// It simplifies safe initialization of Base values.
|
||||||
|
func MustParseBase(s string) Base {
|
||||||
|
b, err := ParseBase(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||||
|
// parsed. It simplifies safe initialization of Script values.
|
||||||
|
func MustParseScript(s string) Script {
|
||||||
|
scr, err := ParseScript(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return scr
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||||
|
// parsed. It simplifies safe initialization of Region values.
|
||||||
|
func MustParseRegion(s string) Region {
|
||||||
|
r, err := ParseRegion(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
und = Tag{}
|
||||||
|
|
||||||
|
Und Tag = Tag{}
|
||||||
|
|
||||||
|
Afrikaans Tag = Tag(compact.Afrikaans)
|
||||||
|
Amharic Tag = Tag(compact.Amharic)
|
||||||
|
Arabic Tag = Tag(compact.Arabic)
|
||||||
|
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
|
||||||
|
Azerbaijani Tag = Tag(compact.Azerbaijani)
|
||||||
|
Bulgarian Tag = Tag(compact.Bulgarian)
|
||||||
|
Bengali Tag = Tag(compact.Bengali)
|
||||||
|
Catalan Tag = Tag(compact.Catalan)
|
||||||
|
Czech Tag = Tag(compact.Czech)
|
||||||
|
Danish Tag = Tag(compact.Danish)
|
||||||
|
German Tag = Tag(compact.German)
|
||||||
|
Greek Tag = Tag(compact.Greek)
|
||||||
|
English Tag = Tag(compact.English)
|
||||||
|
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
|
||||||
|
BritishEnglish Tag = Tag(compact.BritishEnglish)
|
||||||
|
Spanish Tag = Tag(compact.Spanish)
|
||||||
|
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
|
||||||
|
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
|
||||||
|
Estonian Tag = Tag(compact.Estonian)
|
||||||
|
Persian Tag = Tag(compact.Persian)
|
||||||
|
Finnish Tag = Tag(compact.Finnish)
|
||||||
|
Filipino Tag = Tag(compact.Filipino)
|
||||||
|
French Tag = Tag(compact.French)
|
||||||
|
CanadianFrench Tag = Tag(compact.CanadianFrench)
|
||||||
|
Gujarati Tag = Tag(compact.Gujarati)
|
||||||
|
Hebrew Tag = Tag(compact.Hebrew)
|
||||||
|
Hindi Tag = Tag(compact.Hindi)
|
||||||
|
Croatian Tag = Tag(compact.Croatian)
|
||||||
|
Hungarian Tag = Tag(compact.Hungarian)
|
||||||
|
Armenian Tag = Tag(compact.Armenian)
|
||||||
|
Indonesian Tag = Tag(compact.Indonesian)
|
||||||
|
Icelandic Tag = Tag(compact.Icelandic)
|
||||||
|
Italian Tag = Tag(compact.Italian)
|
||||||
|
Japanese Tag = Tag(compact.Japanese)
|
||||||
|
Georgian Tag = Tag(compact.Georgian)
|
||||||
|
Kazakh Tag = Tag(compact.Kazakh)
|
||||||
|
Khmer Tag = Tag(compact.Khmer)
|
||||||
|
Kannada Tag = Tag(compact.Kannada)
|
||||||
|
Korean Tag = Tag(compact.Korean)
|
||||||
|
Kirghiz Tag = Tag(compact.Kirghiz)
|
||||||
|
Lao Tag = Tag(compact.Lao)
|
||||||
|
Lithuanian Tag = Tag(compact.Lithuanian)
|
||||||
|
Latvian Tag = Tag(compact.Latvian)
|
||||||
|
Macedonian Tag = Tag(compact.Macedonian)
|
||||||
|
Malayalam Tag = Tag(compact.Malayalam)
|
||||||
|
Mongolian Tag = Tag(compact.Mongolian)
|
||||||
|
Marathi Tag = Tag(compact.Marathi)
|
||||||
|
Malay Tag = Tag(compact.Malay)
|
||||||
|
Burmese Tag = Tag(compact.Burmese)
|
||||||
|
Nepali Tag = Tag(compact.Nepali)
|
||||||
|
Dutch Tag = Tag(compact.Dutch)
|
||||||
|
Norwegian Tag = Tag(compact.Norwegian)
|
||||||
|
Punjabi Tag = Tag(compact.Punjabi)
|
||||||
|
Polish Tag = Tag(compact.Polish)
|
||||||
|
Portuguese Tag = Tag(compact.Portuguese)
|
||||||
|
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
|
||||||
|
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
|
||||||
|
Romanian Tag = Tag(compact.Romanian)
|
||||||
|
Russian Tag = Tag(compact.Russian)
|
||||||
|
Sinhala Tag = Tag(compact.Sinhala)
|
||||||
|
Slovak Tag = Tag(compact.Slovak)
|
||||||
|
Slovenian Tag = Tag(compact.Slovenian)
|
||||||
|
Albanian Tag = Tag(compact.Albanian)
|
||||||
|
Serbian Tag = Tag(compact.Serbian)
|
||||||
|
SerbianLatin Tag = Tag(compact.SerbianLatin)
|
||||||
|
Swedish Tag = Tag(compact.Swedish)
|
||||||
|
Swahili Tag = Tag(compact.Swahili)
|
||||||
|
Tamil Tag = Tag(compact.Tamil)
|
||||||
|
Telugu Tag = Tag(compact.Telugu)
|
||||||
|
Thai Tag = Tag(compact.Thai)
|
||||||
|
Turkish Tag = Tag(compact.Turkish)
|
||||||
|
Ukrainian Tag = Tag(compact.Ukrainian)
|
||||||
|
Urdu Tag = Tag(compact.Urdu)
|
||||||
|
Uzbek Tag = Tag(compact.Uzbek)
|
||||||
|
Vietnamese Tag = Tag(compact.Vietnamese)
|
||||||
|
Chinese Tag = Tag(compact.Chinese)
|
||||||
|
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
|
||||||
|
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
|
||||||
|
Zulu Tag = Tag(compact.Zulu)
|
||||||
|
)
|
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
Normal file
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: Add contextual character rules from Appendix A of RFC5892.
|
||||||
|
|
||||||
|
// A class is a set of characters that match certain derived properties. The
|
||||||
|
// PRECIS framework defines two classes: The Freeform class and the Identifier
|
||||||
|
// class. The freeform class should be used for profiles where expressiveness is
|
||||||
|
// prioritized over safety such as nicknames or passwords. The identifier class
|
||||||
|
// should be used for profiles where safety is the first priority such as
|
||||||
|
// addressable network labels and usernames.
|
||||||
|
type class struct {
|
||||||
|
validFrom property
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains satisfies the runes.Set interface and returns whether the given rune
|
||||||
|
// is a member of the class.
|
||||||
|
func (c class) Contains(r rune) bool {
|
||||||
|
b := make([]byte, 4)
|
||||||
|
n := utf8.EncodeRune(b, r)
|
||||||
|
|
||||||
|
trieval, _ := dpTrie.lookup(b[:n])
|
||||||
|
return c.validFrom <= property(trieval)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
identifier = &class{validFrom: pValid}
|
||||||
|
freeform = &class{validFrom: idDisOrFreePVal}
|
||||||
|
)
|
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
Normal file
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
// This file contains tables and code related to context rules.
|
||||||
|
|
||||||
|
type catBitmap uint16
|
||||||
|
|
||||||
|
const (
|
||||||
|
// These bits, once set depending on the current value, are never unset.
|
||||||
|
bJapanese catBitmap = 1 << iota
|
||||||
|
bArabicIndicDigit
|
||||||
|
bExtendedArabicIndicDigit
|
||||||
|
|
||||||
|
// These bits are set on each iteration depending on the current value.
|
||||||
|
bJoinStart
|
||||||
|
bJoinMid
|
||||||
|
bJoinEnd
|
||||||
|
bVirama
|
||||||
|
bLatinSmallL
|
||||||
|
bGreek
|
||||||
|
bHebrew
|
||||||
|
|
||||||
|
// These bits indicated which of the permanent bits need to be set at the
|
||||||
|
// end of the checks.
|
||||||
|
bMustHaveJapn
|
||||||
|
|
||||||
|
permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
|
||||||
|
)
|
||||||
|
|
||||||
|
const finalShift = 10
|
||||||
|
|
||||||
|
var errContext = errors.New("precis: contextual rule violated")
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Programmatically set these required bits as, manually setting them seems
|
||||||
|
// too error prone.
|
||||||
|
for i, ct := range categoryTransitions {
|
||||||
|
categoryTransitions[i].keep |= permanent
|
||||||
|
categoryTransitions[i].accept |= ct.term
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var categoryTransitions = []struct {
|
||||||
|
keep catBitmap // mask selecting which bits to keep from the previous state
|
||||||
|
set catBitmap // mask for which bits to set for this transition
|
||||||
|
|
||||||
|
// These bitmaps are used for rules that require lookahead.
|
||||||
|
// term&accept == term must be true, which is enforced programmatically.
|
||||||
|
term catBitmap // bits accepted as termination condition
|
||||||
|
accept catBitmap // bits that pass, but not sufficient as termination
|
||||||
|
|
||||||
|
// The rule function cannot take a *context as an argument, as it would
|
||||||
|
// cause the context to escape, adding significant overhead.
|
||||||
|
rule func(beforeBits catBitmap) (doLookahead bool, err error)
|
||||||
|
}{
|
||||||
|
joiningL: {set: bJoinStart},
|
||||||
|
joiningD: {set: bJoinStart | bJoinEnd},
|
||||||
|
joiningT: {keep: bJoinStart, set: bJoinMid},
|
||||||
|
joiningR: {set: bJoinEnd},
|
||||||
|
viramaModifier: {set: bVirama},
|
||||||
|
viramaJoinT: {set: bVirama | bJoinMid},
|
||||||
|
latinSmallL: {set: bLatinSmallL},
|
||||||
|
greek: {set: bGreek},
|
||||||
|
greekJoinT: {set: bGreek | bJoinMid},
|
||||||
|
hebrew: {set: bHebrew},
|
||||||
|
hebrewJoinT: {set: bHebrew | bJoinMid},
|
||||||
|
japanese: {set: bJapanese},
|
||||||
|
katakanaMiddleDot: {set: bMustHaveJapn},
|
||||||
|
|
||||||
|
zeroWidthNonJoiner: {
|
||||||
|
term: bJoinEnd,
|
||||||
|
accept: bJoinMid,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bVirama != 0 {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
if before&bJoinStart == 0 {
|
||||||
|
return false, errContext
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
zeroWidthJoiner: {
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bVirama == 0 {
|
||||||
|
err = errContext
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
},
|
||||||
|
},
|
||||||
|
middleDot: {
|
||||||
|
term: bLatinSmallL,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bLatinSmallL == 0 {
|
||||||
|
return false, errContext
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
greekLowerNumeralSign: {
|
||||||
|
set: bGreek,
|
||||||
|
term: bGreek,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
return true, nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
hebrewPreceding: {
|
||||||
|
set: bHebrew,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bHebrew == 0 {
|
||||||
|
err = errContext
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
},
|
||||||
|
},
|
||||||
|
arabicIndicDigit: {
|
||||||
|
set: bArabicIndicDigit,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bExtendedArabicIndicDigit != 0 {
|
||||||
|
err = errContext
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
},
|
||||||
|
},
|
||||||
|
extendedArabicIndicDigit: {
|
||||||
|
set: bExtendedArabicIndicDigit,
|
||||||
|
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||||
|
if before&bArabicIndicDigit != 0 {
|
||||||
|
err = errContext
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
Normal file
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package precis contains types and functions for the preparation,
|
||||||
|
// enforcement, and comparison of internationalized strings ("PRECIS") as
|
||||||
|
// defined in RFC 8264. It also contains several pre-defined profiles for
|
||||||
|
// passwords, nicknames, and usernames as defined in RFC 8265 and RFC 8266.
|
||||||
|
//
|
||||||
|
// BE ADVISED: This package is under construction and the API may change in
|
||||||
|
// backwards incompatible ways and without notice.
|
||||||
|
package precis // import "golang.org/x/text/secure/precis"
|
||||||
|
|
||||||
|
//go:generate go run gen.go gen_trieval.go
|
72
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
Normal file
72
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
type nickAdditionalMapping struct {
|
||||||
|
// TODO: This transformer needs to be stateless somehow…
|
||||||
|
notStart bool
|
||||||
|
prevSpace bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *nickAdditionalMapping) Reset() {
|
||||||
|
t.prevSpace = false
|
||||||
|
t.notStart = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
// RFC 8266 §2.1. Rules
|
||||||
|
//
|
||||||
|
// 2. Additional Mapping Rule: The additional mapping rule consists of
|
||||||
|
// the following sub-rules.
|
||||||
|
//
|
||||||
|
// a. Map any instances of non-ASCII space to SPACE (U+0020); a
|
||||||
|
// non-ASCII space is any Unicode code point having a general
|
||||||
|
// category of "Zs", naturally with the exception of SPACE
|
||||||
|
// (U+0020). (The inclusion of only ASCII space prevents
|
||||||
|
// confusion with various non-ASCII space code points, many of
|
||||||
|
// which are difficult to reproduce across different input
|
||||||
|
// methods.)
|
||||||
|
//
|
||||||
|
// b. Remove any instances of the ASCII space character at the
|
||||||
|
// beginning or end of a nickname (e.g., "stpeter " is mapped to
|
||||||
|
// "stpeter").
|
||||||
|
//
|
||||||
|
// c. Map interior sequences of more than one ASCII space character
|
||||||
|
// to a single ASCII space character (e.g., "St Peter" is
|
||||||
|
// mapped to "St Peter").
|
||||||
|
for nSrc < len(src) {
|
||||||
|
r, size := utf8.DecodeRune(src[nSrc:])
|
||||||
|
if size == 0 { // Incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1
|
||||||
|
}
|
||||||
|
if unicode.Is(unicode.Zs, r) {
|
||||||
|
t.prevSpace = true
|
||||||
|
} else {
|
||||||
|
if t.prevSpace && t.notStart {
|
||||||
|
dst[nDst] = ' '
|
||||||
|
nDst += 1
|
||||||
|
}
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
nDst += size
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
t.prevSpace = false
|
||||||
|
t.notStart = true
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
157
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
Normal file
157
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/text/cases"
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/runes"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// An Option is used to define the behavior and rules of a Profile.
|
||||||
|
type Option func(*options)
|
||||||
|
|
||||||
|
type options struct {
|
||||||
|
// Preparation options
|
||||||
|
foldWidth bool
|
||||||
|
|
||||||
|
// Enforcement options
|
||||||
|
asciiLower bool
|
||||||
|
cases transform.SpanningTransformer
|
||||||
|
disallow runes.Set
|
||||||
|
norm transform.SpanningTransformer
|
||||||
|
additional []func() transform.SpanningTransformer
|
||||||
|
width transform.SpanningTransformer
|
||||||
|
disallowEmpty bool
|
||||||
|
bidiRule bool
|
||||||
|
repeat bool
|
||||||
|
|
||||||
|
// Comparison options
|
||||||
|
ignorecase bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOpts(o ...Option) (res options) {
|
||||||
|
for _, f := range o {
|
||||||
|
f(&res)
|
||||||
|
}
|
||||||
|
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
|
||||||
|
// down the road.
|
||||||
|
if res.norm == nil {
|
||||||
|
res.norm = norm.NFC
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// The IgnoreCase option causes the profile to perform a case insensitive
|
||||||
|
// comparison during the PRECIS comparison step.
|
||||||
|
IgnoreCase Option = ignoreCase
|
||||||
|
|
||||||
|
// The FoldWidth option causes the profile to map non-canonical wide and
|
||||||
|
// narrow variants to their decomposition mapping. This is useful for
|
||||||
|
// profiles that are based on the identifier class which would otherwise
|
||||||
|
// disallow such characters.
|
||||||
|
FoldWidth Option = foldWidth
|
||||||
|
|
||||||
|
// The DisallowEmpty option causes the enforcement step to return an error if
|
||||||
|
// the resulting string would be empty.
|
||||||
|
DisallowEmpty Option = disallowEmpty
|
||||||
|
|
||||||
|
// The BidiRule option causes the Bidi Rule defined in RFC 5893 to be
|
||||||
|
// applied.
|
||||||
|
BidiRule Option = bidiRule
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ignoreCase = func(o *options) {
|
||||||
|
o.ignorecase = true
|
||||||
|
}
|
||||||
|
foldWidth = func(o *options) {
|
||||||
|
o.foldWidth = true
|
||||||
|
}
|
||||||
|
disallowEmpty = func(o *options) {
|
||||||
|
o.disallowEmpty = true
|
||||||
|
}
|
||||||
|
bidiRule = func(o *options) {
|
||||||
|
o.bidiRule = true
|
||||||
|
}
|
||||||
|
repeat = func(o *options) {
|
||||||
|
o.repeat = true
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: move this logic to package transform
|
||||||
|
|
||||||
|
type spanWrap struct{ transform.Transformer }
|
||||||
|
|
||||||
|
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
return 0, transform.ErrEndOfSpan
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: allow different types? For instance:
|
||||||
|
// func() transform.Transformer
|
||||||
|
// func() transform.SpanningTransformer
|
||||||
|
// func([]byte) bool // validation only
|
||||||
|
//
|
||||||
|
// Also, would be great if we could detect if a transformer is reentrant.
|
||||||
|
|
||||||
|
// The AdditionalMapping option defines the additional mapping rule for the
|
||||||
|
// Profile by applying Transformer's in sequence.
|
||||||
|
func AdditionalMapping(t ...func() transform.Transformer) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
for _, f := range t {
|
||||||
|
sf := func() transform.SpanningTransformer {
|
||||||
|
return f().(transform.SpanningTransformer)
|
||||||
|
}
|
||||||
|
if _, ok := f().(transform.SpanningTransformer); !ok {
|
||||||
|
sf = func() transform.SpanningTransformer {
|
||||||
|
return spanWrap{f()}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
o.additional = append(o.additional, sf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Norm option defines a Profile's normalization rule. Defaults to NFC.
|
||||||
|
func Norm(f norm.Form) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.norm = f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The FoldCase option defines a Profile's case mapping rule. Options can be
|
||||||
|
// provided to determine the type of case folding used.
|
||||||
|
func FoldCase(opts ...cases.Option) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.asciiLower = true
|
||||||
|
o.cases = cases.Fold(opts...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The LowerCase option defines a Profile's case mapping rule. Options can be
|
||||||
|
// provided to determine the type of case folding used.
|
||||||
|
func LowerCase(opts ...cases.Option) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.asciiLower = true
|
||||||
|
if len(opts) == 0 {
|
||||||
|
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
|
||||||
|
o.cases = cases.Lower(language.Und, opts...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Disallow option further restricts a Profile's allowed characters beyond
|
||||||
|
// what is disallowed by the underlying string class.
|
||||||
|
func Disallow(set runes.Set) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.disallow = set
|
||||||
|
}
|
||||||
|
}
|
412
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
Normal file
412
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
Normal file
|
@ -0,0 +1,412 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/cases"
|
||||||
|
"golang.org/x/text/language"
|
||||||
|
"golang.org/x/text/runes"
|
||||||
|
"golang.org/x/text/secure/bidirule"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/width"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errDisallowedRune = errors.New("precis: disallowed rune encountered")
|
||||||
|
)
|
||||||
|
|
||||||
|
var dpTrie = newDerivedPropertiesTrie(0)
|
||||||
|
|
||||||
|
// A Profile represents a set of rules for normalizing and validating strings in
|
||||||
|
// the PRECIS framework.
|
||||||
|
type Profile struct {
|
||||||
|
options
|
||||||
|
class *class
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewIdentifier creates a new PRECIS profile based on the Identifier string
|
||||||
|
// class. Profiles created from this class are suitable for use where safety is
|
||||||
|
// prioritized over expressiveness like network identifiers, user accounts, chat
|
||||||
|
// rooms, and file names.
|
||||||
|
func NewIdentifier(opts ...Option) *Profile {
|
||||||
|
return &Profile{
|
||||||
|
options: getOpts(opts...),
|
||||||
|
class: identifier,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFreeform creates a new PRECIS profile based on the Freeform string class.
|
||||||
|
// Profiles created from this class are suitable for use where expressiveness is
|
||||||
|
// prioritized over safety like passwords, and display-elements such as
|
||||||
|
// nicknames in a chat room.
|
||||||
|
func NewFreeform(opts ...Option) *Profile {
|
||||||
|
return &Profile{
|
||||||
|
options: getOpts(opts...),
|
||||||
|
class: freeform,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRestrictedProfile creates a new PRECIS profile based on an existing
|
||||||
|
// profile.
|
||||||
|
// If the parent profile already had the Disallow option set, the new rule
|
||||||
|
// overrides the parents rule.
|
||||||
|
func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile {
|
||||||
|
p := *parent
|
||||||
|
Disallow(disallow)(&p.options)
|
||||||
|
return &p
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTransformer creates a new transform.Transformer that performs the PRECIS
|
||||||
|
// preparation and enforcement steps on the given UTF-8 encoded bytes.
|
||||||
|
func (p *Profile) NewTransformer() *Transformer {
|
||||||
|
var ts []transform.Transformer
|
||||||
|
|
||||||
|
// These transforms are applied in the order defined in
|
||||||
|
// https://tools.ietf.org/html/rfc7564#section-7
|
||||||
|
|
||||||
|
// RFC 8266 §2.1:
|
||||||
|
//
|
||||||
|
// Implementation experience has shown that applying the rules for the
|
||||||
|
// Nickname profile is not an idempotent procedure for all code points.
|
||||||
|
// Therefore, an implementation SHOULD apply the rules repeatedly until
|
||||||
|
// the output string is stable; if the output string does not stabilize
|
||||||
|
// after reapplying the rules three (3) additional times after the first
|
||||||
|
// application, the implementation SHOULD terminate application of the
|
||||||
|
// rules and reject the input string as invalid.
|
||||||
|
//
|
||||||
|
// There is no known string that will change indefinitely, so repeat 4 times
|
||||||
|
// and rely on the Span method to keep things relatively performant.
|
||||||
|
r := 1
|
||||||
|
if p.options.repeat {
|
||||||
|
r = 4
|
||||||
|
}
|
||||||
|
for ; r > 0; r-- {
|
||||||
|
if p.options.foldWidth {
|
||||||
|
ts = append(ts, width.Fold)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range p.options.additional {
|
||||||
|
ts = append(ts, f())
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.options.cases != nil {
|
||||||
|
ts = append(ts, p.options.cases)
|
||||||
|
}
|
||||||
|
|
||||||
|
ts = append(ts, p.options.norm)
|
||||||
|
|
||||||
|
if p.options.bidiRule {
|
||||||
|
ts = append(ts, bidirule.New())
|
||||||
|
}
|
||||||
|
|
||||||
|
ts = append(ts, &checker{p: p, allowed: p.Allowed()})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Add the disallow empty rule with a dummy transformer?
|
||||||
|
|
||||||
|
return &Transformer{transform.Chain(ts...)}
|
||||||
|
}
|
||||||
|
|
||||||
|
var errEmptyString = errors.New("precis: transformation resulted in empty string")
|
||||||
|
|
||||||
|
type buffers struct {
|
||||||
|
src []byte
|
||||||
|
buf [2][]byte
|
||||||
|
next int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
|
||||||
|
n, err := t.Span(b.src, true)
|
||||||
|
if err != transform.ErrEndOfSpan {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
x := b.next & 1
|
||||||
|
if b.buf[x] == nil {
|
||||||
|
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
|
||||||
|
}
|
||||||
|
span := append(b.buf[x][:0], b.src[:n]...)
|
||||||
|
b.src, _, err = transform.Append(t, span, b.src[n:])
|
||||||
|
b.buf[x] = b.src
|
||||||
|
b.next++
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-allocate transformers when possible. In some cases this avoids allocation.
|
||||||
|
var (
|
||||||
|
foldWidthT transform.SpanningTransformer = width.Fold
|
||||||
|
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: make this a method on profile.
|
||||||
|
|
||||||
|
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
|
||||||
|
b.src = src
|
||||||
|
|
||||||
|
ascii := true
|
||||||
|
for _, c := range src {
|
||||||
|
if c >= utf8.RuneSelf {
|
||||||
|
ascii = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ASCII fast path.
|
||||||
|
if ascii {
|
||||||
|
for _, f := range p.options.additional {
|
||||||
|
if err = b.apply(f()); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case p.options.asciiLower || (comparing && p.options.ignorecase):
|
||||||
|
for i, c := range b.src {
|
||||||
|
if 'A' <= c && c <= 'Z' {
|
||||||
|
b.src[i] = c ^ 1<<5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case p.options.cases != nil:
|
||||||
|
b.apply(p.options.cases)
|
||||||
|
}
|
||||||
|
c := checker{p: p}
|
||||||
|
if _, err := c.span(b.src, true); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if p.disallow != nil {
|
||||||
|
for _, c := range b.src {
|
||||||
|
if p.disallow.Contains(rune(c)) {
|
||||||
|
return nil, errDisallowedRune
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||||
|
return nil, errEmptyString
|
||||||
|
}
|
||||||
|
return b.src, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// These transforms are applied in the order defined in
|
||||||
|
// https://tools.ietf.org/html/rfc8264#section-7
|
||||||
|
|
||||||
|
r := 1
|
||||||
|
if p.options.repeat {
|
||||||
|
r = 4
|
||||||
|
}
|
||||||
|
for ; r > 0; r-- {
|
||||||
|
// TODO: allow different width transforms options.
|
||||||
|
if p.options.foldWidth || (p.options.ignorecase && comparing) {
|
||||||
|
b.apply(foldWidthT)
|
||||||
|
}
|
||||||
|
for _, f := range p.options.additional {
|
||||||
|
if err = b.apply(f()); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.options.cases != nil {
|
||||||
|
b.apply(p.options.cases)
|
||||||
|
}
|
||||||
|
if comparing && p.options.ignorecase {
|
||||||
|
b.apply(lowerCaseT)
|
||||||
|
}
|
||||||
|
b.apply(p.norm)
|
||||||
|
if p.options.bidiRule && !bidirule.Valid(b.src) {
|
||||||
|
return nil, bidirule.ErrInvalid
|
||||||
|
}
|
||||||
|
c := checker{p: p}
|
||||||
|
if _, err := c.span(b.src, true); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if p.disallow != nil {
|
||||||
|
for i := 0; i < len(b.src); {
|
||||||
|
r, size := utf8.DecodeRune(b.src[i:])
|
||||||
|
if p.disallow.Contains(r) {
|
||||||
|
return nil, errDisallowedRune
|
||||||
|
}
|
||||||
|
i += size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||||
|
return nil, errEmptyString
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.src, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append appends the result of applying p to src writing the result to dst.
|
||||||
|
// It returns an error if the input string is invalid.
|
||||||
|
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
|
||||||
|
var buf buffers
|
||||||
|
b, err := buf.enforce(p, src, false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return append(dst, b...), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
|
||||||
|
var buf buffers
|
||||||
|
b, err := buf.enforce(p, b, key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if buf.next == 0 {
|
||||||
|
c := make([]byte, len(b))
|
||||||
|
copy(c, b)
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of applying the profile to b.
|
||||||
|
func (p *Profile) Bytes(b []byte) ([]byte, error) {
|
||||||
|
return processBytes(p, b, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AppendCompareKey appends the result of applying p to src (including any
|
||||||
|
// optional rules to make strings comparable or useful in a map key such as
|
||||||
|
// applying lowercasing) writing the result to dst. It returns an error if the
|
||||||
|
// input string is invalid.
|
||||||
|
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
|
||||||
|
var buf buffers
|
||||||
|
b, err := buf.enforce(p, src, true)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return append(dst, b...), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processString(p *Profile, s string, key bool) (string, error) {
|
||||||
|
var buf buffers
|
||||||
|
b, err := buf.enforce(p, []byte(s), key)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of applying the profile to s.
|
||||||
|
func (p *Profile) String(s string) (string, error) {
|
||||||
|
return processString(p, s, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CompareKey returns a string that can be used for comparison, hashing, or
|
||||||
|
// collation.
|
||||||
|
func (p *Profile) CompareKey(s string) (string, error) {
|
||||||
|
return processString(p, s, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare enforces both strings, and then compares them for bit-string identity
|
||||||
|
// (byte-for-byte equality). If either string cannot be enforced, the comparison
|
||||||
|
// is false.
|
||||||
|
func (p *Profile) Compare(a, b string) bool {
|
||||||
|
var buf buffers
|
||||||
|
|
||||||
|
akey, err := buf.enforce(p, []byte(a), true)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = buffers{}
|
||||||
|
bkey, err := buf.enforce(p, []byte(b), true)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.Equal(akey, bkey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allowed returns a runes.Set containing every rune that is a member of the
|
||||||
|
// underlying profile's string class and not disallowed by any profile specific
|
||||||
|
// rules.
|
||||||
|
func (p *Profile) Allowed() runes.Set {
|
||||||
|
if p.options.disallow != nil {
|
||||||
|
return runes.Predicate(func(r rune) bool {
|
||||||
|
return p.class.Contains(r) && !p.options.disallow.Contains(r)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return p.class
|
||||||
|
}
|
||||||
|
|
||||||
|
type checker struct {
|
||||||
|
p *Profile
|
||||||
|
allowed runes.Set
|
||||||
|
|
||||||
|
beforeBits catBitmap
|
||||||
|
termBits catBitmap
|
||||||
|
acceptBits catBitmap
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *checker) Reset() {
|
||||||
|
c.beforeBits = 0
|
||||||
|
c.termBits = 0
|
||||||
|
c.acceptBits = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
e, sz := dpTrie.lookup(src[n:])
|
||||||
|
d := categoryTransitions[category(e&catMask)]
|
||||||
|
if sz == 0 {
|
||||||
|
if !atEOF {
|
||||||
|
return n, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
return n, errDisallowedRune
|
||||||
|
}
|
||||||
|
doLookAhead := false
|
||||||
|
if property(e) < c.p.class.validFrom {
|
||||||
|
if d.rule == nil {
|
||||||
|
return n, errDisallowedRune
|
||||||
|
}
|
||||||
|
doLookAhead, err = d.rule(c.beforeBits)
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.beforeBits &= d.keep
|
||||||
|
c.beforeBits |= d.set
|
||||||
|
if c.termBits != 0 {
|
||||||
|
// We are currently in an unterminated lookahead.
|
||||||
|
if c.beforeBits&c.termBits != 0 {
|
||||||
|
c.termBits = 0
|
||||||
|
c.acceptBits = 0
|
||||||
|
} else if c.beforeBits&c.acceptBits == 0 {
|
||||||
|
// Invalid continuation of the unterminated lookahead sequence.
|
||||||
|
return n, errContext
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if doLookAhead {
|
||||||
|
if c.termBits != 0 {
|
||||||
|
// A previous lookahead run has not been terminated yet.
|
||||||
|
return n, errContext
|
||||||
|
}
|
||||||
|
c.termBits = d.term
|
||||||
|
c.acceptBits = d.accept
|
||||||
|
}
|
||||||
|
n += sz
|
||||||
|
}
|
||||||
|
if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
|
||||||
|
err = errContext
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: we may get rid of this transform if transform.Chain understands
|
||||||
|
// something like a Spanner interface.
|
||||||
|
func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
short := false
|
||||||
|
if len(dst) < len(src) {
|
||||||
|
src = src[:len(dst)]
|
||||||
|
atEOF = false
|
||||||
|
short = true
|
||||||
|
}
|
||||||
|
nSrc, err = c.span(src, atEOF)
|
||||||
|
nDst = copy(dst, src[:nSrc])
|
||||||
|
if short && (err == transform.ErrShortSrc || err == nil) {
|
||||||
|
err = transform.ErrShortDst
|
||||||
|
}
|
||||||
|
return nDst, nSrc, err
|
||||||
|
}
|
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
Normal file
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode"
|
||||||
|
|
||||||
|
"golang.org/x/text/runes"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Implements the Nickname profile specified in RFC 8266.
|
||||||
|
Nickname *Profile = nickname
|
||||||
|
|
||||||
|
// Implements the UsernameCaseMapped profile specified in RFC 8265.
|
||||||
|
UsernameCaseMapped *Profile = usernameCaseMap
|
||||||
|
|
||||||
|
// Implements the UsernameCasePreserved profile specified in RFC 8265.
|
||||||
|
UsernameCasePreserved *Profile = usernameNoCaseMap
|
||||||
|
|
||||||
|
// Implements the OpaqueString profile defined in RFC 8265 for passwords and
|
||||||
|
// other secure labels.
|
||||||
|
OpaqueString *Profile = opaquestring
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
nickname = &Profile{
|
||||||
|
options: getOpts(
|
||||||
|
AdditionalMapping(func() transform.Transformer {
|
||||||
|
return &nickAdditionalMapping{}
|
||||||
|
}),
|
||||||
|
IgnoreCase,
|
||||||
|
Norm(norm.NFKC),
|
||||||
|
DisallowEmpty,
|
||||||
|
repeat,
|
||||||
|
),
|
||||||
|
class: freeform,
|
||||||
|
}
|
||||||
|
usernameCaseMap = &Profile{
|
||||||
|
options: getOpts(
|
||||||
|
FoldWidth,
|
||||||
|
LowerCase(),
|
||||||
|
Norm(norm.NFC),
|
||||||
|
BidiRule,
|
||||||
|
),
|
||||||
|
class: identifier,
|
||||||
|
}
|
||||||
|
usernameNoCaseMap = &Profile{
|
||||||
|
options: getOpts(
|
||||||
|
FoldWidth,
|
||||||
|
Norm(norm.NFC),
|
||||||
|
BidiRule,
|
||||||
|
),
|
||||||
|
class: identifier,
|
||||||
|
}
|
||||||
|
opaquestring = &Profile{
|
||||||
|
options: getOpts(
|
||||||
|
AdditionalMapping(func() transform.Transformer {
|
||||||
|
return mapSpaces
|
||||||
|
}),
|
||||||
|
Norm(norm.NFC),
|
||||||
|
DisallowEmpty,
|
||||||
|
),
|
||||||
|
class: freeform,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// mapSpaces is a shared value of a runes.Map transformer.
|
||||||
|
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
|
||||||
|
if unicode.Is(unicode.Zs, r) {
|
||||||
|
return ' '
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
})
|
3889
vendor/golang.org/x/text/secure/precis/tables10.0.0.go
generated
vendored
Normal file
3889
vendor/golang.org/x/text/secure/precis/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4016
vendor/golang.org/x/text/secure/precis/tables11.0.0.go
generated
vendored
Normal file
4016
vendor/golang.org/x/text/secure/precis/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4118
vendor/golang.org/x/text/secure/precis/tables12.0.0.go
generated
vendored
Normal file
4118
vendor/golang.org/x/text/secure/precis/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4152
vendor/golang.org/x/text/secure/precis/tables13.0.0.go
generated
vendored
Normal file
4152
vendor/golang.org/x/text/secure/precis/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4315
vendor/golang.org/x/text/secure/precis/tables15.0.0.go
generated
vendored
Normal file
4315
vendor/golang.org/x/text/secure/precis/tables15.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
3790
vendor/golang.org/x/text/secure/precis/tables9.0.0.go
generated
vendored
Normal file
3790
vendor/golang.org/x/text/secure/precis/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
Normal file
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
import "golang.org/x/text/transform"
|
||||||
|
|
||||||
|
// Transformer implements the transform.Transformer interface.
|
||||||
|
type Transformer struct {
|
||||||
|
t transform.Transformer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Reset() { t.t.Reset() }
|
||||||
|
|
||||||
|
// Transform implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
return t.t.Transform(dst, src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of applying t to b.
|
||||||
|
func (t Transformer) Bytes(b []byte) []byte {
|
||||||
|
b, _, _ = transform.Bytes(t, b)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of applying t to s.
|
||||||
|
func (t Transformer) String(s string) string {
|
||||||
|
s, _, _ = transform.String(t, s)
|
||||||
|
return s
|
||||||
|
}
|
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
Normal file
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package precis
|
||||||
|
|
||||||
|
// entry is the entry of a trie table
|
||||||
|
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||||
|
// 5..0 category
|
||||||
|
type entry uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
propShift = 6
|
||||||
|
propMask = 0xc0
|
||||||
|
catMask = 0x3f
|
||||||
|
)
|
||||||
|
|
||||||
|
func (e entry) property() property { return property(e & propMask) }
|
||||||
|
func (e entry) category() category { return category(e & catMask) }
|
||||||
|
|
||||||
|
type property uint8
|
||||||
|
|
||||||
|
// The order of these constants matter. A Profile may consider runes to be
|
||||||
|
// allowed either from pValid or idDisOrFreePVal.
|
||||||
|
const (
|
||||||
|
unassigned property = iota << propShift
|
||||||
|
disallowed
|
||||||
|
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||||
|
pValid
|
||||||
|
)
|
||||||
|
|
||||||
|
// compute permutations of all properties and specialCategories.
|
||||||
|
type category uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
other category = iota
|
||||||
|
|
||||||
|
// Special rune types
|
||||||
|
joiningL
|
||||||
|
joiningD
|
||||||
|
joiningT
|
||||||
|
joiningR
|
||||||
|
viramaModifier
|
||||||
|
viramaJoinT // Virama + JoiningT
|
||||||
|
latinSmallL // U+006c
|
||||||
|
greek
|
||||||
|
greekJoinT // Greek + JoiningT
|
||||||
|
hebrew
|
||||||
|
hebrewJoinT // Hebrew + JoiningT
|
||||||
|
japanese // hirigana, katakana, han
|
||||||
|
|
||||||
|
// Special rune types associated with contextual rules defined in
|
||||||
|
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||||
|
// ContextO
|
||||||
|
zeroWidthNonJoiner // rule 1
|
||||||
|
zeroWidthJoiner // rule 2
|
||||||
|
// ContextJ
|
||||||
|
middleDot // rule 3
|
||||||
|
greekLowerNumeralSign // rule 4
|
||||||
|
hebrewPreceding // rule 5 and 6
|
||||||
|
katakanaMiddleDot // rule 7
|
||||||
|
arabicIndicDigit // rule 8
|
||||||
|
extendedArabicIndicDigit // rule 9
|
||||||
|
|
||||||
|
numCategories
|
||||||
|
)
|
28
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
Normal file
28
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
// Code generated by "stringer -type=Kind"; DO NOT EDIT.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
import "strconv"
|
||||||
|
|
||||||
|
func _() {
|
||||||
|
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||||
|
// Re-run the stringer command to generate them again.
|
||||||
|
var x [1]struct{}
|
||||||
|
_ = x[Neutral-0]
|
||||||
|
_ = x[EastAsianAmbiguous-1]
|
||||||
|
_ = x[EastAsianWide-2]
|
||||||
|
_ = x[EastAsianNarrow-3]
|
||||||
|
_ = x[EastAsianFullwidth-4]
|
||||||
|
_ = x[EastAsianHalfwidth-5]
|
||||||
|
}
|
||||||
|
|
||||||
|
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
|
||||||
|
|
||||||
|
var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
|
||||||
|
|
||||||
|
func (i Kind) String() string {
|
||||||
|
if i < 0 || i >= Kind(len(_Kind_index)-1) {
|
||||||
|
return "Kind(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||||
|
}
|
||||||
|
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
|
||||||
|
}
|
1328
vendor/golang.org/x/text/width/tables10.0.0.go
generated
vendored
Normal file
1328
vendor/golang.org/x/text/width/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1340
vendor/golang.org/x/text/width/tables11.0.0.go
generated
vendored
Normal file
1340
vendor/golang.org/x/text/width/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1360
vendor/golang.org/x/text/width/tables12.0.0.go
generated
vendored
Normal file
1360
vendor/golang.org/x/text/width/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1361
vendor/golang.org/x/text/width/tables13.0.0.go
generated
vendored
Normal file
1361
vendor/golang.org/x/text/width/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1367
vendor/golang.org/x/text/width/tables15.0.0.go
generated
vendored
Normal file
1367
vendor/golang.org/x/text/width/tables15.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1296
vendor/golang.org/x/text/width/tables9.0.0.go
generated
vendored
Normal file
1296
vendor/golang.org/x/text/width/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
Normal file
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
Normal file
|
@ -0,0 +1,239 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
type foldTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
if src[n] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if elem(v)&tagNeedsFold != 0 {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
start, end := nSrc, len(src)
|
||||||
|
if d := len(dst) - nDst; d < end-start {
|
||||||
|
end = nSrc + d
|
||||||
|
}
|
||||||
|
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||||
|
}
|
||||||
|
n := copy(dst[nDst:], src[start:nSrc])
|
||||||
|
if nDst += n; nDst == len(dst) {
|
||||||
|
nSrc = start + n
|
||||||
|
if nSrc == len(src) {
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if elem(v)&tagNeedsFold == 0 {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type narrowTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
if src[n] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||||
|
} else {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
// ASCII fast path.
|
||||||
|
start, end := nSrc, len(src)
|
||||||
|
if d := len(dst) - nDst; d < end-start {
|
||||||
|
end = nSrc + d
|
||||||
|
}
|
||||||
|
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||||
|
}
|
||||||
|
n := copy(dst[nDst:], src[start:nSrc])
|
||||||
|
if nDst += n; nDst == len(dst) {
|
||||||
|
nSrc = start + n
|
||||||
|
if nSrc == len(src) {
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
if src[nSrc] < utf8.RuneSelf {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type wideTransform struct {
|
||||||
|
transform.NopResetter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
for n < len(src) {
|
||||||
|
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||||
|
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||||
|
// not enough to warrant the extra code and complexity.
|
||||||
|
v, size := trie.lookup(src[n:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
err = transform.ErrShortSrc
|
||||||
|
} else {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||||
|
} else {
|
||||||
|
err = transform.ErrEndOfSpan
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += size
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
for nSrc < len(src) {
|
||||||
|
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||||
|
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||||
|
// not enough to warrant the extra code and complexity.
|
||||||
|
v, size := trie.lookup(src[nSrc:])
|
||||||
|
if size == 0 { // incomplete UTF-8 encoding
|
||||||
|
if !atEOF {
|
||||||
|
return nDst, nSrc, transform.ErrShortSrc
|
||||||
|
}
|
||||||
|
size = 1 // gobble 1 byte
|
||||||
|
}
|
||||||
|
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||||
|
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
nDst += size
|
||||||
|
} else {
|
||||||
|
data := inverseData[byte(v)]
|
||||||
|
if len(dst)-nDst < int(data[0]) {
|
||||||
|
return nDst, nSrc, transform.ErrShortDst
|
||||||
|
}
|
||||||
|
i := 1
|
||||||
|
for end := int(data[0]); i < end; i++ {
|
||||||
|
dst[nDst] = data[i]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||||
|
nDst++
|
||||||
|
}
|
||||||
|
nSrc += size
|
||||||
|
}
|
||||||
|
return nDst, nSrc, nil
|
||||||
|
}
|
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
Normal file
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||||
|
|
||||||
|
package width
|
||||||
|
|
||||||
|
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||||
|
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||||
|
// the inverseData array.
|
||||||
|
type elem uint16
|
||||||
|
|
||||||
|
const (
|
||||||
|
tagNeutral elem = iota << typeShift
|
||||||
|
tagAmbiguous
|
||||||
|
tagWide
|
||||||
|
tagNarrow
|
||||||
|
tagFullwidth
|
||||||
|
tagHalfwidth
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
numTypeBits = 3
|
||||||
|
typeShift = 16 - numTypeBits
|
||||||
|
|
||||||
|
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||||
|
// the Won sign U+20A9.
|
||||||
|
tagNeedsFold = 0x1000
|
||||||
|
|
||||||
|
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||||
|
// variant.
|
||||||
|
wonSign rune = 0x20A9
|
||||||
|
)
|
206
vendor/golang.org/x/text/width/width.go
generated
vendored
Normal file
206
vendor/golang.org/x/text/width/width.go
generated
vendored
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:generate stringer -type=Kind
|
||||||
|
//go:generate go run gen.go gen_common.go gen_trieval.go
|
||||||
|
|
||||||
|
// Package width provides functionality for handling different widths in text.
|
||||||
|
//
|
||||||
|
// Wide characters behave like ideographs; they tend to allow line breaks after
|
||||||
|
// each character and remain upright in vertical text layout. Narrow characters
|
||||||
|
// are kept together in words or runs that are rotated sideways in vertical text
|
||||||
|
// layout.
|
||||||
|
//
|
||||||
|
// For more information, see https://unicode.org/reports/tr11/.
|
||||||
|
package width // import "golang.org/x/text/width"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// 1) Reduce table size by compressing blocks.
|
||||||
|
// 2) API proposition for computing display length
|
||||||
|
// (approximation, fixed pitch only).
|
||||||
|
// 3) Implement display length.
|
||||||
|
|
||||||
|
// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
|
||||||
|
type Kind int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Neutral characters do not occur in legacy East Asian character sets.
|
||||||
|
Neutral Kind = iota
|
||||||
|
|
||||||
|
// EastAsianAmbiguous characters that can be sometimes wide and sometimes
|
||||||
|
// narrow and require additional information not contained in the character
|
||||||
|
// code to further resolve their width.
|
||||||
|
EastAsianAmbiguous
|
||||||
|
|
||||||
|
// EastAsianWide characters are wide in its usual form. They occur only in
|
||||||
|
// the context of East Asian typography. These runes may have explicit
|
||||||
|
// halfwidth counterparts.
|
||||||
|
EastAsianWide
|
||||||
|
|
||||||
|
// EastAsianNarrow characters are narrow in its usual form. They often have
|
||||||
|
// fullwidth counterparts.
|
||||||
|
EastAsianNarrow
|
||||||
|
|
||||||
|
// Note: there exist Narrow runes that do not have fullwidth or wide
|
||||||
|
// counterparts, despite what the definition says (e.g. U+27E6).
|
||||||
|
|
||||||
|
// EastAsianFullwidth characters have a compatibility decompositions of type
|
||||||
|
// wide that map to a narrow counterpart.
|
||||||
|
EastAsianFullwidth
|
||||||
|
|
||||||
|
// EastAsianHalfwidth characters have a compatibility decomposition of type
|
||||||
|
// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
|
||||||
|
// SIGN.
|
||||||
|
EastAsianHalfwidth
|
||||||
|
|
||||||
|
// Note: there exist runes that have a halfwidth counterparts but that are
|
||||||
|
// classified as Ambiguous, rather than wide (e.g. U+2190).
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: the generated tries need to return size 1 for invalid runes for the
|
||||||
|
// width to be computed correctly (each byte should render width 1)
|
||||||
|
|
||||||
|
var trie = newWidthTrie(0)
|
||||||
|
|
||||||
|
// Lookup reports the Properties of the first rune in b and the number of bytes
|
||||||
|
// of its UTF-8 encoding.
|
||||||
|
func Lookup(b []byte) (p Properties, size int) {
|
||||||
|
v, sz := trie.lookup(b)
|
||||||
|
return Properties{elem(v), b[sz-1]}, sz
|
||||||
|
}
|
||||||
|
|
||||||
|
// LookupString reports the Properties of the first rune in s and the number of
|
||||||
|
// bytes of its UTF-8 encoding.
|
||||||
|
func LookupString(s string) (p Properties, size int) {
|
||||||
|
v, sz := trie.lookupString(s)
|
||||||
|
return Properties{elem(v), s[sz-1]}, sz
|
||||||
|
}
|
||||||
|
|
||||||
|
// LookupRune reports the Properties of rune r.
|
||||||
|
func LookupRune(r rune) Properties {
|
||||||
|
var buf [4]byte
|
||||||
|
n := utf8.EncodeRune(buf[:], r)
|
||||||
|
v, _ := trie.lookup(buf[:n])
|
||||||
|
last := byte(r)
|
||||||
|
if r >= utf8.RuneSelf {
|
||||||
|
last = 0x80 + byte(r&0x3f)
|
||||||
|
}
|
||||||
|
return Properties{elem(v), last}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Properties provides access to width properties of a rune.
|
||||||
|
type Properties struct {
|
||||||
|
elem elem
|
||||||
|
last byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e elem) kind() Kind {
|
||||||
|
return Kind(e >> typeShift)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kind returns the Kind of a rune as defined in Unicode TR #11.
|
||||||
|
// See https://unicode.org/reports/tr11/ for more details.
|
||||||
|
func (p Properties) Kind() Kind {
|
||||||
|
return p.elem.kind()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Folded returns the folded variant of a rune or 0 if the rune is canonical.
|
||||||
|
func (p Properties) Folded() rune {
|
||||||
|
if p.elem&tagNeedsFold != 0 {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Narrow returns the narrow variant of a rune or 0 if the rune is already
|
||||||
|
// narrow or doesn't have a narrow variant.
|
||||||
|
func (p Properties) Narrow() rune {
|
||||||
|
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wide returns the wide variant of a rune or 0 if the rune is already
|
||||||
|
// wide or doesn't have a wide variant.
|
||||||
|
func (p Properties) Wide() rune {
|
||||||
|
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
|
||||||
|
buf := inverseData[byte(p.elem)]
|
||||||
|
buf[buf[0]] ^= p.last
|
||||||
|
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO for Properties:
|
||||||
|
// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
|
||||||
|
// mapping.
|
||||||
|
// - Add width information (including information on non-spacing runes).
|
||||||
|
|
||||||
|
// Transformer implements the transform.Transformer interface.
|
||||||
|
type Transformer struct {
|
||||||
|
t transform.SpanningTransformer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Reset() { t.t.Reset() }
|
||||||
|
|
||||||
|
// Transform implements the transform.Transformer interface.
|
||||||
|
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||||
|
return t.t.Transform(dst, src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Span implements the transform.SpanningTransformer interface.
|
||||||
|
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||||
|
return t.t.Span(src, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes returns a new byte slice with the result of applying t to b.
|
||||||
|
func (t Transformer) Bytes(b []byte) []byte {
|
||||||
|
b, _, _ = transform.Bytes(t, b)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string with the result of applying t to s.
|
||||||
|
func (t Transformer) String(s string) string {
|
||||||
|
s, _, _ = transform.String(t, s)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Fold is a transform that maps all runes to their canonical width.
|
||||||
|
//
|
||||||
|
// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
|
||||||
|
// provide a more generic folding mechanism.
|
||||||
|
Fold Transformer = Transformer{foldTransform{}}
|
||||||
|
|
||||||
|
// Widen is a transform that maps runes to their wide variant, if
|
||||||
|
// available.
|
||||||
|
Widen Transformer = Transformer{wideTransform{}}
|
||||||
|
|
||||||
|
// Narrow is a transform that maps runes to their narrow variant, if
|
||||||
|
// available.
|
||||||
|
Narrow Transformer = Transformer{narrowTransform{}}
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO: Consider the following options:
|
||||||
|
// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
|
||||||
|
// generalized variant of this.
|
||||||
|
// - Consider a wide Won character to be the default width (or some generalized
|
||||||
|
// variant of this).
|
||||||
|
// - Filter the set of characters that gets converted (the preferred approach is
|
||||||
|
// to allow applying filters to transforms).
|
8
vendor/modules.txt
vendored
8
vendor/modules.txt
vendored
|
@ -96,6 +96,7 @@ golang.org/x/sys/unix
|
||||||
golang.org/x/sys/windows
|
golang.org/x/sys/windows
|
||||||
# golang.org/x/text v0.14.0
|
# golang.org/x/text v0.14.0
|
||||||
## explicit; go 1.18
|
## explicit; go 1.18
|
||||||
|
golang.org/x/text/cases
|
||||||
golang.org/x/text/encoding
|
golang.org/x/text/encoding
|
||||||
golang.org/x/text/encoding/charmap
|
golang.org/x/text/encoding/charmap
|
||||||
golang.org/x/text/encoding/ianaindex
|
golang.org/x/text/encoding/ianaindex
|
||||||
|
@ -106,12 +107,19 @@ golang.org/x/text/encoding/korean
|
||||||
golang.org/x/text/encoding/simplifiedchinese
|
golang.org/x/text/encoding/simplifiedchinese
|
||||||
golang.org/x/text/encoding/traditionalchinese
|
golang.org/x/text/encoding/traditionalchinese
|
||||||
golang.org/x/text/encoding/unicode
|
golang.org/x/text/encoding/unicode
|
||||||
|
golang.org/x/text/internal
|
||||||
|
golang.org/x/text/internal/language
|
||||||
|
golang.org/x/text/internal/language/compact
|
||||||
|
golang.org/x/text/internal/tag
|
||||||
golang.org/x/text/internal/utf8internal
|
golang.org/x/text/internal/utf8internal
|
||||||
|
golang.org/x/text/language
|
||||||
golang.org/x/text/runes
|
golang.org/x/text/runes
|
||||||
golang.org/x/text/secure/bidirule
|
golang.org/x/text/secure/bidirule
|
||||||
|
golang.org/x/text/secure/precis
|
||||||
golang.org/x/text/transform
|
golang.org/x/text/transform
|
||||||
golang.org/x/text/unicode/bidi
|
golang.org/x/text/unicode/bidi
|
||||||
golang.org/x/text/unicode/norm
|
golang.org/x/text/unicode/norm
|
||||||
|
golang.org/x/text/width
|
||||||
# golang.org/x/tools v0.19.0
|
# golang.org/x/tools v0.19.0
|
||||||
## explicit; go 1.19
|
## explicit; go 1.19
|
||||||
golang.org/x/tools/go/gcexportdata
|
golang.org/x/tools/go/gcexportdata
|
||||||
|
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
"golang.org/x/text/secure/precis"
|
||||||
|
|
||||||
"github.com/mjl-/mox/mlog"
|
"github.com/mjl-/mox/mlog"
|
||||||
"github.com/mjl-/mox/mox-"
|
"github.com/mjl-/mox/mox-"
|
||||||
|
@ -48,6 +49,11 @@ func (a *adminSessionAuth) login(ctx context.Context, log mlog.Log, username, pa
|
||||||
return false, "", fmt.Errorf("reading password file: %v", err)
|
return false, "", fmt.Errorf("reading password file: %v", err)
|
||||||
}
|
}
|
||||||
passwordhash := strings.TrimSpace(string(buf))
|
passwordhash := strings.TrimSpace(string(buf))
|
||||||
|
// Transform with precis, if valid. ../rfc/8265:679
|
||||||
|
pw, err := precis.OpaqueString.String(password)
|
||||||
|
if err == nil {
|
||||||
|
password = pw
|
||||||
|
}
|
||||||
if err := bcrypt.CompareHashAndPassword([]byte(passwordhash), []byte(password)); err != nil {
|
if err := bcrypt.CompareHashAndPassword([]byte(passwordhash), []byte(password)); err != nil {
|
||||||
return false, "", nil
|
return false, "", nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -266,7 +266,7 @@ func Login(ctx context.Context, log mlog.Log, sessionAuth SessionAuth, kind, coo
|
||||||
// We don't set a max-age. These makes cookies per-session. Browsers are rarely
|
// We don't set a max-age. These makes cookies per-session. Browsers are rarely
|
||||||
// restarted nowadays, and they have "continue where you left off", keeping session
|
// restarted nowadays, and they have "continue where you left off", keeping session
|
||||||
// cookies. Our sessions are only valid for max 1 day. Convenience can come from
|
// cookies. Our sessions are only valid for max 1 day. Convenience can come from
|
||||||
// the browser remember the password.
|
// the browser remembering the password.
|
||||||
})
|
})
|
||||||
// Remove cookie used during login.
|
// Remove cookie used during login.
|
||||||
http.SetCookie(w, &http.Cookie{
|
http.SetCookie(w, &http.Cookie{
|
||||||
|
|
|
@ -59,7 +59,9 @@ func TestAPI(t *testing.T) {
|
||||||
log := mlog.New("webmail", nil)
|
log := mlog.New("webmail", nil)
|
||||||
acc, err := store.OpenAccount(log, "mjl")
|
acc, err := store.OpenAccount(log, "mjl")
|
||||||
tcheck(t, err, "open account")
|
tcheck(t, err, "open account")
|
||||||
err = acc.SetPassword(log, "test1234")
|
const pw0 = "te\u0301st \u00a0\u2002\u200a" // NFD and various unicode spaces.
|
||||||
|
const pw1 = "tést " // PRECIS normalized, with NFC.
|
||||||
|
err = acc.SetPassword(log, pw0)
|
||||||
tcheck(t, err, "set password")
|
tcheck(t, err, "set password")
|
||||||
defer func() {
|
defer func() {
|
||||||
err := acc.Close()
|
err := acc.Close()
|
||||||
|
@ -90,7 +92,7 @@ func TestAPI(t *testing.T) {
|
||||||
loginctx := context.WithValue(ctxbg, requestInfoCtxKey, loginReqInfo)
|
loginctx := context.WithValue(ctxbg, requestInfoCtxKey, loginReqInfo)
|
||||||
|
|
||||||
// Missing login token.
|
// Missing login token.
|
||||||
tneedErrorCode(t, "user:error", func() { api.Login(loginctx, "", "mjl@mox.example", "test1234") })
|
tneedErrorCode(t, "user:error", func() { api.Login(loginctx, "", "mjl@mox.example", pw0) })
|
||||||
|
|
||||||
// Login with loginToken.
|
// Login with loginToken.
|
||||||
loginCookie := &http.Cookie{Name: "webmaillogin"}
|
loginCookie := &http.Cookie{Name: "webmaillogin"}
|
||||||
|
@ -104,7 +106,7 @@ func TestAPI(t *testing.T) {
|
||||||
x := recover()
|
x := recover()
|
||||||
expErr := len(expErrCodes) > 0
|
expErr := len(expErrCodes) > 0
|
||||||
if (x != nil) != expErr {
|
if (x != nil) != expErr {
|
||||||
t.Fatalf("got %v, expected codes %v", x, expErrCodes)
|
t.Fatalf("got %v, expected codes %v, for username %q, password %q", x, expErrCodes, username, password)
|
||||||
}
|
}
|
||||||
if x == nil {
|
if x == nil {
|
||||||
return
|
return
|
||||||
|
@ -117,18 +119,21 @@ func TestAPI(t *testing.T) {
|
||||||
|
|
||||||
api.Login(loginctx, loginCookie.Value, username, password)
|
api.Login(loginctx, loginCookie.Value, username, password)
|
||||||
}
|
}
|
||||||
testLogin("mjl@mox.example", "test1234")
|
testLogin("mjl@mox.example", pw0)
|
||||||
testLogin("mjl@mox.example", "bad", "user:loginFailed")
|
testLogin("mjl@mox.example", pw1)
|
||||||
testLogin("nouser@mox.example", "test1234", "user:loginFailed")
|
testLogin("móx@mox.example", pw1) // NFC username
|
||||||
testLogin("nouser@bad.example", "test1234", "user:loginFailed")
|
testLogin("mo\u0301x@mox.example", pw1) // NFD username
|
||||||
|
testLogin("mjl@mox.example", pw1+" ", "user:loginFailed")
|
||||||
|
testLogin("nouser@mox.example", pw0, "user:loginFailed")
|
||||||
|
testLogin("nouser@bad.example", pw0, "user:loginFailed")
|
||||||
for i := 3; i < 10; i++ {
|
for i := 3; i < 10; i++ {
|
||||||
testLogin("bad@bad.example", "test1234", "user:loginFailed")
|
testLogin("bad@bad.example", pw0, "user:loginFailed")
|
||||||
}
|
}
|
||||||
// Ensure rate limiter is triggered, also for slow tests.
|
// Ensure rate limiter is triggered, also for slow tests.
|
||||||
for i := 0; i < 10; i++ {
|
for i := 0; i < 10; i++ {
|
||||||
testLogin("bad@bad.example", "test1234", "user:loginFailed", "user:error")
|
testLogin("bad@bad.example", pw0, "user:loginFailed", "user:error")
|
||||||
}
|
}
|
||||||
testLogin("bad@bad.example", "test1234", "user:error")
|
testLogin("bad@bad.example", pw0, "user:error")
|
||||||
|
|
||||||
// Context with different IP, for clear rate limit history.
|
// Context with different IP, for clear rate limit history.
|
||||||
reqInfo := requestInfo{"mjl@mox.example", "mjl", "", nil, &http.Request{RemoteAddr: "127.0.0.1:1234"}}
|
reqInfo := requestInfo{"mjl@mox.example", "mjl", "", nil, &http.Request{RemoteAddr: "127.0.0.1:1234"}}
|
||||||
|
|
Loading…
Reference in a new issue