2023-01-30 16:27:06 +03:00
// Package publicsuffix implements a public suffix list to look up the
// organizational domain for a given host name. Organizational domains can be
// registered, one level below a top-level domain.
//
// Example.com has a public suffix ".com", and example.co.uk has a public
// suffix ".co.uk". The organizational domain of sub.example.com is
// example.com, and the organization domain of sub.example.co.uk is
// example.co.uk.
package publicsuffix
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
2024-02-08 16:49:01 +03:00
"log/slog"
2023-01-30 16:27:06 +03:00
"strings"
_ "embed"
"golang.org/x/net/idna"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
)
// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
// Labels map from utf8 labels to labels for subdomains.
// The end is marked with an empty string as label.
type labels map [ string ] labels
// List is a public suffix list.
type List struct {
includes , excludes labels
}
var publicsuffixList List
//go:embed public_suffix_list.txt
var publicsuffixData [ ] byte
func init ( ) {
2023-12-05 15:35:58 +03:00
log := mlog . New ( "publicsuffix" , nil )
l , err := ParseList ( log . Logger , bytes . NewReader ( publicsuffixData ) )
2023-01-30 16:27:06 +03:00
if err != nil {
2023-12-05 15:35:58 +03:00
log . Fatalx ( "parsing public suffix list" , err )
2023-01-30 16:27:06 +03:00
}
publicsuffixList = l
}
// ParseList parses a public suffix list.
// Only the "ICANN DOMAINS" are used.
2023-12-05 15:35:58 +03:00
func ParseList ( elog * slog . Logger , r io . Reader ) ( List , error ) {
log := mlog . New ( "publicsuffix" , elog )
2023-01-30 16:27:06 +03:00
list := List { labels { } , labels { } }
br := bufio . NewReader ( r )
// Only use ICANN domains. ../rfc/7489-eid6729
var icannDomains bool
for {
line , err := br . ReadString ( '\n' )
if line != "" {
line = strings . TrimSpace ( line )
if strings . HasPrefix ( line , "// ===BEGIN ICANN DOMAINS===" ) {
icannDomains = true
continue
} else if strings . HasPrefix ( line , "// ===END ICANN DOMAINS===" ) {
icannDomains = false
continue
} else if line == "" || strings . HasPrefix ( line , "//" ) || ! icannDomains {
continue
}
l := list . includes
var t [ ] string
oline := line
if strings . HasPrefix ( line , "!" ) {
line = line [ 1 : ]
l = list . excludes
t = strings . Split ( line , "." )
if len ( t ) == 1 {
2023-12-05 15:35:58 +03:00
log . Print ( "exclude rule with single label, skipping" , slog . String ( "line" , oline ) )
2023-01-30 16:27:06 +03:00
continue
}
} else {
t = strings . Split ( line , "." )
}
for i := len ( t ) - 1 ; i >= 0 ; i -- {
w := t [ i ]
if w == "" {
2023-12-05 15:35:58 +03:00
log . Print ( "empty label in rule, skipping" , slog . String ( "line" , oline ) )
2023-01-30 16:27:06 +03:00
break
}
if w != "" && w != "*" {
w , err = idna . Lookup . ToUnicode ( w )
if err != nil {
2023-12-05 15:35:58 +03:00
log . Printx ( "invalid label, skipping" , err , slog . String ( "line" , oline ) )
2023-01-30 16:27:06 +03:00
}
}
m , ok := l [ w ]
if ok {
if _ , dup := m [ "" ] ; i == 0 && dup {
2023-12-05 15:35:58 +03:00
log . Print ( "duplicate rule" , slog . String ( "line" , oline ) )
2023-01-30 16:27:06 +03:00
}
l = m
} else {
m = labels { }
l [ w ] = m
l = m
}
}
l [ "" ] = nil // Mark end.
}
if err == io . EOF {
break
}
if err != nil {
return List { } , fmt . Errorf ( "reading public suffix list: %w" , err )
}
}
return list , nil
}
// Lookup calls Lookup on the builtin public suffix list, from
// https://publicsuffix.org/list/.
2023-12-05 15:35:58 +03:00
func Lookup ( ctx context . Context , elog * slog . Logger , domain dns . Domain ) ( orgDomain dns . Domain ) {
return publicsuffixList . Lookup ( ctx , elog , domain )
2023-01-30 16:27:06 +03:00
}
// Lookup returns the organizational domain. If domain is an organizational
// domain, or higher-level, the same domain is returned.
2023-12-05 15:35:58 +03:00
func ( l List ) Lookup ( ctx context . Context , elog * slog . Logger , domain dns . Domain ) ( orgDomain dns . Domain ) {
log := mlog . New ( "publicsuffix" , elog )
2023-01-30 16:27:06 +03:00
defer func ( ) {
2023-12-05 15:35:58 +03:00
log . Debug ( "publicsuffix lookup result" , slog . Any ( "reqdom" , domain ) , slog . Any ( "orgdom" , orgDomain ) )
2023-01-30 16:27:06 +03:00
} ( )
t := strings . Split ( domain . Name ( ) , "." )
var n int
if nexcl , ok := match ( l . excludes , t ) ; ok {
n = nexcl
} else if nincl , ok := match ( l . includes , t ) ; ok {
n = nincl + 1
} else {
n = 2
}
if len ( t ) < n {
return domain
}
name := strings . Join ( t [ len ( t ) - n : ] , "." )
if isASCII ( name ) {
return dns . Domain { ASCII : name }
}
t = strings . Split ( domain . ASCII , "." )
ascii := strings . Join ( t [ len ( t ) - n : ] , "." )
return dns . Domain { ASCII : ascii , Unicode : name }
}
func isASCII ( s string ) bool {
for _ , c := range s {
if c >= 0x80 {
return false
}
}
return true
}
func match ( l labels , t [ ] string ) ( int , bool ) {
if len ( t ) == 0 {
_ , ok := l [ "" ]
return 0 , ok
}
s := t [ len ( t ) - 1 ]
t = t [ : len ( t ) - 1 ]
n := 0
if m , mok := l [ s ] ; mok {
if nn , sok := match ( m , t ) ; sok {
n = 1 + nn
}
}
if m , mok := l [ "*" ] ; mok {
if nn , sok := match ( m , t ) ; sok && nn >= n {
n = 1 + nn
}
}
_ , mok := l [ "" ]
return n , n > 0 || mok
}