2019-07-14 19:29:58 +08:00
|
|
|
package trie
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"strings"
|
2024-09-21 21:03:59 +08:00
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
2019-07-14 19:29:58 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2020-05-28 12:13:05 +08:00
|
|
|
wildcard = "*"
|
|
|
|
dotWildcard = ""
|
|
|
|
complexWildcard = "+"
|
|
|
|
domainStep = "."
|
2019-07-14 19:29:58 +08:00
|
|
|
)
|
|
|
|
|
2021-10-10 23:44:09 +08:00
|
|
|
// ErrInvalidDomain means insert domain is invalid
|
|
|
|
var ErrInvalidDomain = errors.New("invalid domain")
|
2019-07-14 19:29:58 +08:00
|
|
|
|
2020-05-28 12:13:05 +08:00
|
|
|
// DomainTrie contains the main logic for adding and searching nodes for domain segments.
|
2019-07-14 19:29:58 +08:00
|
|
|
// support wildcard domain (e.g *.google.com)
|
2022-11-02 22:28:18 +08:00
|
|
|
type DomainTrie[T any] struct {
|
2022-04-06 04:25:53 +08:00
|
|
|
root *Node[T]
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
|
|
|
|
2021-05-19 11:17:35 +08:00
|
|
|
func ValidAndSplitDomain(domain string) ([]string, bool) {
|
2020-04-08 15:45:59 +08:00
|
|
|
if domain != "" && domain[len(domain)-1] == '.' {
|
|
|
|
return nil, false
|
|
|
|
}
|
2024-09-21 21:03:59 +08:00
|
|
|
if domain != "" {
|
|
|
|
if r, _ := utf8.DecodeRuneInString(domain); unicode.IsSpace(r) {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
if r, _ := utf8.DecodeLastRuneInString(domain); unicode.IsSpace(r) {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
}
|
2023-04-20 05:45:22 +00:00
|
|
|
domain = strings.ToLower(domain)
|
2020-04-08 15:45:59 +08:00
|
|
|
parts := strings.Split(domain, domainStep)
|
|
|
|
if len(parts) == 1 {
|
2020-06-07 17:25:51 +08:00
|
|
|
if parts[0] == "" {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
|
|
|
return parts, true
|
2020-04-08 15:45:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, part := range parts[1:] {
|
|
|
|
if part == "" {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return parts, true
|
2019-07-16 00:57:08 +08:00
|
|
|
}
|
|
|
|
|
2019-07-14 19:29:58 +08:00
|
|
|
// Insert adds a node to the trie.
|
|
|
|
// Support
|
|
|
|
// 1. www.example.com
|
|
|
|
// 2. *.example.com
|
|
|
|
// 3. subdomain.*.example.com
|
2020-04-08 15:45:59 +08:00
|
|
|
// 4. .example.com
|
2020-05-28 12:13:05 +08:00
|
|
|
// 5. +.example.com
|
2022-04-06 04:25:53 +08:00
|
|
|
func (t *DomainTrie[T]) Insert(domain string, data T) error {
|
2021-05-19 11:17:35 +08:00
|
|
|
parts, valid := ValidAndSplitDomain(domain)
|
2020-04-08 15:45:59 +08:00
|
|
|
if !valid {
|
2019-07-14 19:29:58 +08:00
|
|
|
return ErrInvalidDomain
|
|
|
|
}
|
|
|
|
|
2020-05-28 12:13:05 +08:00
|
|
|
if parts[0] == complexWildcard {
|
|
|
|
t.insert(parts[1:], data)
|
|
|
|
parts[0] = dotWildcard
|
|
|
|
t.insert(parts, data)
|
|
|
|
} else {
|
|
|
|
t.insert(parts, data)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-04-06 04:25:53 +08:00
|
|
|
func (t *DomainTrie[T]) insert(parts []string, data T) {
|
2019-07-14 19:29:58 +08:00
|
|
|
node := t.root
|
|
|
|
// reverse storage domain part to save space
|
|
|
|
for i := len(parts) - 1; i >= 0; i-- {
|
|
|
|
part := parts[i]
|
2022-11-30 18:50:46 +08:00
|
|
|
node = node.getOrNewChild(part)
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
|
|
|
|
2022-11-02 22:28:18 +08:00
|
|
|
node.setData(data)
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Search is the most important part of the Trie.
|
|
|
|
// Priority as:
|
|
|
|
// 1. static part
|
|
|
|
// 2. wildcard domain
|
2020-04-08 15:45:59 +08:00
|
|
|
// 2. dot wildcard domain
|
2022-04-06 04:25:53 +08:00
|
|
|
func (t *DomainTrie[T]) Search(domain string) *Node[T] {
|
2021-05-19 11:17:35 +08:00
|
|
|
parts, valid := ValidAndSplitDomain(domain)
|
2020-04-08 15:45:59 +08:00
|
|
|
if !valid || parts[0] == "" {
|
2019-07-14 19:29:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-06-24 18:41:23 +08:00
|
|
|
n := t.search(t.root, parts)
|
2019-07-14 19:29:58 +08:00
|
|
|
|
2022-11-02 22:28:18 +08:00
|
|
|
if n.isEmpty() {
|
2020-06-24 18:41:23 +08:00
|
|
|
return nil
|
|
|
|
}
|
2019-07-14 19:29:58 +08:00
|
|
|
|
2020-06-24 18:41:23 +08:00
|
|
|
return n
|
|
|
|
}
|
2020-04-24 23:49:19 +08:00
|
|
|
|
2022-04-06 04:25:53 +08:00
|
|
|
func (t *DomainTrie[T]) search(node *Node[T], parts []string) *Node[T] {
|
2020-06-24 18:41:23 +08:00
|
|
|
if len(parts) == 0 {
|
|
|
|
return node
|
|
|
|
}
|
2020-04-08 15:45:59 +08:00
|
|
|
|
2020-06-24 18:41:23 +08:00
|
|
|
if c := node.getChild(parts[len(parts)-1]); c != nil {
|
2022-11-02 22:28:18 +08:00
|
|
|
if n := t.search(c, parts[:len(parts)-1]); !n.isEmpty() {
|
2020-06-24 18:41:23 +08:00
|
|
|
return n
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
2020-04-08 15:45:59 +08:00
|
|
|
}
|
2019-07-14 19:29:58 +08:00
|
|
|
|
2020-06-24 18:41:23 +08:00
|
|
|
if c := node.getChild(wildcard); c != nil {
|
2022-11-02 22:28:18 +08:00
|
|
|
if n := t.search(c, parts[:len(parts)-1]); !n.isEmpty() {
|
2020-06-24 18:41:23 +08:00
|
|
|
return n
|
2020-04-08 15:45:59 +08:00
|
|
|
}
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
|
|
|
|
2021-03-24 01:00:21 +08:00
|
|
|
return node.getChild(dotWildcard)
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|
|
|
|
|
2022-11-30 19:42:05 +08:00
|
|
|
func (t *DomainTrie[T]) Optimize() {
|
|
|
|
t.root.optimize()
|
2022-11-30 18:50:46 +08:00
|
|
|
}
|
|
|
|
|
2024-07-28 10:07:37 +08:00
|
|
|
func (t *DomainTrie[T]) Foreach(fn func(domain string, data T) bool) {
|
2023-04-01 11:53:39 +08:00
|
|
|
for key, data := range t.root.getChildren() {
|
2024-07-28 10:07:37 +08:00
|
|
|
recursion([]string{key}, data, fn)
|
2024-08-15 07:42:59 +08:00
|
|
|
if !data.isEmpty() {
|
2024-07-28 10:07:37 +08:00
|
|
|
if !fn(joinDomain([]string{key}), data.data) {
|
|
|
|
return
|
|
|
|
}
|
2023-04-20 05:45:22 +00:00
|
|
|
}
|
2023-04-01 11:53:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-14 22:38:17 +08:00
|
|
|
func (t *DomainTrie[T]) IsEmpty() bool {
|
2024-08-15 07:42:59 +08:00
|
|
|
if t == nil || t.root == nil {
|
2024-08-14 22:38:17 +08:00
|
|
|
return true
|
|
|
|
}
|
2024-08-15 07:42:59 +08:00
|
|
|
return len(t.root.getChildren()) == 0
|
2024-08-14 22:38:17 +08:00
|
|
|
}
|
|
|
|
|
2024-07-28 10:07:37 +08:00
|
|
|
func recursion[T any](items []string, node *Node[T], fn func(domain string, data T) bool) bool {
|
2023-04-01 11:53:39 +08:00
|
|
|
for key, data := range node.getChildren() {
|
|
|
|
newItems := append([]string{key}, items...)
|
2024-08-15 07:42:59 +08:00
|
|
|
if !data.isEmpty() {
|
2023-04-01 11:53:39 +08:00
|
|
|
domain := joinDomain(newItems)
|
|
|
|
if domain[0] == domainStepByte {
|
|
|
|
domain = complexWildcard + domain
|
|
|
|
}
|
2024-07-28 10:07:37 +08:00
|
|
|
if !fn(domain, data.Data()) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !recursion(newItems, data, fn) {
|
|
|
|
return false
|
2023-04-01 11:53:39 +08:00
|
|
|
}
|
|
|
|
}
|
2024-07-28 10:07:37 +08:00
|
|
|
return true
|
2023-04-01 11:53:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func joinDomain(items []string) string {
|
|
|
|
return strings.Join(items, domainStep)
|
|
|
|
}
|
|
|
|
|
2019-07-14 19:29:58 +08:00
|
|
|
// New returns a new, empty Trie.
|
2022-11-02 22:28:18 +08:00
|
|
|
func New[T any]() *DomainTrie[T] {
|
|
|
|
return &DomainTrie[T]{root: newNode[T]()}
|
2019-07-14 19:29:58 +08:00
|
|
|
}
|