From 676832d14af661db14898e41523dafc5761fbf7d Mon Sep 17 00:00:00 2001 From: MkQtS <81752398+MkQtS@users.noreply.github.com> Date: Thu, 22 Jan 2026 18:46:53 +0800 Subject: [PATCH] Improve value checkers and docs (#3208) * Refactor: improve value checkers * Docs: small improvements [skip ci] --- README.md | 10 +++++----- main.go | 25 +++++++++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 3ee95243..7aba4091 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,9 @@ All data are under `data` directory. Each file in the directory represents a sub # comments include:another-file domain:google.com @attr1 @attr2 +full:analytics.google.com @ads keyword:google -regexp:www\.google\.com$ -full:www.google.com +regexp:^odd[1-7]\.example\.org(\.[a-z]{2})?$ ``` **Syntax:** @@ -106,10 +106,10 @@ full:www.google.com - Comment begins with `#`. It may begin anywhere in the file. The content in the line after `#` is treated as comment and ignored in production. - Subdomain begins with `domain:`, followed by a valid domain name. The prefix `domain:` may be omitted. -- Keyword begins with `keyword:`, followed by a string. -- Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard). - Full domain begins with `full:`, followed by a complete and valid domain name. -- Domain rules (including `domain`, `keyword`, `regexp` and `full`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`. +- Keyword begins with `keyword:`, followed by a substring of a valid domain name. +- Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard). +- Domain rules (including `domain`, `full`, `keyword`, and `regexp`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`. - Domain rules may have none, one or more affiliations, which additionally adds the domain rule into the affiliated target list. Each affiliation begins with `&` and followed by the name of the target list (nomatter whether the target has a dedicated file in data path). This is a method for data management, and will not remain in the final lists or `dlc.dat`. - Inclusion begins with `include:`, followed by the name of another valid domain list. A simple `include:listb` in file `lista` means adding all domain rules of `listb` into `lista`. Inclusions with attributes stands for selective inclusion. `include:listb @attr1 @-attr2` means only adding those domain rules *with* `@attr1` **and** *without* `@attr2`. This is a special type for data management, and will not remain in the final lists or `dlc.dat`. diff --git a/main.go b/main.go index 4611ed18..d63b6bfc 100644 --- a/main.go +++ b/main.go @@ -30,10 +30,10 @@ const ( ) var ( - TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`) - ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`) - AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`) - SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`) + TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`) + DomainChecker = regexp.MustCompile(`^[a-z0-9\.-]+$`) + AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`) + SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`) ) var ( @@ -123,6 +123,8 @@ func parseEntry(line string) (Entry, error) { entry.Type = strings.ToLower(kv[0]) if entry.Type == RuleTypeRegexp { entry.Value = kv[1] + } else if entry.Type == RuleTypeInclude { + entry.Value = strings.ToUpper(kv[1]) } else { entry.Value = strings.ToLower(kv[1]) } @@ -133,12 +135,19 @@ func parseEntry(line string) (Entry, error) { if !TypeChecker.MatchString(entry.Type) { return entry, fmt.Errorf("invalid type: %s", entry.Type) } - if entry.Type == RuleTypeRegexp { + switch entry.Type { + case RuleTypeRegexp: if _, err := regexp.Compile(entry.Value); err != nil { return entry, fmt.Errorf("invalid regexp: %s", entry.Value) } - } else if !ValueChecker.MatchString(entry.Value) { - return entry, fmt.Errorf("invalid value: %s", entry.Value) + case RuleTypeInclude: + if !SiteChecker.MatchString(entry.Value) { + return entry, fmt.Errorf("invalid included list name: %s", entry.Value) + } + default: // `full`, `domain` and `keyword` are all (parts of) domains + if !DomainChecker.MatchString(entry.Value) { + return entry, fmt.Errorf("invalid domain: %s", entry.Value) + } } // Parse/Check attributes and affiliations @@ -214,7 +223,7 @@ func parseList(refName string, refList []*Entry) error { if len(entry.Affs) != 0 { return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value) } - inc := &Inclusion{Source: strings.ToUpper(entry.Value)} + inc := &Inclusion{Source: entry.Value} for _, attr := range entry.Attrs { if strings.HasPrefix(attr, "-") { inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character