Improve value checkers and docs (#3208)

* Refactor: improve value checkers

* Docs: small improvements

[skip ci]
This commit is contained in:
MkQtS
2026-01-22 18:46:53 +08:00
committed by GitHub
parent a2f08a142c
commit 676832d14a
2 changed files with 22 additions and 13 deletions

View File

@@ -91,9 +91,9 @@ All data are under `data` directory. Each file in the directory represents a sub
# comments
include:another-file
domain:google.com @attr1 @attr2
full:analytics.google.com @ads
keyword:google
regexp:www\.google\.com$
full:www.google.com
regexp:^odd[1-7]\.example\.org(\.[a-z]{2})?$
```
**Syntax:**
@@ -106,10 +106,10 @@ full:www.google.com
- Comment begins with `#`. It may begin anywhere in the file. The content in the line after `#` is treated as comment and ignored in production.
- Subdomain begins with `domain:`, followed by a valid domain name. The prefix `domain:` may be omitted.
- Keyword begins with `keyword:`, followed by a string.
- Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard).
- Full domain begins with `full:`, followed by a complete and valid domain name.
- Domain rules (including `domain`, `keyword`, `regexp` and `full`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`.
- Keyword begins with `keyword:`, followed by a substring of a valid domain name.
- Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard).
- Domain rules (including `domain`, `full`, `keyword`, and `regexp`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`.
- Domain rules may have none, one or more affiliations, which additionally adds the domain rule into the affiliated target list. Each affiliation begins with `&` and followed by the name of the target list (nomatter whether the target has a dedicated file in data path). This is a method for data management, and will not remain in the final lists or `dlc.dat`.
- Inclusion begins with `include:`, followed by the name of another valid domain list. A simple `include:listb` in file `lista` means adding all domain rules of `listb` into `lista`. Inclusions with attributes stands for selective inclusion. `include:listb @attr1 @-attr2` means only adding those domain rules *with* `@attr1` **and** *without* `@attr2`. This is a special type for data management, and will not remain in the final lists or `dlc.dat`.

25
main.go
View File

@@ -30,10 +30,10 @@ const (
)
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
DomainChecker = regexp.MustCompile(`^[a-z0-9\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
)
var (
@@ -123,6 +123,8 @@ func parseEntry(line string) (Entry, error) {
entry.Type = strings.ToLower(kv[0])
if entry.Type == RuleTypeRegexp {
entry.Value = kv[1]
} else if entry.Type == RuleTypeInclude {
entry.Value = strings.ToUpper(kv[1])
} else {
entry.Value = strings.ToLower(kv[1])
}
@@ -133,12 +135,19 @@ func parseEntry(line string) (Entry, error) {
if !TypeChecker.MatchString(entry.Type) {
return entry, fmt.Errorf("invalid type: %s", entry.Type)
}
if entry.Type == RuleTypeRegexp {
switch entry.Type {
case RuleTypeRegexp:
if _, err := regexp.Compile(entry.Value); err != nil {
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
}
} else if !ValueChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid value: %s", entry.Value)
case RuleTypeInclude:
if !SiteChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid included list name: %s", entry.Value)
}
default: // `full`, `domain` and `keyword` are all (parts of) domains
if !DomainChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid domain: %s", entry.Value)
}
}
// Parse/Check attributes and affiliations
@@ -214,7 +223,7 @@ func parseList(refName string, refList []*Entry) error {
if len(entry.Affs) != 0 {
return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
}
inc := &Inclusion{Source: strings.ToUpper(entry.Value)}
inc := &Inclusion{Source: entry.Value}
for _, attr := range entry.Attrs {
if strings.HasPrefix(attr, "-") {
inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character