Refactor main.go (#3119)

* Refactor: reduce the use of strings.TrimSpace

* Refactor: use string attr before toProto

* Refactor parseEntry

- add value/attribute checker(check missing space)
- allow multiple spaces
- sort attributes
- improve readablity

* Refactor exportPlainTextList

- remove unnecessary variable
- improve readablity

* Remove support for partial include

This reverts e640ac2783

It is problematic and I will implement a new one

* Refactor: promote refMap

* Feat: add support for partial include

- refactor inclusion logic
- add basic deduplicate

* Refactor exporting plaintext list

* Feat: add support for affiliation

A domain rule is always added to the list corresponding to the filename
it resides in. Additionally, you can now add affiliations to a domain
rule, and the rule will be added to the list specified by the
affiliation. Each affiliation begins with `&` and followed by the name
of the affiliation.

This helps us to reduce the number of data files without compromising
functionality, and avoid writing a same rule in different files.

* Feat: add advanced deduplicate for subdomains

only for domain/full subdomains without attr

* Refactor: import and use slices

* Refactor: improve code
This commit is contained in:
MkQtS
2026-01-20 20:58:32 +08:00
committed by GitHub
parent d50e2e1ad7
commit ec95fedc45

512
main.go
View File

@@ -7,7 +7,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"sort" "slices"
"strings" "strings"
router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" router "github.com/v2fly/v2ray-core/v5/app/router/routercommon"
@@ -29,286 +29,307 @@ const (
RuleTypeInclude string = "include" RuleTypeInclude string = "include"
) )
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
)
var (
refMap = make(map[string][]*Entry)
plMap = make(map[string]*ParsedList)
finalMap = make(map[string][]*Entry)
cirIncMap = make(map[string]bool) // Used for circular inclusion detection
)
type Entry struct { type Entry struct {
Type string Type string
Value string Value string
Attrs []*router.Domain_Attribute Attrs []string
Plain string
Affs []string
} }
type List struct { type Inclusion struct {
Name string Source string
Entry []Entry MustAttrs []string
BanAttrs []string
} }
type ParsedList struct { type ParsedList struct {
Name string Name string
Inclusion map[string]bool Inclusions []*Inclusion
Entry []Entry Entries []*Entry
} }
func (l *ParsedList) toPlainText(listName string) error { func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
var entryBytes []byte
for _, entry := range l.Entry {
var attrString string
if entry.Attrs != nil {
for _, attr := range entry.Attrs {
attrString += "@" + attr.GetKey() + ","
}
attrString = strings.TrimRight(":"+attrString, ",")
}
// Entry output format is: type:domain.tld:@attr1,@attr2
entryBytes = append(entryBytes, []byte(entry.Type+":"+entry.Value+attrString+"\n")...)
}
if err := os.WriteFile(filepath.Join(*outputDir, listName+".txt"), entryBytes, 0644); err != nil {
return err
}
return nil
}
func (l *ParsedList) toProto() (*router.GeoSite, error) {
site := &router.GeoSite{ site := &router.GeoSite{
CountryCode: l.Name, CountryCode: listName,
Domain: make([]*router.Domain, 0, len(entries)),
} }
for _, entry := range l.Entry { for _, entry := range entries {
pdomain := &router.Domain{Value: entry.Value}
for _, attr := range entry.Attrs {
pdomain.Attribute = append(pdomain.Attribute, &router.Domain_Attribute{
Key: attr,
TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
})
}
switch entry.Type { switch entry.Type {
case RuleTypeDomain: case RuleTypeDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_RootDomain
Type: router.Domain_RootDomain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeRegexp: case RuleTypeRegexp:
// check regexp validity to avoid runtime error pdomain.Type = router.Domain_Regex
_, err := regexp.Compile(entry.Value)
if err != nil {
return nil, fmt.Errorf("invalid regexp in list %s: %s", l.Name, entry.Value)
}
site.Domain = append(site.Domain, &router.Domain{
Type: router.Domain_Regex,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeKeyword: case RuleTypeKeyword:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Plain
Type: router.Domain_Plain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeFullDomain: case RuleTypeFullDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Full
Type: router.Domain_Full,
Value: entry.Value,
Attribute: entry.Attrs,
})
default:
return nil, fmt.Errorf("unknown domain type: %s", entry.Type)
} }
site.Domain = append(site.Domain, pdomain)
} }
return site, nil return site, nil
} }
func exportPlainTextList(list []string, refName string, pl *ParsedList) { func writePlainList(exportedName string) error {
for _, listName := range list { targetList, exist := finalMap[strings.ToUpper(exportedName)]
if strings.EqualFold(refName, listName) { if !exist || len(targetList) == 0 {
if err := pl.toPlainText(strings.ToLower(refName)); err != nil { return fmt.Errorf("'%s' list does not exist or is empty.", exportedName)
fmt.Println("Failed:", err)
continue
} }
fmt.Printf("'%s' has been generated successfully.\n", listName) file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt"))
if err != nil {
return err
} }
defer file.Close()
w := bufio.NewWriter(file)
for _, entry := range targetList {
fmt.Fprintln(w, entry.Plain)
} }
return w.Flush()
} }
func removeComment(line string) string { func parseEntry(line string) (Entry, error) {
idx := strings.Index(line, "#") var entry Entry
if idx == -1 { parts := strings.Fields(line)
return line
}
return strings.TrimSpace(line[:idx])
}
func parseDomain(domain string, entry *Entry) error { // Parse type and value
kv := strings.Split(domain, ":") rawTypeVal := parts[0]
kv := strings.Split(rawTypeVal, ":")
if len(kv) == 1 { if len(kv) == 1 {
entry.Type = RuleTypeDomain entry.Type = RuleTypeDomain // Default type
entry.Value = strings.ToLower(kv[0]) entry.Value = strings.ToLower(rawTypeVal)
return nil } else if len(kv) == 2 {
}
if len(kv) == 2 {
entry.Type = strings.ToLower(kv[0]) entry.Type = strings.ToLower(kv[0])
if entry.Type == RuleTypeRegexp {
if strings.EqualFold(entry.Type, RuleTypeRegexp) {
entry.Value = kv[1] entry.Value = kv[1]
} else { } else {
entry.Value = strings.ToLower(kv[1]) entry.Value = strings.ToLower(kv[1])
} }
} else {
return nil return entry, fmt.Errorf("invalid format: %s", line)
}
// Check type and value
if !TypeChecker.MatchString(entry.Type) {
return entry, fmt.Errorf("invalid type: %s", entry.Type)
}
if entry.Type == RuleTypeRegexp {
if _, err := regexp.Compile(entry.Value); err != nil {
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
}
} else if !ValueChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid value: %s", entry.Value)
} }
return fmt.Errorf("invalid format: %s", domain) // Parse/Check attributes and affiliations
} for _, part := range parts[1:] {
if strings.HasPrefix(part, "@") {
func parseAttribute(attr string) (*router.Domain_Attribute, error) { attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
var attribute router.Domain_Attribute if !AttrChecker.MatchString(attr) {
if len(attr) == 0 || attr[0] != '@' { return entry, fmt.Errorf("invalid attribute key: %s", attr)
return &attribute, fmt.Errorf("invalid attribute: %s", attr)
}
attribute.Key = strings.ToLower(attr[1:]) // Trim attribute prefix `@` character
attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true}
return &attribute, nil
}
func parseEntry(line string) (Entry, error) {
line = strings.TrimSpace(line)
parts := strings.Split(line, " ")
var entry Entry
if len(parts) == 0 {
return entry, fmt.Errorf("empty entry")
}
if err := parseDomain(parts[0], &entry); err != nil {
return entry, err
}
for i := 1; i < len(parts); i++ {
attr, err := parseAttribute(parts[i])
if err != nil {
return entry, err
} }
entry.Attrs = append(entry.Attrs, attr) entry.Attrs = append(entry.Attrs, attr)
} else if strings.HasPrefix(part, "&") {
aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character
if !SiteChecker.MatchString(aff) {
return entry, fmt.Errorf("invalid affiliation key: %s", aff)
}
entry.Affs = append(entry.Affs, aff)
} else {
return entry, fmt.Errorf("invalid attribute/affiliation: %s", part)
}
}
// Sort attributes
slices.Sort(entry.Attrs)
// Formated plain entry: type:domain.tld:@attr1,@attr2
entry.Plain = entry.Type + ":" + entry.Value
if len(entry.Attrs) != 0 {
entry.Plain = entry.Plain + ":@" + strings.Join(entry.Attrs, ",@")
} }
return entry, nil return entry, nil
} }
func Load(path string) (*List, error) { func loadData(path string) error {
file, err := os.Open(path) file, err := os.Open(path)
if err != nil { if err != nil {
return nil, err return err
} }
defer file.Close() defer file.Close()
list := &List{ listName := strings.ToUpper(filepath.Base(path))
Name: strings.ToUpper(filepath.Base(path)), if !SiteChecker.MatchString(listName) {
return fmt.Errorf("invalid list name: %s", listName)
} }
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
lineIdx := 0
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := scanner.Text()
line = removeComment(line) lineIdx++
if len(line) == 0 { // Remove comments
if idx := strings.Index(line, "#"); idx != -1 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue continue
} }
entry, err := parseEntry(line) entry, err := parseEntry(line)
if err != nil { if err != nil {
return nil, err return fmt.Errorf("error in %s at line %d: %v", path, lineIdx, err)
} }
list.Entry = append(list.Entry, entry) refMap[listName] = append(refMap[listName], &entry)
} }
return nil
return list, nil
} }
func isMatchAttr(Attrs []*router.Domain_Attribute, includeKey string) bool { func parseList(refName string, refList []*Entry) error {
isMatch := false pl, _ := plMap[refName]
mustMatch := true if pl == nil {
matchName := includeKey pl = &ParsedList{Name: refName}
if strings.HasPrefix(includeKey, "!") { plMap[refName] = pl
isMatch = true
mustMatch = false
matchName = strings.TrimLeft(includeKey, "!")
} }
for _, entry := range refList {
for _, Attr := range Attrs {
attrName := Attr.Key
if mustMatch {
if matchName == attrName {
isMatch = true
break
}
} else {
if matchName == attrName {
isMatch = false
break
}
}
}
return isMatch
}
func createIncludeAttrEntrys(list *List, matchAttr *router.Domain_Attribute) []Entry {
newEntryList := make([]Entry, 0, len(list.Entry))
matchName := matchAttr.Key
for _, entry := range list.Entry {
matched := isMatchAttr(entry.Attrs, matchName)
if matched {
newEntryList = append(newEntryList, entry)
}
}
return newEntryList
}
func ParseList(list *List, ref map[string]*List) (*ParsedList, error) {
pl := &ParsedList{
Name: list.Name,
Inclusion: make(map[string]bool),
}
entryList := list.Entry
for {
newEntryList := make([]Entry, 0, len(entryList))
hasInclude := false
for _, entry := range entryList {
if entry.Type == RuleTypeInclude { if entry.Type == RuleTypeInclude {
refName := strings.ToUpper(entry.Value) if len(entry.Affs) != 0 {
if entry.Attrs != nil { return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
}
inc := &Inclusion{Source: strings.ToUpper(entry.Value)}
for _, attr := range entry.Attrs { for _, attr := range entry.Attrs {
InclusionName := strings.ToUpper(refName + "@" + attr.Key) if strings.HasPrefix(attr, "-") {
if pl.Inclusion[InclusionName] { inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character
continue } else {
inc.MustAttrs = append(inc.MustAttrs, attr)
} }
pl.Inclusion[InclusionName] = true }
pl.Inclusions = append(pl.Inclusions, inc)
} else {
for _, aff := range entry.Affs {
apl, _ := plMap[aff]
if apl == nil {
apl = &ParsedList{Name: aff}
plMap[aff] = apl
}
apl.Entries = append(apl.Entries, entry)
}
pl.Entries = append(pl.Entries, entry)
}
}
return nil
}
refList := ref[refName] func polishList(roughMap *map[string]*Entry) []*Entry {
if refList == nil { finalList := make([]*Entry, 0, len(*roughMap))
return nil, fmt.Errorf("list not found: %s", entry.Value) queuingList := make([]*Entry, 0, len(*roughMap)) // Domain/full entries without attr
} domainsMap := make(map[string]bool)
attrEntrys := createIncludeAttrEntrys(refList, attr) for _, entry := range *roughMap {
if len(attrEntrys) != 0 { switch entry.Type { // Bypass regexp, keyword and "full/domain with attr"
newEntryList = append(newEntryList, attrEntrys...) case RuleTypeRegexp:
} finalList = append(finalList, entry)
} case RuleTypeKeyword:
finalList = append(finalList, entry)
case RuleTypeDomain:
domainsMap[entry.Value] = true
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else { } else {
InclusionName := refName queuingList = append(queuingList, entry)
if pl.Inclusion[InclusionName] {
continue
} }
pl.Inclusion[InclusionName] = true case RuleTypeFullDomain:
refList := ref[refName] if len(entry.Attrs) != 0 {
if refList == nil { finalList = append(finalList, entry)
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
newEntryList = append(newEntryList, refList.Entry...)
}
hasInclude = true
} else { } else {
newEntryList = append(newEntryList, entry) queuingList = append(queuingList, entry)
} }
} }
entryList = newEntryList }
if !hasInclude { // Remove redundant subdomains for full/domain without attr
for _, qentry := range queuingList {
isRedundant := false
pd := qentry.Value // Parent domain
for {
idx := strings.Index(pd, ".")
if idx == -1 { break }
pd = pd[idx+1:] // Go for next parent
if !strings.Contains(pd, ".") { break } // Not allow tld to be a parent
if domainsMap[pd] {
isRedundant = true
break break
} }
} }
pl.Entry = entryList if !isRedundant {
finalList = append(finalList, qentry)
}
}
// Sort final entries
slices.SortFunc(finalList, func(a, b *Entry) int {
return strings.Compare(a.Plain, b.Plain)
})
return finalList
}
return pl, nil func resolveList(pl *ParsedList) error {
if _, pldone := finalMap[pl.Name]; pldone { return nil }
if cirIncMap[pl.Name] {
return fmt.Errorf("circular inclusion in: %s", pl.Name)
}
cirIncMap[pl.Name] = true
defer delete(cirIncMap, pl.Name)
isMatchAttrFilters := func(entry *Entry, incFilter *Inclusion) bool {
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true }
if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 }
for _, m := range incFilter.MustAttrs {
if !slices.Contains(entry.Attrs, m) { return false }
}
for _, b := range incFilter.BanAttrs {
if slices.Contains(entry.Attrs, b) { return false }
}
return true
}
roughMap := make(map[string]*Entry) // Avoid basic duplicates
for _, dentry := range pl.Entries { // Add direct entries
roughMap[dentry.Plain] = dentry
}
for _, inc := range pl.Inclusions {
incPl, exist := plMap[inc.Source]
if !exist {
return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source)
}
if err := resolveList(incPl); err != nil {
return err
}
for _, ientry := range finalMap[inc.Source] {
if isMatchAttrFilters(ientry, inc) { // Add included entries
roughMap[ientry.Plain] = ientry
}
}
}
finalMap[pl.Name] = polishList(&roughMap)
return nil
} }
func main() { func main() {
@@ -317,7 +338,7 @@ func main() {
dir := *dataPath dir := *dataPath
fmt.Println("Use domain lists in", dir) fmt.Println("Use domain lists in", dir)
ref := make(map[string]*List) // Generate refMap
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
@@ -325,18 +346,32 @@ func main() {
if info.IsDir() { if info.IsDir() {
return nil return nil
} }
list, err := Load(path) if err := loadData(path); err != nil {
if err != nil {
return err return err
} }
ref[list.Name] = list
return nil return nil
}) })
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to loadData:", err)
os.Exit(1) os.Exit(1)
} }
// Generate plMap
for refName, refList := range refMap {
if err := parseList(refName, refList); err != nil {
fmt.Println("Failed to parseList:", err)
os.Exit(1)
}
}
// Generate finalMap
for _, pl := range plMap {
if err := resolveList(pl); err != nil {
fmt.Println("Failed to resolveList:", err)
os.Exit(1)
}
}
// Create output directory if not exist // Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) { if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil { if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
@@ -345,55 +380,40 @@ func main() {
} }
} }
protoList := new(router.GeoSiteList) // Export plaintext list
var existList []string if *exportLists != "" {
for refName, list := range ref { exportedListSlice := strings.Split(*exportLists, ",")
pl, err := ParseList(list, ref) for _, exportedList := range exportedListSlice {
if err != nil { if err := writePlainList(exportedList); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write list:", err)
os.Exit(1) continue
} }
site, err := pl.toProto() fmt.Printf("list: '%s' has been generated successfully.\n", exportedList)
}
}
// Generate dat file
protoList := new(router.GeoSiteList)
for siteName, siteEntries := range finalMap {
site, err := makeProtoList(siteName, siteEntries)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed:", err)
os.Exit(1) os.Exit(1)
} }
protoList.Entry = append(protoList.Entry, site) protoList.Entry = append(protoList.Entry, site)
// Flatten and export plaintext list
if *exportLists != "" {
if existList != nil {
exportPlainTextList(existList, refName, pl)
} else {
exportedListSlice := strings.Split(*exportLists, ",")
for _, exportedListName := range exportedListSlice {
fileName := filepath.Join(dir, exportedListName)
_, err := os.Stat(fileName)
if err == nil || os.IsExist(err) {
existList = append(existList, exportedListName)
} else {
fmt.Printf("'%s' list does not exist in '%s' directory.\n", exportedListName, dir)
} }
}
if existList != nil {
exportPlainTextList(existList, refName, pl)
}
}
}
}
// Sort protoList so the marshaled list is reproducible // Sort protoList so the marshaled list is reproducible
sort.SliceStable(protoList.Entry, func(i, j int) bool { slices.SortFunc(protoList.Entry, func(a, b *router.GeoSite) int {
return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode return strings.Compare(a.CountryCode, b.CountryCode)
}) })
protoBytes, err := proto.Marshal(protoList) protoBytes, err := proto.Marshal(protoList)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to marshal:", err)
os.Exit(1) os.Exit(1)
} }
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil { if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write output:", err)
os.Exit(1) os.Exit(1)
} else { } else {
fmt.Println(*outputName, "has been generated successfully.") fmt.Println(*outputName, "has been generated successfully.")