From ec95fedc456f2528cdc823c44ea640e05d1ed08d Mon Sep 17 00:00:00 2001 From: MkQtS <81752398+MkQtS@users.noreply.github.com> Date: Tue, 20 Jan 2026 20:58:32 +0800 Subject: [PATCH] Refactor main.go (#3119) * Refactor: reduce the use of strings.TrimSpace * Refactor: use string attr before toProto * Refactor parseEntry - add value/attribute checker(check missing space) - allow multiple spaces - sort attributes - improve readablity * Refactor exportPlainTextList - remove unnecessary variable - improve readablity * Remove support for partial include This reverts e640ac27834550c9182c486ea180bbd3b0d4ba75 It is problematic and I will implement a new one * Refactor: promote refMap * Feat: add support for partial include - refactor inclusion logic - add basic deduplicate * Refactor exporting plaintext list * Feat: add support for affiliation A domain rule is always added to the list corresponding to the filename it resides in. Additionally, you can now add affiliations to a domain rule, and the rule will be added to the list specified by the affiliation. Each affiliation begins with `&` and followed by the name of the affiliation. This helps us to reduce the number of data files without compromising functionality, and avoid writing a same rule in different files. * Feat: add advanced deduplicate for subdomains only for domain/full subdomains without attr * Refactor: import and use slices * Refactor: improve code --- main.go | 532 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 276 insertions(+), 256 deletions(-) diff --git a/main.go b/main.go index 165bb092..4611ed18 100644 --- a/main.go +++ b/main.go @@ -7,7 +7,7 @@ import ( "os" "path/filepath" "regexp" - "sort" + "slices" "strings" router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" @@ -29,286 +29,307 @@ const ( RuleTypeInclude string = "include" ) +var ( + TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`) + ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`) + AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`) + SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`) +) + +var ( + refMap = make(map[string][]*Entry) + plMap = make(map[string]*ParsedList) + finalMap = make(map[string][]*Entry) + cirIncMap = make(map[string]bool) // Used for circular inclusion detection +) + type Entry struct { Type string Value string - Attrs []*router.Domain_Attribute + Attrs []string + Plain string + Affs []string } -type List struct { - Name string - Entry []Entry +type Inclusion struct { + Source string + MustAttrs []string + BanAttrs []string } type ParsedList struct { - Name string - Inclusion map[string]bool - Entry []Entry + Name string + Inclusions []*Inclusion + Entries []*Entry } -func (l *ParsedList) toPlainText(listName string) error { - var entryBytes []byte - for _, entry := range l.Entry { - var attrString string - if entry.Attrs != nil { - for _, attr := range entry.Attrs { - attrString += "@" + attr.GetKey() + "," - } - attrString = strings.TrimRight(":"+attrString, ",") - } - // Entry output format is: type:domain.tld:@attr1,@attr2 - entryBytes = append(entryBytes, []byte(entry.Type+":"+entry.Value+attrString+"\n")...) - } - if err := os.WriteFile(filepath.Join(*outputDir, listName+".txt"), entryBytes, 0644); err != nil { - return err - } - return nil -} - -func (l *ParsedList) toProto() (*router.GeoSite, error) { +func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) { site := &router.GeoSite{ - CountryCode: l.Name, + CountryCode: listName, + Domain: make([]*router.Domain, 0, len(entries)), } - for _, entry := range l.Entry { + for _, entry := range entries { + pdomain := &router.Domain{Value: entry.Value} + for _, attr := range entry.Attrs { + pdomain.Attribute = append(pdomain.Attribute, &router.Domain_Attribute{ + Key: attr, + TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true}, + }) + } + switch entry.Type { case RuleTypeDomain: - site.Domain = append(site.Domain, &router.Domain{ - Type: router.Domain_RootDomain, - Value: entry.Value, - Attribute: entry.Attrs, - }) - + pdomain.Type = router.Domain_RootDomain case RuleTypeRegexp: - // check regexp validity to avoid runtime error - _, err := regexp.Compile(entry.Value) - if err != nil { - return nil, fmt.Errorf("invalid regexp in list %s: %s", l.Name, entry.Value) - } - site.Domain = append(site.Domain, &router.Domain{ - Type: router.Domain_Regex, - Value: entry.Value, - Attribute: entry.Attrs, - }) - + pdomain.Type = router.Domain_Regex case RuleTypeKeyword: - site.Domain = append(site.Domain, &router.Domain{ - Type: router.Domain_Plain, - Value: entry.Value, - Attribute: entry.Attrs, - }) - + pdomain.Type = router.Domain_Plain case RuleTypeFullDomain: - site.Domain = append(site.Domain, &router.Domain{ - Type: router.Domain_Full, - Value: entry.Value, - Attribute: entry.Attrs, - }) - - default: - return nil, fmt.Errorf("unknown domain type: %s", entry.Type) + pdomain.Type = router.Domain_Full } + site.Domain = append(site.Domain, pdomain) } return site, nil } -func exportPlainTextList(list []string, refName string, pl *ParsedList) { - for _, listName := range list { - if strings.EqualFold(refName, listName) { - if err := pl.toPlainText(strings.ToLower(refName)); err != nil { - fmt.Println("Failed:", err) - continue - } - fmt.Printf("'%s' has been generated successfully.\n", listName) - } +func writePlainList(exportedName string) error { + targetList, exist := finalMap[strings.ToUpper(exportedName)] + if !exist || len(targetList) == 0 { + return fmt.Errorf("'%s' list does not exist or is empty.", exportedName) } + file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt")) + if err != nil { + return err + } + defer file.Close() + w := bufio.NewWriter(file) + for _, entry := range targetList { + fmt.Fprintln(w, entry.Plain) + } + return w.Flush() } -func removeComment(line string) string { - idx := strings.Index(line, "#") - if idx == -1 { - return line - } - return strings.TrimSpace(line[:idx]) -} +func parseEntry(line string) (Entry, error) { + var entry Entry + parts := strings.Fields(line) -func parseDomain(domain string, entry *Entry) error { - kv := strings.Split(domain, ":") + // Parse type and value + rawTypeVal := parts[0] + kv := strings.Split(rawTypeVal, ":") if len(kv) == 1 { - entry.Type = RuleTypeDomain - entry.Value = strings.ToLower(kv[0]) - return nil - } - - if len(kv) == 2 { + entry.Type = RuleTypeDomain // Default type + entry.Value = strings.ToLower(rawTypeVal) + } else if len(kv) == 2 { entry.Type = strings.ToLower(kv[0]) - - if strings.EqualFold(entry.Type, RuleTypeRegexp) { + if entry.Type == RuleTypeRegexp { entry.Value = kv[1] } else { entry.Value = strings.ToLower(kv[1]) } - - return nil + } else { + return entry, fmt.Errorf("invalid format: %s", line) } - - return fmt.Errorf("invalid format: %s", domain) -} - -func parseAttribute(attr string) (*router.Domain_Attribute, error) { - var attribute router.Domain_Attribute - if len(attr) == 0 || attr[0] != '@' { - return &attribute, fmt.Errorf("invalid attribute: %s", attr) + // Check type and value + if !TypeChecker.MatchString(entry.Type) { + return entry, fmt.Errorf("invalid type: %s", entry.Type) } - - attribute.Key = strings.ToLower(attr[1:]) // Trim attribute prefix `@` character - attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true} - return &attribute, nil -} - -func parseEntry(line string) (Entry, error) { - line = strings.TrimSpace(line) - parts := strings.Split(line, " ") - - var entry Entry - if len(parts) == 0 { - return entry, fmt.Errorf("empty entry") - } - - if err := parseDomain(parts[0], &entry); err != nil { - return entry, err - } - - for i := 1; i < len(parts); i++ { - attr, err := parseAttribute(parts[i]) - if err != nil { - return entry, err + if entry.Type == RuleTypeRegexp { + if _, err := regexp.Compile(entry.Value); err != nil { + return entry, fmt.Errorf("invalid regexp: %s", entry.Value) } - entry.Attrs = append(entry.Attrs, attr) + } else if !ValueChecker.MatchString(entry.Value) { + return entry, fmt.Errorf("invalid value: %s", entry.Value) + } + + // Parse/Check attributes and affiliations + for _, part := range parts[1:] { + if strings.HasPrefix(part, "@") { + attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character + if !AttrChecker.MatchString(attr) { + return entry, fmt.Errorf("invalid attribute key: %s", attr) + } + entry.Attrs = append(entry.Attrs, attr) + } else if strings.HasPrefix(part, "&") { + aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character + if !SiteChecker.MatchString(aff) { + return entry, fmt.Errorf("invalid affiliation key: %s", aff) + } + entry.Affs = append(entry.Affs, aff) + } else { + return entry, fmt.Errorf("invalid attribute/affiliation: %s", part) + } + } + // Sort attributes + slices.Sort(entry.Attrs) + // Formated plain entry: type:domain.tld:@attr1,@attr2 + entry.Plain = entry.Type + ":" + entry.Value + if len(entry.Attrs) != 0 { + entry.Plain = entry.Plain + ":@" + strings.Join(entry.Attrs, ",@") } return entry, nil } -func Load(path string) (*List, error) { +func loadData(path string) error { file, err := os.Open(path) if err != nil { - return nil, err + return err } defer file.Close() - list := &List{ - Name: strings.ToUpper(filepath.Base(path)), + listName := strings.ToUpper(filepath.Base(path)) + if !SiteChecker.MatchString(listName) { + return fmt.Errorf("invalid list name: %s", listName) } scanner := bufio.NewScanner(file) + lineIdx := 0 for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - line = removeComment(line) - if len(line) == 0 { + line := scanner.Text() + lineIdx++ + // Remove comments + if idx := strings.Index(line, "#"); idx != -1 { + line = line[:idx] + } + line = strings.TrimSpace(line) + if line == "" { continue } entry, err := parseEntry(line) if err != nil { - return nil, err + return fmt.Errorf("error in %s at line %d: %v", path, lineIdx, err) } - list.Entry = append(list.Entry, entry) + refMap[listName] = append(refMap[listName], &entry) } - - return list, nil + return nil } -func isMatchAttr(Attrs []*router.Domain_Attribute, includeKey string) bool { - isMatch := false - mustMatch := true - matchName := includeKey - if strings.HasPrefix(includeKey, "!") { - isMatch = true - mustMatch = false - matchName = strings.TrimLeft(includeKey, "!") +func parseList(refName string, refList []*Entry) error { + pl, _ := plMap[refName] + if pl == nil { + pl = &ParsedList{Name: refName} + plMap[refName] = pl } - - for _, Attr := range Attrs { - attrName := Attr.Key - if mustMatch { - if matchName == attrName { - isMatch = true - break + for _, entry := range refList { + if entry.Type == RuleTypeInclude { + if len(entry.Affs) != 0 { + return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value) } - } else { - if matchName == attrName { - isMatch = false - break - } - } - } - return isMatch -} - -func createIncludeAttrEntrys(list *List, matchAttr *router.Domain_Attribute) []Entry { - newEntryList := make([]Entry, 0, len(list.Entry)) - matchName := matchAttr.Key - for _, entry := range list.Entry { - matched := isMatchAttr(entry.Attrs, matchName) - if matched { - newEntryList = append(newEntryList, entry) - } - } - return newEntryList -} - -func ParseList(list *List, ref map[string]*List) (*ParsedList, error) { - pl := &ParsedList{ - Name: list.Name, - Inclusion: make(map[string]bool), - } - entryList := list.Entry - for { - newEntryList := make([]Entry, 0, len(entryList)) - hasInclude := false - for _, entry := range entryList { - if entry.Type == RuleTypeInclude { - refName := strings.ToUpper(entry.Value) - if entry.Attrs != nil { - for _, attr := range entry.Attrs { - InclusionName := strings.ToUpper(refName + "@" + attr.Key) - if pl.Inclusion[InclusionName] { - continue - } - pl.Inclusion[InclusionName] = true - - refList := ref[refName] - if refList == nil { - return nil, fmt.Errorf("list not found: %s", entry.Value) - } - attrEntrys := createIncludeAttrEntrys(refList, attr) - if len(attrEntrys) != 0 { - newEntryList = append(newEntryList, attrEntrys...) - } - } + inc := &Inclusion{Source: strings.ToUpper(entry.Value)} + for _, attr := range entry.Attrs { + if strings.HasPrefix(attr, "-") { + inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character } else { - InclusionName := refName - if pl.Inclusion[InclusionName] { - continue - } - pl.Inclusion[InclusionName] = true - refList := ref[refName] - if refList == nil { - return nil, fmt.Errorf("list not found: %s", entry.Value) - } - newEntryList = append(newEntryList, refList.Entry...) + inc.MustAttrs = append(inc.MustAttrs, attr) } - hasInclude = true - } else { - newEntryList = append(newEntryList, entry) } - } - entryList = newEntryList - if !hasInclude { - break + pl.Inclusions = append(pl.Inclusions, inc) + } else { + for _, aff := range entry.Affs { + apl, _ := plMap[aff] + if apl == nil { + apl = &ParsedList{Name: aff} + plMap[aff] = apl + } + apl.Entries = append(apl.Entries, entry) + } + pl.Entries = append(pl.Entries, entry) } } - pl.Entry = entryList + return nil +} - return pl, nil +func polishList(roughMap *map[string]*Entry) []*Entry { + finalList := make([]*Entry, 0, len(*roughMap)) + queuingList := make([]*Entry, 0, len(*roughMap)) // Domain/full entries without attr + domainsMap := make(map[string]bool) + for _, entry := range *roughMap { + switch entry.Type { // Bypass regexp, keyword and "full/domain with attr" + case RuleTypeRegexp: + finalList = append(finalList, entry) + case RuleTypeKeyword: + finalList = append(finalList, entry) + case RuleTypeDomain: + domainsMap[entry.Value] = true + if len(entry.Attrs) != 0 { + finalList = append(finalList, entry) + } else { + queuingList = append(queuingList, entry) + } + case RuleTypeFullDomain: + if len(entry.Attrs) != 0 { + finalList = append(finalList, entry) + } else { + queuingList = append(queuingList, entry) + } + } + } + // Remove redundant subdomains for full/domain without attr + for _, qentry := range queuingList { + isRedundant := false + pd := qentry.Value // Parent domain + for { + idx := strings.Index(pd, ".") + if idx == -1 { break } + pd = pd[idx+1:] // Go for next parent + if !strings.Contains(pd, ".") { break } // Not allow tld to be a parent + if domainsMap[pd] { + isRedundant = true + break + } + } + if !isRedundant { + finalList = append(finalList, qentry) + } + } + // Sort final entries + slices.SortFunc(finalList, func(a, b *Entry) int { + return strings.Compare(a.Plain, b.Plain) + }) + return finalList +} + +func resolveList(pl *ParsedList) error { + if _, pldone := finalMap[pl.Name]; pldone { return nil } + + if cirIncMap[pl.Name] { + return fmt.Errorf("circular inclusion in: %s", pl.Name) + } + cirIncMap[pl.Name] = true + defer delete(cirIncMap, pl.Name) + + isMatchAttrFilters := func(entry *Entry, incFilter *Inclusion) bool { + if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true } + if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 } + + for _, m := range incFilter.MustAttrs { + if !slices.Contains(entry.Attrs, m) { return false } + } + for _, b := range incFilter.BanAttrs { + if slices.Contains(entry.Attrs, b) { return false } + } + return true + } + + roughMap := make(map[string]*Entry) // Avoid basic duplicates + for _, dentry := range pl.Entries { // Add direct entries + roughMap[dentry.Plain] = dentry + } + for _, inc := range pl.Inclusions { + incPl, exist := plMap[inc.Source] + if !exist { + return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source) + } + if err := resolveList(incPl); err != nil { + return err + } + for _, ientry := range finalMap[inc.Source] { + if isMatchAttrFilters(ientry, inc) { // Add included entries + roughMap[ientry.Plain] = ientry + } + } + } + finalMap[pl.Name] = polishList(&roughMap) + return nil } func main() { @@ -317,7 +338,7 @@ func main() { dir := *dataPath fmt.Println("Use domain lists in", dir) - ref := make(map[string]*List) + // Generate refMap err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -325,18 +346,32 @@ func main() { if info.IsDir() { return nil } - list, err := Load(path) - if err != nil { + if err := loadData(path); err != nil { return err } - ref[list.Name] = list return nil }) if err != nil { - fmt.Println("Failed:", err) + fmt.Println("Failed to loadData:", err) os.Exit(1) } + // Generate plMap + for refName, refList := range refMap { + if err := parseList(refName, refList); err != nil { + fmt.Println("Failed to parseList:", err) + os.Exit(1) + } + } + + // Generate finalMap + for _, pl := range plMap { + if err := resolveList(pl); err != nil { + fmt.Println("Failed to resolveList:", err) + os.Exit(1) + } + } + // Create output directory if not exist if _, err := os.Stat(*outputDir); os.IsNotExist(err) { if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil { @@ -345,55 +380,40 @@ func main() { } } - protoList := new(router.GeoSiteList) - var existList []string - for refName, list := range ref { - pl, err := ParseList(list, ref) - if err != nil { - fmt.Println("Failed:", err) - os.Exit(1) + // Export plaintext list + if *exportLists != "" { + exportedListSlice := strings.Split(*exportLists, ",") + for _, exportedList := range exportedListSlice { + if err := writePlainList(exportedList); err != nil { + fmt.Println("Failed to write list:", err) + continue + } + fmt.Printf("list: '%s' has been generated successfully.\n", exportedList) } - site, err := pl.toProto() + } + + // Generate dat file + protoList := new(router.GeoSiteList) + for siteName, siteEntries := range finalMap { + site, err := makeProtoList(siteName, siteEntries) if err != nil { fmt.Println("Failed:", err) os.Exit(1) } protoList.Entry = append(protoList.Entry, site) - - // Flatten and export plaintext list - if *exportLists != "" { - if existList != nil { - exportPlainTextList(existList, refName, pl) - } else { - exportedListSlice := strings.Split(*exportLists, ",") - for _, exportedListName := range exportedListSlice { - fileName := filepath.Join(dir, exportedListName) - _, err := os.Stat(fileName) - if err == nil || os.IsExist(err) { - existList = append(existList, exportedListName) - } else { - fmt.Printf("'%s' list does not exist in '%s' directory.\n", exportedListName, dir) - } - } - if existList != nil { - exportPlainTextList(existList, refName, pl) - } - } - } } - // Sort protoList so the marshaled list is reproducible - sort.SliceStable(protoList.Entry, func(i, j int) bool { - return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode + slices.SortFunc(protoList.Entry, func(a, b *router.GeoSite) int { + return strings.Compare(a.CountryCode, b.CountryCode) }) protoBytes, err := proto.Marshal(protoList) if err != nil { - fmt.Println("Failed:", err) + fmt.Println("Failed to marshal:", err) os.Exit(1) } if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil { - fmt.Println("Failed:", err) + fmt.Println("Failed to write output:", err) os.Exit(1) } else { fmt.Println(*outputName, "has been generated successfully.")