Compare commits

...

6 Commits

Author SHA1 Message Date
MkQtS
9d73db400e Feat: add support for partial include 2025-12-31 21:42:35 +08:00
MkQtS
c83bb86d06 Refactor: promote refMap 2025-12-31 20:30:16 +08:00
MkQtS
58f79fa579 Remove support for partial include
This reverts e640ac2783

It is problematic and I will implement a new one
2025-12-31 20:29:54 +08:00
MkQtS
969e6baad8 Refactor exportPlainTextList
- remove unnecessary variable
- improve readablity
2025-12-31 20:25:58 +08:00
MkQtS
bbd5b64219 Refactor parseEntry
- add value/attribute checker(check missing space)
- allow multiple spaces
- sort attributes
- improve readablity
2025-12-31 20:24:51 +08:00
MkQtS
6b10d69246 Refactor: reduce the use of strings.TrimSpace 2025-12-31 20:14:14 +08:00

336
main.go
View File

@@ -29,6 +29,19 @@ const (
RuleTypeInclude string = "include"
)
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
)
var (
refMap = make(map[string]*List)
plMap = make(map[string]*ParsedList)
finalMap = make(map[string]*List)
cirIncMap = make(map[string]bool) // Used for circular inclusion detection
)
type Entry struct {
Type string
Value string
@@ -40,15 +53,21 @@ type List struct {
Entry []Entry
}
type ParsedList struct {
Name string
Inclusion map[string]bool
Entry []Entry
type Inclusion struct {
Source string
MustAttrs []*router.Domain_Attribute
BannedAttrs []*router.Domain_Attribute
}
func (l *ParsedList) toPlainText(listName string) error {
type ParsedList struct {
Name string
Inclusions []Inclusion
Entry []Entry
}
func (entryList *List) toPlainText() error {
var entryBytes []byte
for _, entry := range l.Entry {
for _, entry := range entryList.Entry {
var attrString string
if entry.Attrs != nil {
for _, attr := range entry.Attrs {
@@ -59,13 +78,13 @@ func (l *ParsedList) toPlainText(listName string) error {
// Entry output format is: type:domain.tld:@attr1,@attr2
entryBytes = append(entryBytes, []byte(entry.Type+":"+entry.Value+attrString+"\n")...)
}
if err := os.WriteFile(filepath.Join(*outputDir, listName+".txt"), entryBytes, 0644); err != nil {
if err := os.WriteFile(filepath.Join(*outputDir, strings.ToLower(entryList.Name)+".txt"), entryBytes, 0644); err != nil {
return err
}
return nil
}
func (l *ParsedList) toProto() (*router.GeoSite, error) {
func (l *List) toProto() (*router.GeoSite, error) {
site := &router.GeoSite{
CountryCode: l.Name,
}
@@ -79,11 +98,6 @@ func (l *ParsedList) toProto() (*router.GeoSite, error) {
})
case RuleTypeRegexp:
// check regexp validity to avoid runtime error
_, err := regexp.Compile(entry.Value)
if err != nil {
return nil, fmt.Errorf("invalid regexp in list %s: %s", l.Name, entry.Value)
}
site.Domain = append(site.Domain, &router.Domain{
Type: router.Domain_Regex,
Value: entry.Value,
@@ -103,89 +117,73 @@ func (l *ParsedList) toProto() (*router.GeoSite, error) {
Value: entry.Value,
Attribute: entry.Attrs,
})
default:
return nil, fmt.Errorf("unknown domain type: %s", entry.Type)
}
}
return site, nil
}
func exportPlainTextList(list []string, refName string, pl *ParsedList) {
for _, listName := range list {
if strings.EqualFold(refName, listName) {
if err := pl.toPlainText(strings.ToLower(refName)); err != nil {
fmt.Println("Failed:", err)
func exportPlainTextList(exportFiles []string, entryList *List) {
for _, exportfilename := range exportFiles {
if strings.EqualFold(entryList.Name, exportfilename) {
if err := entryList.toPlainText(); err != nil {
fmt.Println("Failed to exportPlainTextList:", err)
continue
}
fmt.Printf("'%s' has been generated successfully.\n", listName)
fmt.Printf("'%s' has been generated successfully.\n", exportfilename)
}
}
}
func removeComment(line string) string {
idx := strings.Index(line, "#")
if idx == -1 {
return line
}
return strings.TrimSpace(line[:idx])
}
func parseEntry(line string) (Entry, error) {
var entry Entry
parts := strings.Fields(line)
func parseDomain(domain string, entry *Entry) error {
kv := strings.Split(domain, ":")
// Parse/Check type and value
rawTypeVal := parts[0]
kv := strings.Split(rawTypeVal, ":")
if len(kv) == 1 {
entry.Type = RuleTypeDomain
entry.Value = strings.ToLower(kv[0])
return nil
}
if len(kv) == 2 {
entry.Type = RuleTypeDomain // Default type
entry.Value = strings.ToLower(rawTypeVal)
} else if len(kv) == 2 {
entry.Type = strings.ToLower(kv[0])
if strings.EqualFold(entry.Type, RuleTypeRegexp) {
if entry.Type == RuleTypeRegexp {
entry.Value = kv[1]
} else {
entry.Value = strings.ToLower(kv[1])
}
return nil
} else {
return entry, fmt.Errorf("invalid format: %s", line)
}
return fmt.Errorf("invalid format: %s", domain)
}
func parseAttribute(attr string) (*router.Domain_Attribute, error) {
var attribute router.Domain_Attribute
if len(attr) == 0 || attr[0] != '@' {
return &attribute, fmt.Errorf("invalid attribute: %s", attr)
if !TypeChecker.MatchString(entry.Type) {
return entry, fmt.Errorf("invalid type: %s", entry.Type)
}
attribute.Key = strings.ToLower(attr[1:]) // Trim attribute prefix `@` character
attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true}
return &attribute, nil
}
func parseEntry(line string) (Entry, error) {
line = strings.TrimSpace(line)
parts := strings.Split(line, " ")
var entry Entry
if len(parts) == 0 {
return entry, fmt.Errorf("empty entry")
}
if err := parseDomain(parts[0], &entry); err != nil {
return entry, err
}
for i := 1; i < len(parts); i++ {
attr, err := parseAttribute(parts[i])
if err != nil {
return entry, err
if entry.Type == RuleTypeRegexp {
if _, err := regexp.Compile(entry.Value); err != nil {
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
}
entry.Attrs = append(entry.Attrs, attr)
} else if !ValueChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid value: %s", entry.Value)
}
// Parse/Check attributes
for _, part := range parts[1:] {
if !strings.HasPrefix(part, "@") {
return entry, fmt.Errorf("invalid attribute: %s", part)
}
attrKey := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
if !AttrChecker.MatchString(attrKey) {
return entry, fmt.Errorf("invalid attribute key: %s", attrKey)
}
entry.Attrs = append(entry.Attrs, &router.Domain_Attribute{
Key: attrKey,
TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
})
}
// Sort attributes
sort.Slice(entry.Attrs, func(i, j int) bool {
return entry.Attrs[i].Key < entry.Attrs[j].Key
})
return entry, nil
}
@@ -201,9 +199,13 @@ func Load(path string) (*List, error) {
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
line = removeComment(line)
if len(line) == 0 {
line := scanner.Text()
// Remove comments
if idx := strings.Index(line, "#"); idx != -1 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
entry, err := parseEntry(line)
@@ -216,108 +218,77 @@ func Load(path string) (*List, error) {
return list, nil
}
func isMatchAttr(Attrs []*router.Domain_Attribute, includeKey string) bool {
isMatch := false
mustMatch := true
matchName := includeKey
if strings.HasPrefix(includeKey, "!") {
isMatch = true
mustMatch = false
matchName = strings.TrimLeft(includeKey, "!")
}
for _, Attr := range Attrs {
attrName := Attr.Key
if mustMatch {
if matchName == attrName {
isMatch = true
break
}
} else {
if matchName == attrName {
isMatch = false
break
}
}
}
return isMatch
}
func createIncludeAttrEntrys(list *List, matchAttr *router.Domain_Attribute) []Entry {
newEntryList := make([]Entry, 0, len(list.Entry))
matchName := matchAttr.Key
for _, entry := range list.Entry {
matched := isMatchAttr(entry.Attrs, matchName)
if matched {
newEntryList = append(newEntryList, entry)
}
}
return newEntryList
}
func ParseList(list *List, ref map[string]*List) (*ParsedList, error) {
pl := &ParsedList{
Name: list.Name,
Inclusion: make(map[string]bool),
}
entryList := list.Entry
for {
newEntryList := make([]Entry, 0, len(entryList))
hasInclude := false
for _, entry := range entryList {
if entry.Type == RuleTypeInclude {
refName := strings.ToUpper(entry.Value)
if entry.Attrs != nil {
for _, attr := range entry.Attrs {
InclusionName := strings.ToUpper(refName + "@" + attr.Key)
if pl.Inclusion[InclusionName] {
continue
}
pl.Inclusion[InclusionName] = true
refList := ref[refName]
if refList == nil {
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
attrEntrys := createIncludeAttrEntrys(refList, attr)
if len(attrEntrys) != 0 {
newEntryList = append(newEntryList, attrEntrys...)
}
}
func ParseList(refList *List) (*ParsedList, error) {
pl := &ParsedList{Name: refList.Name}
for _, entry := range refList.Entry {
if entry.Type == RuleTypeInclude {
inc := Inclusion{Source: strings.ToUpper(entry.Value)}
for _, attr := range entry.Attrs {
if strings.HasPrefix(attr.Key, "-") {
inc.BannedAttrs = append(inc.BannedAttrs, &router.Domain_Attribute{
Key: attr.Key[1:], // Trim attribute prefix `-` character
TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
})
} else {
InclusionName := refName
if pl.Inclusion[InclusionName] {
continue
}
pl.Inclusion[InclusionName] = true
refList := ref[refName]
if refList == nil {
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
newEntryList = append(newEntryList, refList.Entry...)
inc.MustAttrs = append(inc.MustAttrs, attr)
}
hasInclude = true
} else {
newEntryList = append(newEntryList, entry)
}
}
entryList = newEntryList
if !hasInclude {
break
pl.Inclusions = append(pl.Inclusions, inc)
} else {
pl.Entry = append(pl.Entry, entry)
}
}
pl.Entry = entryList
return pl, nil
}
func isMatchAttrFilters(entry Entry, incFilter Inclusion) bool {
attrMap := make(map[string]bool)
for _, attr := range entry.Attrs {
attrMap[attr.Key] = true
}
for _, m := range incFilter.MustAttrs {
if !attrMap[m.Key] { return false }
}
for _, b := range incFilter.BannedAttrs {
if attrMap[b.Key] { return false }
}
return true
}
func ResolveList(pl *ParsedList) error {
//TODO: deduplicate
if _, pldone := finalMap[pl.Name]; pldone { return nil }
if cirIncMap[pl.Name] {
return fmt.Errorf("circular inclusion in: %s", pl.Name)
}
cirIncMap[pl.Name] = true
defer delete(cirIncMap, pl.Name)
finalList := &List{Name: pl.Name}
finalList.Entry = append(finalList.Entry, pl.Entry...)
for _, inc := range pl.Inclusions {
if err := ResolveList(plMap[inc.Source]); err != nil {
return err
}
for _, entry := range finalMap[inc.Source].Entry {
if isMatchAttrFilters(entry, inc) {
finalList.Entry = append(finalList.Entry, entry)
}
}
}
finalMap[pl.Name] = finalList
return nil
}
func main() {
flag.Parse()
dir := *dataPath
fmt.Println("Use domain lists in", dir)
ref := make(map[string]*List)
// Generate refMap
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
@@ -329,7 +300,7 @@ func main() {
if err != nil {
return err
}
ref[list.Name] = list
refMap[list.Name] = list
return nil
})
if err != nil {
@@ -337,6 +308,24 @@ func main() {
os.Exit(1)
}
// Generate plMap
for refName, refList := range refMap {
pl, err := ParseList(refList)
if err != nil {
fmt.Println("Failed to ParseList:", err)
os.Exit(1)
}
plMap[refName] = pl
}
// Generate finalMap
for _, pl := range plMap {
if err := ResolveList(pl); err != nil {
fmt.Println("Failed to ResolveList:", err)
os.Exit(1)
}
}
// Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
@@ -347,13 +336,8 @@ func main() {
protoList := new(router.GeoSiteList)
var existList []string
for refName, list := range ref {
pl, err := ParseList(list, ref)
if err != nil {
fmt.Println("Failed:", err)
os.Exit(1)
}
site, err := pl.toProto()
for _, siteEntries := range finalMap {
site, err := siteEntries.toProto()
if err != nil {
fmt.Println("Failed:", err)
os.Exit(1)
@@ -363,7 +347,7 @@ func main() {
// Flatten and export plaintext list
if *exportLists != "" {
if existList != nil {
exportPlainTextList(existList, refName, pl)
exportPlainTextList(existList, siteEntries)
} else {
exportedListSlice := strings.Split(*exportLists, ",")
for _, exportedListName := range exportedListSlice {
@@ -376,7 +360,7 @@ func main() {
}
}
if existList != nil {
exportPlainTextList(existList, refName, pl)
exportPlainTextList(existList, siteEntries)
}
}
}
@@ -389,11 +373,11 @@ func main() {
protoBytes, err := proto.Marshal(protoList)
if err != nil {
fmt.Println("Failed:", err)
fmt.Println("Failed to Marshal:", err)
os.Exit(1)
}
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil {
fmt.Println("Failed:", err)
fmt.Println("Failed to write output:", err)
os.Exit(1)
} else {
fmt.Println(*outputName, "has been generated successfully.")