mirror of
https://github.com/v2fly/domain-list-community.git
synced 2026-02-04 04:53:14 +07:00
* Refactor: improve deduplicate * Feat: support to export all lists to a plain yml use: `--exportlists=_all_` * Docs: add links for dlc plain yml [skip ci]
459 lines
12 KiB
Go
459 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"slices"
|
|
"strings"
|
|
|
|
router "github.com/v2fly/v2ray-core/v5/app/router/routercommon"
|
|
"google.golang.org/protobuf/proto"
|
|
)
|
|
|
|
var (
|
|
dataPath = flag.String("datapath", "./data", "Path to your custom 'data' directory")
|
|
outputName = flag.String("outputname", "dlc.dat", "Name of the generated dat file")
|
|
outputDir = flag.String("outputdir", "./", "Directory to place all generated files")
|
|
exportLists = flag.String("exportlists", "", "Lists to be flattened and exported in plaintext format, separated by ',' comma")
|
|
)
|
|
|
|
const (
|
|
RuleTypeDomain string = "domain"
|
|
RuleTypeFullDomain string = "full"
|
|
RuleTypeKeyword string = "keyword"
|
|
RuleTypeRegexp string = "regexp"
|
|
RuleTypeInclude string = "include"
|
|
)
|
|
|
|
var (
|
|
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
|
|
DomainChecker = regexp.MustCompile(`^[a-z0-9\.-]+$`)
|
|
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
|
|
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
|
|
)
|
|
|
|
var (
|
|
refMap = make(map[string][]*Entry)
|
|
plMap = make(map[string]*ParsedList)
|
|
finalMap = make(map[string][]*Entry)
|
|
cirIncMap = make(map[string]bool) // Used for circular inclusion detection
|
|
)
|
|
|
|
type Entry struct {
|
|
Type string
|
|
Value string
|
|
Attrs []string
|
|
Plain string
|
|
Affs []string
|
|
}
|
|
|
|
type Inclusion struct {
|
|
Source string
|
|
MustAttrs []string
|
|
BanAttrs []string
|
|
}
|
|
|
|
type ParsedList struct {
|
|
Name string
|
|
Inclusions []*Inclusion
|
|
Entries []*Entry
|
|
}
|
|
|
|
func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
|
|
site := &router.GeoSite{
|
|
CountryCode: listName,
|
|
Domain: make([]*router.Domain, 0, len(entries)),
|
|
}
|
|
for _, entry := range entries {
|
|
pdomain := &router.Domain{Value: entry.Value}
|
|
for _, attr := range entry.Attrs {
|
|
pdomain.Attribute = append(pdomain.Attribute, &router.Domain_Attribute{
|
|
Key: attr,
|
|
TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
|
|
})
|
|
}
|
|
|
|
switch entry.Type {
|
|
case RuleTypeDomain:
|
|
pdomain.Type = router.Domain_RootDomain
|
|
case RuleTypeRegexp:
|
|
pdomain.Type = router.Domain_Regex
|
|
case RuleTypeKeyword:
|
|
pdomain.Type = router.Domain_Plain
|
|
case RuleTypeFullDomain:
|
|
pdomain.Type = router.Domain_Full
|
|
}
|
|
site.Domain = append(site.Domain, pdomain)
|
|
}
|
|
return site, nil
|
|
}
|
|
|
|
func writePlainAll(siteList *[]string) error {
|
|
file, err := os.Create(filepath.Join(*outputDir, *outputName + "_plain.yml"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
w := bufio.NewWriter(file)
|
|
w.WriteString("lists:\n")
|
|
for _, site := range *siteList {
|
|
fmt.Fprintf(w, " - name: %s\n", strings.ToLower(site))
|
|
fmt.Fprintf(w, " length: %d\n", len(finalMap[site]))
|
|
w.WriteString(" rules:\n")
|
|
for _, entry := range finalMap[site] {
|
|
fmt.Fprintf(w, " - %s\n", entry.Plain)
|
|
}
|
|
}
|
|
return w.Flush()
|
|
}
|
|
|
|
func writePlainList(exportedName string) error {
|
|
targetList, exist := finalMap[strings.ToUpper(exportedName)]
|
|
if !exist || len(targetList) == 0 {
|
|
return fmt.Errorf("'%s' list does not exist or is empty.", exportedName)
|
|
}
|
|
file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
w := bufio.NewWriter(file)
|
|
for _, entry := range targetList {
|
|
fmt.Fprintln(w, entry.Plain)
|
|
}
|
|
return w.Flush()
|
|
}
|
|
|
|
func parseEntry(line string) (Entry, error) {
|
|
var entry Entry
|
|
parts := strings.Fields(line)
|
|
|
|
// Parse type and value
|
|
rawTypeVal := parts[0]
|
|
kv := strings.Split(rawTypeVal, ":")
|
|
if len(kv) == 1 {
|
|
entry.Type = RuleTypeDomain // Default type
|
|
entry.Value = strings.ToLower(rawTypeVal)
|
|
} else if len(kv) == 2 {
|
|
entry.Type = strings.ToLower(kv[0])
|
|
if entry.Type == RuleTypeRegexp {
|
|
entry.Value = kv[1]
|
|
} else if entry.Type == RuleTypeInclude {
|
|
entry.Value = strings.ToUpper(kv[1])
|
|
} else {
|
|
entry.Value = strings.ToLower(kv[1])
|
|
}
|
|
} else {
|
|
return entry, fmt.Errorf("invalid format: %s", line)
|
|
}
|
|
// Check type and value
|
|
if !TypeChecker.MatchString(entry.Type) {
|
|
return entry, fmt.Errorf("invalid type: %s", entry.Type)
|
|
}
|
|
switch entry.Type {
|
|
case RuleTypeRegexp:
|
|
if _, err := regexp.Compile(entry.Value); err != nil {
|
|
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
|
|
}
|
|
case RuleTypeInclude:
|
|
if !SiteChecker.MatchString(entry.Value) {
|
|
return entry, fmt.Errorf("invalid included list name: %s", entry.Value)
|
|
}
|
|
default: // `full`, `domain` and `keyword` are all (parts of) domains
|
|
if !DomainChecker.MatchString(entry.Value) {
|
|
return entry, fmt.Errorf("invalid domain: %s", entry.Value)
|
|
}
|
|
}
|
|
|
|
// Parse/Check attributes and affiliations
|
|
for _, part := range parts[1:] {
|
|
if strings.HasPrefix(part, "@") {
|
|
attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
|
|
if !AttrChecker.MatchString(attr) {
|
|
return entry, fmt.Errorf("invalid attribute key: %s", attr)
|
|
}
|
|
entry.Attrs = append(entry.Attrs, attr)
|
|
} else if strings.HasPrefix(part, "&") {
|
|
aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character
|
|
if !SiteChecker.MatchString(aff) {
|
|
return entry, fmt.Errorf("invalid affiliation key: %s", aff)
|
|
}
|
|
entry.Affs = append(entry.Affs, aff)
|
|
} else {
|
|
return entry, fmt.Errorf("invalid attribute/affiliation: %s", part)
|
|
}
|
|
}
|
|
// Sort attributes
|
|
slices.Sort(entry.Attrs)
|
|
// Formated plain entry: type:domain.tld:@attr1,@attr2
|
|
entry.Plain = entry.Type + ":" + entry.Value
|
|
if len(entry.Attrs) != 0 {
|
|
entry.Plain = entry.Plain + ":@" + strings.Join(entry.Attrs, ",@")
|
|
}
|
|
|
|
return entry, nil
|
|
}
|
|
|
|
func loadData(path string) error {
|
|
file, err := os.Open(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
listName := strings.ToUpper(filepath.Base(path))
|
|
if !SiteChecker.MatchString(listName) {
|
|
return fmt.Errorf("invalid list name: %s", listName)
|
|
}
|
|
scanner := bufio.NewScanner(file)
|
|
lineIdx := 0
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
lineIdx++
|
|
// Remove comments
|
|
if idx := strings.Index(line, "#"); idx != -1 {
|
|
line = line[:idx]
|
|
}
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
entry, err := parseEntry(line)
|
|
if err != nil {
|
|
return fmt.Errorf("error in %s at line %d: %v", path, lineIdx, err)
|
|
}
|
|
refMap[listName] = append(refMap[listName], &entry)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func parseList(refName string, refList []*Entry) error {
|
|
pl, _ := plMap[refName]
|
|
if pl == nil {
|
|
pl = &ParsedList{Name: refName}
|
|
plMap[refName] = pl
|
|
}
|
|
for _, entry := range refList {
|
|
if entry.Type == RuleTypeInclude {
|
|
if len(entry.Affs) != 0 {
|
|
return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
|
|
}
|
|
inc := &Inclusion{Source: entry.Value}
|
|
for _, attr := range entry.Attrs {
|
|
if strings.HasPrefix(attr, "-") {
|
|
inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character
|
|
} else {
|
|
inc.MustAttrs = append(inc.MustAttrs, attr)
|
|
}
|
|
}
|
|
pl.Inclusions = append(pl.Inclusions, inc)
|
|
} else {
|
|
for _, aff := range entry.Affs {
|
|
apl, _ := plMap[aff]
|
|
if apl == nil {
|
|
apl = &ParsedList{Name: aff}
|
|
plMap[aff] = apl
|
|
}
|
|
apl.Entries = append(apl.Entries, entry)
|
|
}
|
|
pl.Entries = append(pl.Entries, entry)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func polishList(roughMap *map[string]*Entry) []*Entry {
|
|
finalList := make([]*Entry, 0, len(*roughMap))
|
|
queuingList := make([]*Entry, 0, len(*roughMap)) // Domain/full entries without attr
|
|
domainsMap := make(map[string]bool)
|
|
for _, entry := range *roughMap {
|
|
switch entry.Type { // Bypass regexp, keyword and "full/domain with attr"
|
|
case RuleTypeRegexp:
|
|
finalList = append(finalList, entry)
|
|
case RuleTypeKeyword:
|
|
finalList = append(finalList, entry)
|
|
case RuleTypeDomain:
|
|
domainsMap[entry.Value] = true
|
|
if len(entry.Attrs) != 0 {
|
|
finalList = append(finalList, entry)
|
|
} else {
|
|
queuingList = append(queuingList, entry)
|
|
}
|
|
case RuleTypeFullDomain:
|
|
if len(entry.Attrs) != 0 {
|
|
finalList = append(finalList, entry)
|
|
} else {
|
|
queuingList = append(queuingList, entry)
|
|
}
|
|
}
|
|
}
|
|
// Remove redundant subdomains for full/domain without attr
|
|
for _, qentry := range queuingList {
|
|
isRedundant := false
|
|
pd := qentry.Value // To be parent domain
|
|
if qentry.Type == RuleTypeFullDomain {
|
|
pd = "." + pd // So that `domain:example.org` overrides `full:example.org`
|
|
}
|
|
for {
|
|
idx := strings.Index(pd, ".")
|
|
if idx == -1 { break }
|
|
pd = pd[idx+1:] // Go for next parent
|
|
if !strings.Contains(pd, ".") { break } // Not allow tld to be a parent
|
|
if domainsMap[pd] {
|
|
isRedundant = true
|
|
break
|
|
}
|
|
}
|
|
if !isRedundant {
|
|
finalList = append(finalList, qentry)
|
|
}
|
|
}
|
|
// Sort final entries
|
|
slices.SortFunc(finalList, func(a, b *Entry) int {
|
|
return strings.Compare(a.Plain, b.Plain)
|
|
})
|
|
return finalList
|
|
}
|
|
|
|
func resolveList(pl *ParsedList) error {
|
|
if _, pldone := finalMap[pl.Name]; pldone { return nil }
|
|
|
|
if cirIncMap[pl.Name] {
|
|
return fmt.Errorf("circular inclusion in: %s", pl.Name)
|
|
}
|
|
cirIncMap[pl.Name] = true
|
|
defer delete(cirIncMap, pl.Name)
|
|
|
|
isMatchAttrFilters := func(entry *Entry, incFilter *Inclusion) bool {
|
|
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true }
|
|
if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 }
|
|
|
|
for _, m := range incFilter.MustAttrs {
|
|
if !slices.Contains(entry.Attrs, m) { return false }
|
|
}
|
|
for _, b := range incFilter.BanAttrs {
|
|
if slices.Contains(entry.Attrs, b) { return false }
|
|
}
|
|
return true
|
|
}
|
|
|
|
roughMap := make(map[string]*Entry) // Avoid basic duplicates
|
|
for _, dentry := range pl.Entries { // Add direct entries
|
|
roughMap[dentry.Plain] = dentry
|
|
}
|
|
for _, inc := range pl.Inclusions {
|
|
incPl, exist := plMap[inc.Source]
|
|
if !exist {
|
|
return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source)
|
|
}
|
|
if err := resolveList(incPl); err != nil {
|
|
return err
|
|
}
|
|
for _, ientry := range finalMap[inc.Source] {
|
|
if isMatchAttrFilters(ientry, inc) { // Add included entries
|
|
roughMap[ientry.Plain] = ientry
|
|
}
|
|
}
|
|
}
|
|
finalMap[pl.Name] = polishList(&roughMap)
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
dir := *dataPath
|
|
fmt.Println("Use domain lists in", dir)
|
|
|
|
// Generate refMap
|
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
if err := loadData(path); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
fmt.Println("Failed to loadData:", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Generate plMap
|
|
for refName, refList := range refMap {
|
|
if err := parseList(refName, refList); err != nil {
|
|
fmt.Println("Failed to parseList:", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// Generate finalMap and sorted list of site names
|
|
siteList := make([]string, 0 ,len(plMap))
|
|
for _, pl := range plMap {
|
|
siteList = append(siteList, pl.Name)
|
|
if err := resolveList(pl); err != nil {
|
|
fmt.Println("Failed to resolveList:", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
slices.Sort(siteList)
|
|
|
|
// Create output directory if not exist
|
|
if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
|
|
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
|
|
fmt.Println("Failed:", mkErr)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// Export plaintext list
|
|
if *exportLists != "" {
|
|
exportedListSlice := strings.Split(*exportLists, ",")
|
|
for _, exportedList := range exportedListSlice {
|
|
if exportedList == "_all_" {
|
|
if err := writePlainAll(&siteList); err != nil {
|
|
fmt.Println("Failed to writePlainAll:", err)
|
|
continue
|
|
}
|
|
} else {
|
|
if err := writePlainList(exportedList); err != nil {
|
|
fmt.Println("Failed to write list:", err)
|
|
continue
|
|
}
|
|
}
|
|
fmt.Printf("list: '%s' has been generated successfully.\n", exportedList)
|
|
}
|
|
}
|
|
|
|
// Generate dat file
|
|
protoList := new(router.GeoSiteList)
|
|
for _, siteName := range siteList { // So that protoList.Entry is sorted
|
|
site, err := makeProtoList(siteName, finalMap[siteName])
|
|
if err != nil {
|
|
fmt.Println("Failed to makeProtoList:", err)
|
|
os.Exit(1)
|
|
}
|
|
protoList.Entry = append(protoList.Entry, site)
|
|
}
|
|
|
|
protoBytes, err := proto.Marshal(protoList)
|
|
if err != nil {
|
|
fmt.Println("Failed to marshal:", err)
|
|
os.Exit(1)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil {
|
|
fmt.Println("Failed to write output:", err)
|
|
os.Exit(1)
|
|
} else {
|
|
fmt.Println(*outputName, "has been generated successfully.")
|
|
}
|
|
}
|