Compare commits

..

10 Commits

Author SHA1 Message Date
MkQtS
aa052c7bd1 Feat: add advanced deduplicate for subdomains
only for domain/full without attr
2026-01-11 22:02:46 +08:00
MkQtS
68d291d4ee Feat: add support for affiliation
A domain rule is always added to the list corresponding to the filename
it resides in. Additionally, you can now add affiliations to a domain
rule, and the rule will be added to the list specified by the
affiliation. Each affiliation begins with `&` and followed by the name
of the affiliation.

For example, when you can add a single rule
`youtube.com &youtube &category-entertainment` in file `data/google`.
Then `geosite:google`, `geosite:youtube` and
`geosite:category-entertainment` all contain `[domain:]youtube.com`
(even if files `data/youtube` and `data/category-entertainment` do not
exist).

This helps us to reduce the number of data files without compromising
functionality, and avoid writing a same rule in different files.
2026-01-11 22:02:46 +08:00
MkQtS
951b407d6b Refactor exporting plaintext list 2026-01-11 22:02:46 +08:00
MkQtS
5e6253ec4e Feat: add support for partial include
- refactor inclusion logic
- add basic deduplicate
2026-01-11 22:02:46 +08:00
MkQtS
c87c46db9d Refactor: promote refMap 2026-01-11 22:02:46 +08:00
MkQtS
eeccce1d3b Remove support for partial include
This reverts e640ac2783

It is problematic and I will implement a new one
2026-01-11 22:02:46 +08:00
MkQtS
fd7f296305 Refactor exportPlainTextList
- remove unnecessary variable
- improve readablity
2026-01-11 22:02:46 +08:00
MkQtS
8cbcc1042b Refactor parseEntry
- add value/attribute checker(check missing space)
- allow multiple spaces
- sort attributes
- improve readablity
2026-01-11 22:02:46 +08:00
MkQtS
431b7f2acb Refactor: use string attr before toProto 2026-01-11 22:02:45 +08:00
MkQtS
d898eef2f0 Refactor: reduce the use of strings.TrimSpace 2026-01-11 22:02:45 +08:00
12 changed files with 313 additions and 320 deletions

View File

@@ -202,7 +202,3 @@ zhugeio.com
# 车来了 # 车来了
atrace.chelaile.net.cn atrace.chelaile.net.cn
logs.chelaile.net.cn logs.chelaile.net.cn
# 航旅纵横
analytics.umetrip.com
sensors.umetrip.com.cn

View File

@@ -1,37 +1,43 @@
# 远程桌面/远程组网 # 远程桌面/远程组网
## 连连控
asklink.com
## EasyTier
easytier.cn
## Oray ## Oray
oray.com oray.com
oray.net oray.net
orayer.com orayer.com
orayimg.com orayimg.com
sunlogin.net sunlogin.net
## 叮当猫脚本管理系统 懒人精灵远程调试
privateapi.xyz
## ToDesk ## ToDesk
todesk.cn todesk.cn
todesk.com todesk.com
## xEdge干将互联
## 连连控
asklink.com
## 叮当猫脚本管理系统 懒人精灵远程调试
privateapi.xyz
# xEdge干将互联
include:xedge include:xedge
# 文档协作 # 文档协作
## 语雀
nlark.com
yuque.com
## 石墨文档
shimo.im
shimonote.com
smgv.cn
## Tower 团队协作 ## Tower 团队协作
tower.im tower.im
## 秀米编辑器 ## 秀米编辑器
tritoninfo.net tritoninfo.net
xiumi.us xiumi.us
xiumius.com xiumius.com
## 石墨文档
shimo.im
shimonote.com
smgv.cn
## 语雀
nlark.com
yuque.com
## 吾道 ## 吾道
woodo.cn woodo.cn

View File

@@ -140,7 +140,6 @@ shellcheck.net
shields.io shields.io
sqlite.org sqlite.org
sublimetext.com sublimetext.com
tampermonkey.net
termius.com termius.com
unpkg.com unpkg.com
videojs.com videojs.com

View File

@@ -104,9 +104,6 @@ bag.itunes.apple.com
bookeeper.itunes.apple.com bookeeper.itunes.apple.com
# Entertainment oriented media # Entertainment oriented media
# This section references the "Source Considerations" page on the Chinese Wikipedia:
# - https://zh.wikipedia.org/wiki/WikiProject:电子游戏/来源考量
# - https://zh.wikipedia.org/wiki/WikiProject:ACG/來源考量
## 4Gamer.net ## 4Gamer.net
4gamer.net 4gamer.net
## 4Gamers ## 4Gamers
@@ -119,28 +116,20 @@ appget.com
appmedia.jp appmedia.jp
## Automaton ## Automaton
automaton-media.com automaton-media.com
## 橙心社
cxacg.com
## 电faminicogamer ## 电faminicogamer
denfaminicogamer.jp denfaminicogamer.jp
## ASCII Media Works ## ASCII Media Works
dengekionline.com dengekionline.com
## E-ROAD ## E-ROAD
eroge-road.com eroge-road.com
## esports.gg
esports.gg
## ファミ通 ## ファミ通
famitsu.com famitsu.com
## GameApps.HK
gameapps.hk
## 遊戲基地 ## 遊戲基地
gamebase.com.tw gamebase.com.tw
## IID, Inc. ## IID, Inc.
gamebusiness.jp gamebusiness.jp
gamespark.jp gamespark.jp
inside-games.jp inside-games.jp
## Game Focus
gamefocus.co.kr
## GameMeca ## GameMeca
gamemeca.com gamemeca.com
## Gameover有機網 ## Gameover有機網
@@ -152,8 +141,6 @@ onlinegamer.jp
greatgame.asia greatgame.asia
## HobbiGame ## HobbiGame
hobbigame.com hobbigame.com
## ゲーム文化保存研究所
igcc.jp
## IGN ## IGN
ign.com ign.com
ignimg.com ignimg.com
@@ -165,30 +152,18 @@ mediaclip.jp
maedahiroyuki.com maedahiroyuki.com
## MANTANWEB ## MANTANWEB
mantan-web.jp mantan-web.jp
## モエデジ
moedigi.com
## Moepedia ## Moepedia
moepedia.net moepedia.net
## マイナビニュース
mynavi.jp
## Openbook阅读通
openbook.org.tw
## QooApp ## QooApp
qoo-app.com qoo-app.com
## Real Sound ## Real Sound
realsound.jp realsound.jp
## れポたま!
repotama.com
## Saiga NAK ## Saiga NAK
saiganak.com saiganak.com
## SQOOL
sqool.net
## The Games Daily ## The Games Daily
tgdaily.co.kr tgdaily.co.kr
## Thisisgame ## Thisisgame
thisisgame.com thisisgame.com
## 玩具人
toy-people.com
## Wanuxi ## Wanuxi
wanuxi.com wanuxi.com
## よろず〜 ## よろず〜

View File

@@ -109,9 +109,6 @@ zhulang.com
zongheng.com zongheng.com
# 娱乐资讯媒体 # 娱乐资讯媒体
# 该部分参考了中文维基百科的「来源考量」:
# - https://zh.wikipedia.org/wiki/WikiProject:电子游戏/来源考量
# - https://zh.wikipedia.org/wiki/WikiProject:ACG/來源考量
include:tgbus include:tgbus
include:vgtime include:vgtime
@@ -146,15 +143,11 @@ ign.com.cn
nadianshi.com nadianshi.com
## 游戏日报 ## 游戏日报
news.yxrb.net news.yxrb.net
## 手谈姬
shoutanjjj.com
## 游戏机实用技术 ## 游戏机实用技术
ucg.cn ucg.cn
## 游戏茶馆 ## 游戏茶馆
youxichaguan.com youxichaguan.com
## 游戏葡萄 ## 游戏葡萄
youxiputao.com youxiputao.com
## 游戏陀螺
youxituoluo.com
## 游研社 ## 游研社
yystv.cn yystv.cn

View File

@@ -112,7 +112,6 @@ hkej.com
hkgpao.com hkgpao.com
hongkongfp.com hongkongfp.com
inmediahk.net inmediahk.net
inquirer.net
inside.com.tw inside.com.tw
itmedia.co.jp itmedia.co.jp
jfengtime.com jfengtime.com
@@ -121,7 +120,6 @@ limedia.tw
localpresshk.com localpresshk.com
ltsports.com.tw ltsports.com.tw
macaodaily.com macaodaily.com
maidonanews.jp
mdnkids.com mdnkids.com
mirrormedia.com.tw mirrormedia.com.tw
mirrormedia.mg mirrormedia.mg

View File

@@ -2,44 +2,33 @@ include:cloudflare-cn
include:cloudflare-ipfs include:cloudflare-ipfs
argotunnel.com argotunnel.com
browser.run
cfargotunnel.com cfargotunnel.com
cfdata.org
cfl.re cfl.re
cloudflare-dns.com cloudflare-dns.com
cloudflare-ech.com cloudflare-ech.com
cloudflare-esni.com cloudflare-esni.com
cloudflare-gateway.com cloudflare-gateway.com
cloudflare-quic.com cloudflare-quic.com
cloudflare-terms-of-service-abuse.com
cloudflare.com cloudflare.com
cloudflare.dev
cloudflare.net cloudflare.net
cloudflare.tv cloudflare.tv
cloudflareaccess.com cloudflareaccess.com
cloudflareapps.com cloudflareapps.com
cloudflarebolt.com cloudflarebolt.com
cloudflarebrowser.com
cloudflarechallenge.com
cloudflareclient.com cloudflareclient.com
cloudflarecp.com
cloudflareinsights.com cloudflareinsights.com
cloudflareok.com cloudflareok.com
cloudflarepartners.com cloudflarepartners.com
cloudflareportal.com cloudflareportal.com
cloudflarepreview.com cloudflarepreview.com
cloudflareregistrar.com cloudflareregistrar.com
cloudflareresearch.com
cloudflareresolve.com cloudflareresolve.com
cloudflaressl.com cloudflaressl.com
cloudflarestatus.com cloudflarestatus.com
cloudflarestorage.com cloudflarestorage.com
cloudflarestream.com cloudflarestream.com
cloudflaresupport.com
cloudflaretest.com cloudflaretest.com
cloudflarewarp.com cloudflarewarp.com
cloudflareworkers.com
encryptedsni.com
every1dns.net every1dns.net
imagedelivery.net imagedelivery.net
isbgpsafeyet.com isbgpsafeyet.com

View File

@@ -230,8 +230,6 @@ xiamenair.com # 厦门航空
12306.cn 12306.cn
95306.cn 95306.cn
ccrgt.com ccrgt.com
## 北京市政交通一卡通
bmac.com.cn
## 车来了 ## 车来了
chelaile.net.cn chelaile.net.cn
## 跨境巴士 ## 跨境巴士

View File

@@ -1,4 +1,3 @@
c.sayhi.360.cn @ads
fenxi.360.cn @ads fenxi.360.cn @ads
fenxi.com @ads fenxi.com @ads
lianmeng.360.cn @ads lianmeng.360.cn @ads

View File

@@ -79,14 +79,3 @@ fymallqa3.com
fymallqa4.com fymallqa4.com
fymallqa7.com fymallqa7.com
fymallqa9.com fymallqa9.com
tdnsdl1.cn
tdnsdl1.com.cn
tdnsdl2.cn
tdnsdl2.com.cn
tdnsdl3.cn
tdnsdl3.com.cn
tdnsdl4.cn
tdnsdl4.com.cn
tdnsdl5.cn
tdnsdl5.com.cn

View File

@@ -1,7 +1,6 @@
ad.browser.qq.com @ads ad.browser.qq.com @ads
ad.qq.com @ads ad.qq.com @ads
ad.qun.qq.com @ads ad.qun.qq.com @ads
ad.tencentmusic.com @ads
ad.weixin.qq.com @ads ad.weixin.qq.com @ads
adfilter.imtt.qq.com @ads adfilter.imtt.qq.com @ads
adnet.qq.com @ads adnet.qq.com @ads
@@ -43,7 +42,6 @@ pmir.3g.qq.com @ads
push.qq.com @ads push.qq.com @ads
qqdata.ab.qq.com @ads qqdata.ab.qq.com @ads
report.qqweb.qq.com @ads report.qqweb.qq.com @ads
report.tencentmusic.com @ads
rmonitor.qq.com @ads rmonitor.qq.com @ads
sngmta.qq.com @ads sngmta.qq.com @ads
stat.y.qq.com @ads stat.y.qq.com @ads

505
main.go
View File

@@ -29,10 +29,31 @@ const (
RuleTypeInclude string = "include" RuleTypeInclude string = "include"
) )
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
)
var (
refMap = make(map[string]*List)
plMap = make(map[string]*ParsedList)
finalMap = make(map[string][]Entry)
cirIncMap = make(map[string]bool) // Used for circular inclusion detection
)
type Entry struct { type Entry struct {
Type string Type string
Value string Value string
Attrs []*router.Domain_Attribute Attrs []string
Affs []string
}
type Inclusion struct {
Source string
MustAttrs []string
BanAttrs []string
} }
type List struct { type List struct {
@@ -42,149 +63,115 @@ type List struct {
type ParsedList struct { type ParsedList struct {
Name string Name string
Inclusion map[string]bool Inclusions []Inclusion
Entry []Entry Entry []Entry
} }
func (l *ParsedList) toPlainText(listName string) error { func makeProtoList(listName string, entries *[]Entry) (*router.GeoSite, error) {
var entryBytes []byte site := &router.GeoSite{
for _, entry := range l.Entry { CountryCode: listName,
var attrString string Domain: make([]*router.Domain, 0, len(*entries)),
if entry.Attrs != nil { }
for _, entry := range *entries {
pdomain := &router.Domain{Value: entry.Value}
for _, attr := range entry.Attrs { for _, attr := range entry.Attrs {
attrString += "@" + attr.GetKey() + "," pdomain.Attribute = append(pdomain.Attribute, &router.Domain_Attribute{
} Key: attr,
attrString = strings.TrimRight(":"+attrString, ",") TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
} })
// Entry output format is: type:domain.tld:@attr1,@attr2
entryBytes = append(entryBytes, []byte(entry.Type+":"+entry.Value+attrString+"\n")...)
}
if err := os.WriteFile(filepath.Join(*outputDir, listName+".txt"), entryBytes, 0644); err != nil {
return err
}
return nil
} }
func (l *ParsedList) toProto() (*router.GeoSite, error) {
site := &router.GeoSite{
CountryCode: l.Name,
}
for _, entry := range l.Entry {
switch entry.Type { switch entry.Type {
case RuleTypeDomain: case RuleTypeDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_RootDomain
Type: router.Domain_RootDomain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeRegexp: case RuleTypeRegexp:
// check regexp validity to avoid runtime error pdomain.Type = router.Domain_Regex
_, err := regexp.Compile(entry.Value)
if err != nil {
return nil, fmt.Errorf("invalid regexp in list %s: %s", l.Name, entry.Value)
}
site.Domain = append(site.Domain, &router.Domain{
Type: router.Domain_Regex,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeKeyword: case RuleTypeKeyword:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Plain
Type: router.Domain_Plain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeFullDomain: case RuleTypeFullDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Full
Type: router.Domain_Full,
Value: entry.Value,
Attribute: entry.Attrs,
})
default:
return nil, fmt.Errorf("unknown domain type: %s", entry.Type)
} }
site.Domain = append(site.Domain, pdomain)
} }
return site, nil return site, nil
} }
func exportPlainTextList(list []string, refName string, pl *ParsedList) { func writePlainList(exportedName string) error {
for _, listName := range list { targetList, exist := finalMap[strings.ToUpper(exportedName)]
if strings.EqualFold(refName, listName) { if !exist || len(targetList) == 0 {
if err := pl.toPlainText(strings.ToLower(refName)); err != nil { return fmt.Errorf("'%s' list does not exist or is empty.", exportedName)
fmt.Println("Failed:", err)
continue
} }
fmt.Printf("'%s' has been generated successfully.\n", listName) file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt"))
if err != nil {
return err
} }
defer file.Close()
w := bufio.NewWriter(file)
for _, entry := range targetList {
// Entry output format is: type:domain.tld:@attr1,@attr2
var attrString string
if entry.Attrs != nil {
attrString = ":@" + strings.Join(entry.Attrs, ",@")
} }
fmt.Fprintln(w, entry.Type + ":" + entry.Value + attrString)
}
return w.Flush()
} }
func removeComment(line string) string { func parseEntry(line string) (Entry, error) {
idx := strings.Index(line, "#") var entry Entry
if idx == -1 { parts := strings.Fields(line)
return line
}
return strings.TrimSpace(line[:idx])
}
func parseDomain(domain string, entry *Entry) error { // Parse type and value
kv := strings.Split(domain, ":") rawTypeVal := parts[0]
kv := strings.Split(rawTypeVal, ":")
if len(kv) == 1 { if len(kv) == 1 {
entry.Type = RuleTypeDomain entry.Type = RuleTypeDomain // Default type
entry.Value = strings.ToLower(kv[0]) entry.Value = strings.ToLower(rawTypeVal)
return nil } else if len(kv) == 2 {
}
if len(kv) == 2 {
entry.Type = strings.ToLower(kv[0]) entry.Type = strings.ToLower(kv[0])
if entry.Type == RuleTypeRegexp {
if strings.EqualFold(entry.Type, RuleTypeRegexp) {
entry.Value = kv[1] entry.Value = kv[1]
} else { } else {
entry.Value = strings.ToLower(kv[1]) entry.Value = strings.ToLower(kv[1])
} }
} else {
return nil return entry, fmt.Errorf("invalid format: %s", line)
}
// Check type and value
if !TypeChecker.MatchString(entry.Type) {
return entry, fmt.Errorf("invalid type: %s", entry.Type)
}
if entry.Type == RuleTypeRegexp {
if _, err := regexp.Compile(entry.Value); err != nil {
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
}
} else if !ValueChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid value: %s", entry.Value)
} }
return fmt.Errorf("invalid format: %s", domain) // Parse/Check attributes and affiliations
} for _, part := range parts[1:] {
if strings.HasPrefix(part, "@") {
func parseAttribute(attr string) (*router.Domain_Attribute, error) { attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
var attribute router.Domain_Attribute if !AttrChecker.MatchString(attr) {
if len(attr) == 0 || attr[0] != '@' { return entry, fmt.Errorf("invalid attribute key: %s", attr)
return &attribute, fmt.Errorf("invalid attribute: %s", attr)
}
attribute.Key = strings.ToLower(attr[1:]) // Trim attribute prefix `@` character
attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true}
return &attribute, nil
}
func parseEntry(line string) (Entry, error) {
line = strings.TrimSpace(line)
parts := strings.Split(line, " ")
var entry Entry
if len(parts) == 0 {
return entry, fmt.Errorf("empty entry")
}
if err := parseDomain(parts[0], &entry); err != nil {
return entry, err
}
for i := 1; i < len(parts); i++ {
attr, err := parseAttribute(parts[i])
if err != nil {
return entry, err
} }
entry.Attrs = append(entry.Attrs, attr) entry.Attrs = append(entry.Attrs, attr)
} else if strings.HasPrefix(part, "&") {
aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character
if !SiteChecker.MatchString(aff) {
return entry, fmt.Errorf("invalid affiliation key: %s", aff)
} }
entry.Affs = append(entry.Affs, aff)
} else {
return entry, fmt.Errorf("invalid attribute/affiliation: %s", part)
}
}
// Sort attributes
sort.Slice(entry.Attrs, func(i, j int) bool {
return entry.Attrs[i] < entry.Attrs[j]
})
return entry, nil return entry, nil
} }
@@ -196,14 +183,20 @@ func Load(path string) (*List, error) {
} }
defer file.Close() defer file.Close()
list := &List{ listName := strings.ToUpper(filepath.Base(path))
Name: strings.ToUpper(filepath.Base(path)), if !SiteChecker.MatchString(listName) {
return nil, fmt.Errorf("invalid list name: %s", listName)
} }
list := &List{Name: listName}
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := scanner.Text()
line = removeComment(line) // Remove comments
if len(line) == 0 { if idx := strings.Index(line, "#"); idx != -1 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue continue
} }
entry, err := parseEntry(line) entry, err := parseEntry(line)
@@ -216,99 +209,158 @@ func Load(path string) (*List, error) {
return list, nil return list, nil
} }
func isMatchAttr(Attrs []*router.Domain_Attribute, includeKey string) bool { func ParseList(refList *List) error {
isMatch := false pl := plMap[refList.Name]
mustMatch := true if pl == nil {
matchName := includeKey pl = &ParsedList{Name: refList.Name}
if strings.HasPrefix(includeKey, "!") { plMap[refList.Name] = pl
isMatch = true
mustMatch = false
matchName = strings.TrimLeft(includeKey, "!")
} }
for _, entry := range refList.Entry {
for _, Attr := range Attrs {
attrName := Attr.Key
if mustMatch {
if matchName == attrName {
isMatch = true
break
}
} else {
if matchName == attrName {
isMatch = false
break
}
}
}
return isMatch
}
func createIncludeAttrEntrys(list *List, matchAttr *router.Domain_Attribute) []Entry {
newEntryList := make([]Entry, 0, len(list.Entry))
matchName := matchAttr.Key
for _, entry := range list.Entry {
matched := isMatchAttr(entry.Attrs, matchName)
if matched {
newEntryList = append(newEntryList, entry)
}
}
return newEntryList
}
func ParseList(list *List, ref map[string]*List) (*ParsedList, error) {
pl := &ParsedList{
Name: list.Name,
Inclusion: make(map[string]bool),
}
entryList := list.Entry
for {
newEntryList := make([]Entry, 0, len(entryList))
hasInclude := false
for _, entry := range entryList {
if entry.Type == RuleTypeInclude { if entry.Type == RuleTypeInclude {
refName := strings.ToUpper(entry.Value) if len(entry.Affs) != 0 {
if entry.Attrs != nil { return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
}
inc := Inclusion{Source: strings.ToUpper(entry.Value)}
for _, attr := range entry.Attrs { for _, attr := range entry.Attrs {
InclusionName := strings.ToUpper(refName + "@" + attr.Key) if strings.HasPrefix(attr, "-") {
if pl.Inclusion[InclusionName] { inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character
continue } else {
inc.MustAttrs = append(inc.MustAttrs, attr)
}
}
pl.Inclusions = append(pl.Inclusions, inc)
} else {
if len(entry.Affs) != 0 {
for _, aff := range entry.Affs {
apl := plMap[aff]
if apl == nil {
apl = &ParsedList{Name: aff}
plMap[aff] = apl
}
apl.Entry = append(apl.Entry, entry)
}
}
pl.Entry = append(pl.Entry, entry)
}
}
return nil
} }
pl.Inclusion[InclusionName] = true
refList := ref[refName] func polishList(rl *[]Entry) []Entry {
if refList == nil { // Remove basic duplicates
return nil, fmt.Errorf("list not found: %s", entry.Value) pendingList := make([]Entry, 0, len(*rl)) // Exactly same entries removed
entry2String := func(e Entry) string { // Attributes already sorted
return e.Type + ":" + e.Value + "@" + strings.Join(e.Attrs, "@")
} }
attrEntrys := createIncludeAttrEntrys(refList, attr) bscDupMap := make(map[string]bool)
if len(attrEntrys) != 0 { for _, entry := range *rl {
newEntryList = append(newEntryList, attrEntrys...) if estring := entry2String(entry); !bscDupMap[estring] {
bscDupMap[estring] = true
pendingList = append(pendingList, entry)
} }
} }
finalList := make([]Entry, 0, len(pendingList))
queuingList := make([]Entry, 0, len(pendingList)) // Domain/full entries without attr
domainsMap := make(map[string]bool)
for _, entry := range pendingList {
switch entry.Type { // Bypass regexp, keyword and "full/domain with attr"
case RuleTypeRegexp:
finalList = append(finalList, entry)
case RuleTypeKeyword:
finalList = append(finalList, entry)
case RuleTypeDomain:
domainsMap[entry.Value] = true
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else { } else {
InclusionName := refName queuingList = append(queuingList, entry)
if pl.Inclusion[InclusionName] {
continue
} }
pl.Inclusion[InclusionName] = true case RuleTypeFullDomain:
refList := ref[refName] if len(entry.Attrs) != 0 {
if refList == nil { finalList = append(finalList, entry)
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
newEntryList = append(newEntryList, refList.Entry...)
}
hasInclude = true
} else { } else {
newEntryList = append(newEntryList, entry) queuingList = append(queuingList, entry)
} }
} }
entryList = newEntryList }
if !hasInclude {
// Remove redundant subdomains for full/domain without attr
for _, qentry := range queuingList {
parts := strings.Split(qentry.Value, ".")
isRedundant := false
for i := 1; i < len(parts) - 1 ; i++ {
// Not check parent for level2 "name.tld" domain / tld will not become a parent
parentdomain := strings.Join(parts[i:], ".")
if domainsMap[parentdomain] {
isRedundant = true
break break
} }
} }
pl.Entry = entryList if !isRedundant {
finalList = append(finalList, qentry)
}
}
return pl, nil // Sort final entries
sort.Slice(finalList, func(i, j int) bool {
if finalList[i].Type != finalList[j].Type {
return finalList[i].Type < finalList[j].Type
}
if finalList[i].Value != finalList[j].Value {
return finalList[i].Value < finalList[j].Value
}
// Ideally, the comparison here will not be triggered by source data
return strings.Join(finalList[i].Attrs, ",") < strings.Join(finalList[j].Attrs, ",")
})
return finalList
}
func ResolveList(pl *ParsedList) error {
if _, pldone := finalMap[pl.Name]; pldone { return nil }
if cirIncMap[pl.Name] {
return fmt.Errorf("circular inclusion in: %s", pl.Name)
}
cirIncMap[pl.Name] = true
defer delete(cirIncMap, pl.Name)
isMatchAttrFilters := func(entry Entry, incFilter Inclusion) bool {
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true }
if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 }
attrMap := make(map[string]bool)
for _, attr := range entry.Attrs {
attrMap[attr] = true
}
for _, m := range incFilter.MustAttrs {
if !attrMap[m] { return false }
}
for _, b := range incFilter.BanAttrs {
if attrMap[b] { return false }
}
return true
}
var roughList []Entry
roughList = append(roughList, pl.Entry...)
for _, inc := range pl.Inclusions {
incPl, exist := plMap[inc.Source]
if !exist {
return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source)
}
if err := ResolveList(incPl); err != nil {
return err
}
for _, ientry := range finalMap[inc.Source] {
if isMatchAttrFilters(ientry, inc) {
roughList = append(roughList, ientry)
}
}
}
finalMap[pl.Name] = polishList(&roughList)
return nil
} }
func main() { func main() {
@@ -317,7 +369,7 @@ func main() {
dir := *dataPath dir := *dataPath
fmt.Println("Use domain lists in", dir) fmt.Println("Use domain lists in", dir)
ref := make(map[string]*List) // Generate refMap
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
@@ -329,7 +381,7 @@ func main() {
if err != nil { if err != nil {
return err return err
} }
ref[list.Name] = list refMap[list.Name] = list
return nil return nil
}) })
if err != nil { if err != nil {
@@ -337,6 +389,22 @@ func main() {
os.Exit(1) os.Exit(1)
} }
// Generate plMap
for _, refList := range refMap {
if err := ParseList(refList); err != nil {
fmt.Println("Failed to ParseList:", err)
os.Exit(1)
}
}
// Generate finalMap
for _, pl := range plMap {
if err := ResolveList(pl); err != nil {
fmt.Println("Failed to ResolveList:", err)
os.Exit(1)
}
}
// Create output directory if not exist // Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) { if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil { if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
@@ -345,43 +413,28 @@ func main() {
} }
} }
protoList := new(router.GeoSiteList) // Export plaintext list
var existList []string if *exportLists != "" {
for refName, list := range ref { exportedListSlice := strings.Split(*exportLists, ",")
pl, err := ParseList(list, ref) for _, exportedList := range exportedListSlice {
if err != nil { if err := writePlainList(exportedList); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write list:", err)
os.Exit(1) continue
} }
site, err := pl.toProto() fmt.Printf("list: '%s' has been generated successfully.\n", exportedList)
}
}
// Generate dat file
protoList := new(router.GeoSiteList)
for siteName, siteEntries := range finalMap {
site, err := makeProtoList(siteName, &siteEntries)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed:", err)
os.Exit(1) os.Exit(1)
} }
protoList.Entry = append(protoList.Entry, site) protoList.Entry = append(protoList.Entry, site)
// Flatten and export plaintext list
if *exportLists != "" {
if existList != nil {
exportPlainTextList(existList, refName, pl)
} else {
exportedListSlice := strings.Split(*exportLists, ",")
for _, exportedListName := range exportedListSlice {
fileName := filepath.Join(dir, exportedListName)
_, err := os.Stat(fileName)
if err == nil || os.IsExist(err) {
existList = append(existList, exportedListName)
} else {
fmt.Printf("'%s' list does not exist in '%s' directory.\n", exportedListName, dir)
} }
}
if existList != nil {
exportPlainTextList(existList, refName, pl)
}
}
}
}
// Sort protoList so the marshaled list is reproducible // Sort protoList so the marshaled list is reproducible
sort.SliceStable(protoList.Entry, func(i, j int) bool { sort.SliceStable(protoList.Entry, func(i, j int) bool {
return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode
@@ -389,11 +442,11 @@ func main() {
protoBytes, err := proto.Marshal(protoList) protoBytes, err := proto.Marshal(protoList)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to marshal:", err)
os.Exit(1) os.Exit(1)
} }
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil { if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write output:", err)
os.Exit(1) os.Exit(1)
} else { } else {
fmt.Println(*outputName, "has been generated successfully.") fmt.Println(*outputName, "has been generated successfully.")