Compare commits

...

19 Commits

Author SHA1 Message Date
MkQtS
aa052c7bd1 Feat: add advanced deduplicate for subdomains
only for domain/full without attr
2026-01-11 22:02:46 +08:00
MkQtS
68d291d4ee Feat: add support for affiliation
A domain rule is always added to the list corresponding to the filename
it resides in. Additionally, you can now add affiliations to a domain
rule, and the rule will be added to the list specified by the
affiliation. Each affiliation begins with `&` and followed by the name
of the affiliation.

For example, when you can add a single rule
`youtube.com &youtube &category-entertainment` in file `data/google`.
Then `geosite:google`, `geosite:youtube` and
`geosite:category-entertainment` all contain `[domain:]youtube.com`
(even if files `data/youtube` and `data/category-entertainment` do not
exist).

This helps us to reduce the number of data files without compromising
functionality, and avoid writing a same rule in different files.
2026-01-11 22:02:46 +08:00
MkQtS
951b407d6b Refactor exporting plaintext list 2026-01-11 22:02:46 +08:00
MkQtS
5e6253ec4e Feat: add support for partial include
- refactor inclusion logic
- add basic deduplicate
2026-01-11 22:02:46 +08:00
MkQtS
c87c46db9d Refactor: promote refMap 2026-01-11 22:02:46 +08:00
MkQtS
eeccce1d3b Remove support for partial include
This reverts e640ac2783

It is problematic and I will implement a new one
2026-01-11 22:02:46 +08:00
MkQtS
fd7f296305 Refactor exportPlainTextList
- remove unnecessary variable
- improve readablity
2026-01-11 22:02:46 +08:00
MkQtS
8cbcc1042b Refactor parseEntry
- add value/attribute checker(check missing space)
- allow multiple spaces
- sort attributes
- improve readablity
2026-01-11 22:02:46 +08:00
MkQtS
431b7f2acb Refactor: use string attr before toProto 2026-01-11 22:02:45 +08:00
MkQtS
d898eef2f0 Refactor: reduce the use of strings.TrimSpace 2026-01-11 22:02:45 +08:00
深鸣
8786ff74f0 Add more ad domains (#3164)
* Add more ad domains

* Merge some ad domains into their corresponding main rules

---------

Co-authored-by: MkQtS <81752398+MkQtS@users.noreply.github.com>
2026-01-11 22:01:15 +08:00
MkQtS
cc8a866863 Add more cn domains (#3163)
* chinamobile: add more domains

* chinaunicom: add wosms.cn

* zhangtao: add more domains

* category-media-cn: add more domains
2026-01-10 22:30:40 +08:00
深鸣
593e467448 tencent-ads: add more domains (#3162) 2026-01-10 22:28:26 +08:00
深鸣
76b32b86b8 geolocation-cn: add more domains (#3161) 2026-01-10 16:48:26 +08:00
深鸣
1150b420a4 xiaomi-ads: add tracker.xiaomixiaoai.com (#3160) 2026-01-10 14:57:56 +08:00
Stephanos Komnenos
2a0fb64efd category-pt: add nexushd.org (#3158) 2026-01-10 14:56:52 +08:00
深鸣
c05ce9952d xiaomi-ads: add stats.music.xiaomi.com (#3159) 2026-01-10 11:15:23 +08:00
xizi
67389b37cc Add schoopia (#3156)
Add domains for 翻转校园
2026-01-09 09:54:32 +08:00
TripleA
39431a9e8f google-deepmind: add missing subdomain for Google AI Labs (#3157) 2026-01-09 09:47:54 +08:00
24 changed files with 364 additions and 252 deletions

View File

@@ -1,3 +1,4 @@
advertising.apple.com @ads
api-adservices.apple.com @ads api-adservices.apple.com @ads
iadsdk.apple.com @ads iadsdk.apple.com @ads
iad.apple.com @ads iad.apple.com @ads

View File

@@ -1,11 +1,14 @@
include:fqnovel-ads include:fqnovel-ads
ad.toutiao.com @ads
analytics.tiktok.com @ads
bds.snssdk.com @ads bds.snssdk.com @ads
byteadverts.com @ads byteadverts.com @ads
ctobsnssdk.com @ads ctobsnssdk.com @ads
dig.bdurl.net @ads dig.bdurl.net @ads
extlog.snssdk.com @ads extlog.snssdk.com @ads
i.snssdk.com @ads i.snssdk.com @ads
log.zijieapi.com @ads
mcs.snssdk.com @ads mcs.snssdk.com @ads
pangolin-dsp-toutiao-b.com @ads pangolin-dsp-toutiao-b.com @ads
pangolin-dsp-toutiao.com @ads pangolin-dsp-toutiao.com @ads

View File

@@ -12,12 +12,12 @@ include:atom-data-ads
include:baidu-ads include:baidu-ads
include:bytedance-ads include:bytedance-ads
include:category-ads-ir include:category-ads-ir
include:cctv @ads
include:clearbit-ads include:clearbit-ads
include:dmm-ads include:dmm-ads
include:duolingo-ads include:duolingo-ads
include:emogi-ads include:emogi-ads
include:flurry-ads include:flurry-ads
include:github-ads
include:google-ads include:google-ads
include:growingio-ads include:growingio-ads
include:hiido-ads include:hiido-ads
@@ -38,7 +38,7 @@ include:newrelic-ads
include:ogury-ads include:ogury-ads
include:ookla-speedtest-ads include:ookla-speedtest-ads
include:openx-ads include:openx-ads
include:picacg-ads include:picacg @ads
include:pocoiq-ads include:pocoiq-ads
include:pubmatic-ads include:pubmatic-ads
include:qihoo360-ads include:qihoo360-ads
@@ -52,6 +52,7 @@ include:tagtic-ads
include:tappx-ads include:tappx-ads
include:television-ads include:television-ads
include:tencent-ads include:tencent-ads
include:tendcloud @ads
include:uberads-ads include:uberads-ads
include:umeng-ads include:umeng-ads
include:unity-ads include:unity-ads
@@ -59,7 +60,7 @@ include:xhamster-ads
include:xiaomi-ads include:xiaomi-ads
include:ximalaya-ads include:ximalaya-ads
include:yahoo-ads include:yahoo-ads
include:zhihu-ads include:zhihu @ads
# Other domains for ads serving # Other domains for ads serving
51.la 51.la
@@ -73,6 +74,7 @@ adbutter.net
addthisedge.com addthisedge.com
ads.trafficjunky.net ads.trafficjunky.net
ads.wteam.xyz ads.wteam.xyz
adservice.sigmob.cn
adtechus.com adtechus.com
adtrue.com adtrue.com
adxprtz.com adxprtz.com
@@ -196,3 +198,7 @@ reyun.com
zhugeapi.com zhugeapi.com
zhugeapi.net zhugeapi.net
zhugeio.com zhugeio.com
# 车来了
atrace.chelaile.net.cn
logs.chelaile.net.cn

View File

@@ -5,6 +5,7 @@
ad-delivery.net @ads ad-delivery.net @ads
adinplay.com @ads adinplay.com @ads
adnxs.com @ads adnxs.com @ads
adview.cn @ads
ads.trafficjunky.net @ads ads.trafficjunky.net @ads
advertserve.com @ads advertserve.com @ads
casalemedia.com @ads casalemedia.com @ads
@@ -18,6 +19,7 @@ mfadsrvr.com @ads
mgid.com @ads mgid.com @ads
ns1p.net @ads ns1p.net @ads
pubmatic.com @ads pubmatic.com @ads
sigmob.com @ads
snapads.com @ads snapads.com @ads
spotxchange.com @ads spotxchange.com @ads
unimhk.com @ads unimhk.com @ads

View File

@@ -5,6 +5,7 @@ include:applysquare
include:hugecore include:hugecore
include:hujiang include:hujiang
include:koolearn include:koolearn
include:schoopia
include:shanbay include:shanbay
include:xueersi include:xueersi
include:yuanfudao include:yuanfudao

View File

@@ -16,6 +16,8 @@ include:ynet
21jingji.com 21jingji.com
# 站长网 # 站长网
admin5.com admin5.com
# 安徽新闻
anhuinews.com
# 锋潮科技 # 锋潮科技
anzhuo.cn anzhuo.cn
# 鞭牛士 # 鞭牛士
@@ -55,6 +57,8 @@ cztv.com
cztvcdn.com cztvcdn.com
cztvcloud.com cztvcloud.com
zjstv.com zjstv.com
# 读特新闻
dutenews.com
# 大智慧 # 大智慧
dzh.com.cn dzh.com.cn
gw.com.cn gw.com.cn
@@ -169,6 +173,9 @@ ssimg.cn
stockstar.com stockstar.com
# 证券时报网 # 证券时报网
stcn.com stcn.com
# 深圳报业集团
sznews.com
szpgm.com
# 大公网/大公报 # 大公网/大公报
takungpao.com takungpao.com
# TechWeb # TechWeb

View File

@@ -141,6 +141,9 @@ lemonhd.org
# 备胎 # 备胎
beitai.pt beitai.pt
# NexusHD
nexushd.org
bitpt.cn bitpt.cn
hdarea.club hdarea.club
hdchina.org hdchina.org

View File

@@ -39,3 +39,5 @@ zggbdszt.cn
zggbdszt.com.cn zggbdszt.com.cn
zggbdszt.net.cn zggbdszt.net.cn
zygbdszt.net.cn zygbdszt.net.cn
ad.cctv.com @ads

View File

@@ -13,11 +13,18 @@ migucloud.com
migufun.com migufun.com
miguvideo.com miguvideo.com
# 中移在线
cmcc-cs.cn
online-cmcc.cn
# 中移互联 # 中移互联
andfx.cn andfx.cn
andfx.net andfx.net
cmicapm.com
cmicrwx.cn cmicrwx.cn
cmicvip.cn
cmpassport.com cmpassport.com
cytxl.com.cn
fetion-portal.com fetion-portal.com
fetionpic.com fetionpic.com
mmarket.com mmarket.com

View File

@@ -23,3 +23,4 @@ wo.com.cn
wo116114.com wo116114.com
wocloud.com.cn wocloud.com.cn
woread.com.cn woread.com.cn
wosms.cn

View File

@@ -230,8 +230,12 @@ xiamenair.com # 厦门航空
12306.cn 12306.cn
95306.cn 95306.cn
ccrgt.com ccrgt.com
## 车来了
chelaile.net.cn
## 跨境巴士 ## 跨境巴士
kuajing84.com kuajing84.com
## 掌上公交
mygolbs.com
## 宁停车 ## 宁停车
ningtingche.com ningtingche.com
## 航旅纵横 ## 航旅纵横

View File

@@ -32,6 +32,7 @@ jules.google.com
# Google AI Labs # Google AI Labs
labs.google labs.google
labs.google.com
aisandbox-pa.googleapis.com aisandbox-pa.googleapis.com
# Android Studio Gemini Code Assist # Android Studio Gemini Code Assist

View File

@@ -1,3 +1,5 @@
include:github-ads
browser.events.data.msn.cn @ads browser.events.data.msn.cn @ads
browser.events.data.msn.com @ads browser.events.data.msn.com @ads
clarity.ms @ads clarity.ms @ads

View File

@@ -1,5 +1,3 @@
include:picacg-ads
bikaa.xyz bikaa.xyz
bikac.xyz bikac.xyz
bikaios.xyz bikaios.xyz
@@ -14,3 +12,6 @@ wikawika.xyz
# Image Resource Domain like `img.diwodiwo.xyz` `s3.diwodiwo.xyz` `storage.diwodiwo.xyz` `storage-b.diwodiwo.xyz` # Image Resource Domain like `img.diwodiwo.xyz` `s3.diwodiwo.xyz` `storage.diwodiwo.xyz` `storage-b.diwodiwo.xyz`
diwodiwo.xyz diwodiwo.xyz
# Ad Domain
full:ad-channel.diwodiwo.xyz @ads
full:ad-display.diwodiwo.xyz @ads

View File

@@ -1,2 +0,0 @@
full:ad-channel.diwodiwo.xyz @ads
full:ad-display.diwodiwo.xyz @ads

4
data/schoopia Normal file
View File

@@ -0,0 +1,4 @@
# 翻转校园 河南昱荣教育科技有限公司 豫ICP备2021021028号
schoopia.com
# 同学派 翻转校园app内置服务
tongxuepie.com

View File

@@ -1,4 +1,5 @@
ad.hpplay.cn @ads ad.hpplay.cn @ads
adc.hpplay.cn @ads
adeng.hpplay.cn @ads adeng.hpplay.cn @ads
ads-uo.api.leiniao.com @ads ads-uo.api.leiniao.com @ads
ads-ut.api.leiniao.com @ads ads-ut.api.leiniao.com @ads

View File

@@ -33,18 +33,23 @@ isdspeed.qq.com @ads
log.tbs.qq.com @ads log.tbs.qq.com @ads
mdt.qq.com @ads mdt.qq.com @ads
monitor.music.qq.com @ads monitor.music.qq.com @ads
monitor.uu.qq.com @ads
mpush.qq.com @ads mpush.qq.com @ads
mtrace.qq.com @ads mtrace.qq.com @ads
pgdt.gtimg.cn @ads pgdt.gtimg.cn @ads
pingjs.qq.com @ads pingjs.qq.com @ads
pmir.3g.qq.com @ads
push.qq.com @ads push.qq.com @ads
qqdata.ab.qq.com @ads
report.qqweb.qq.com @ads report.qqweb.qq.com @ads
rmonitor.qq.com @ads
sngmta.qq.com @ads sngmta.qq.com @ads
stat.y.qq.com @ads stat.y.qq.com @ads
tajs.qq.com @ads tajs.qq.com @ads
tcss.qq.com @ads tcss.qq.com @ads
tmead.y.qq.com @ads tmead.y.qq.com @ads
tmeadcomm.y.qq.com @ads tmeadcomm.y.qq.com @ads
tpstelemetry.tencent.com @ads
trace.qq.com @ads trace.qq.com @ads
ugdtimg.com @ads ugdtimg.com @ads
wit.qq.com @ads wit.qq.com @ads

View File

@@ -2,3 +2,5 @@ tendcloud.com
talkingdata.com talkingdata.com
talkinggame.com talkinggame.com
cpatrk.net cpatrk.net
tdsdk.cpatrk.net @ads

View File

@@ -14,6 +14,8 @@ logupdate.avlyun.sec.miui.com @ads
misc.in.duokanbox.com @ads misc.in.duokanbox.com @ads
sentry.d.mi.com @ads sentry.d.mi.com @ads
sentry.d.xiaomi.net @ads sentry.d.xiaomi.net @ads
stats.music.xiaomi.com @ads
tjqonline.cn @ads tjqonline.cn @ads
tracker.ai.xiaomi.com @ads tracker.ai.xiaomi.com @ads
tracker.xiaomixiaoai.com @ads
tracking.miui.com @ads tracking.miui.com @ads

View File

@@ -1,3 +1,10 @@
# 掌淘
mob.com mob.com
dutils.com sharesdk.cn
# 游昆
accuratead.cn accuratead.cn
dutils.com
hiaiabc.com
mobsdks.com
yksdks.com

View File

@@ -1,4 +1,5 @@
include:zhihu-ads
zhihu.com zhihu.com
zhimg.com zhimg.com
crash2.zhihu.com @ads
zhihu-web-analytics.zhihu.com @ads

View File

@@ -1,2 +0,0 @@
crash2.zhihu.com @ads
zhihu-web-analytics.zhihu.com @ads

533
main.go
View File

@@ -29,10 +29,31 @@ const (
RuleTypeInclude string = "include" RuleTypeInclude string = "include"
) )
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
ValueChecker = regexp.MustCompile(`^[a-z0-9!\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
)
var (
refMap = make(map[string]*List)
plMap = make(map[string]*ParsedList)
finalMap = make(map[string][]Entry)
cirIncMap = make(map[string]bool) // Used for circular inclusion detection
)
type Entry struct { type Entry struct {
Type string Type string
Value string Value string
Attrs []*router.Domain_Attribute Attrs []string
Affs []string
}
type Inclusion struct {
Source string
MustAttrs []string
BanAttrs []string
} }
type List struct { type List struct {
@@ -41,151 +62,117 @@ type List struct {
} }
type ParsedList struct { type ParsedList struct {
Name string Name string
Inclusion map[string]bool Inclusions []Inclusion
Entry []Entry Entry []Entry
} }
func (l *ParsedList) toPlainText(listName string) error { func makeProtoList(listName string, entries *[]Entry) (*router.GeoSite, error) {
var entryBytes []byte
for _, entry := range l.Entry {
var attrString string
if entry.Attrs != nil {
for _, attr := range entry.Attrs {
attrString += "@" + attr.GetKey() + ","
}
attrString = strings.TrimRight(":"+attrString, ",")
}
// Entry output format is: type:domain.tld:@attr1,@attr2
entryBytes = append(entryBytes, []byte(entry.Type+":"+entry.Value+attrString+"\n")...)
}
if err := os.WriteFile(filepath.Join(*outputDir, listName+".txt"), entryBytes, 0644); err != nil {
return err
}
return nil
}
func (l *ParsedList) toProto() (*router.GeoSite, error) {
site := &router.GeoSite{ site := &router.GeoSite{
CountryCode: l.Name, CountryCode: listName,
Domain: make([]*router.Domain, 0, len(*entries)),
} }
for _, entry := range l.Entry { for _, entry := range *entries {
pdomain := &router.Domain{Value: entry.Value}
for _, attr := range entry.Attrs {
pdomain.Attribute = append(pdomain.Attribute, &router.Domain_Attribute{
Key: attr,
TypedValue: &router.Domain_Attribute_BoolValue{BoolValue: true},
})
}
switch entry.Type { switch entry.Type {
case RuleTypeDomain: case RuleTypeDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_RootDomain
Type: router.Domain_RootDomain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeRegexp: case RuleTypeRegexp:
// check regexp validity to avoid runtime error pdomain.Type = router.Domain_Regex
_, err := regexp.Compile(entry.Value)
if err != nil {
return nil, fmt.Errorf("invalid regexp in list %s: %s", l.Name, entry.Value)
}
site.Domain = append(site.Domain, &router.Domain{
Type: router.Domain_Regex,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeKeyword: case RuleTypeKeyword:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Plain
Type: router.Domain_Plain,
Value: entry.Value,
Attribute: entry.Attrs,
})
case RuleTypeFullDomain: case RuleTypeFullDomain:
site.Domain = append(site.Domain, &router.Domain{ pdomain.Type = router.Domain_Full
Type: router.Domain_Full,
Value: entry.Value,
Attribute: entry.Attrs,
})
default:
return nil, fmt.Errorf("unknown domain type: %s", entry.Type)
} }
site.Domain = append(site.Domain, pdomain)
} }
return site, nil return site, nil
} }
func exportPlainTextList(list []string, refName string, pl *ParsedList) { func writePlainList(exportedName string) error {
for _, listName := range list { targetList, exist := finalMap[strings.ToUpper(exportedName)]
if strings.EqualFold(refName, listName) { if !exist || len(targetList) == 0 {
if err := pl.toPlainText(strings.ToLower(refName)); err != nil { return fmt.Errorf("'%s' list does not exist or is empty.", exportedName)
fmt.Println("Failed:", err) }
continue file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt"))
} if err != nil {
fmt.Printf("'%s' has been generated successfully.\n", listName) return err
}
defer file.Close()
w := bufio.NewWriter(file)
for _, entry := range targetList {
// Entry output format is: type:domain.tld:@attr1,@attr2
var attrString string
if entry.Attrs != nil {
attrString = ":@" + strings.Join(entry.Attrs, ",@")
} }
fmt.Fprintln(w, entry.Type + ":" + entry.Value + attrString)
} }
return w.Flush()
} }
func removeComment(line string) string { func parseEntry(line string) (Entry, error) {
idx := strings.Index(line, "#") var entry Entry
if idx == -1 { parts := strings.Fields(line)
return line
}
return strings.TrimSpace(line[:idx])
}
func parseDomain(domain string, entry *Entry) error { // Parse type and value
kv := strings.Split(domain, ":") rawTypeVal := parts[0]
kv := strings.Split(rawTypeVal, ":")
if len(kv) == 1 { if len(kv) == 1 {
entry.Type = RuleTypeDomain entry.Type = RuleTypeDomain // Default type
entry.Value = strings.ToLower(kv[0]) entry.Value = strings.ToLower(rawTypeVal)
return nil } else if len(kv) == 2 {
}
if len(kv) == 2 {
entry.Type = strings.ToLower(kv[0]) entry.Type = strings.ToLower(kv[0])
if entry.Type == RuleTypeRegexp {
if strings.EqualFold(entry.Type, RuleTypeRegexp) {
entry.Value = kv[1] entry.Value = kv[1]
} else { } else {
entry.Value = strings.ToLower(kv[1]) entry.Value = strings.ToLower(kv[1])
} }
} else {
return nil return entry, fmt.Errorf("invalid format: %s", line)
} }
// Check type and value
return fmt.Errorf("invalid format: %s", domain) if !TypeChecker.MatchString(entry.Type) {
} return entry, fmt.Errorf("invalid type: %s", entry.Type)
func parseAttribute(attr string) (*router.Domain_Attribute, error) {
var attribute router.Domain_Attribute
if len(attr) == 0 || attr[0] != '@' {
return &attribute, fmt.Errorf("invalid attribute: %s", attr)
} }
if entry.Type == RuleTypeRegexp {
attribute.Key = strings.ToLower(attr[1:]) // Trim attribute prefix `@` character if _, err := regexp.Compile(entry.Value); err != nil {
attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true} return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
return &attribute, nil
}
func parseEntry(line string) (Entry, error) {
line = strings.TrimSpace(line)
parts := strings.Split(line, " ")
var entry Entry
if len(parts) == 0 {
return entry, fmt.Errorf("empty entry")
}
if err := parseDomain(parts[0], &entry); err != nil {
return entry, err
}
for i := 1; i < len(parts); i++ {
attr, err := parseAttribute(parts[i])
if err != nil {
return entry, err
} }
entry.Attrs = append(entry.Attrs, attr) } else if !ValueChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid value: %s", entry.Value)
} }
// Parse/Check attributes and affiliations
for _, part := range parts[1:] {
if strings.HasPrefix(part, "@") {
attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
if !AttrChecker.MatchString(attr) {
return entry, fmt.Errorf("invalid attribute key: %s", attr)
}
entry.Attrs = append(entry.Attrs, attr)
} else if strings.HasPrefix(part, "&") {
aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character
if !SiteChecker.MatchString(aff) {
return entry, fmt.Errorf("invalid affiliation key: %s", aff)
}
entry.Affs = append(entry.Affs, aff)
} else {
return entry, fmt.Errorf("invalid attribute/affiliation: %s", part)
}
}
// Sort attributes
sort.Slice(entry.Attrs, func(i, j int) bool {
return entry.Attrs[i] < entry.Attrs[j]
})
return entry, nil return entry, nil
} }
@@ -196,14 +183,20 @@ func Load(path string) (*List, error) {
} }
defer file.Close() defer file.Close()
list := &List{ listName := strings.ToUpper(filepath.Base(path))
Name: strings.ToUpper(filepath.Base(path)), if !SiteChecker.MatchString(listName) {
return nil, fmt.Errorf("invalid list name: %s", listName)
} }
list := &List{Name: listName}
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := scanner.Text()
line = removeComment(line) // Remove comments
if len(line) == 0 { if idx := strings.Index(line, "#"); idx != -1 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue continue
} }
entry, err := parseEntry(line) entry, err := parseEntry(line)
@@ -216,99 +209,158 @@ func Load(path string) (*List, error) {
return list, nil return list, nil
} }
func isMatchAttr(Attrs []*router.Domain_Attribute, includeKey string) bool { func ParseList(refList *List) error {
isMatch := false pl := plMap[refList.Name]
mustMatch := true if pl == nil {
matchName := includeKey pl = &ParsedList{Name: refList.Name}
if strings.HasPrefix(includeKey, "!") { plMap[refList.Name] = pl
isMatch = true
mustMatch = false
matchName = strings.TrimLeft(includeKey, "!")
} }
for _, entry := range refList.Entry {
for _, Attr := range Attrs { if entry.Type == RuleTypeInclude {
attrName := Attr.Key if len(entry.Affs) != 0 {
if mustMatch { return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
if matchName == attrName {
isMatch = true
break
} }
} else { inc := Inclusion{Source: strings.ToUpper(entry.Value)}
if matchName == attrName { for _, attr := range entry.Attrs {
isMatch = false if strings.HasPrefix(attr, "-") {
break inc.BanAttrs = append(inc.BanAttrs, attr[1:]) // Trim attribute prefix `-` character
}
}
}
return isMatch
}
func createIncludeAttrEntrys(list *List, matchAttr *router.Domain_Attribute) []Entry {
newEntryList := make([]Entry, 0, len(list.Entry))
matchName := matchAttr.Key
for _, entry := range list.Entry {
matched := isMatchAttr(entry.Attrs, matchName)
if matched {
newEntryList = append(newEntryList, entry)
}
}
return newEntryList
}
func ParseList(list *List, ref map[string]*List) (*ParsedList, error) {
pl := &ParsedList{
Name: list.Name,
Inclusion: make(map[string]bool),
}
entryList := list.Entry
for {
newEntryList := make([]Entry, 0, len(entryList))
hasInclude := false
for _, entry := range entryList {
if entry.Type == RuleTypeInclude {
refName := strings.ToUpper(entry.Value)
if entry.Attrs != nil {
for _, attr := range entry.Attrs {
InclusionName := strings.ToUpper(refName + "@" + attr.Key)
if pl.Inclusion[InclusionName] {
continue
}
pl.Inclusion[InclusionName] = true
refList := ref[refName]
if refList == nil {
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
attrEntrys := createIncludeAttrEntrys(refList, attr)
if len(attrEntrys) != 0 {
newEntryList = append(newEntryList, attrEntrys...)
}
}
} else { } else {
InclusionName := refName inc.MustAttrs = append(inc.MustAttrs, attr)
if pl.Inclusion[InclusionName] {
continue
}
pl.Inclusion[InclusionName] = true
refList := ref[refName]
if refList == nil {
return nil, fmt.Errorf("list not found: %s", entry.Value)
}
newEntryList = append(newEntryList, refList.Entry...)
} }
hasInclude = true
} else {
newEntryList = append(newEntryList, entry)
} }
} pl.Inclusions = append(pl.Inclusions, inc)
entryList = newEntryList } else {
if !hasInclude { if len(entry.Affs) != 0 {
break for _, aff := range entry.Affs {
apl := plMap[aff]
if apl == nil {
apl = &ParsedList{Name: aff}
plMap[aff] = apl
}
apl.Entry = append(apl.Entry, entry)
}
}
pl.Entry = append(pl.Entry, entry)
} }
} }
pl.Entry = entryList return nil
}
return pl, nil func polishList(rl *[]Entry) []Entry {
// Remove basic duplicates
pendingList := make([]Entry, 0, len(*rl)) // Exactly same entries removed
entry2String := func(e Entry) string { // Attributes already sorted
return e.Type + ":" + e.Value + "@" + strings.Join(e.Attrs, "@")
}
bscDupMap := make(map[string]bool)
for _, entry := range *rl {
if estring := entry2String(entry); !bscDupMap[estring] {
bscDupMap[estring] = true
pendingList = append(pendingList, entry)
}
}
finalList := make([]Entry, 0, len(pendingList))
queuingList := make([]Entry, 0, len(pendingList)) // Domain/full entries without attr
domainsMap := make(map[string]bool)
for _, entry := range pendingList {
switch entry.Type { // Bypass regexp, keyword and "full/domain with attr"
case RuleTypeRegexp:
finalList = append(finalList, entry)
case RuleTypeKeyword:
finalList = append(finalList, entry)
case RuleTypeDomain:
domainsMap[entry.Value] = true
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else {
queuingList = append(queuingList, entry)
}
case RuleTypeFullDomain:
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else {
queuingList = append(queuingList, entry)
}
}
}
// Remove redundant subdomains for full/domain without attr
for _, qentry := range queuingList {
parts := strings.Split(qentry.Value, ".")
isRedundant := false
for i := 1; i < len(parts) - 1 ; i++ {
// Not check parent for level2 "name.tld" domain / tld will not become a parent
parentdomain := strings.Join(parts[i:], ".")
if domainsMap[parentdomain] {
isRedundant = true
break
}
}
if !isRedundant {
finalList = append(finalList, qentry)
}
}
// Sort final entries
sort.Slice(finalList, func(i, j int) bool {
if finalList[i].Type != finalList[j].Type {
return finalList[i].Type < finalList[j].Type
}
if finalList[i].Value != finalList[j].Value {
return finalList[i].Value < finalList[j].Value
}
// Ideally, the comparison here will not be triggered by source data
return strings.Join(finalList[i].Attrs, ",") < strings.Join(finalList[j].Attrs, ",")
})
return finalList
}
func ResolveList(pl *ParsedList) error {
if _, pldone := finalMap[pl.Name]; pldone { return nil }
if cirIncMap[pl.Name] {
return fmt.Errorf("circular inclusion in: %s", pl.Name)
}
cirIncMap[pl.Name] = true
defer delete(cirIncMap, pl.Name)
isMatchAttrFilters := func(entry Entry, incFilter Inclusion) bool {
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true }
if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 }
attrMap := make(map[string]bool)
for _, attr := range entry.Attrs {
attrMap[attr] = true
}
for _, m := range incFilter.MustAttrs {
if !attrMap[m] { return false }
}
for _, b := range incFilter.BanAttrs {
if attrMap[b] { return false }
}
return true
}
var roughList []Entry
roughList = append(roughList, pl.Entry...)
for _, inc := range pl.Inclusions {
incPl, exist := plMap[inc.Source]
if !exist {
return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source)
}
if err := ResolveList(incPl); err != nil {
return err
}
for _, ientry := range finalMap[inc.Source] {
if isMatchAttrFilters(ientry, inc) {
roughList = append(roughList, ientry)
}
}
}
finalMap[pl.Name] = polishList(&roughList)
return nil
} }
func main() { func main() {
@@ -317,7 +369,7 @@ func main() {
dir := *dataPath dir := *dataPath
fmt.Println("Use domain lists in", dir) fmt.Println("Use domain lists in", dir)
ref := make(map[string]*List) // Generate refMap
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
@@ -329,7 +381,7 @@ func main() {
if err != nil { if err != nil {
return err return err
} }
ref[list.Name] = list refMap[list.Name] = list
return nil return nil
}) })
if err != nil { if err != nil {
@@ -337,6 +389,22 @@ func main() {
os.Exit(1) os.Exit(1)
} }
// Generate plMap
for _, refList := range refMap {
if err := ParseList(refList); err != nil {
fmt.Println("Failed to ParseList:", err)
os.Exit(1)
}
}
// Generate finalMap
for _, pl := range plMap {
if err := ResolveList(pl); err != nil {
fmt.Println("Failed to ResolveList:", err)
os.Exit(1)
}
}
// Create output directory if not exist // Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) { if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil { if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
@@ -345,43 +413,28 @@ func main() {
} }
} }
protoList := new(router.GeoSiteList) // Export plaintext list
var existList []string if *exportLists != "" {
for refName, list := range ref { exportedListSlice := strings.Split(*exportLists, ",")
pl, err := ParseList(list, ref) for _, exportedList := range exportedListSlice {
if err != nil { if err := writePlainList(exportedList); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write list:", err)
os.Exit(1) continue
}
fmt.Printf("list: '%s' has been generated successfully.\n", exportedList)
} }
site, err := pl.toProto() }
// Generate dat file
protoList := new(router.GeoSiteList)
for siteName, siteEntries := range finalMap {
site, err := makeProtoList(siteName, &siteEntries)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed:", err)
os.Exit(1) os.Exit(1)
} }
protoList.Entry = append(protoList.Entry, site) protoList.Entry = append(protoList.Entry, site)
// Flatten and export plaintext list
if *exportLists != "" {
if existList != nil {
exportPlainTextList(existList, refName, pl)
} else {
exportedListSlice := strings.Split(*exportLists, ",")
for _, exportedListName := range exportedListSlice {
fileName := filepath.Join(dir, exportedListName)
_, err := os.Stat(fileName)
if err == nil || os.IsExist(err) {
existList = append(existList, exportedListName)
} else {
fmt.Printf("'%s' list does not exist in '%s' directory.\n", exportedListName, dir)
}
}
if existList != nil {
exportPlainTextList(existList, refName, pl)
}
}
}
} }
// Sort protoList so the marshaled list is reproducible // Sort protoList so the marshaled list is reproducible
sort.SliceStable(protoList.Entry, func(i, j int) bool { sort.SliceStable(protoList.Entry, func(i, j int) bool {
return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode return protoList.Entry[i].CountryCode < protoList.Entry[j].CountryCode
@@ -389,11 +442,11 @@ func main() {
protoBytes, err := proto.Marshal(protoList) protoBytes, err := proto.Marshal(protoList)
if err != nil { if err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to marshal:", err)
os.Exit(1) os.Exit(1)
} }
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil { if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil {
fmt.Println("Failed:", err) fmt.Println("Failed to write output:", err)
os.Exit(1) os.Exit(1)
} else { } else {
fmt.Println(*outputName, "has been generated successfully.") fmt.Println(*outputName, "has been generated successfully.")