Compare commits

...

15 Commits

Author SHA1 Message Date
fernvenue
b117cf851f Add packages.microsoft.com to microsoft-dev. (#3234) 2026-02-01 11:58:25 +08:00
jinqiang zhang
0b6606758d add louisvuitton (#3233) 2026-01-31 18:04:21 +08:00
Blackteahamburger
fcf9c67d83 category-education-cn: add zjzs.net (#3232) 2026-01-30 19:20:41 +08:00
MkQtS
56e0b47c73 Clean up ad lists (#3231)
* category-ads-all: include adjust

* category-ads-all: include clearbit

* category-ads-all: include ogury

* category-ads-all: include openx

* category-ads-all: include pubmatic

and remove pubmatic-ads

* category-ads-all: include segment

* category-ads-all: include supersonic

* geolocation-cn: remove the inclusion of umeng

it's included in alibaba

* add unitychina

* remove unity-ads

use unity@ads or unitychina@ads instead
2026-01-30 12:10:37 +08:00
Signaliks
4f45866be4 Update cloudflare (#3229) 2026-01-29 13:34:52 +08:00
sergeevms
40d763daca Update atlassian (#3228)
* Update atlassian

* Supplement and sort

data source: https://support.atlassian.com/organization-administration/docs/ip-addresses-and-domains-for-atlassian-cloud-products/

---------

Co-authored-by: MkQtS <81752398+MkQtS@users.noreply.github.com>
2026-01-29 13:33:48 +08:00
MkQtS
6c91898557 Cleanup ad lists (#3227)
Merge ad lists containing too few rules.

merged/removed lists:

adcolony-ads applovin-ads atom-data-ads emogi-ads flurry-ads
growingio-ads hiido-ads hotjar-ads inner-active-ads mopub-ads
mxplayer-ads newrelic-ads pocoiq-ads tagtic-ads tappx-ads uberads-ads
2026-01-28 17:43:54 +08:00
MkQtS
91da593233 apple: add aod-ssl.itunes.apple.com with cn attr (#3226) 2026-01-28 16:51:48 +08:00
TripleA
9f1c6b6922 Add Bohemia Interactive and Battleye domains (#3223) 2026-01-28 16:41:32 +08:00
MkQtS
b3bae7de8f Update category-ads (#3222)
* remove ads attr from openaicom.imgix.net

imgix.net is serving for pictures, not ads/tracking

* category-ads: include more ad domains
2026-01-28 13:07:34 +08:00
Jinzhe Zeng
4e9b28f951 add crixet.com to openai (#3221)
Crixet has been acquired by OpenAI, per https://crixet.com
2026-01-28 11:49:57 +08:00
xiyao
3c0a538219 samsung: add ospserver.net (#3219)
Samsung OneUI update server
2026-01-27 16:52:46 +08:00
MkQtS
2160230ef9 terabox: add more domains (#3218) 2026-01-27 15:24:47 +08:00
MkQtS
5c38f34456 Add cmd/datdump/main.go (#3213)
* Feat: add a new datdump tool

* Refactor: address code review comments

* Refactor: remove export all from main program

use datdump instead

* Refactor: allow spaces in exportlists

e.g. `--exportlists="lista, listb"`

* all: cleanup

* apply review suggestion

---------

Co-authored-by: database64128 <free122448@hotmail.com>
2026-01-24 23:11:35 +08:00
Zeehan2005
8e62b9b541 Enhance README with additional attribute @cn details (#3212)
Expanded the explanation of attributes in the README to include domains available in China mainland.

[skip ci]
2026-01-24 16:05:37 +08:00
49 changed files with 418 additions and 189 deletions

View File

@@ -36,7 +36,8 @@ jobs:
- name: Build dlc.dat and plain lists
run: |
cd code || exit 1
go run ./ --outputdir=../ --exportlists=_all_,category-ads-all,tld-cn,cn,tld-\!cn,geolocation-\!cn,apple,icloud
go run ./ --outputdir=../ --exportlists=category-ads-all,tld-cn,cn,tld-\!cn,geolocation-\!cn,apple,icloud
go run ./cmd/datdump/main.go --inputdata=../dlc.dat --outputdir=../ --exportlists=_all_
cd ../ && rm -rf code
- name: Generate dlc.dat sha256 hash

2
.gitignore vendored
View File

@@ -8,5 +8,5 @@
dlc.dat
# Exported plaintext lists.
dlc.dat_plain.yml
/*.yml
/*.txt

View File

@@ -141,7 +141,7 @@ Theoretically any string can be used as the name, as long as it is a valid file
### Attributes
Attribute is useful for sub-group of domains, especially for filtering purpose. For example, the list of `google` domains may contains its main domains, as well as domains that serve ads. The ads domains may be marked by attribute `@ads`, and can be used as `geosite:google@ads` in V2Ray routing.
Attribute is useful for sub-group of domains, especially for filtering purpose. For example, the list of `google` may contains its main domains, as well as domains that serve ads. The ads domains may be marked by attribute `@ads`, and can be used as `geosite:google@ads` in V2Ray routing. Domains and services that originate from outside China mainland but have access point in China mainland, may be marked by attribute `@cn`.
## Contribution guideline

164
cmd/datdump/main.go Normal file
View File

@@ -0,0 +1,164 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/v2fly/domain-list-community/internal/dlc"
router "github.com/v2fly/v2ray-core/v5/app/router/routercommon"
"google.golang.org/protobuf/proto"
)
var (
inputData = flag.String("inputdata", "dlc.dat", "Name of the geosite dat file")
outputDir = flag.String("outputdir", "./", "Directory to place all generated files")
exportLists = flag.String("exportlists", "", "Lists to be exported, separated by ',' (empty for _all_)")
)
type DomainRule struct {
Type string
Value string
Attrs []string
}
type DomainList struct {
Name string
Rules []DomainRule
}
func (d *DomainRule) domain2String() string {
dstring := d.Type + ":" + d.Value
if len(d.Attrs) != 0 {
dstring += ":@" + strings.Join(d.Attrs, ",@")
}
return dstring
}
func loadGeosite(path string) ([]DomainList, map[string]*DomainList, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, nil, fmt.Errorf("failed to read geosite file: %w", err)
}
vgeositeList := new(router.GeoSiteList)
if err := proto.Unmarshal(data, vgeositeList); err != nil {
return nil, nil, fmt.Errorf("failed to unmarshal: %w", err)
}
domainLists := make([]DomainList, len(vgeositeList.Entry))
domainListByName := make(map[string]*DomainList, len(vgeositeList.Entry))
for i, vsite := range vgeositeList.Entry {
rules := make([]DomainRule, 0, len(vsite.Domain))
for _, vdomain := range vsite.Domain {
rule := DomainRule{Value: vdomain.Value}
switch vdomain.Type {
case router.Domain_RootDomain:
rule.Type = dlc.RuleTypeDomain
case router.Domain_Regex:
rule.Type = dlc.RuleTypeRegexp
case router.Domain_Plain:
rule.Type = dlc.RuleTypeKeyword
case router.Domain_Full:
rule.Type = dlc.RuleTypeFullDomain
default:
return nil, nil, fmt.Errorf("invalid rule type: %+v", vdomain.Type)
}
for _, vattr := range vdomain.Attribute {
rule.Attrs = append(rule.Attrs, vattr.Key)
}
rules = append(rules, rule)
}
domainLists[i] = DomainList{
Name: strings.ToUpper(vsite.CountryCode),
Rules: rules,
}
domainListByName[domainLists[i].Name] = &domainLists[i]
}
return domainLists, domainListByName, nil
}
func exportSite(name string, domainListByName map[string]*DomainList) error {
domainList, ok := domainListByName[strings.ToUpper(name)]
if !ok {
return fmt.Errorf("list '%s' does not exist", name)
}
if len(domainList.Rules) == 0 {
return fmt.Errorf("list '%s' is empty", name)
}
file, err := os.Create(filepath.Join(*outputDir, name+".yml"))
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
fmt.Fprintf(w, "%s:\n", name)
for _, domain := range domainList.Rules {
fmt.Fprintf(w, " - %q\n", domain.domain2String())
}
return w.Flush()
}
func exportAll(filename string, domainLists []DomainList) error {
file, err := os.Create(filepath.Join(*outputDir, filename))
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
w.WriteString("lists:\n")
for _, domainList := range domainLists {
fmt.Fprintf(w, " - name: %s\n", strings.ToLower(domainList.Name))
fmt.Fprintf(w, " length: %d\n", len(domainList.Rules))
w.WriteString(" rules:\n")
for _, domain := range domainList.Rules {
fmt.Fprintf(w, " - %q\n", domain.domain2String())
}
}
return w.Flush()
}
func main() {
flag.Parse()
// Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
fmt.Println("Failed to create output directory:", mkErr)
os.Exit(1)
}
}
fmt.Printf("Loading %s...\n", *inputData)
domainLists, domainListByName, err := loadGeosite(*inputData)
if err != nil {
fmt.Println("Failed to loadGeosite:", err)
os.Exit(1)
}
var exportListSlice []string
for raw := range strings.SplitSeq(*exportLists, ",") {
if trimmed := strings.TrimSpace(raw); trimmed != "" {
exportListSlice = append(exportListSlice, trimmed)
}
}
if len(exportListSlice) == 0 {
exportListSlice = []string{"_all_"}
}
for _, eplistname := range exportListSlice {
if strings.EqualFold(eplistname, "_all_") {
if err := exportAll(filepath.Base(*inputData)+"_plain.yml", domainLists); err != nil {
fmt.Println("Failed to exportAll:", err)
continue
}
} else {
if err := exportSite(eplistname, domainListByName); err != nil {
fmt.Println("Failed to exportSite:", err)
continue
}
}
fmt.Printf("list: '%s' has been exported successfully.\n", eplistname)
}
}

View File

@@ -1 +0,0 @@
adcolony.com @ads

View File

@@ -1,4 +1,4 @@
adjust.com @ads
adjust.net.in @ads
adjust.io @ads
adjust.net.in @ads
adjust.world @ads

View File

@@ -756,6 +756,7 @@ full:amp-api-edge.apps.apple.com @cn
full:amp-api-search-edge.apps.apple.com @cn
full:amp-api.apps.apple.com @cn
full:amp-api.music.apple.com @cn
full:aod-ssl.itunes.apple.com @cn
full:aod.itunes.apple.com @cn
full:api-edge.apps.apple.com @cn
full:apptrailers.itunes.apple.com @cn

View File

@@ -1,2 +0,0 @@
applovin.com @ads
applvn.com @ads

View File

@@ -1,7 +1,11 @@
include:trello
atl-paas.net
atlassian-dev.net
atlassian.com
atlassian.net
bitbucket.io
bitbucket.org
jira.com
ss-inf.net
statuspage.io
include:trello

View File

@@ -1,3 +0,0 @@
atom-data.io @ads
analytics-data.io @ads
ironbeast.io @ads

9
data/bohemia Normal file
View File

@@ -0,0 +1,9 @@
arma3.com
armaplatform.com
bistudio.com
bohemia.net
dayz.com
makearmanotwar.com
silicagame.com
vigorgame.com
ylands.com

View File

@@ -1,29 +1,21 @@
# This file contains domains that clearly serving ads
include:acfun-ads
include:adcolony-ads
include:adjust-ads
include:adobe-ads
include:alibaba-ads
include:amazon-ads
include:apple-ads
include:applovin-ads
include:atom-data-ads
include:baidu-ads
include:bytedance-ads
include:category-ads-ir
include:cctv @ads
include:clearbit-ads
include:disney @ads
include:dmm-ads
include:duolingo-ads
include:emogi-ads
include:flurry-ads
include:gamersky @ads
include:google-ads
include:growingio-ads
include:hiido-ads
include:hotjar-ads
include:hetzner @ads
include:hunantv-ads
include:inner-active-ads
include:iqiyi-ads
include:jd-ads
include:kuaishou-ads
@@ -31,30 +23,25 @@ include:kugou-ads
include:letv-ads
include:meta-ads
include:microsoft-ads
include:mopub-ads
include:mxplayer-ads
include:netease-ads
include:newrelic-ads
include:ogury-ads
include:ookla-speedtest-ads
include:openx-ads
include:openai @ads
include:picacg @ads
include:pocoiq-ads
include:pubmatic-ads
include:pikpak @ads
include:pixiv @ads
include:qihoo360-ads
include:segment-ads
include:samsung @ads
include:sina-ads
include:snap @ads
include:sohu-ads
include:spotify-ads
include:supersonic-ads
include:tagtic-ads
include:tappx-ads
include:television-ads
include:tencent-ads
include:tendcloud @ads
include:uberads-ads
include:twitter @ads
include:umeng-ads
include:unity-ads
include:unity @ads
include:unitychina @ads
include:xhamster-ads
include:xiaomi-ads
include:ximalaya-ads
@@ -77,26 +64,32 @@ adservice.sigmob.cn
adtechus.com
adtrue.com
adxprtz.com
assets.growingio.com
cdn.advertserve.com
cdn.banclip.com
cfts1tifqr.com
contentabc.com
cretgate.com
data.flurry.com
decide.mixpanel.com
emogi.com
ero-advertising.com
eroadvertising.com
evt.mxplay.com
exoclick.com
exosrv.com
go2.global
gozendata.com
gzads.com
gz-data.com
gzads.com
img-bss.csdn.net
imglnkc.com
imglnkd.com
inner-active.mobi
innovid.com
jads.co
jl3.yjaxa.top
js-agent.newrelic.com
juicyads.com
kepler-37b.com
leanplum.com
@@ -104,22 +97,26 @@ lqc006.com
moat.com
moatads.com
mobwithad.com
mopub.com
onesignal.com
realsrv.com
s4yxaqyq95.com
shhs-ydd8x2.yjrmss.cn
ssp.api.tappx.com
static.hotjar.com
static.javhd.com
tm-banners.gamingadult.com
trafficfactory.biz
tsyndicate.com
uberads.com
wwads.cn
# 36Kr
adx.36kr.com
# 12306
ad.12306.cn
# 36Kr
adx.36kr.com
# AdHub
hubcloud.com.cn
@@ -130,6 +127,10 @@ beizi.biz
click.ali213.net
pbmp.ali213.net
# AppLovin
applovin.com
applvn.com
# Caixin
# regexp:^pinggai\d\.caixin\.com$
full:pinggai0.caixin.com
@@ -147,12 +148,29 @@ full:pinggai9.caixin.com
adq.chinaso.com
stat.chinaso.com
# hiido
mlog.hiido.com
ylog.hiido.com
# Httpool
toboads.com
# ironSource Atom
analytics-data.io
atom-data.io
ironbeast.io
# pocoiq
cdn.pocoiq.cn
oct.pocoiq.cn
# Qiniu
dn-growing.qbox.me
# tagtic
g1.tagtic.cn
xy-log.tagtic.cn
# UNI Marketing
ad.unimhk.com

View File

@@ -1,30 +1,34 @@
# This file contains domains of all ads providers, including both the domains that serves ads, and the domains of providers themselves.
include:category-ads
include:adjust
include:clearbit
include:ogury
include:openx
include:pubmatic
include:segment
include:supersonic
include:taboola
1rx.io @ads
7box.vip @ads
ad-delivery.net @ads
adcolony.com @ads
adinplay.com @ads
adnxs.com @ads
adview.cn @ads
ads.trafficjunky.net @ads
advertserve.com @ads
adview.cn @ads
casalemedia.com @ads
contextual.media.net @ads
cpmstar.com @ads
demdex.net @ads
httpool.com @ads
lijit.com @ads
1rx.io @ads
mfadsrvr.com @ads
mgid.com @ads
ns1p.net @ads
pubmatic.com @ads
sigmob.com @ads
snapads.com @ads
spotxchange.com @ads
unimhk.com @ads
upapi.net @ads
include:taboola
include:category-ads

View File

@@ -48,6 +48,7 @@ include:kakao
include:kaspersky
include:lg
include:logitech
include:louisvuitton
include:mailru-group
include:meta
include:microsoft

View File

@@ -18,6 +18,7 @@ include:segmentfault
include:sxl
include:tencent-dev
include:ubuntukylin
include:unitychina
jinrishici.com
openvela.com

View File

@@ -142,3 +142,5 @@ ystbds.com
zhan.com
# 智慧树
zhihuishu.com
# 浙江省教育考试院
zjzs.net

View File

@@ -2,6 +2,9 @@ include:playcover
include:fflogs
include:trackernetwork
# Anti-Cheat
battleye.com
# Android Emulator
bluestacks.com
ldmnq.com @cn
@@ -16,5 +19,5 @@ prts.plus
heavenlywind.cc @cn
poi.moe
# Steam++ / Watt Toolkit
steampp.net @cn

View File

@@ -1,6 +1,7 @@
include:2kgames
include:blizzard
include:bluearchive
include:bohemia
include:curseforge
include:cygames
include:ea

View File

@@ -41,6 +41,9 @@ cloudflarewarp.com
cloudflareworkers.com
encryptedsni.com
every1dns.net
foundationdns.com
foundationdns.net
foundationdns.org
imagedelivery.net
isbgpsafeyet.com
one.one.one

View File

@@ -1 +0,0 @@
emogi.com @ads

View File

@@ -1 +0,0 @@
data.flurry.com @ads

View File

@@ -24,7 +24,6 @@ include:category-social-media-cn
# Advertisment & Analytics
include:getui
include:jiguang
include:umeng
# 神策数据
sensorsdata.cn

View File

@@ -1 +0,0 @@
assets.growingio.com @ads

View File

@@ -1,2 +0,0 @@
mlog.hiido.com @ads
ylog.hiido.com @ads

View File

@@ -1 +0,0 @@
static.hotjar.com @ads

View File

@@ -1 +0,0 @@
inner-active.mobi @ads

5
data/louisvuitton Normal file
View File

@@ -0,0 +1,5 @@
louisvuitton.cn @cn
louisvuitton.com
lvcampaign.com @cn
full:tp.louisvuitton.com @cn

View File

@@ -60,6 +60,7 @@ full:default.exp-tas.com
full:developer.microsoft.com
full:download.visualstudio.microsoft.com
full:dtlgalleryint.cloudapp.net
full:packages.microsoft.com
full:poshtestgallery.cloudapp.net
full:psg-int-centralus.cloudapp.net
full:psg-int-eastus.cloudapp.net

View File

@@ -1 +0,0 @@
mopub.com @ads

View File

@@ -1 +0,0 @@
evt.mxplay.com @ads

View File

@@ -1 +0,0 @@
js-agent.newrelic.com @ads

View File

@@ -1,3 +1,3 @@
ogury.co @ads
ogury.com @ads
presage.io @ads
ogury.co @ads

View File

@@ -1,6 +1,7 @@
# Main domain
chatgpt.com
chat.com
chatgpt.com
crixet.com
oaistatic.com
oaiusercontent.com
openai.com
@@ -10,13 +11,13 @@ sora.com
openai.com.cdn.cloudflare.net
full:openaiapi-site.azureedge.net
full:openaicom-api-bdcpf8c6d2e9atf6.z01.azurefd.net
full:openaicom.imgix.net
full:openaicomproductionae4b.blob.core.windows.net
full:production-openaicom-storage.azureedge.net
regexp:^chatgpt-async-webps-prod-\S+-\d+\.webpubsub\.azure\.com$
# tracking
full:o33249.ingest.sentry.io @ads
full:openaicom.imgix.net @ads
full:browser-intake-datadoghq.com @ads
# Advanced Voice

View File

@@ -1,2 +0,0 @@
cdn.pocoiq.cn @ads
oct.pocoiq.cn @ads

View File

@@ -2,5 +2,3 @@
pubmatic.com
pubmatic.co.jp
include:pubmatic-ads

View File

@@ -1 +0,0 @@
ads.pubmatic.com @ads

View File

@@ -8,6 +8,7 @@ galaxyappstore.com
galaxymobile.jp
game-platform.net
knoxemm.com
ospserver.net
samsung.com
samsungads.com @ads
samsungapps.com

View File

@@ -1,4 +1,5 @@
ssacdn.com @ads
supersonic.com @ads
supersonicads.com @ads
ssacdn.com @ads
supersonicads-a.akamaihd.net @ads

View File

@@ -1,2 +0,0 @@
g1.tagtic.cn @ads
xy-log.tagtic.cn @ads

View File

@@ -1 +0,0 @@
ssp.api.tappx.com @ads

View File

@@ -1,2 +1,7 @@
1024terabox.com
bestclouddrive.com
freeterabox.com
nephobox.com
terabox.com
terabox1024.com
teraboxcdn.com

View File

@@ -1 +0,0 @@
uberads.com @ads

View File

@@ -1,4 +1,6 @@
unity.com
unity3d.com
include:unity-ads
# Ads/tracking
iads.unity3d.com @ads
unityads.unity3d.com @ads

View File

@@ -1,6 +1,11 @@
# 优三缔 / 优美缔 / 团结引擎
u3d.cn
unity.cn
unitychina.cn
# Ads/tracking
ads.unitychina.cn @ads
splash-ads.cdn.unity.cn @ads
splash-ads.unitychina.cn @ads
unityads.unity.cn @ads
unityads.unity3d.com @ads
unityads.unitychina.cn @ads

9
internal/dlc/dlc.go Normal file
View File

@@ -0,0 +1,9 @@
package dlc
const (
RuleTypeDomain string = "domain"
RuleTypeFullDomain string = "full"
RuleTypeKeyword string = "keyword"
RuleTypeRegexp string = "regexp"
RuleTypeInclude string = "include"
)

241
main.go
View File

@@ -10,6 +10,7 @@ import (
"slices"
"strings"
"github.com/v2fly/domain-list-community/internal/dlc"
router "github.com/v2fly/v2ray-core/v5/app/router/routercommon"
"google.golang.org/protobuf/proto"
)
@@ -21,21 +22,6 @@ var (
exportLists = flag.String("exportlists", "", "Lists to be flattened and exported in plaintext format, separated by ',' comma")
)
const (
RuleTypeDomain string = "domain"
RuleTypeFullDomain string = "full"
RuleTypeKeyword string = "keyword"
RuleTypeRegexp string = "regexp"
RuleTypeInclude string = "include"
)
var (
TypeChecker = regexp.MustCompile(`^(domain|full|keyword|regexp|include)$`)
DomainChecker = regexp.MustCompile(`^[a-z0-9\.-]+$`)
AttrChecker = regexp.MustCompile(`^[a-z0-9!-]+$`)
SiteChecker = regexp.MustCompile(`^[A-Z0-9!-]+$`)
)
var (
refMap = make(map[string][]*Entry)
plMap = make(map[string]*ParsedList)
@@ -66,7 +52,7 @@ type ParsedList struct {
func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
site := &router.GeoSite{
CountryCode: listName,
Domain: make([]*router.Domain, 0, len(entries)),
Domain: make([]*router.Domain, 0, len(entries)),
}
for _, entry := range entries {
pdomain := &router.Domain{Value: entry.Value}
@@ -78,13 +64,13 @@ func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
}
switch entry.Type {
case RuleTypeDomain:
case dlc.RuleTypeDomain:
pdomain.Type = router.Domain_RootDomain
case RuleTypeRegexp:
case dlc.RuleTypeRegexp:
pdomain.Type = router.Domain_Regex
case RuleTypeKeyword:
case dlc.RuleTypeKeyword:
pdomain.Type = router.Domain_Plain
case RuleTypeFullDomain:
case dlc.RuleTypeFullDomain:
pdomain.Type = router.Domain_Full
}
site.Domain = append(site.Domain, pdomain)
@@ -92,31 +78,12 @@ func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
return site, nil
}
func writePlainAll(siteList *[]string) error {
file, err := os.Create(filepath.Join(*outputDir, *outputName + "_plain.yml"))
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
w.WriteString("lists:\n")
for _, site := range *siteList {
fmt.Fprintf(w, " - name: %s\n", strings.ToLower(site))
fmt.Fprintf(w, " length: %d\n", len(finalMap[site]))
w.WriteString(" rules:\n")
for _, entry := range finalMap[site] {
fmt.Fprintf(w, " - %s\n", entry.Plain)
}
}
return w.Flush()
}
func writePlainList(exportedName string) error {
targetList, exist := finalMap[strings.ToUpper(exportedName)]
if !exist || len(targetList) == 0 {
return fmt.Errorf("'%s' list does not exist or is empty.", exportedName)
return fmt.Errorf("list %q does not exist or is empty.", exportedName)
}
file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName) + ".txt"))
file, err := os.Create(filepath.Join(*outputDir, strings.ToLower(exportedName)+".txt"))
if err != nil {
return err
}
@@ -131,41 +98,43 @@ func writePlainList(exportedName string) error {
func parseEntry(line string) (Entry, error) {
var entry Entry
parts := strings.Fields(line)
if len(parts) == 0 {
return entry, fmt.Errorf("empty line: %q", line)
}
// Parse type and value
rawTypeVal := parts[0]
kv := strings.Split(rawTypeVal, ":")
if len(kv) == 1 {
entry.Type = RuleTypeDomain // Default type
entry.Value = strings.ToLower(rawTypeVal)
} else if len(kv) == 2 {
entry.Type = strings.ToLower(kv[0])
if entry.Type == RuleTypeRegexp {
entry.Value = kv[1]
} else if entry.Type == RuleTypeInclude {
entry.Value = strings.ToUpper(kv[1])
} else {
entry.Value = strings.ToLower(kv[1])
v := parts[0]
colonIndex := strings.Index(v, ":")
if colonIndex == -1 {
entry.Type = dlc.RuleTypeDomain // Default type
entry.Value = strings.ToLower(v)
if !validateDomainChars(entry.Value) {
return entry, fmt.Errorf("invalid domain: %q", entry.Value)
}
} else {
return entry, fmt.Errorf("invalid format: %s", line)
}
// Check type and value
if !TypeChecker.MatchString(entry.Type) {
return entry, fmt.Errorf("invalid type: %s", entry.Type)
}
switch entry.Type {
case RuleTypeRegexp:
if _, err := regexp.Compile(entry.Value); err != nil {
return entry, fmt.Errorf("invalid regexp: %s", entry.Value)
}
case RuleTypeInclude:
if !SiteChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid included list name: %s", entry.Value)
}
default: // `full`, `domain` and `keyword` are all (parts of) domains
if !DomainChecker.MatchString(entry.Value) {
return entry, fmt.Errorf("invalid domain: %s", entry.Value)
typ := strings.ToLower(v[:colonIndex])
val := v[colonIndex+1:]
switch typ {
case dlc.RuleTypeRegexp:
if _, err := regexp.Compile(val); err != nil {
return entry, fmt.Errorf("invalid regexp %q: %w", val, err)
}
entry.Type = dlc.RuleTypeRegexp
entry.Value = val
case dlc.RuleTypeInclude:
entry.Type = dlc.RuleTypeInclude
entry.Value = strings.ToUpper(val)
if !validateSiteName(entry.Value) {
return entry, fmt.Errorf("invalid include list name: %q", entry.Value)
}
case dlc.RuleTypeDomain, dlc.RuleTypeFullDomain, dlc.RuleTypeKeyword:
entry.Type = typ
entry.Value = strings.ToLower(val)
if !validateDomainChars(entry.Value) {
return entry, fmt.Errorf("invalid domain: %q", entry.Value)
}
default:
return entry, fmt.Errorf("invalid type: %q", typ)
}
}
@@ -173,18 +142,18 @@ func parseEntry(line string) (Entry, error) {
for _, part := range parts[1:] {
if strings.HasPrefix(part, "@") {
attr := strings.ToLower(part[1:]) // Trim attribute prefix `@` character
if !AttrChecker.MatchString(attr) {
return entry, fmt.Errorf("invalid attribute key: %s", attr)
if !validateAttrChars(attr) {
return entry, fmt.Errorf("invalid attribute: %q", attr)
}
entry.Attrs = append(entry.Attrs, attr)
} else if strings.HasPrefix(part, "&") {
aff := strings.ToUpper(part[1:]) // Trim affiliation prefix `&` character
if !SiteChecker.MatchString(aff) {
return entry, fmt.Errorf("invalid affiliation key: %s", aff)
if !validateSiteName(aff) {
return entry, fmt.Errorf("invalid affiliation: %q", aff)
}
entry.Affs = append(entry.Affs, aff)
} else {
return entry, fmt.Errorf("invalid attribute/affiliation: %s", part)
return entry, fmt.Errorf("invalid attribute/affiliation: %q", part)
}
}
// Sort attributes
@@ -198,6 +167,39 @@ func parseEntry(line string) (Entry, error) {
return entry, nil
}
func validateDomainChars(domain string) bool {
for i := range domain {
c := domain[i]
if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.' || c == '-' {
continue
}
return false
}
return true
}
func validateAttrChars(attr string) bool {
for i := range attr {
c := attr[i]
if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '!' || c == '-' {
continue
}
return false
}
return true
}
func validateSiteName(name string) bool {
for i := range name {
c := name[i]
if (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '!' || c == '-' {
continue
}
return false
}
return true
}
func loadData(path string) error {
file, err := os.Open(path)
if err != nil {
@@ -206,7 +208,7 @@ func loadData(path string) error {
defer file.Close()
listName := strings.ToUpper(filepath.Base(path))
if !SiteChecker.MatchString(listName) {
if !validateSiteName(listName) {
return fmt.Errorf("invalid list name: %s", listName)
}
scanner := bufio.NewScanner(file)
@@ -238,7 +240,7 @@ func parseList(refName string, refList []*Entry) error {
plMap[refName] = pl
}
for _, entry := range refList {
if entry.Type == RuleTypeInclude {
if entry.Type == dlc.RuleTypeInclude {
if len(entry.Affs) != 0 {
return fmt.Errorf("affiliation is not allowed for include:%s", entry.Value)
}
@@ -272,18 +274,18 @@ func polishList(roughMap *map[string]*Entry) []*Entry {
domainsMap := make(map[string]bool)
for _, entry := range *roughMap {
switch entry.Type { // Bypass regexp, keyword and "full/domain with attr"
case RuleTypeRegexp:
case dlc.RuleTypeRegexp:
finalList = append(finalList, entry)
case RuleTypeKeyword:
case dlc.RuleTypeKeyword:
finalList = append(finalList, entry)
case RuleTypeDomain:
case dlc.RuleTypeDomain:
domainsMap[entry.Value] = true
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else {
queuingList = append(queuingList, entry)
}
case RuleTypeFullDomain:
case dlc.RuleTypeFullDomain:
if len(entry.Attrs) != 0 {
finalList = append(finalList, entry)
} else {
@@ -295,14 +297,18 @@ func polishList(roughMap *map[string]*Entry) []*Entry {
for _, qentry := range queuingList {
isRedundant := false
pd := qentry.Value // To be parent domain
if qentry.Type == RuleTypeFullDomain {
if qentry.Type == dlc.RuleTypeFullDomain {
pd = "." + pd // So that `domain:example.org` overrides `full:example.org`
}
for {
idx := strings.Index(pd, ".")
if idx == -1 { break }
if idx == -1 {
break
}
pd = pd[idx+1:] // Go for next parent
if !strings.Contains(pd, ".") { break } // Not allow tld to be a parent
if !strings.Contains(pd, ".") {
break
} // Not allow tld to be a parent
if domainsMap[pd] {
isRedundant = true
break
@@ -320,7 +326,9 @@ func polishList(roughMap *map[string]*Entry) []*Entry {
}
func resolveList(pl *ParsedList) error {
if _, pldone := finalMap[pl.Name]; pldone { return nil }
if _, pldone := finalMap[pl.Name]; pldone {
return nil
}
if cirIncMap[pl.Name] {
return fmt.Errorf("circular inclusion in: %s", pl.Name)
@@ -329,14 +337,22 @@ func resolveList(pl *ParsedList) error {
defer delete(cirIncMap, pl.Name)
isMatchAttrFilters := func(entry *Entry, incFilter *Inclusion) bool {
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 { return true }
if len(entry.Attrs) == 0 { return len(incFilter.MustAttrs) == 0 }
if len(incFilter.MustAttrs) == 0 && len(incFilter.BanAttrs) == 0 {
return true
}
if len(entry.Attrs) == 0 {
return len(incFilter.MustAttrs) == 0
}
for _, m := range incFilter.MustAttrs {
if !slices.Contains(entry.Attrs, m) { return false }
if !slices.Contains(entry.Attrs, m) {
return false
}
}
for _, b := range incFilter.BanAttrs {
if slices.Contains(entry.Attrs, b) { return false }
if slices.Contains(entry.Attrs, b) {
return false
}
}
return true
}
@@ -348,7 +364,7 @@ func resolveList(pl *ParsedList) error {
for _, inc := range pl.Inclusions {
incPl, exist := plMap[inc.Source]
if !exist {
return fmt.Errorf("list '%s' includes a non-existent list: '%s'", pl.Name, inc.Source)
return fmt.Errorf("list %q includes a non-existent list: %q", pl.Name, inc.Source)
}
if err := resolveList(incPl); err != nil {
return err
@@ -395,54 +411,51 @@ func main() {
}
}
// Generate finalMap and sorted list of site names
siteList := make([]string, 0 ,len(plMap))
// Generate finalMap
for _, pl := range plMap {
siteList = append(siteList, pl.Name)
if err := resolveList(pl); err != nil {
fmt.Println("Failed to resolveList:", err)
os.Exit(1)
}
}
slices.Sort(siteList)
// Create output directory if not exist
if _, err := os.Stat(*outputDir); os.IsNotExist(err) {
if mkErr := os.MkdirAll(*outputDir, 0755); mkErr != nil {
fmt.Println("Failed:", mkErr)
fmt.Println("Failed to create output directory:", mkErr)
os.Exit(1)
}
}
// Export plaintext list
if *exportLists != "" {
exportedListSlice := strings.Split(*exportLists, ",")
for _, exportedList := range exportedListSlice {
if exportedList == "_all_" {
if err := writePlainAll(&siteList); err != nil {
fmt.Println("Failed to writePlainAll:", err)
continue
}
} else {
if err := writePlainList(exportedList); err != nil {
fmt.Println("Failed to write list:", err)
continue
}
}
fmt.Printf("list: '%s' has been generated successfully.\n", exportedList)
var exportListSlice []string
for raw := range strings.SplitSeq(*exportLists, ",") {
if trimmed := strings.TrimSpace(raw); trimmed != "" {
exportListSlice = append(exportListSlice, trimmed)
}
}
for _, exportList := range exportListSlice {
if err := writePlainList(exportList); err != nil {
fmt.Println("Failed to write list:", err)
continue
}
fmt.Printf("list %q has been generated successfully.\n", exportList)
}
// Generate dat file
protoList := new(router.GeoSiteList)
for _, siteName := range siteList { // So that protoList.Entry is sorted
site, err := makeProtoList(siteName, finalMap[siteName])
for siteName, siteEntries := range finalMap {
site, err := makeProtoList(siteName, siteEntries)
if err != nil {
fmt.Println("Failed to makeProtoList:", err)
os.Exit(1)
}
protoList.Entry = append(protoList.Entry, site)
}
// Sort protoList so the marshaled list is reproducible
slices.SortFunc(protoList.Entry, func(a, b *router.GeoSite) int {
return strings.Compare(a.CountryCode, b.CountryCode)
})
protoBytes, err := proto.Marshal(protoList)
if err != nil {