Compare commits

...

22 Commits

Author SHA1 Message Date
MkQtS
aa76e186cb Add more cn domains (#3380)
* weiphone: add weiphone.net

* category-education-cn: add more domains

* category-media-cn: add more domains

* geolocation-cn: add more domains
2026-03-22 15:43:41 +08:00
fdrvn
c9348f1db4 category-ip-geo-detect: add myip.wtf (#3379) 2026-03-22 12:28:34 +08:00
深鸣
d62599c8db category-acg: add bestdori.com (#3376) 2026-03-20 13:25:24 +08:00
MkQtS
fab6275217 Update README.md
Add tips for generating custom dat files.

[skip ci]
2026-03-20 11:02:07 +08:00
MkQtS
4c7afec5a9 category-electronic-cn: remove non-cn domains (#3375)
They were added in `espressif` and included in `geolocation-!cn`.
2026-03-20 10:53:04 +08:00
Jarl-Penguin
330c30eb23 category-ip-geo-detect: Add ip.hetzner.com (#3374)
Signed-off-by: Jarl-Penguin <jarlpenguin@outlook.com>
2026-03-20 10:47:36 +08:00
inf
f34f22819e category-dev: add ziglang.org (#3373) 2026-03-20 10:46:09 +08:00
ir0nmand0
baa1409cfb category-entertainment-ru: add beeline.tv (#3372)
Beeline TV (beeline.tv) is a Russian streaming service by VEON (Beeline).
Movies, TV series, and live TV channels for Russian-speaking audience.

Subdomains (covered by domain match): web-prod, rest, images, static, video.
External deps (mediavitrina.ru, vimpelcom.ru) already in category-ru.

Co-authored-by: Dima Dudukin <dima.dudukin.dev@gmail.com>
2026-03-19 18:12:53 +08:00
MkQtS
a22d247c5a qcloud: comment out useless regexp rules (#3371)
Overrided by other domain type rules, but cannot be optimized automatically.

They are actually useless and only affect performance.
2026-03-19 12:43:29 +08:00
yobarerukoto
d311bbe50b geolocation-cn: add gzyowin.com (#3369) 2026-03-18 19:29:43 +08:00
MkQtS
1db558b165 main.go: support to generate multiple custom dats (#3367)
This allows to remove any unwanted lists without modifying the domains
data, and you can generate multiple custom v2ray dat files in a single
command.

As long as the source data is consistent, any list remains in the trimed
dat contains the same rules comparing to the list in full dat.

Use the new option `datprofile` to specify the config json file path.
`outputname` will be ignored when `datprofile` is set.

Co-authored-by: database64128 <free122448@hotmail.com>
2026-03-18 18:32:05 +08:00
Konstantin
9ee0757263 Add Tilda domains (#3368)
* add tilda

* tilda: add to category-dev
2026-03-18 18:05:54 +08:00
MkQtS
714a061ba3 main.go: improve codes (#3366)
* main.go: improve codes

* main.go: add parseInclusion

- seprate from parseEntry
- not allow affiliation for inclusion
2026-03-18 15:58:47 +08:00
TripleA
5ff8142411 Update category-speedtest (#3365) 2026-03-18 00:01:38 +08:00
MkQtS
becbd7a8ad wordpress: add wordpress.net (#3364) 2026-03-16 19:05:25 +08:00
xchacha20-poly1305
cd2d66eb72 baidu: Add xdrtc.com (#3362) 2026-03-15 21:55:53 +08:00
MkQtS
4c4ad053ef nvidia: add nvidia.custhelp.com 2026-03-15 14:51:57 +08:00
MkQtS
6544f6d3a6 oracle: add custhelp.com 2026-03-15 14:51:57 +08:00
MkQtS
673a70c380 category-ai-!cn: add spicywriter.com (#3360) 2026-03-15 14:32:27 +08:00
TripleA
be078767c4 Update category-ip-geo-detect (#3359) 2026-03-15 14:21:52 +08:00
深鸣
15fde0da4b Add more cn domains (#3358) 2026-03-15 14:20:57 +08:00
pover0k
5dd4779425 nodeseek: add nodeget.com (#3357) 2026-03-15 14:16:38 +08:00
22 changed files with 355 additions and 133 deletions

6
.gitignore vendored
View File

@@ -4,9 +4,9 @@
/domain-list-community /domain-list-community
/domain-list-community.exe /domain-list-community.exe
# Generated dat file. # Generated dat files.
dlc.dat /*.dat
# Exported plaintext lists. # Exported plaintext lists.
/*.yml
/*.txt /*.txt
/*.yml

View File

@@ -86,6 +86,8 @@ Each file in the `data` directory can be used as a rule in this format: `geosite
Run `go run ./ --help` for more usage information. Run `go run ./ --help` for more usage information.
For anyone who wants to generate custom `.dat` files, you may read [#3370](https://github.com/v2fly/domain-list-community/discussions/3370).
## Structure of data ## Structure of data
All data are under `data` directory. Each file in the directory represents a sub-list of domains, named by the file name. File content is in the following format. All data are under `data` directory. Each file in the directory represents a sub-list of domains, named by the file name. File content is in the following format.
@@ -105,7 +107,7 @@ regexp:^odd[1-7]\.example\.org(\.[a-z]{2})?$
> Adding new `regexp` and `keyword` rules is discouraged because it is easy to use them incorrectly, and proxy software cannot efficiently match these types of rules. > Adding new `regexp` and `keyword` rules is discouraged because it is easy to use them incorrectly, and proxy software cannot efficiently match these types of rules.
> [!NOTE] > [!NOTE]
> The following types of rules are **NOT** fully compatible with the ones that defined by user in V2Ray config file. Do **Not** copy and paste directly. > The following types of rules are **NOT** fully compatible with the ones that defined by user in V2Ray config file. Do **NOT** copy and paste directly.
- Comment begins with `#`. It may begin anywhere in the file. The content in the line after `#` is treated as comment and ignored in production. - Comment begins with `#`. It may begin anywhere in the file. The content in the line after `#` is treated as comment and ignored in production.
- Subdomain begins with `domain:`, followed by a valid domain name. The prefix `domain:` may be omitted. - Subdomain begins with `domain:`, followed by a valid domain name. The prefix `domain:` may be omitted.
@@ -114,7 +116,7 @@ regexp:^odd[1-7]\.example\.org(\.[a-z]{2})?$
- Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard). - Regular expression begins with `regexp:`, followed by a valid regular expression (per Golang's standard).
- Domain rules (including `domain`, `full`, `keyword`, and `regexp`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`. - Domain rules (including `domain`, `full`, `keyword`, and `regexp`) may have none, one or more attributes. Each attribute begins with `@` and followed by the name of the attribute. Attributes will remain available in final lists and `dlc.dat`.
- Domain rules may have none, one or more affiliations, which additionally adds the domain rule into the affiliated target list. Each affiliation begins with `&` and followed by the name of the target list (nomatter whether the target has a dedicated file in data path). This is a method for data management, and will not remain in the final lists or `dlc.dat`. - Domain rules may have none, one or more affiliations, which additionally adds the domain rule into the affiliated target list. Each affiliation begins with `&` and followed by the name of the target list (nomatter whether the target has a dedicated file in data path). This is a method for data management, and will not remain in the final lists or `dlc.dat`.
- Inclusion begins with `include:`, followed by the name of another valid domain list. A simple `include:listb` in file `lista` means adding all domain rules of `listb` into `lista`. Inclusions with attributes stands for selective inclusion. `include:listb @attr1 @-attr2` means only adding those domain rules *with* `@attr1` **and** *without* `@attr2`. This is a special type for data management, and will not remain in the final lists or `dlc.dat`. - Inclusion begins with `include:`, followed by the name of another valid domain list. `include:listb` in file `lista` means adding all domain rules of `listb` into `lista`. Inclusions with attributes stand for selective inclusion. `include:listb @attr1 @-attr2` means only adding those domain rules *with* `@attr1` **and** *without* `@attr2`. This is a special type for data management, and will not remain in the final lists or `dlc.dat`.
## How it works ## How it works

View File

@@ -64,6 +64,7 @@ shifen.com
smartapps.cn smartapps.cn
tieba.com tieba.com
tiebaimg.com tiebaimg.com
xdrtc.com
xianfae.com xianfae.com
xiaodutv.com xiaodutv.com
yoojia.com yoojia.com

View File

@@ -1,4 +1,5 @@
# Game # Game
bestdori.com
colorfulstage.com colorfulstage.com
# Idol # Idol

View File

@@ -46,3 +46,4 @@ mistral.ai
openart.ai openart.ai
openclaw.ai openclaw.ai
openrouter.ai openrouter.ai
spicywriter.com

View File

@@ -54,6 +54,7 @@ include:stackexchange
include:strikingly include:strikingly
include:termux include:termux
include:thelinuxfoundation include:thelinuxfoundation
include:tilda
include:unity include:unity
include:v8 include:v8
@@ -152,4 +153,5 @@ wireshark.org
x.org x.org
xposed.info xposed.info
yarnpkg.com yarnpkg.com
ziglang.org
zsh.org zsh.org

View File

@@ -10,6 +10,8 @@ include:shanbay
include:xueersi include:xueersi
include:yuanfudao include:yuanfudao
edu.cn
# 雨课堂 # 雨课堂
include:yuketang include:yuketang
## 雨豆课堂 ## 雨豆课堂
@@ -19,8 +21,6 @@ yushiyan.net
## 学堂在线 ## 学堂在线
xuetangx.com xuetangx.com
edu.cn
# 国家智慧教育公共服务平台 # 国家智慧教育公共服务平台
cbern.com.cn cbern.com.cn
smartedu.cn smartedu.cn
@@ -91,6 +91,7 @@ eoffcn.com
jinrongren.net jinrongren.net
offcn.com offcn.com
zggqzp.com zggqzp.com
zgjsks.com
zgsydw.com zgsydw.com
# 高考100 # 高考100
gk100.com gk100.com
@@ -105,6 +106,9 @@ xir.cn
# 日语测试 # 日语测试
j-test.com j-test.com
nattest-china.com nattest-china.com
# 菁优网
jyeoo.com
jyeoo.net
# 课播云校 # 课播云校
keboyunxiao.com keboyunxiao.com
# 课堂派 # 课堂派
@@ -132,6 +136,8 @@ oldboyedu.com
pigai.org pigai.org
# 公考知识库 # 公考知识库
saduck.top saduck.top
# 外研在线
unipus.cn
# 未来云校 # 未来云校
weilaiyunxiao.com weilaiyunxiao.com
# 北京嘉瑞新创教育咨询有限公司 # 北京嘉瑞新创教育咨询有限公司

View File

@@ -17,11 +17,10 @@ bouffalolab.com
cxmt.com cxmt.com
# 乐鑫信息科技 # 乐鑫信息科技
#include:espressif
esp8266.cn esp8266.cn
esp8266.com
esp8266.com.cn esp8266.com.cn
espressif.cn espressif.cn
espressif.com
espressif.com.cn espressif.com.cn
# 华秋电子 # 华秋电子

View File

@@ -57,10 +57,14 @@ anitabi.cn
# 暴风影音 # 暴风影音
baofeng.com baofeng.com
baofeng.net baofeng.net
# 街机地图
bemanicn.com
# 布咕阅读 # 布咕阅读
bgwxc.com bgwxc.com
# B站空降助手 # B站空降助手
bsbsb.top bsbsb.top
# CdkeyNoGap
cdkeynogap.com
# 动漫之家 #703 # 动漫之家 #703
dmzj.com dmzj.com
muwai.com muwai.com

View File

@@ -11,6 +11,7 @@ include:okko
include:wink include:wink
24h.tv 24h.tv
amediateka.ru amediateka.ru
beeline.tv
ivi.ru ivi.ru
premier.one premier.one
smotreshka.tv smotreshka.tv

View File

@@ -97,6 +97,7 @@ myexternalip.com
myip.com myip.com
myip.ms myip.ms
myip.ru myip.ru
myip.wtf
myipaddress.com myipaddress.com
myiplookup.com myiplookup.com
mylocation.org mylocation.org
@@ -131,7 +132,10 @@ wtfismyip.com
# Subdomains/internal api used for ip-geo-detect # Subdomains/internal api used for ip-geo-detect
full:checkip.amazonaws.com full:checkip.amazonaws.com
full:ipv4-check-perf.radar.cloudflare.com
full:ipv6-check-perf.radar.cloudflare.com
geoip.noc.gov.ru geoip.noc.gov.ru
ip.hetzner.com
ip.mail.ru ip.mail.ru
ip.nic.ru ip.nic.ru
ip.tyk.nu ip.tyk.nu

View File

@@ -135,6 +135,10 @@ kksmg.com
statickksmg.com statickksmg.com
# 晚点 # 晚点
latepost.com latepost.com
# 雷科技
leikeji.com
# 雷峰网
leiphone.com
# 今日观察网 # 今日观察网
miercn.com miercn.com
# 快科技 # 快科技
@@ -206,14 +210,21 @@ techweb.com.cn
thecover.cn thecover.cn
# 澎湃新闻 # 澎湃新闻
thepaper.cn thepaper.cn
# 潮新闻/浙江日报
8531.cn
tidenews.com.cn
# 人人都是产品经理 # 人人都是产品经理
woshipm.com woshipm.com
# 新华日报
xhby.net
# 西陆网 # 西陆网
xilu.com xilu.com
# 新快报 # 新快报
xkb.com.cn xkb.com.cn
# 第一财经 # 第一财经
yicai.com yicai.com
# 扬子晚报
yzwb.net
# ZEALER # ZEALER
zealer.com zealer.com
# 知产力 # 知产力

View File

@@ -3,33 +3,62 @@ include:openspeedtest
cnspeedtest.cn @cn cnspeedtest.cn @cn
fast.com fast.com
fastspeedtest.com
linkmeter.net
measurementlab.net measurementlab.net
meter.net
nperf.com nperf.com
openspeedtest.ru
speed.cloudflare.com speed.cloudflare.com
speed.dler.io speed.dler.io
speed.ee
speed.hinet.net speed.hinet.net
speed.nccu.edu.tw speed.nccu.edu.tw
speed.neu6.edu.cn @cn speed.neu6.edu.cn @cn
speed.nju.edu.cn @cn
speed.nuaa.edu.cn @cn speed.nuaa.edu.cn @cn
speed.qlu.edu.cn @cn speed.qlu.edu.cn @cn
speed.ujs.edu.cn @cn speed.ujs.edu.cn @cn
speed6.ujs.edu.cn @cn
speed2.hinet.net speed2.hinet.net
speed5.ntu.edu.tw speed5.ntu.edu.tw
speed6.ujs.edu.cn @cn
speedcheck.org
speedgeo.net
speedof.me
speedtest.cesnet.cz
speedtest.ch
speedtest.citylink.pro
speedtest.cn @cn speedtest.cn @cn
speedtest.co.za
speedtest.de
speedtest.dno-it.ru
speedtest.frontier.com
speedtest.im speedtest.im
speedtest.mail.ru
speedtest.mfcyun.com @cn speedtest.mfcyun.com @cn
speedtest.net.in
speedtest.net.ua
speedtest.net.uk
speedtest.org
speedtest.rt.ru
speedtest.ru
speedtest.shaw.ca
speedtest.shu.edu.cn @cn speedtest.shu.edu.cn @cn
speedtest6.shu.edu.cn @cn speedtest.su
speedtest.uz
speedtest.volia.com
speedtest.xaut.edu.cn @cn speedtest.xaut.edu.cn @cn
speedtest.xfinity.com speedtest.xfinity.com
speedtestcustom.com speedtest.xyz
test.ustc.edu.cn @cn speedtest24.ru
test6.ustc.edu.cn @cn speedtest6.shu.edu.cn @cn
test.nju.edu.cn @cn test.nju.edu.cn @cn
test.ustc.edu.cn @cn
test6.nju.edu.cn @cn test6.nju.edu.cn @cn
speed.nju.edu.cn @cn test6.ustc.edu.cn @cn
testmy.net testmy.net
testmyspeed.com
testskorosti.ru
xnfz.seu.edu.cn @cn xnfz.seu.edu.cn @cn
full:hk-global-bgp.hkg.speedtest.yecaoyun.com @!cn full:hk-global-bgp.hkg.speedtest.yecaoyun.com @!cn

View File

@@ -222,8 +222,12 @@ mingxuan.store
gov.cn gov.cn
## 中国气象局 ## 中国气象局
cma.cn cma.cn
## 政务和公益机构域名注册管理中心
conac.cn
## 中央气象台 ## 中央气象台
nmc.cn nmc.cn
## 中国互联网联合辟谣平台
piyao.org.cn
# Public transportation # Public transportation
## 中国国际航空 ## 中国国际航空
@@ -321,10 +325,14 @@ webterren.com
## 国学大师网 ## 国学大师网
guoxuedashi.com guoxuedashi.com
guoxuemi.com guoxuemi.com
## 志愿汇
zyh365.com
## 温州市图书馆 ## 温州市图书馆
wzlib.cn wzlib.cn
## 中国大百科全书
zgbk.com
## 浙江图书馆
zjlib.cn
## 志愿汇
zyh365.com
# Services & Softwares # Services & Softwares
include:category-ai-cn include:category-ai-cn
@@ -1112,8 +1120,6 @@ lascn.net
lawxp.com lawxp.com
leangoo.com leangoo.com
leanote.com leanote.com
leikeji.com
leiphone.com
leiting.com leiting.com
leju.com leju.com
leturich.org leturich.org
@@ -1394,7 +1400,6 @@ weand.com
weavatar.com weavatar.com
weicaifu.com weicaifu.com
weilaicaijing.com weilaicaijing.com
weiphone.net
weixing.com weixing.com
weiyangx.com weiyangx.com
welltrend-edu.com welltrend-edu.com
@@ -1523,7 +1528,6 @@ zastatic.com
zczj.com zczj.com
zdfans.com zdfans.com
zgjm.org zgjm.org
zgjsks.com
zglxw.com zglxw.com
zgzcw.com zgzcw.com
zhanbuba.com zhanbuba.com
@@ -1621,3 +1625,6 @@ ao-x.ac.cn
# 万集科技 京ICP备18036282号-2 # 万集科技 京ICP备18036282号-2
wanji.net.cn wanji.net.cn
# 广州市雅望互联网服务有限公司
gzyowin.com

View File

@@ -1,5 +1,6 @@
22112211.xyz 22112211.xyz
deepflood.com deepflood.com
nodeget.com
nodeimage.com nodeimage.com
nodequality.com nodequality.com
nodeseek.com nodeseek.com

View File

@@ -59,6 +59,7 @@ tegrazone.com
tegrazone.jp tegrazone.jp
tegrazone.kr tegrazone.kr
full:nvidia.custhelp.com
full:nvidia.tt.omtrdc.net full:nvidia.tt.omtrdc.net
# NVIDIA 文件下载服务器中国镜像 # NVIDIA 文件下载服务器中国镜像

View File

@@ -1,10 +1,14 @@
# All .oracle domains # All .oracle domains
oracle oracle
include:addthis
include:java
ateam-oracle.com ateam-oracle.com
bronto.com bronto.com
covid19-rx.org covid19-rx.org
covid19rx.org covid19rx.org
custhelp.com
oracle.com oracle.com
oraclecloud.com oraclecloud.com
oraclefoundation.org oraclefoundation.org
@@ -12,6 +16,3 @@ oracleimg.com
oracleinfinity.io oracleinfinity.io
sun.com sun.com
virtualbox.org virtualbox.org
include:addthis
include:java

View File

@@ -258,13 +258,14 @@ tdnsv14.net
tdnsv15.net tdnsv15.net
# myqcloud inside mainland China # myqcloud inside mainland China
regexp:\.(.+-)?ap-beijing(-.+)?\.myqcloud\.com$ #北京 # overrided by myqcloud.com
regexp:\.(.+-)?ap-nanjing(-.+)?\.myqcloud\.com$ # #regexp:\.(.+-)?ap-beijing(-.+)?\.myqcloud\.com$ #
regexp:\.(.+-)?ap-shanghai(-.+)?\.myqcloud\.com$ #上海 #regexp:\.(.+-)?ap-nanjing(-.+)?\.myqcloud\.com$ #南京
regexp:\.(.+-)?ap-guangzhou(-.+)?\.myqcloud\.com$ #广州 #regexp:\.(.+-)?ap-shanghai(-.+)?\.myqcloud\.com$ #上海
regexp:\.(.+-)?ap-chengdu(-.+)?\.myqcloud\.com$ #成都 #regexp:\.(.+-)?ap-guangzhou(-.+)?\.myqcloud\.com$ #广州
regexp:\.(.+-)?ap-chongqing(-.+)?\.myqcloud\.com$ #重庆 #regexp:\.(.+-)?ap-chengdu(-.+)?\.myqcloud\.com$ #成都
regexp:\.(.+-)?ap-shenzhen(-.+)?\.myqcloud\.com$ #深圳 #regexp:\.(.+-)?ap-chongqing(-.+)?\.myqcloud\.com$ #重庆
#regexp:\.(.+-)?ap-shenzhen(-.+)?\.myqcloud\.com$ #深圳
# COS 使用到的非中国大陆的地域与可用区,参见 https://cloud.tencent.com/document/product/436/6224 # COS 使用到的非中国大陆的地域与可用区,参见 https://cloud.tencent.com/document/product/436/6224
ap-hongkong.myqcloud.com @!cn #中国香港 ap-hongkong.myqcloud.com @!cn #中国香港
@@ -282,13 +283,14 @@ eu-frankfurt.myqcloud.com @!cn #法兰克福
eu-moscow.myqcloud.com @!cn #莫斯科 eu-moscow.myqcloud.com @!cn #莫斯科
# tencentcos inside mainland China # tencentcos inside mainland China
regexp:\.(.+-)?ap-beijing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #北京 # overrided by tencentcos.cn, tencentcos.com, tencentcos.com.cn
regexp:\.(.+-)?ap-nanjing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ # #regexp:\.(.+-)?ap-beijing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #
regexp:\.(.+-)?ap-shanghai(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #上海 #regexp:\.(.+-)?ap-nanjing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #南京
regexp:\.(.+-)?ap-guangzhou(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #广州 #regexp:\.(.+-)?ap-shanghai(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #上海
regexp:\.(.+-)?ap-chengdu(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #成都 #regexp:\.(.+-)?ap-guangzhou(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #广州
regexp:\.(.+-)?ap-chongqing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #重庆 #regexp:\.(.+-)?ap-chengdu(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #成都
regexp:\.(.+-)?ap-shenzhen(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #深圳 #regexp:\.(.+-)?ap-chongqing(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #重庆
#regexp:\.(.+-)?ap-shenzhen(-.+)?\.tencentcos\.(cn|com(\.cn)?)$ #深圳
# tencentcos outside mainland China # tencentcos outside mainland China
# regexp:.+\.ap-hongkong\.tencentcos\.(cn|com(\.cn)?)$ @!cn #中国香港 # regexp:.+\.ap-hongkong\.tencentcos\.(cn|com(\.cn)?)$ @!cn #中国香港

5
data/tilda Normal file
View File

@@ -0,0 +1,5 @@
tilda.cc
tilda.ru
tilda.ws
tildaapi.com
tildacdn.com

View File

@@ -1,3 +1,4 @@
feng.com feng.com
fengimg.com fengimg.com
weiphone.net
wfdata.club wfdata.club

View File

@@ -1,6 +1,7 @@
videopress.com videopress.com
w.org w.org
wordpress.com wordpress.com
wordpress.net
wordpress.org wordpress.org
wordpress.tv wordpress.tv
wp-themes.com wp-themes.com

332
main.go
View File

@@ -2,6 +2,7 @@ package main
import ( import (
"bufio" "bufio"
"encoding/json"
"flag" "flag"
"fmt" "fmt"
"os" "os"
@@ -19,6 +20,7 @@ var (
dataPath = flag.String("datapath", "./data", "Path to your custom 'data' directory") dataPath = flag.String("datapath", "./data", "Path to your custom 'data' directory")
outputName = flag.String("outputname", "dlc.dat", "Name of the generated dat file") outputName = flag.String("outputname", "dlc.dat", "Name of the generated dat file")
outputDir = flag.String("outputdir", "./", "Directory to place all generated files") outputDir = flag.String("outputdir", "./", "Directory to place all generated files")
datProfile = flag.String("datprofile", "", "Path of config file used to assemble custom dats")
exportLists = flag.String("exportlists", "", "Lists to be flattened and exported in plaintext format, separated by ',' comma") exportLists = flag.String("exportlists", "", "Lists to be flattened and exported in plaintext format, separated by ',' comma")
) )
@@ -47,7 +49,24 @@ type Processor struct {
cirIncMap map[string]bool cirIncMap map[string]bool
} }
func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) { type GeoSites struct {
Sites []*router.GeoSite
SiteIdx map[string]int
}
type DatTask struct {
Name string `json:"name"`
Mode string `json:"mode"`
Lists []string `json:"lists"`
}
const (
ModeAll string = "all"
ModeAllowlist string = "allowlist"
ModeDenylist string = "denylist"
)
func makeProtoList(listName string, entries []*Entry) *router.GeoSite {
site := &router.GeoSite{ site := &router.GeoSite{
CountryCode: listName, CountryCode: listName,
Domain: make([]*router.Domain, 0, len(entries)), Domain: make([]*router.Domain, 0, len(entries)),
@@ -73,7 +92,91 @@ func makeProtoList(listName string, entries []*Entry) (*router.GeoSite, error) {
} }
site.Domain = append(site.Domain, pdomain) site.Domain = append(site.Domain, pdomain)
} }
return site, nil return site
}
func loadTasks(path string) ([]DatTask, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
var tasks []DatTask
dec := json.NewDecoder(f)
if err := dec.Decode(&tasks); err != nil {
return nil, fmt.Errorf("failed to decode json: %w", err)
}
for i, t := range tasks {
if t.Name == "" {
return nil, fmt.Errorf("task[%d]: name is required", i)
}
switch t.Mode {
case ModeAll, ModeAllowlist, ModeDenylist:
default:
return nil, fmt.Errorf("task[%d] %q: invalid mode %q", i, t.Name, t.Mode)
}
}
return tasks, nil
}
func (gs *GeoSites) assembleDat(task DatTask) error {
datFileName := strings.ToLower(filepath.Base(task.Name))
geoSiteList := new(router.GeoSiteList)
switch task.Mode {
case ModeAll:
geoSiteList.Entry = gs.Sites
case ModeAllowlist:
allowedIdxes := make([]int, 0, len(task.Lists))
for _, list := range task.Lists {
if idx, ok := gs.SiteIdx[strings.ToUpper(list)]; ok {
allowedIdxes = append(allowedIdxes, idx)
} else {
return fmt.Errorf("list %q not found for allowlist task", list)
}
}
slices.Sort(allowedIdxes)
allowedlen := len(allowedIdxes)
if allowedlen == 0 {
return fmt.Errorf("allowlist needs at least one valid list")
}
geoSiteList.Entry = make([]*router.GeoSite, allowedlen)
for i, idx := range allowedIdxes {
geoSiteList.Entry[i] = gs.Sites[idx]
}
case ModeDenylist:
deniedMap := make(map[int]bool, len(task.Lists))
for _, list := range task.Lists {
if idx, ok := gs.SiteIdx[strings.ToUpper(list)]; ok {
deniedMap[idx] = true
} else {
fmt.Printf("[Warn] list %q not found in denylist task %q", list, task.Name)
}
}
deniedlen := len(deniedMap)
if deniedlen == 0 {
fmt.Printf("[Warn] nothing to deny in task %q", task.Name)
geoSiteList.Entry = gs.Sites
} else {
geoSiteList.Entry = make([]*router.GeoSite, 0, len(gs.Sites)-deniedlen)
for i, site := range gs.Sites {
if !deniedMap[i] {
geoSiteList.Entry = append(geoSiteList.Entry, site)
}
}
}
}
protoBytes, err := proto.Marshal(geoSiteList)
if err != nil {
return fmt.Errorf("failed to marshal: %w", err)
}
if err := os.WriteFile(filepath.Join(*outputDir, datFileName), protoBytes, 0644); err != nil {
return fmt.Errorf("failed to write file %q: %w", datFileName, err)
}
fmt.Printf("dat %q has been generated successfully\n", datFileName)
return nil
} }
func writePlainList(listname string, entries []*Entry) error { func writePlainList(listname string, entries []*Entry) error {
@@ -89,46 +192,28 @@ func writePlainList(listname string, entries []*Entry) error {
return w.Flush() return w.Flush()
} }
func parseEntry(line string) (*Entry, []string, error) { func parseEntry(typ, rule string) (*Entry, []string, error) {
entry := new(Entry) entry := &Entry{Type: typ}
parts := strings.Fields(line) parts := strings.Fields(rule)
if len(parts) == 0 { if len(parts) == 0 {
return entry, nil, fmt.Errorf("empty line") return entry, nil, fmt.Errorf("empty domain rule")
} }
// Parse value
// Parse type and value switch entry.Type {
typ, val, isTypeSpecified := strings.Cut(parts[0], ":") case dlc.RuleTypeRegexp:
typ = strings.ToLower(typ) if _, err := regexp.Compile(parts[0]); err != nil {
if !isTypeSpecified { // Default RuleType return entry, nil, fmt.Errorf("invalid regexp %q: %w", parts[0], err)
if !validateDomainChars(typ) {
return entry, nil, fmt.Errorf("invalid domain: %q", typ)
} }
entry.Type = dlc.RuleTypeDomain entry.Value = parts[0]
entry.Value = typ case dlc.RuleTypeDomain, dlc.RuleTypeFullDomain, dlc.RuleTypeKeyword:
} else { entry.Value = strings.ToLower(parts[0])
switch typ { if !validateDomainChars(entry.Value) {
case dlc.RuleTypeRegexp: return entry, nil, fmt.Errorf("invalid domain: %q", entry.Value)
if _, err := regexp.Compile(val); err != nil {
return entry, nil, fmt.Errorf("invalid regexp %q: %w", val, err)
}
entry.Type = dlc.RuleTypeRegexp
entry.Value = val
case dlc.RuleTypeInclude:
entry.Type = dlc.RuleTypeInclude
entry.Value = strings.ToUpper(val)
if !validateSiteName(entry.Value) {
return entry, nil, fmt.Errorf("invalid included list name: %q", entry.Value)
}
case dlc.RuleTypeDomain, dlc.RuleTypeFullDomain, dlc.RuleTypeKeyword:
entry.Type = typ
entry.Value = strings.ToLower(val)
if !validateDomainChars(entry.Value) {
return entry, nil, fmt.Errorf("invalid domain: %q", entry.Value)
}
default:
return entry, nil, fmt.Errorf("invalid type: %q", typ)
} }
default:
return entry, nil, fmt.Errorf("unknown rule type: %q", entry.Type)
} }
plen := len(entry.Type) + len(entry.Value) + 1
// Parse attributes and affiliations // Parse attributes and affiliations
var affs []string var affs []string
@@ -140,6 +225,7 @@ func parseEntry(line string) (*Entry, []string, error) {
return entry, affs, fmt.Errorf("invalid attribute: %q", attr) return entry, affs, fmt.Errorf("invalid attribute: %q", attr)
} }
entry.Attrs = append(entry.Attrs, attr) entry.Attrs = append(entry.Attrs, attr)
plen += 2 + len(attr)
case '&': case '&':
aff := strings.ToUpper(part[1:]) aff := strings.ToUpper(part[1:])
if !validateSiteName(aff) { if !validateSiteName(aff) {
@@ -147,33 +233,70 @@ func parseEntry(line string) (*Entry, []string, error) {
} }
affs = append(affs, aff) affs = append(affs, aff)
default: default:
return entry, affs, fmt.Errorf("invalid attribute/affiliation: %q", part) return entry, affs, fmt.Errorf("unknown field: %q", part)
} }
} }
if entry.Type != dlc.RuleTypeInclude { slices.Sort(entry.Attrs) // Sort attributes
slices.Sort(entry.Attrs) // Sort attributes // Formated plain entry: type:domain.tld:@attr1,@attr2
// Formated plain entry: type:domain.tld:@attr1,@attr2 var plain strings.Builder
var plain strings.Builder plain.Grow(plen)
plain.Grow(len(entry.Type) + len(entry.Value) + 10) plain.WriteString(entry.Type)
plain.WriteString(entry.Type) plain.WriteByte(':')
plain.WriteByte(':') plain.WriteString(entry.Value)
plain.WriteString(entry.Value) for i, attr := range entry.Attrs {
for i, attr := range entry.Attrs { if i == 0 {
if i == 0 { plain.WriteByte(':')
plain.WriteByte(':') } else {
} else { plain.WriteByte(',')
plain.WriteByte(',')
}
plain.WriteByte('@')
plain.WriteString(attr)
} }
entry.Plain = plain.String() plain.WriteByte('@')
plain.WriteString(attr)
} }
entry.Plain = plain.String()
return entry, affs, nil return entry, affs, nil
} }
func parseInclusion(rule string) (*Inclusion, error) {
parts := strings.Fields(rule)
if len(parts) == 0 {
return nil, fmt.Errorf("empty inclusion")
}
inc := &Inclusion{Source: strings.ToUpper(parts[0])}
if !validateSiteName(inc.Source) {
return inc, fmt.Errorf("invalid included list name: %q", inc.Source)
}
// Parse attributes
for _, part := range parts[1:] {
switch part[0] {
case '@':
attr := strings.ToLower(part[1:])
if attr[0] == '-' {
battr := attr[1:]
if !validateAttrChars(battr) {
return inc, fmt.Errorf("invalid ban attribute: %q", battr)
}
inc.BanAttrs = append(inc.BanAttrs, battr)
} else {
if !validateAttrChars(attr) {
return inc, fmt.Errorf("invalid must attribute: %q", attr)
}
inc.MustAttrs = append(inc.MustAttrs, attr)
}
case '&':
return inc, fmt.Errorf("affiliation is not allowed for inclusion")
default:
return inc, fmt.Errorf("unknown field: %q", part)
}
}
return inc, nil
}
func validateDomainChars(domain string) bool { func validateDomainChars(domain string) bool {
if domain == "" {
return false
}
for i := range domain { for i := range domain {
c := domain[i] c := domain[i]
if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.' || c == '-' { if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.' || c == '-' {
@@ -185,9 +308,12 @@ func validateDomainChars(domain string) bool {
} }
func validateAttrChars(attr string) bool { func validateAttrChars(attr string) bool {
if attr == "" {
return false
}
for i := range attr { for i := range attr {
c := attr[i] c := attr[i]
if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '!' || c == '-' { if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '!' {
continue continue
} }
return false return false
@@ -196,6 +322,9 @@ func validateAttrChars(attr string) bool {
} }
func validateSiteName(name string) bool { func validateSiteName(name string) bool {
if name == "" {
return false
}
for i := range name { for i := range name {
c := name[i] c := name[i]
if (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '!' || c == '-' { if (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '!' || c == '-' {
@@ -232,26 +361,23 @@ func (p *Processor) loadData(listName string, path string) error {
if line == "" { if line == "" {
continue continue
} }
entry, affs, err := parseEntry(line) typ, rule, isTypeSpecified := strings.Cut(line, ":")
if err != nil { if !isTypeSpecified { // Default RuleType
return fmt.Errorf("error in %q at line %d: %w", path, lineIdx, err) typ, rule = dlc.RuleTypeDomain, typ
} else {
typ = strings.ToLower(typ)
} }
if typ == dlc.RuleTypeInclude {
if entry.Type == dlc.RuleTypeInclude { inc, err := parseInclusion(rule)
inc := &Inclusion{Source: entry.Value} if err != nil {
for _, attr := range entry.Attrs { return fmt.Errorf("error in %q at line %d: %w", path, lineIdx, err)
if attr[0] == '-' {
inc.BanAttrs = append(inc.BanAttrs, attr[1:])
} else {
inc.MustAttrs = append(inc.MustAttrs, attr)
}
}
for _, aff := range affs {
apl := p.getOrCreateParsedList(aff)
apl.Inclusions = append(apl.Inclusions, inc)
} }
pl.Inclusions = append(pl.Inclusions, inc) pl.Inclusions = append(pl.Inclusions, inc)
} else { } else {
entry, affs, err := parseEntry(typ, rule)
if err != nil {
return fmt.Errorf("error in %q at line %d: %w", path, lineIdx, err)
}
for _, aff := range affs { for _, aff := range affs {
apl := p.getOrCreateParsedList(aff) apl := p.getOrCreateParsedList(aff)
apl.Entries = append(apl.Entries, entry) apl.Entries = append(apl.Entries, entry)
@@ -259,7 +385,7 @@ func (p *Processor) loadData(listName string, path string) error {
pl.Entries = append(pl.Entries, entry) pl.Entries = append(pl.Entries, entry)
} }
} }
return nil return scanner.Err()
} }
func isMatchAttrFilters(entry *Entry, incFilter *Inclusion) bool { func isMatchAttrFilters(entry *Entry, incFilter *Inclusion) bool {
@@ -360,6 +486,9 @@ func (p *Processor) resolveList(plname string) error {
} }
} }
} }
if len(roughMap) == 0 {
return fmt.Errorf("empty list")
}
p.finalMap[plname] = polishList(roughMap) p.finalMap[plname] = polishList(roughMap)
return nil return nil
} }
@@ -387,13 +516,15 @@ func run() error {
return fmt.Errorf("failed to loadData: %w", err) return fmt.Errorf("failed to loadData: %w", err)
} }
// Generate finalMap // Generate finalMap
processor.finalMap = make(map[string][]*Entry, len(processor.plMap)) sitesCount := len(processor.plMap)
processor.finalMap = make(map[string][]*Entry, sitesCount)
processor.cirIncMap = make(map[string]bool) processor.cirIncMap = make(map[string]bool)
for plname := range processor.plMap { for plname := range processor.plMap {
if err := processor.resolveList(plname); err != nil { if err := processor.resolveList(plname); err != nil {
return fmt.Errorf("failed to resolveList %q: %w", plname, err) return fmt.Errorf("failed to resolveList %q: %w", plname, err)
} }
} }
processor.plMap = nil
// Make sure output directory exists // Make sure output directory exists
if err := os.MkdirAll(*outputDir, 0755); err != nil { if err := os.MkdirAll(*outputDir, 0755); err != nil {
@@ -403,47 +534,58 @@ func run() error {
for rawEpList := range strings.SplitSeq(*exportLists, ",") { for rawEpList := range strings.SplitSeq(*exportLists, ",") {
if epList := strings.TrimSpace(rawEpList); epList != "" { if epList := strings.TrimSpace(rawEpList); epList != "" {
entries, exist := processor.finalMap[strings.ToUpper(epList)] entries, exist := processor.finalMap[strings.ToUpper(epList)]
if !exist || len(entries) == 0 { if !exist {
fmt.Printf("list %q does not exist or is empty\n", epList) fmt.Printf("[Warn] list %q does not exist\n", epList)
continue continue
} }
if err := writePlainList(epList, entries); err != nil { if err := writePlainList(epList, entries); err != nil {
fmt.Printf("failed to write list %q: %v\n", epList, err) fmt.Printf("[Error] failed to write list %q: %v\n", epList, err)
continue continue
} }
fmt.Printf("list %q has been generated successfully.\n", epList) fmt.Printf("list %q has been generated successfully\n", epList)
} }
} }
// Generate dat file // Generate proto sites
protoList := new(router.GeoSiteList) gs := &GeoSites{
for siteName, siteEntries := range processor.finalMap { Sites: make([]*router.GeoSite, 0, sitesCount),
site, err := makeProtoList(siteName, siteEntries) SiteIdx: make(map[string]int, sitesCount),
if err != nil {
return fmt.Errorf("failed to makeProtoList %q: %w", siteName, err)
}
protoList.Entry = append(protoList.Entry, site)
} }
// Sort protoList so the marshaled list is reproducible for siteName, siteEntries := range processor.finalMap {
slices.SortFunc(protoList.Entry, func(a, b *router.GeoSite) int { gs.Sites = append(gs.Sites, makeProtoList(siteName, siteEntries))
}
processor = nil
// Sort proto sites so the generated file is reproducible
slices.SortFunc(gs.Sites, func(a, b *router.GeoSite) int {
return strings.Compare(a.CountryCode, b.CountryCode) return strings.Compare(a.CountryCode, b.CountryCode)
}) })
for i := range sitesCount {
gs.SiteIdx[gs.Sites[i].CountryCode] = i
}
protoBytes, err := proto.Marshal(protoList) // Load tasks and generate dat files
if err != nil { var tasks []DatTask
return fmt.Errorf("failed to marshal: %w", err) if *datProfile == "" {
tasks = []DatTask{{Name: *outputName, Mode: ModeAll}}
} else {
var err error
tasks, err = loadTasks(*datProfile)
if err != nil {
return fmt.Errorf("failed to loadTasks %q: %v", *datProfile, err)
}
} }
if err := os.WriteFile(filepath.Join(*outputDir, *outputName), protoBytes, 0644); err != nil { for _, task := range tasks {
return fmt.Errorf("failed to write output: %w", err) if err := gs.assembleDat(task); err != nil {
fmt.Printf("[Error] failed to assembleDat %q: %v", task.Name, err)
}
} }
fmt.Printf("%q has been generated successfully.\n", *outputName)
return nil return nil
} }
func main() { func main() {
flag.Parse() flag.Parse()
if err := run(); err != nil { if err := run(); err != nil {
fmt.Printf("Fatal error: %v\n", err) fmt.Printf("[Fatal] critical error: %v\n", err)
os.Exit(1) os.Exit(1)
} }
} }