Unbound: rebuild unbound blacklist download

o move all templates into one standard ini file containing all required info for the blacklists
o add syslog output (in unbound log view) about processing and error handling
o validate "whitelist" regexp entries before usage
o lock process while performing download task
o don't try to download in parallel, the gain is relatively small and adds complexity.
o remove last unboundplus action, migrate cron jobs if they exist
This commit is contained in:
Ad Schellevis 2020-09-29 16:46:55 +02:00
parent 5f108f698e
commit f16b67232c
12 changed files with 263 additions and 231 deletions

View File

@ -44,10 +44,9 @@ class ServiceController extends ApiMutableServiceControllerBase
public function dnsblAction()
{
$this->sessionClose();
$mdl = new Dnsbl();
$backend = new Backend();
$backend->configdRun('template reload OPNsense/Unbound');
$response = $backend->configdpRun('unbound dnsbl', array((string)$mdl->type));
$backend->configdRun('template reload OPNsense/Unbound/*');
$response = $backend->configdpRun('unbound dnsbl');
return array("status" => $response);
}
}

View File

@ -1,6 +1,6 @@
<model>
<mount>//OPNsense/cron</mount>
<version>1.0.1</version>
<version>1.0.2</version>
<description>Cron settings</description>
<items>
<jobs>

View File

@ -0,0 +1,48 @@
<?php
/**
* Copyright (C) 2020 Deciso B.V.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
namespace OPNsense\Cron\Migrations;
use OPNsense\Base\BaseModelMigration;
/**
* migrate unboundplus jobs to unbound
*/
class M1_0_2 extends BaseModelMigration
{
public function run($model)
{
foreach ($model->jobs->job->iterateItems() as $node) {
if ($node->command == "unboundplus dnsbl") {
$node->command = "unbound dnsbl";
}
}
}
}

View File

@ -1,204 +0,0 @@
#!/usr/local/bin/python3
# DNS BL script
# Copyright (c) 2020 Petr Kejval <petr.kejval6@gmail.com>
# Downloads blacklisted domains from user specified URLs and "compile" them into unbound.conf compatible file
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
import re, urllib3, threading, subprocess
re_blacklist = re.compile(r'(^127\.0\.0\.1[\s]+|^0\.0\.0\.0[\s]+)([0-9a-z_.-]+)(?:\s|$)|^([0-9a-z_.-]+)(?:\s|$)', re.I)
re_whitelist = re.compile(r'$^') # default - match nothing
blacklist = set()
urls = set()
predefined_lists = {
"aa": "https://adaway.org/hosts.txt",
"ag": "https://justdomains.github.io/blocklists/lists/adguarddns-justdomains.txt",
"bla0": "https://blocklistproject.github.io/Lists/abuse.txt",
"bla": "https://blocklistproject.github.io/Lists/ads.txt",
"blc": "https://blocklistproject.github.io/Lists/crypto.txt",
"bld": "https://blocklistproject.github.io/Lists/drugs.txt",
"blf0": "https://blocklistproject.github.io/Lists/facebook.txt",
"blf": "https://blocklistproject.github.io/Lists/fraud.txt",
"blg": "https://blocklistproject.github.io/Lists/gambling.txt",
"blm": "https://blocklistproject.github.io/Lists/malware.txt",
"blp": "https://blocklistproject.github.io/Lists/phishing.txt",
"blp0": "https://blocklistproject.github.io/Lists/piracy.txt",
"blp1": "https://blocklistproject.github.io/Lists/porn.txt",
"blr": "https://blocklistproject.github.io/Lists/ransomware.txt",
"blr0": "https://blocklistproject.github.io/Lists/redirect.txt",
"bls": "https://blocklistproject.github.io/Lists/scam.txt",
"blt": "https://blocklistproject.github.io/Lists/tiktok.txt",
"blt0": "https://blocklistproject.github.io/Lists/torrent.txt",
"blt1": "https://blocklistproject.github.io/Lists/tracking.txt",
"bly": "https://blocklistproject.github.io/Lists/youtube.txt",
"el": "https://justdomains.github.io/blocklists/lists/easylist-justdomains.txt",
"ep": "https://justdomains.github.io/blocklists/lists/easyprivacy-justdomains.txt",
"nc": "https://justdomains.github.io/blocklists/lists/nocoin-justdomains.txt",
"pa": "https://raw.githubusercontent.com/chadmayfield/my-pihole-blocklists/master/lists/pi_blocklist_porn_all.list",
"pt": "https://raw.githubusercontent.com/chadmayfield/pihole-blocklists/master/lists/pi_blocklist_porn_top1m.list",
"sa": "https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt",
"sb": "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts",
"st": "https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt",
"ws": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt",
"wsu": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/update.txt",
"wse": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/extra.txt",
"yy": "http://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext"
}
def add_to_blacklist(domain):
""" Checks if domain is present in whitelist. If not, domain is addded to BL set. """
match = re_whitelist.match(domain)
if not match:
blacklist.add(domain)
def parse_line(line):
""" Checks if line matches re_blacklist. If so, tries add domain to BL set. """
global blacklist
line = line.replace('\\t', " ")
line = line.replace('\\r', "")
match = re_blacklist.match(line)
if match:
if match.group(2) != None:
add_to_blacklist(match.group(2))
elif match.group(3) != None:
add_to_blacklist(match.group(3))
def process_url(url):
""" Reads and parses blacklisted domains from URL into BL set. """
print(f"Processing BL items from: {url}")
try:
http = urllib3.PoolManager(timeout=5.0)
r = http.request('GET', url, retries=2)
if r.status == 200:
for line in str(r.data).split('\\n'):
parse_line(line)
except Exception as e:
print(str(e))
def save_config_file():
""" Saves blacklist in unbound.conf format """
print(f"Saving {len(blacklist)} blacklisted domains into dnsbl.conf")
try:
with open("/var/unbound/etc/dnsbl.conf", 'w') as file:
# No domains found or DNSBL is disabled
if (len(blacklist) == 0):
file.write("")
else:
file.write('server:\n')
for line in blacklist:
#file.write('local-zone: "' + str(line) + '" static\n')
file.write('local-data: "' + str(line) + ' A 0.0.0.0"\n')
except Exception as e:
print(str(e))
exit(1)
def load_list(path, separator=None):
""" Reads file with specified path into set to ensure unique values.
Splits lines with defined separator. If sperator==None no split is performed. """
result = set()
try:
with open(path, 'r') as file:
for line in file.readlines():
if not separator == None:
for element in line.split(separator):
result.add(element.replace('\n', ''))
else:
result.add(line.replace('\n', ''))
except Exception as e:
print(str(e))
return result
def load_whitelist():
""" Loads user defined whitelist in regex format and compiles it. """
print("Loading whitelist")
global re_whitelist
wl = load_list('/var/unbound/etc/whitelist.inc', ',')
wl.add(r'.*localhost$')
wl.add(r'^(?![a-zA-Z\d]).*') # Exclude domains NOT starting with alphanumeric char
print(f"Loaded {len(wl)} whitelist items")
try:
re_whitelist = re.compile('|'.join(wl), re.I)
except Exception as e:
print(f"Whitelist regex compile failed: {str(e)}")
def load_blacklists():
""" Loads user defined blacklists URLs. """
print("Loading blacklists URLs")
global urls
urls = load_list('/var/unbound/etc/lists.inc', ',')
print(f"Loaded {len(urls)} blacklists URLs")
def load_predefined_lists():
""" Loads user chosen predefined lists """
print("Loading predefined lists URLs")
global urls
lists = load_list('/var/unbound/etc/dnsbl.inc')
types = set()
for first in lists:
first = str(first).split('=')[1]
first = str(first).replace('"', '').replace('\n', '')
first = first.split(',')
for type in first:
types.add(type)
break
print(f"Loaded {len(types)} predefined blacklists URLs")
for type in types:
try:
urls.add(predefined_lists[type])
except KeyError:
continue
except Exception as e:
print(str(e))
if __name__ == "__main__":
# Prepare lists from config files
load_whitelist()
load_blacklists()
load_predefined_lists()
# Start processing BLs in threads
threads = [threading.Thread(target=process_url, args=(url,)) for url in urls]
for t in threads:
t.start()
for t in threads:
t.join()
save_config_file()
print("Restarting unbound service")
subprocess.Popen(["pluginctl", "-s", "unbound", "restart"])
exit(0)

View File

@ -0,0 +1,144 @@
#!/usr/local/bin/python3
"""
Copyright (c) 2020 Ad Schellevis <ad@opnsense.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import os
import re
import syslog
import tempfile
import time
import fcntl
from configparser import ConfigParser
import requests
def uri_reader(uri):
req_opts = {
'url': uri,
'timeout': 120,
'stream': True
}
try:
req = requests.get(**req_opts)
except Exception as e:
syslog.syslog(syslog.LOG_ERR,'blacklist download : unable to download file from %s (error : %s)' % (uri, e))
return
if req.status_code >= 200 and req.status_code <= 299:
req.raw.decode_content = True
prev_chop = ''
while True:
chop = req.raw.read(1024).decode()
if not chop:
if prev_chop:
yield prev_chop
break
else:
parts = (prev_chop + chop).split('\n')
if len(parts) > chop.find('\n'):
prev_chop = parts.pop()
else:
prev_chop = ''
for part in parts:
yield part
else:
syslog.syslog(syslog.LOG_ERR,
'blacklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code)
)
if __name__ == '__main__':
# check for a running download process, this may take a while so it's better to check...
try:
lck = open('/tmp/unbound-download_blacklists.tmp', 'w+')
fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError:
# already running, exit status 99
sys.exit(99)
startup_time = time.time()
syslog.openlog('unbound', logoption=syslog.LOG_DAEMON, facility=syslog.LOG_LOCAL4)
blacklist_items = set()
if os.path.exists('/var/unbound/etc/blacklists.conf'):
cnf = ConfigParser()
cnf.read('/var/unbound/etc/blacklists.conf')
# exclude (white) lists, compile to regex to be used to filter blacklist entries
if cnf.has_section('exclude'):
exclude_list = set()
for exclude_item in cnf['exclude']:
try:
re.compile(cnf['exclude'][exclude_item], re.IGNORECASE)
exclude_list.add(cnf['exclude'][exclude_item])
except re.error:
syslog.syslog(syslog.LOG_ERR,
'blacklist download : skip invalid whitelist exclude pattern "%s" (%s)' % (
exclude_item, cnf['exclude'][exclude_item]
)
)
if not exclude_list:
exclude_list.add('$^')
wp = '|'.join(exclude_list)
whitelist_pattern = re.compile(wp, re.IGNORECASE)
syslog.syslog(syslog.LOG_NOTICE, 'blacklist download : exclude domains matching %s' % wp)
# fetch all blacklists
if cnf.has_section('blacklists'):
for blacklist in cnf['blacklists']:
file_stats = {'uri': cnf['blacklists'][blacklist], 'skip' : 0, 'blacklist': 0, 'lines' :0}
for line in uri_reader(cnf['blacklists'][blacklist]):
file_stats['lines'] += 1
# cut line into parts before comment marker (if any)
tmp = line.split('#')[0].split()
entry = None
while tmp:
entry = tmp.pop(-1)
if entry not in ['127.0.0.1', '0.0.0.0']:
break
if entry:
if whitelist_pattern.match(entry):
file_stats['skip'] += 1
else:
file_stats['blacklist'] += 1
blacklist_items.add(entry)
syslog.syslog(
syslog.LOG_NOTICE,
'blacklist download %(uri)s (lines: %(lines)d exclude: %(skip)d black: %(blacklist)d' % file_stats
)
# write out results
with open("/var/unbound/etc/dnsbl.conf", 'w') as unbound_outf:
if blacklist_items:
unbound_outf.write('server:\n')
for entry in blacklist_items:
unbound_outf.write("local-data: \"%s A 0.0.0.0\"\n" % entry)
syslog.syslog(syslog.LOG_NOTICE, "blacklist download done in %0.2f seconds (%d records)" % (
time.time() - startup_time, len(blacklist_items)
))

View File

@ -53,7 +53,7 @@ type:script
message:Start Unbound
[dnsbl]
command:/usr/local/opnsense/scripts/unbound/dnsbl.py
command:/usr/local/opnsense/scripts/unbound/download_blacklists.py && /usr/local/sbin/pluginctl unbound restart
parameters:
type:script
message:fetching and applying DNSBLs

View File

@ -1,10 +0,0 @@
###################################################################
# Compatibility layer, to be removed in a future version
# - keeps old plugin cron jobs functional
###################################################################
[dnsbl]
command:/usr/local/opnsense/scripts/unbound/dnsbl.py
parameters:
type:script
message:fetching and applying DNSBLs
description: Download Unbound DNSBLs and restart

View File

@ -1,6 +1,4 @@
root.min.hints:/var/unbound/root.hints
dnsbl.inc:/var/unbound/etc/dnsbl.inc
whitelist.inc:/var/unbound/etc/whitelist.inc
miscellaneous.conf:/var/unbound/etc/miscellaneous.conf
dot.conf:/var/unbound/etc/dot.conf
lists.inc:/var/unbound/etc/lists.inc
blacklists.conf:/var/unbound/etc/blacklists.conf

View File

@ -0,0 +1,66 @@
{%
set predefined = {
"aa": "https://adaway.org/hosts.txt",
"ag": "https://justdomains.github.io/blocklists/lists/adguarddns-justdomains.txt",
"bla0": "https://blocklistproject.github.io/Lists/abuse.txt",
"bla": "https://blocklistproject.github.io/Lists/ads.txt",
"blc": "https://blocklistproject.github.io/Lists/crypto.txt",
"bld": "https://blocklistproject.github.io/Lists/drugs.txt",
"blf0": "https://blocklistproject.github.io/Lists/facebook.txt",
"blf": "https://blocklistproject.github.io/Lists/fraud.txt",
"blg": "https://blocklistproject.github.io/Lists/gambling.txt",
"blm": "https://blocklistproject.github.io/Lists/malware.txt",
"blp": "https://blocklistproject.github.io/Lists/phishing.txt",
"blp0": "https://blocklistproject.github.io/Lists/piracy.txt",
"blp1": "https://blocklistproject.github.io/Lists/porn.txt",
"blr": "https://blocklistproject.github.io/Lists/ransomware.txt",
"blr0": "https://blocklistproject.github.io/Lists/redirect.txt",
"bls": "https://blocklistproject.github.io/Lists/scam.txt",
"blt": "https://blocklistproject.github.io/Lists/tiktok.txt",
"blt0": "https://blocklistproject.github.io/Lists/torrent.txt",
"blt1": "https://blocklistproject.github.io/Lists/tracking.txt",
"bly": "https://blocklistproject.github.io/Lists/youtube.txt",
"el": "https://justdomains.github.io/blocklists/lists/easylist-justdomains.txt",
"ep": "https://justdomains.github.io/blocklists/lists/easyprivacy-justdomains.txt",
"nc": "https://justdomains.github.io/blocklists/lists/nocoin-justdomains.txt",
"pa": "https://raw.githubusercontent.com/chadmayfield/my-pihole-blocklists/master/lists/pi_blocklist_porn_all.list",
"pt": "https://raw.githubusercontent.com/chadmayfield/pihole-blocklists/master/lists/pi_blocklist_porn_top1m.list",
"sa": "https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt",
"sb": "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts",
"st": "https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt",
"ws": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt",
"wsu": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/update.txt",
"wse": "https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/extra.txt",
"yy": "http://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext"
}
%}
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.enabled') %}
[blacklists]
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.type') %}
{% for shortcode in OPNsense.unboundplus.dnsbl.type.split(',') %}
{% if shortcode in predefined %}
predefined_{{ shortcode }}={{ predefined[shortcode] }}
{% else %}
#shortcode=##not_found##
{% endif %}
{% endfor %}
{% endif %}
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.lists') %}
{% for uri in OPNsense.unboundplus.dnsbl.lists.split(',') %}
custom_{{loop.index}}={{uri}}
{% endfor %}
{% endif %}
[exclude]
# exclude localhost entries
default_pattern_1=.*localhost$
# exclude non domain entries
default_pattern_2=^(?![a-zA-Z\d]).*
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.whitelists')%}
# user defined
{% for pattern in OPNsense.unboundplus.dnsbl.whitelists.split(',') %}
custom_pattern_{{loop.index}}={{ pattern }}
{% endfor %}
{% endif %}
{% endif %}

View File

@ -1,3 +0,0 @@
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.enabled') and not helpers.empty('OPNsense.unboundplus.dnsbl.type') %}
unbound_dnsbl="{{ OPNsense.unboundplus.dnsbl.type }}"
{% endif %}

View File

@ -1,3 +0,0 @@
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.enabled') and not helpers.empty('OPNsense.unboundplus.dnsbl.lists') %}
{{ OPNsense.unboundplus.dnsbl.lists|default("") }}
{% endif %}

View File

@ -1,3 +0,0 @@
{% if not helpers.empty('OPNsense.unboundplus.dnsbl.enabled') and not helpers.empty('OPNsense.unboundplus.dnsbl.whitelists')%}
{{ OPNsense.unboundplus.dnsbl.whitelists|default("") }}
{% endif %}