mirror of
https://github.com/lucaspalomodevelop/core.git
synced 2026-03-15 00:54:41 +00:00
Unbound: blocklists: allow scripts to extend blocklist functionality (#6350)
This commit is contained in:
parent
5fa74bfafa
commit
09f40f0e33
2
plist
2
plist
@ -976,6 +976,8 @@
|
||||
/usr/local/opnsense/scripts/system/temperature.sh
|
||||
/usr/local/opnsense/scripts/system/trigger_config_changed_events.py
|
||||
/usr/local/opnsense/scripts/unbound/blocklists.py
|
||||
/usr/local/opnsense/scripts/unbound/blocklists/__init__.py
|
||||
/usr/local/opnsense/scripts/unbound/blocklists/default_bl.py
|
||||
/usr/local/opnsense/scripts/unbound/cache.sh
|
||||
/usr/local/opnsense/scripts/unbound/check.sh
|
||||
/usr/local/opnsense/scripts/unbound/logger.py
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/usr/local/bin/python3
|
||||
|
||||
"""
|
||||
Copyright (c) 2020 Ad Schellevis <ad@opnsense.org>
|
||||
Copyright (c) 2020-2023 Ad Schellevis <ad@opnsense.org>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -26,215 +26,8 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import syslog
|
||||
import tempfile
|
||||
import time
|
||||
import fcntl
|
||||
from configparser import ConfigParser
|
||||
import requests
|
||||
import ujson
|
||||
|
||||
def uri_reader(uri):
|
||||
req_opts = {
|
||||
'url': uri,
|
||||
'timeout': 5,
|
||||
'stream': True
|
||||
}
|
||||
try:
|
||||
req = requests.get(**req_opts)
|
||||
except Exception as e:
|
||||
syslog.syslog(syslog.LOG_ERR,'blocklist download : unable to download file from %s (error : %s)' % (uri, e))
|
||||
return
|
||||
|
||||
if req.status_code >= 200 and req.status_code <= 299:
|
||||
req.raw.decode_content = True
|
||||
prev_chop = ''
|
||||
while True:
|
||||
try:
|
||||
chop = req.raw.read(1024).decode()
|
||||
if not chop:
|
||||
if prev_chop:
|
||||
yield prev_chop
|
||||
break
|
||||
else:
|
||||
parts = (prev_chop + chop).split('\n')
|
||||
if parts[-1] != "\n":
|
||||
prev_chop = parts.pop()
|
||||
else:
|
||||
prev_chop = ''
|
||||
for part in parts:
|
||||
yield part
|
||||
except Exception as e:
|
||||
syslog.syslog(syslog.LOG_ERR,'blocklist download : error reading file from %s (error : %s)' % (uri, e))
|
||||
return
|
||||
|
||||
else:
|
||||
syslog.syslog(syslog.LOG_ERR,
|
||||
'blocklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code)
|
||||
)
|
||||
from blocklists import BlocklistParser
|
||||
|
||||
if __name__ == '__main__':
|
||||
# check for a running download process, this may take a while so it's better to check...
|
||||
try:
|
||||
lck = open('/tmp/unbound-download_blocklists.tmp', 'w+')
|
||||
fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except IOError:
|
||||
# already running, exit status 99
|
||||
sys.exit(99)
|
||||
|
||||
domain_pattern = re.compile(
|
||||
r'^(([\da-zA-Z_])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})'
|
||||
r'?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))$'
|
||||
)
|
||||
destination_address = '0.0.0.0'
|
||||
rcode = 'NOERROR'
|
||||
|
||||
startup_time = time.time()
|
||||
syslog.openlog('unbound', facility=syslog.LOG_LOCAL4)
|
||||
blocklist_items = {
|
||||
'data': {},
|
||||
'config': {}
|
||||
}
|
||||
skip_download = False
|
||||
if os.path.exists('/tmp/unbound-blocklists.conf'):
|
||||
cnf = ConfigParser()
|
||||
cnf.read('/tmp/unbound-blocklists.conf')
|
||||
|
||||
cnf_cache = ConfigParser()
|
||||
if os.path.exists('/tmp/unbound-blocklists.conf.cache'):
|
||||
cnf_cache.read('/tmp/unbound-blocklists.conf.cache')
|
||||
else:
|
||||
cnf_cache.read('/tmp/unbound-blocklists.conf')
|
||||
|
||||
if cnf.sections() and cnf_cache.sections():
|
||||
# get the difference between the old and new configuration, there won't be any
|
||||
# if we're starting up, so it will proceed as normal.
|
||||
diff_cnf = {d: set(map(tuple, v.items())) for d,v in cnf._sections.items()}
|
||||
diff_cnf_cache = {d: set(map(tuple, v.items())) for d,v in cnf_cache._sections.items()}
|
||||
diffs_added = {header: diff_cnf[header] - diff_cnf_cache[header] for header, _ in diff_cnf.items()}
|
||||
diffs_removed = {header: diff_cnf_cache[header] - diff_cnf[header] for header, _ in diff_cnf.items()}
|
||||
|
||||
# we can only skip download if the include option has changed, but it must proceed
|
||||
# if any other option has changed
|
||||
if (diffs_added['include'] or diffs_removed['include']):
|
||||
skip_download = True
|
||||
for (a, r) in zip(diffs_added, diffs_removed):
|
||||
if (a != 'include' and r != 'include') and (diffs_added[a] or diffs_removed[r]):
|
||||
skip_download = False
|
||||
|
||||
if cnf.has_section('exclude'):
|
||||
exclude_list = set()
|
||||
for exclude_item in cnf['exclude']:
|
||||
pattern = cnf['exclude'][exclude_item]
|
||||
try:
|
||||
re.compile(pattern, re.IGNORECASE)
|
||||
exclude_list.add(pattern)
|
||||
except re.error:
|
||||
syslog.syslog(syslog.LOG_ERR,
|
||||
'blocklist download : skip invalid whitelist exclude pattern "%s" (%s)' % (
|
||||
exclude_item, pattern
|
||||
)
|
||||
)
|
||||
if not exclude_list:
|
||||
exclude_list.add('$^')
|
||||
|
||||
wp = '|'.join(exclude_list)
|
||||
whitelist_pattern = re.compile(wp, re.IGNORECASE)
|
||||
syslog.syslog(syslog.LOG_NOTICE, 'blocklist download : exclude domains matching %s' % wp)
|
||||
|
||||
if cnf.has_section('settings'):
|
||||
if cnf.has_option('settings', 'address'):
|
||||
blocklist_items['config']['dst_addr'] = cnf.get('settings', 'address')
|
||||
if cnf.has_option('settings', 'rcode'):
|
||||
blocklist_items['config']['rcode'] = cnf.get('settings', 'rcode')
|
||||
|
||||
if not skip_download:
|
||||
# fetch all blocklists, will replace the existing file used by Unbound
|
||||
if cnf.has_section('blocklists'):
|
||||
for blocklist in cnf['blocklists']:
|
||||
list_type = blocklist.split('_', 1)
|
||||
bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1]
|
||||
file_stats = {'uri': cnf['blocklists'][blocklist], 'skip' : 0, 'blocklist': 0, 'lines' :0}
|
||||
for line in uri_reader(cnf['blocklists'][blocklist]):
|
||||
file_stats['lines'] += 1
|
||||
# cut line into parts before comment marker (if any)
|
||||
tmp = line.split('#')[0].split()
|
||||
entry = None
|
||||
while tmp:
|
||||
entry = tmp.pop(-1)
|
||||
if entry not in ['127.0.0.1', '0.0.0.0']:
|
||||
break
|
||||
if entry:
|
||||
domain = entry.lower()
|
||||
if whitelist_pattern.match(entry):
|
||||
file_stats['skip'] += 1
|
||||
else:
|
||||
if domain_pattern.match(domain):
|
||||
file_stats['blocklist'] += 1
|
||||
blocklist_items['data'][entry] = {'bl': bl_shortcode, 'wildcard': False}
|
||||
else:
|
||||
file_stats['skip'] += 1
|
||||
|
||||
syslog.syslog(
|
||||
syslog.LOG_NOTICE,
|
||||
'blocklist download %(uri)s (lines: %(lines)d exclude: %(skip)d block: %(blocklist)d)' % file_stats
|
||||
)
|
||||
|
||||
# after a download, always apply exact custom matches on top of it
|
||||
if cnf.has_section('include'):
|
||||
for item in cnf['include']:
|
||||
entry = cnf['include'][item].rstrip().lower()
|
||||
if not whitelist_pattern.match(entry):
|
||||
if domain_pattern.match(entry):
|
||||
blocklist_items['data'][entry] = {'bl': 'Manual','wildcard': False}
|
||||
if '*' in entry:
|
||||
blocklist_items['data'][entry.replace('*.', '')] = {'bl': 'Manual', 'wildcard': True}
|
||||
|
||||
else:
|
||||
# only modify the existing list, administrate on added and removed exact custom matches
|
||||
syslog.syslog(syslog.LOG_NOTICE, 'blocklist: skip download')
|
||||
if os.path.exists('/var/unbound/data/dnsbl.json'):
|
||||
blocklist_items = ujson.load(open('/var/unbound/data/dnsbl.json', 'r'))
|
||||
if diffs_removed['include']:
|
||||
for item in diffs_removed['include']:
|
||||
entry = item[1].replace('*.', '').rstrip().lower()
|
||||
# include entry may have been overridden by the whitelist, so use pop()
|
||||
blocklist_items['data'].pop(entry, None)
|
||||
|
||||
if diffs_added['include']:
|
||||
for item in diffs_added['include']:
|
||||
entry = item[1].rstrip().lower()
|
||||
if not whitelist_pattern.match(entry):
|
||||
if domain_pattern.match(entry):
|
||||
blocklist_items['data'][entry] = {'bl': 'Manual', 'wildcard': False}
|
||||
if '*' in entry:
|
||||
blocklist_items['data'][entry.replace('*.', '')] = {'bl': 'Manual', 'wildcard': True}
|
||||
|
||||
# check if there are wildcards in the dataset
|
||||
has_wildcards = False
|
||||
for item in blocklist_items['data']:
|
||||
if blocklist_items['data'][item]['wildcard'] == True:
|
||||
has_wildcards = True
|
||||
break
|
||||
blocklist_items['config']['has_wildcards'] = has_wildcards
|
||||
|
||||
with open('/tmp/unbound-blocklists.conf.cache', 'w') as cache_config:
|
||||
# cache the current config so we can diff on it the next time
|
||||
cnf.write(cache_config)
|
||||
|
||||
# write out results
|
||||
if not os.path.exists('/var/unbound/data'):
|
||||
os.makedirs('/var/unbound/data')
|
||||
with open("/var/unbound/data/dnsbl.json.new", 'w') as unbound_outf:
|
||||
if blocklist_items:
|
||||
ujson.dump(blocklist_items, unbound_outf)
|
||||
|
||||
# atomically replace the current dnsbl so unbound can pick up on it
|
||||
os.replace('/var/unbound/data/dnsbl.json.new', '/var/unbound/data/dnsbl.json')
|
||||
|
||||
syslog.syslog(syslog.LOG_NOTICE, "blocklist download done in %0.2f seconds (%d records)" % (
|
||||
time.time() - startup_time, len(blocklist_items['data'])
|
||||
))
|
||||
bl = BlocklistParser()
|
||||
bl.update_blocklist()
|
||||
|
||||
265
src/opnsense/scripts/unbound/blocklists/__init__.py
Executable file
265
src/opnsense/scripts/unbound/blocklists/__init__.py
Executable file
@ -0,0 +1,265 @@
|
||||
#!/usr/local/bin/python3
|
||||
|
||||
"""
|
||||
Copyright (c) 2023 Deciso B.V.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import syslog
|
||||
import re
|
||||
import glob
|
||||
import importlib
|
||||
import sys
|
||||
import fcntl
|
||||
import ujson
|
||||
import time
|
||||
import hashlib
|
||||
from configparser import ConfigParser
|
||||
|
||||
class BaseBlocklistHandler:
|
||||
def __init__(self, config=None):
|
||||
self.config = config
|
||||
self.cnf = None
|
||||
self.priority = 0
|
||||
|
||||
self.cur_bl_location = '/var/unbound/data/dnsbl.json'
|
||||
|
||||
self.domain_pattern = re.compile(
|
||||
r'^(([\da-zA-Z_])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})'
|
||||
r'?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))$'
|
||||
)
|
||||
|
||||
self._load_config()
|
||||
|
||||
def get_config(self):
|
||||
"""
|
||||
Get statically defined configuration options.
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_blocklist(self):
|
||||
"""
|
||||
Overridden by derived classes to produce a formatted blocklist. Returns a dictionary
|
||||
with domains as keys and a dictionary of metadata as values
|
||||
"""
|
||||
pass
|
||||
|
||||
def _load_config(self):
|
||||
"""
|
||||
Load a configuration. If a cached version exists, will also provide a dictionary
|
||||
containing the differences between the two.
|
||||
"""
|
||||
if os.path.exists(self.config):
|
||||
self.cnf = ConfigParser()
|
||||
self.cnf.read(self.config)
|
||||
|
||||
def _blocklists_in_config(self):
|
||||
"""
|
||||
Generator for derived classes to iterate over configured blocklist urls.
|
||||
Assumes <shortcode>=<url> configuration formatting
|
||||
"""
|
||||
if self.cnf and self.cnf.has_section('blocklists'):
|
||||
for blocklist in self.cnf['blocklists']:
|
||||
list_type = blocklist.split('_', 1)
|
||||
bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1]
|
||||
yield (self.cnf['blocklists'][blocklist], bl_shortcode)
|
||||
|
||||
def _domains_in_blocklist(self, blocklist):
|
||||
"""
|
||||
Generator for derived classes to iterate over downloaded domains.
|
||||
"""
|
||||
for line in self._blocklist_reader(blocklist):
|
||||
# cut line into parts before comment marker (if any)
|
||||
tmp = line.split('#')[0].split()
|
||||
entry = None
|
||||
while tmp:
|
||||
entry = tmp.pop(-1)
|
||||
if entry not in ['127.0.0.1', '0.0.0.0']:
|
||||
break
|
||||
if entry:
|
||||
yield entry
|
||||
|
||||
def _blocklist_reader(self, uri):
|
||||
"""
|
||||
Decides whether a blocklist can be read from a cached file or
|
||||
needs to be downloaded. Yields (unformatted) domains either way
|
||||
"""
|
||||
total_lines = 0
|
||||
from_cache = False
|
||||
h = hashlib.md5(uri.encode()).hexdigest()
|
||||
cache_loc = '/tmp/bl_cache/'
|
||||
if os.path.exists(cache_loc):
|
||||
filep = cache_loc + h
|
||||
if os.path.exists(filep):
|
||||
fstat = os.stat(filep).st_ctime
|
||||
if (time.time() - fstat) < 72000: # 20 hours, a bit under the recommended cron time
|
||||
from_cache = True
|
||||
for line in open(filep):
|
||||
total_lines += 1
|
||||
yield line
|
||||
|
||||
if not from_cache:
|
||||
os.makedirs(cache_loc, exist_ok=True)
|
||||
with open(cache_loc + h, 'w') as outf:
|
||||
for line in self._uri_reader(uri):
|
||||
outf.write(line + '\n')
|
||||
total_lines += 1
|
||||
yield line
|
||||
|
||||
syslog.syslog(
|
||||
syslog.LOG_NOTICE, 'blocklist download: %d total lines %s for %s' %
|
||||
(total_lines, 'from cache' if from_cache else 'downloaded', uri)
|
||||
)
|
||||
|
||||
def _uri_reader(self, uri):
|
||||
"""
|
||||
Takes a URI and yields domain entries.
|
||||
"""
|
||||
req_opts = {
|
||||
'url': uri,
|
||||
'timeout': 5,
|
||||
'stream': True
|
||||
}
|
||||
try:
|
||||
req = requests.get(**req_opts)
|
||||
except Exception as e:
|
||||
syslog.syslog(syslog.LOG_ERR,'blocklist download : unable to download file from %s (error : %s)' % (uri, e))
|
||||
return
|
||||
|
||||
if req.status_code >= 200 and req.status_code <= 299:
|
||||
req.raw.decode_content = True
|
||||
prev_chop = ''
|
||||
while True:
|
||||
try:
|
||||
chop = req.raw.read(1024).decode()
|
||||
if not chop:
|
||||
if prev_chop:
|
||||
yield prev_chop
|
||||
break
|
||||
else:
|
||||
parts = (prev_chop + chop).split('\n')
|
||||
if parts[-1] != "\n":
|
||||
prev_chop = parts.pop()
|
||||
else:
|
||||
prev_chop = ''
|
||||
for part in parts:
|
||||
yield part
|
||||
except Exception as e:
|
||||
syslog.syslog(syslog.LOG_ERR,'blocklist download : error reading file from %s (error : %s)' % (uri, e))
|
||||
return
|
||||
|
||||
else:
|
||||
syslog.syslog(syslog.LOG_ERR,
|
||||
'blocklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code)
|
||||
)
|
||||
|
||||
class BlocklistParser:
|
||||
def __init__(self):
|
||||
# check for a running download process, this may take a while so it's better to check...
|
||||
try:
|
||||
lck = open('/tmp/unbound-download_blocklists.tmp', 'w+')
|
||||
fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except IOError:
|
||||
# already running, exit status 99
|
||||
sys.exit(99)
|
||||
|
||||
syslog.openlog('unbound', facility=syslog.LOG_LOCAL4)
|
||||
self.handlers = list()
|
||||
self._register_handlers()
|
||||
self.startup_time = time.time()
|
||||
|
||||
def _register_handlers(self):
|
||||
handlers = list()
|
||||
for filename in glob.glob("%s/*.py" % os.path.dirname(__file__)):
|
||||
importlib.import_module(".%s" % os.path.splitext(os.path.basename(filename))[0], __name__)
|
||||
|
||||
for module_name in dir(sys.modules[__name__]):
|
||||
for attribute_name in dir(getattr(sys.modules[__name__], module_name)):
|
||||
cls = getattr(getattr(sys.modules[__name__], module_name), attribute_name)
|
||||
if isinstance(cls, type) and issubclass(cls, BaseBlocklistHandler)\
|
||||
and cls not in (BaseBlocklistHandler,):
|
||||
handlers.append(cls())
|
||||
self.handlers = handlers
|
||||
|
||||
def _get_config(self):
|
||||
for handler in self.handlers:
|
||||
cfg = handler.get_config()
|
||||
if cfg:
|
||||
return cfg
|
||||
|
||||
def _merge_results(self, blocklists):
|
||||
"""
|
||||
Take output of all the handlers and merge based on each handlers' priority.
|
||||
The default handler has highest priority
|
||||
"""
|
||||
if len(blocklists) == 1:
|
||||
return next(iter(blocklists.values()))
|
||||
|
||||
blocklists = dict(sorted(blocklists.items(), reverse=True))
|
||||
first = next(iter(blocklists.values()))
|
||||
for bl in list(blocklists.values())[1:]:
|
||||
for key, value in bl.items():
|
||||
if key not in first:
|
||||
# no collision, merge
|
||||
first[key] = value
|
||||
else:
|
||||
# a handler with a lower priority has provided a policy
|
||||
# on a domain that already exists in the blocklist,
|
||||
# add it for debugging purposes
|
||||
first[key].setdefault('collisions', []).append(value)
|
||||
|
||||
return first
|
||||
|
||||
def update_blocklist(self):
|
||||
blocklists = {}
|
||||
merged = {}
|
||||
for handler in self.handlers:
|
||||
blocklists[handler.priority] = handler.get_blocklist()
|
||||
|
||||
merged['data'] = self._merge_results(blocklists)
|
||||
merged['config'] = self._get_config()
|
||||
|
||||
# check if there are wildcards in the dataset
|
||||
has_wildcards = False
|
||||
for item in merged['data']:
|
||||
if merged['data'][item].get('wildcard') == True:
|
||||
has_wildcards = True
|
||||
break
|
||||
merged['config']['has_wildcards'] = has_wildcards
|
||||
|
||||
# write out results
|
||||
if not os.path.exists('/var/unbound/data'):
|
||||
os.makedirs('/var/unbound/data')
|
||||
with open("/var/unbound/data/dnsbl.json.new", 'w') as unbound_outf:
|
||||
if merged:
|
||||
ujson.dump(merged, unbound_outf)
|
||||
|
||||
# atomically replace the current dnsbl so unbound can pick up on it
|
||||
os.replace('/var/unbound/data/dnsbl.json.new', '/var/unbound/data/dnsbl.json')
|
||||
|
||||
syslog.syslog(syslog.LOG_NOTICE, "blocklist parsing done in %0.2f seconds (%d records)" % (
|
||||
time.time() - self.startup_time, len(merged['data'])
|
||||
))
|
||||
112
src/opnsense/scripts/unbound/blocklists/default_bl.py
Executable file
112
src/opnsense/scripts/unbound/blocklists/default_bl.py
Executable file
@ -0,0 +1,112 @@
|
||||
#!/usr/local/bin/python3
|
||||
|
||||
"""
|
||||
Copyright (c) 2023 Deciso B.V.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
import syslog
|
||||
import re
|
||||
import os
|
||||
import ujson
|
||||
from . import BaseBlocklistHandler
|
||||
|
||||
class DefaultBlocklistHandler(BaseBlocklistHandler):
|
||||
def __init__(self):
|
||||
super().__init__('/tmp/unbound-blocklists.conf')
|
||||
self.priority = 100
|
||||
self._whitelist_pattern = self._get_excludes()
|
||||
|
||||
def get_config(self):
|
||||
cfg = {}
|
||||
if self.cnf and self.cnf.has_section('settings'):
|
||||
if self.cnf.has_option('settings', 'address'):
|
||||
cfg['dst_addr'] = self.cnf.get('settings', 'address')
|
||||
if self.cnf.has_option('settings', 'rcode'):
|
||||
cfg['rcode'] = self.cnf.get('settings', 'rcode')
|
||||
return cfg
|
||||
|
||||
def get_blocklist(self):
|
||||
result = {}
|
||||
for blocklist, bl_shortcode in self._blocklists_in_config():
|
||||
per_file_stats = {'uri': blocklist, 'skip': 0, 'blocklist': 0}
|
||||
for entry in self._domains_in_blocklist(blocklist):
|
||||
domain = entry.lower()
|
||||
if self._whitelist_pattern.match(entry):
|
||||
per_file_stats['skip'] += 1
|
||||
else:
|
||||
if self.domain_pattern.match(domain):
|
||||
per_file_stats['blocklist'] += 1
|
||||
if entry in result:
|
||||
# duplicate domain, signify in dataset for debugging purposes
|
||||
if 'duplicates' in result[entry]:
|
||||
result[entry]['duplicates'] += ',%s' % bl_shortcode
|
||||
else:
|
||||
result[entry]['duplicates'] = '%s' % bl_shortcode
|
||||
else:
|
||||
result[entry] = {'bl': bl_shortcode, 'wildcard': False}
|
||||
else:
|
||||
per_file_stats['skip'] += 1
|
||||
syslog.syslog(
|
||||
syslog.LOG_NOTICE,
|
||||
'blocklist: %(uri)s (exclude: %(skip)d block: %(blocklist)d)' % per_file_stats
|
||||
)
|
||||
|
||||
if self.cnf and self.cnf.has_section('include'):
|
||||
for key, value in self.cnf['include'].items():
|
||||
if key.startswith('custom'):
|
||||
entry = value.rstrip().lower()
|
||||
if not self._whitelist_pattern.match(entry):
|
||||
if self.domain_pattern.match(entry):
|
||||
result[entry] = {'bl': 'Manual', 'wildcard': False}
|
||||
elif key.startswith('wildcard'):
|
||||
if self.domain_pattern.match(value):
|
||||
# do not apply whitelist to wildcard domains
|
||||
result[value] = {'bl': 'Manual', 'wildcard': True}
|
||||
|
||||
return result
|
||||
|
||||
def _get_excludes(self):
|
||||
whitelist_pattern = re.compile('$^') # match nothing
|
||||
if self.cnf.has_section('exclude'):
|
||||
exclude_list = set()
|
||||
for exclude_item in self.cnf['exclude']:
|
||||
pattern = self.cnf['exclude'][exclude_item]
|
||||
try:
|
||||
re.compile(pattern, re.IGNORECASE)
|
||||
exclude_list.add(pattern)
|
||||
except re.error:
|
||||
syslog.syslog(syslog.LOG_ERR,
|
||||
'blocklist download : skip invalid whitelist exclude pattern "%s" (%s)' % (
|
||||
exclude_item, pattern
|
||||
)
|
||||
)
|
||||
if not exclude_list:
|
||||
exclude_list.add('$^')
|
||||
|
||||
wp = '|'.join(exclude_list)
|
||||
whitelist_pattern = re.compile(wp, re.IGNORECASE)
|
||||
syslog.syslog(syslog.LOG_NOTICE, 'blocklist download : exclude domains matching %s' % wp)
|
||||
|
||||
return whitelist_pattern
|
||||
Loading…
x
Reference in New Issue
Block a user