diff --git a/plist b/plist index 03439d8b3..0394cad27 100644 --- a/plist +++ b/plist @@ -976,6 +976,8 @@ /usr/local/opnsense/scripts/system/temperature.sh /usr/local/opnsense/scripts/system/trigger_config_changed_events.py /usr/local/opnsense/scripts/unbound/blocklists.py +/usr/local/opnsense/scripts/unbound/blocklists/__init__.py +/usr/local/opnsense/scripts/unbound/blocklists/default_bl.py /usr/local/opnsense/scripts/unbound/cache.sh /usr/local/opnsense/scripts/unbound/check.sh /usr/local/opnsense/scripts/unbound/logger.py diff --git a/src/opnsense/scripts/unbound/blocklists.py b/src/opnsense/scripts/unbound/blocklists.py index e50b6edfd..d55f8391f 100755 --- a/src/opnsense/scripts/unbound/blocklists.py +++ b/src/opnsense/scripts/unbound/blocklists.py @@ -1,7 +1,7 @@ #!/usr/local/bin/python3 """ - Copyright (c) 2020 Ad Schellevis + Copyright (c) 2020-2023 Ad Schellevis All rights reserved. Redistribution and use in source and binary forms, with or without @@ -26,215 +26,8 @@ POSSIBILITY OF SUCH DAMAGE. """ -import os -import sys -import re -import syslog -import tempfile -import time -import fcntl -from configparser import ConfigParser -import requests -import ujson - -def uri_reader(uri): - req_opts = { - 'url': uri, - 'timeout': 5, - 'stream': True - } - try: - req = requests.get(**req_opts) - except Exception as e: - syslog.syslog(syslog.LOG_ERR,'blocklist download : unable to download file from %s (error : %s)' % (uri, e)) - return - - if req.status_code >= 200 and req.status_code <= 299: - req.raw.decode_content = True - prev_chop = '' - while True: - try: - chop = req.raw.read(1024).decode() - if not chop: - if prev_chop: - yield prev_chop - break - else: - parts = (prev_chop + chop).split('\n') - if parts[-1] != "\n": - prev_chop = parts.pop() - else: - prev_chop = '' - for part in parts: - yield part - except Exception as e: - syslog.syslog(syslog.LOG_ERR,'blocklist download : error reading file from %s (error : %s)' % (uri, e)) - return - - else: - syslog.syslog(syslog.LOG_ERR, - 'blocklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code) - ) +from blocklists import BlocklistParser if __name__ == '__main__': - # check for a running download process, this may take a while so it's better to check... - try: - lck = open('/tmp/unbound-download_blocklists.tmp', 'w+') - fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB) - except IOError: - # already running, exit status 99 - sys.exit(99) - - domain_pattern = re.compile( - r'^(([\da-zA-Z_])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})' - r'?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))$' - ) - destination_address = '0.0.0.0' - rcode = 'NOERROR' - - startup_time = time.time() - syslog.openlog('unbound', facility=syslog.LOG_LOCAL4) - blocklist_items = { - 'data': {}, - 'config': {} - } - skip_download = False - if os.path.exists('/tmp/unbound-blocklists.conf'): - cnf = ConfigParser() - cnf.read('/tmp/unbound-blocklists.conf') - - cnf_cache = ConfigParser() - if os.path.exists('/tmp/unbound-blocklists.conf.cache'): - cnf_cache.read('/tmp/unbound-blocklists.conf.cache') - else: - cnf_cache.read('/tmp/unbound-blocklists.conf') - - if cnf.sections() and cnf_cache.sections(): - # get the difference between the old and new configuration, there won't be any - # if we're starting up, so it will proceed as normal. - diff_cnf = {d: set(map(tuple, v.items())) for d,v in cnf._sections.items()} - diff_cnf_cache = {d: set(map(tuple, v.items())) for d,v in cnf_cache._sections.items()} - diffs_added = {header: diff_cnf[header] - diff_cnf_cache[header] for header, _ in diff_cnf.items()} - diffs_removed = {header: diff_cnf_cache[header] - diff_cnf[header] for header, _ in diff_cnf.items()} - - # we can only skip download if the include option has changed, but it must proceed - # if any other option has changed - if (diffs_added['include'] or diffs_removed['include']): - skip_download = True - for (a, r) in zip(diffs_added, diffs_removed): - if (a != 'include' and r != 'include') and (diffs_added[a] or diffs_removed[r]): - skip_download = False - - if cnf.has_section('exclude'): - exclude_list = set() - for exclude_item in cnf['exclude']: - pattern = cnf['exclude'][exclude_item] - try: - re.compile(pattern, re.IGNORECASE) - exclude_list.add(pattern) - except re.error: - syslog.syslog(syslog.LOG_ERR, - 'blocklist download : skip invalid whitelist exclude pattern "%s" (%s)' % ( - exclude_item, pattern - ) - ) - if not exclude_list: - exclude_list.add('$^') - - wp = '|'.join(exclude_list) - whitelist_pattern = re.compile(wp, re.IGNORECASE) - syslog.syslog(syslog.LOG_NOTICE, 'blocklist download : exclude domains matching %s' % wp) - - if cnf.has_section('settings'): - if cnf.has_option('settings', 'address'): - blocklist_items['config']['dst_addr'] = cnf.get('settings', 'address') - if cnf.has_option('settings', 'rcode'): - blocklist_items['config']['rcode'] = cnf.get('settings', 'rcode') - - if not skip_download: - # fetch all blocklists, will replace the existing file used by Unbound - if cnf.has_section('blocklists'): - for blocklist in cnf['blocklists']: - list_type = blocklist.split('_', 1) - bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1] - file_stats = {'uri': cnf['blocklists'][blocklist], 'skip' : 0, 'blocklist': 0, 'lines' :0} - for line in uri_reader(cnf['blocklists'][blocklist]): - file_stats['lines'] += 1 - # cut line into parts before comment marker (if any) - tmp = line.split('#')[0].split() - entry = None - while tmp: - entry = tmp.pop(-1) - if entry not in ['127.0.0.1', '0.0.0.0']: - break - if entry: - domain = entry.lower() - if whitelist_pattern.match(entry): - file_stats['skip'] += 1 - else: - if domain_pattern.match(domain): - file_stats['blocklist'] += 1 - blocklist_items['data'][entry] = {'bl': bl_shortcode, 'wildcard': False} - else: - file_stats['skip'] += 1 - - syslog.syslog( - syslog.LOG_NOTICE, - 'blocklist download %(uri)s (lines: %(lines)d exclude: %(skip)d block: %(blocklist)d)' % file_stats - ) - - # after a download, always apply exact custom matches on top of it - if cnf.has_section('include'): - for item in cnf['include']: - entry = cnf['include'][item].rstrip().lower() - if not whitelist_pattern.match(entry): - if domain_pattern.match(entry): - blocklist_items['data'][entry] = {'bl': 'Manual','wildcard': False} - if '*' in entry: - blocklist_items['data'][entry.replace('*.', '')] = {'bl': 'Manual', 'wildcard': True} - - else: - # only modify the existing list, administrate on added and removed exact custom matches - syslog.syslog(syslog.LOG_NOTICE, 'blocklist: skip download') - if os.path.exists('/var/unbound/data/dnsbl.json'): - blocklist_items = ujson.load(open('/var/unbound/data/dnsbl.json', 'r')) - if diffs_removed['include']: - for item in diffs_removed['include']: - entry = item[1].replace('*.', '').rstrip().lower() - # include entry may have been overridden by the whitelist, so use pop() - blocklist_items['data'].pop(entry, None) - - if diffs_added['include']: - for item in diffs_added['include']: - entry = item[1].rstrip().lower() - if not whitelist_pattern.match(entry): - if domain_pattern.match(entry): - blocklist_items['data'][entry] = {'bl': 'Manual', 'wildcard': False} - if '*' in entry: - blocklist_items['data'][entry.replace('*.', '')] = {'bl': 'Manual', 'wildcard': True} - - # check if there are wildcards in the dataset - has_wildcards = False - for item in blocklist_items['data']: - if blocklist_items['data'][item]['wildcard'] == True: - has_wildcards = True - break - blocklist_items['config']['has_wildcards'] = has_wildcards - - with open('/tmp/unbound-blocklists.conf.cache', 'w') as cache_config: - # cache the current config so we can diff on it the next time - cnf.write(cache_config) - - # write out results - if not os.path.exists('/var/unbound/data'): - os.makedirs('/var/unbound/data') - with open("/var/unbound/data/dnsbl.json.new", 'w') as unbound_outf: - if blocklist_items: - ujson.dump(blocklist_items, unbound_outf) - - # atomically replace the current dnsbl so unbound can pick up on it - os.replace('/var/unbound/data/dnsbl.json.new', '/var/unbound/data/dnsbl.json') - - syslog.syslog(syslog.LOG_NOTICE, "blocklist download done in %0.2f seconds (%d records)" % ( - time.time() - startup_time, len(blocklist_items['data']) - )) + bl = BlocklistParser() + bl.update_blocklist() diff --git a/src/opnsense/scripts/unbound/blocklists/__init__.py b/src/opnsense/scripts/unbound/blocklists/__init__.py new file mode 100755 index 000000000..d472089a4 --- /dev/null +++ b/src/opnsense/scripts/unbound/blocklists/__init__.py @@ -0,0 +1,265 @@ +#!/usr/local/bin/python3 + +""" + Copyright (c) 2023 Deciso B.V. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" + +import os +import requests +import syslog +import re +import glob +import importlib +import sys +import fcntl +import ujson +import time +import hashlib +from configparser import ConfigParser + +class BaseBlocklistHandler: + def __init__(self, config=None): + self.config = config + self.cnf = None + self.priority = 0 + + self.cur_bl_location = '/var/unbound/data/dnsbl.json' + + self.domain_pattern = re.compile( + r'^(([\da-zA-Z_])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})' + r'?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))$' + ) + + self._load_config() + + def get_config(self): + """ + Get statically defined configuration options. + """ + pass + + def get_blocklist(self): + """ + Overridden by derived classes to produce a formatted blocklist. Returns a dictionary + with domains as keys and a dictionary of metadata as values + """ + pass + + def _load_config(self): + """ + Load a configuration. If a cached version exists, will also provide a dictionary + containing the differences between the two. + """ + if os.path.exists(self.config): + self.cnf = ConfigParser() + self.cnf.read(self.config) + + def _blocklists_in_config(self): + """ + Generator for derived classes to iterate over configured blocklist urls. + Assumes = configuration formatting + """ + if self.cnf and self.cnf.has_section('blocklists'): + for blocklist in self.cnf['blocklists']: + list_type = blocklist.split('_', 1) + bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1] + yield (self.cnf['blocklists'][blocklist], bl_shortcode) + + def _domains_in_blocklist(self, blocklist): + """ + Generator for derived classes to iterate over downloaded domains. + """ + for line in self._blocklist_reader(blocklist): + # cut line into parts before comment marker (if any) + tmp = line.split('#')[0].split() + entry = None + while tmp: + entry = tmp.pop(-1) + if entry not in ['127.0.0.1', '0.0.0.0']: + break + if entry: + yield entry + + def _blocklist_reader(self, uri): + """ + Decides whether a blocklist can be read from a cached file or + needs to be downloaded. Yields (unformatted) domains either way + """ + total_lines = 0 + from_cache = False + h = hashlib.md5(uri.encode()).hexdigest() + cache_loc = '/tmp/bl_cache/' + if os.path.exists(cache_loc): + filep = cache_loc + h + if os.path.exists(filep): + fstat = os.stat(filep).st_ctime + if (time.time() - fstat) < 72000: # 20 hours, a bit under the recommended cron time + from_cache = True + for line in open(filep): + total_lines += 1 + yield line + + if not from_cache: + os.makedirs(cache_loc, exist_ok=True) + with open(cache_loc + h, 'w') as outf: + for line in self._uri_reader(uri): + outf.write(line + '\n') + total_lines += 1 + yield line + + syslog.syslog( + syslog.LOG_NOTICE, 'blocklist download: %d total lines %s for %s' % + (total_lines, 'from cache' if from_cache else 'downloaded', uri) + ) + + def _uri_reader(self, uri): + """ + Takes a URI and yields domain entries. + """ + req_opts = { + 'url': uri, + 'timeout': 5, + 'stream': True + } + try: + req = requests.get(**req_opts) + except Exception as e: + syslog.syslog(syslog.LOG_ERR,'blocklist download : unable to download file from %s (error : %s)' % (uri, e)) + return + + if req.status_code >= 200 and req.status_code <= 299: + req.raw.decode_content = True + prev_chop = '' + while True: + try: + chop = req.raw.read(1024).decode() + if not chop: + if prev_chop: + yield prev_chop + break + else: + parts = (prev_chop + chop).split('\n') + if parts[-1] != "\n": + prev_chop = parts.pop() + else: + prev_chop = '' + for part in parts: + yield part + except Exception as e: + syslog.syslog(syslog.LOG_ERR,'blocklist download : error reading file from %s (error : %s)' % (uri, e)) + return + + else: + syslog.syslog(syslog.LOG_ERR, + 'blocklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code) + ) + +class BlocklistParser: + def __init__(self): + # check for a running download process, this may take a while so it's better to check... + try: + lck = open('/tmp/unbound-download_blocklists.tmp', 'w+') + fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError: + # already running, exit status 99 + sys.exit(99) + + syslog.openlog('unbound', facility=syslog.LOG_LOCAL4) + self.handlers = list() + self._register_handlers() + self.startup_time = time.time() + + def _register_handlers(self): + handlers = list() + for filename in glob.glob("%s/*.py" % os.path.dirname(__file__)): + importlib.import_module(".%s" % os.path.splitext(os.path.basename(filename))[0], __name__) + + for module_name in dir(sys.modules[__name__]): + for attribute_name in dir(getattr(sys.modules[__name__], module_name)): + cls = getattr(getattr(sys.modules[__name__], module_name), attribute_name) + if isinstance(cls, type) and issubclass(cls, BaseBlocklistHandler)\ + and cls not in (BaseBlocklistHandler,): + handlers.append(cls()) + self.handlers = handlers + + def _get_config(self): + for handler in self.handlers: + cfg = handler.get_config() + if cfg: + return cfg + + def _merge_results(self, blocklists): + """ + Take output of all the handlers and merge based on each handlers' priority. + The default handler has highest priority + """ + if len(blocklists) == 1: + return next(iter(blocklists.values())) + + blocklists = dict(sorted(blocklists.items(), reverse=True)) + first = next(iter(blocklists.values())) + for bl in list(blocklists.values())[1:]: + for key, value in bl.items(): + if key not in first: + # no collision, merge + first[key] = value + else: + # a handler with a lower priority has provided a policy + # on a domain that already exists in the blocklist, + # add it for debugging purposes + first[key].setdefault('collisions', []).append(value) + + return first + + def update_blocklist(self): + blocklists = {} + merged = {} + for handler in self.handlers: + blocklists[handler.priority] = handler.get_blocklist() + + merged['data'] = self._merge_results(blocklists) + merged['config'] = self._get_config() + + # check if there are wildcards in the dataset + has_wildcards = False + for item in merged['data']: + if merged['data'][item].get('wildcard') == True: + has_wildcards = True + break + merged['config']['has_wildcards'] = has_wildcards + + # write out results + if not os.path.exists('/var/unbound/data'): + os.makedirs('/var/unbound/data') + with open("/var/unbound/data/dnsbl.json.new", 'w') as unbound_outf: + if merged: + ujson.dump(merged, unbound_outf) + + # atomically replace the current dnsbl so unbound can pick up on it + os.replace('/var/unbound/data/dnsbl.json.new', '/var/unbound/data/dnsbl.json') + + syslog.syslog(syslog.LOG_NOTICE, "blocklist parsing done in %0.2f seconds (%d records)" % ( + time.time() - self.startup_time, len(merged['data']) + )) diff --git a/src/opnsense/scripts/unbound/blocklists/default_bl.py b/src/opnsense/scripts/unbound/blocklists/default_bl.py new file mode 100755 index 000000000..42689d257 --- /dev/null +++ b/src/opnsense/scripts/unbound/blocklists/default_bl.py @@ -0,0 +1,112 @@ +#!/usr/local/bin/python3 + +""" + Copyright (c) 2023 Deciso B.V. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" + +import syslog +import re +import os +import ujson +from . import BaseBlocklistHandler + +class DefaultBlocklistHandler(BaseBlocklistHandler): + def __init__(self): + super().__init__('/tmp/unbound-blocklists.conf') + self.priority = 100 + self._whitelist_pattern = self._get_excludes() + + def get_config(self): + cfg = {} + if self.cnf and self.cnf.has_section('settings'): + if self.cnf.has_option('settings', 'address'): + cfg['dst_addr'] = self.cnf.get('settings', 'address') + if self.cnf.has_option('settings', 'rcode'): + cfg['rcode'] = self.cnf.get('settings', 'rcode') + return cfg + + def get_blocklist(self): + result = {} + for blocklist, bl_shortcode in self._blocklists_in_config(): + per_file_stats = {'uri': blocklist, 'skip': 0, 'blocklist': 0} + for entry in self._domains_in_blocklist(blocklist): + domain = entry.lower() + if self._whitelist_pattern.match(entry): + per_file_stats['skip'] += 1 + else: + if self.domain_pattern.match(domain): + per_file_stats['blocklist'] += 1 + if entry in result: + # duplicate domain, signify in dataset for debugging purposes + if 'duplicates' in result[entry]: + result[entry]['duplicates'] += ',%s' % bl_shortcode + else: + result[entry]['duplicates'] = '%s' % bl_shortcode + else: + result[entry] = {'bl': bl_shortcode, 'wildcard': False} + else: + per_file_stats['skip'] += 1 + syslog.syslog( + syslog.LOG_NOTICE, + 'blocklist: %(uri)s (exclude: %(skip)d block: %(blocklist)d)' % per_file_stats + ) + + if self.cnf and self.cnf.has_section('include'): + for key, value in self.cnf['include'].items(): + if key.startswith('custom'): + entry = value.rstrip().lower() + if not self._whitelist_pattern.match(entry): + if self.domain_pattern.match(entry): + result[entry] = {'bl': 'Manual', 'wildcard': False} + elif key.startswith('wildcard'): + if self.domain_pattern.match(value): + # do not apply whitelist to wildcard domains + result[value] = {'bl': 'Manual', 'wildcard': True} + + return result + + def _get_excludes(self): + whitelist_pattern = re.compile('$^') # match nothing + if self.cnf.has_section('exclude'): + exclude_list = set() + for exclude_item in self.cnf['exclude']: + pattern = self.cnf['exclude'][exclude_item] + try: + re.compile(pattern, re.IGNORECASE) + exclude_list.add(pattern) + except re.error: + syslog.syslog(syslog.LOG_ERR, + 'blocklist download : skip invalid whitelist exclude pattern "%s" (%s)' % ( + exclude_item, pattern + ) + ) + if not exclude_list: + exclude_list.add('$^') + + wp = '|'.join(exclude_list) + whitelist_pattern = re.compile(wp, re.IGNORECASE) + syslog.syslog(syslog.LOG_NOTICE, 'blocklist download : exclude domains matching %s' % wp) + + return whitelist_pattern