From b4e72bbeffa2931c746107f2817bbc7b558a6b3e Mon Sep 17 00:00:00 2001 From: Stephan de Wit Date: Wed, 1 Mar 2023 08:31:13 +0100 Subject: [PATCH] Unbound: Blocklists: slight restructure of inherited code. No functional changes --- .../scripts/unbound/blocklists/__init__.py | 61 +++++-------------- .../scripts/unbound/blocklists/default_bl.py | 45 +++++++++++++- 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/src/opnsense/scripts/unbound/blocklists/__init__.py b/src/opnsense/scripts/unbound/blocklists/__init__.py index 0498ca2fb..23775ec9e 100755 --- a/src/opnsense/scripts/unbound/blocklists/__init__.py +++ b/src/opnsense/scripts/unbound/blocklists/__init__.py @@ -36,7 +36,6 @@ import sys import fcntl import ujson import time -import hashlib from configparser import ConfigParser class BaseBlocklistHandler: @@ -68,29 +67,29 @@ class BaseBlocklistHandler: """ pass + def _blocklist_reader(self, uri): + """ + Used by a derived class to define a caching and/or download routine. + """ + pass + + def _blocklists_in_config(self): + """ + Generator for derived classes to iterate over configured blocklists. + """ + pass + def _load_config(self): """ - Load a configuration. If a cached version exists, will also provide a dictionary - containing the differences between the two. + Load a configuration file. """ if os.path.exists(self.config): self.cnf = ConfigParser() self.cnf.read(self.config) - def _blocklists_in_config(self): - """ - Generator for derived classes to iterate over configured blocklist urls. - Assumes = configuration formatting - """ - if self.cnf and self.cnf.has_section('blocklists'): - for blocklist in self.cnf['blocklists']: - list_type = blocklist.split('_', 1) - bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1] - yield (self.cnf['blocklists'][blocklist], bl_shortcode) - def _domains_in_blocklist(self, blocklist): """ - Generator for derived classes to iterate over downloaded domains. + Generator for derived classes to iterate over cached/downloaded domains. """ for line in self._blocklist_reader(blocklist): # cut line into parts before comment marker (if any) @@ -103,38 +102,6 @@ class BaseBlocklistHandler: if entry: yield entry - def _blocklist_reader(self, uri): - """ - Decides whether a blocklist can be read from a cached file or - needs to be downloaded. Yields (unformatted) domains either way - """ - total_lines = 0 - from_cache = False - h = hashlib.md5(uri.encode()).hexdigest() - cache_loc = '/tmp/bl_cache/' - if os.path.exists(cache_loc): - filep = cache_loc + h - if os.path.exists(filep): - fstat = os.stat(filep).st_ctime - if (time.time() - fstat) < self.cache_ttl: # 20 hours, a bit under the recommended cron time - from_cache = True - for line in open(filep): - total_lines += 1 - yield line - - if not from_cache: - os.makedirs(cache_loc, exist_ok=True) - with open(cache_loc + h, 'w') as outf: - for line in self._uri_reader(uri): - outf.write(line + '\n') - total_lines += 1 - yield line - - syslog.syslog( - syslog.LOG_NOTICE, 'blocklist download: %d total lines %s for %s' % - (total_lines, 'from cache' if from_cache else 'downloaded', uri) - ) - def _uri_reader(self, uri): """ Takes a URI and yields domain entries. diff --git a/src/opnsense/scripts/unbound/blocklists/default_bl.py b/src/opnsense/scripts/unbound/blocklists/default_bl.py index 42689d257..9fa80e939 100755 --- a/src/opnsense/scripts/unbound/blocklists/default_bl.py +++ b/src/opnsense/scripts/unbound/blocklists/default_bl.py @@ -29,7 +29,8 @@ import syslog import re import os -import ujson +import hashlib +import time from . import BaseBlocklistHandler class DefaultBlocklistHandler(BaseBlocklistHandler): @@ -87,6 +88,48 @@ class DefaultBlocklistHandler(BaseBlocklistHandler): return result + def _blocklists_in_config(self): + """ + Generator for derived classes to iterate over configured blocklists. + """ + if self.cnf and self.cnf.has_section('blocklists'): + for blocklist in self.cnf['blocklists']: + list_type = blocklist.split('_', 1) + bl_shortcode = 'Custom' if list_type[0] == 'custom' else list_type[1] + yield (self.cnf['blocklists'][blocklist], bl_shortcode) + + def _blocklist_reader(self, uri): + """ + Decides whether a blocklist can be read from a cached file or + needs to be downloaded. Yields (unformatted) domains either way + """ + total_lines = 0 + from_cache = False + h = hashlib.md5(uri.encode()).hexdigest() + cache_loc = '/tmp/bl_cache/' + if os.path.exists(cache_loc): + filep = cache_loc + h + if os.path.exists(filep): + fstat = os.stat(filep).st_ctime + if (time.time() - fstat) < self.cache_ttl: # 20 hours, a bit under the recommended cron time + from_cache = True + for line in open(filep): + total_lines += 1 + yield line + + if not from_cache: + os.makedirs(cache_loc, exist_ok=True) + with open(cache_loc + h, 'w') as outf: + for line in self._uri_reader(uri): + outf.write(line + '\n') + total_lines += 1 + yield line + + syslog.syslog( + syslog.LOG_NOTICE, 'blocklist download: %d total lines %s for %s' % + (total_lines, 'from cache' if from_cache else 'downloaded', uri) + ) + def _get_excludes(self): whitelist_pattern = re.compile('$^') # match nothing if self.cnf.has_section('exclude'):