diff --git a/src/opnsense/scripts/proxy/fetchACLs.py b/src/opnsense/scripts/proxy/fetchACLs.py index 0f9fe6820..b6add428c 100755 --- a/src/opnsense/scripts/proxy/fetchACLs.py +++ b/src/opnsense/scripts/proxy/fetchACLs.py @@ -28,6 +28,8 @@ import urllib2 import os +import json +import glob import os.path import tarfile import gzip @@ -61,8 +63,9 @@ class ACLDownload(object): syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s'%self._url) self._source_data = None - def pre_process(self): - """ pre process downloaded data, handle compression + def get_files(self): + """ process downloaded data, handle compression + :return: iterator filename, content """ if self._source_data is not None: # handle compressed data @@ -71,49 +74,43 @@ class ACLDownload(object): # source is in tar.gz format, extract all into a single string try: tf = tarfile.open(fileobj=StringIO.StringIO(self._source_data)) - target_data = [] for tf_file in tf.getmembers(): if tf_file.isfile(): - target_data.append(tf.extractfile(tf_file).read()) - self._target_data = ''.join(target_data) + yield tf_file.name, tf.extractfile(tf_file).read() except IOError as e: syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e)) elif len(self._url) > 4 and self._url[-3:] == '.gz': # source is in .gz format unpack try: gf = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(self._source_data)) - self._target_data = gf.read() + yield os.path.basename(self._url), gf.read() except IOError as e: syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e)) elif len(self._url) > 5 and self._url[-4:] == '.zip': # source is in .zip format, extract all into a single string - target_data = [] + target_data = dict() with zipfile.ZipFile(StringIO.StringIO(self._source_data), mode='r', compression=zipfile.ZIP_DEFLATED) as zf: for item in zf.infolist(): - target_data.append(zf.read(item)) - self._target_data = ''.join(target_data) + if item.file_size > 0: + yield item.filename, zf.read(item) + self._target_data = target_data else: - self._target_data = self._source_data + yield os.path.basename(self._url), self._source_data def download(self): + """ download / unpack ACL + :return: iterator filename, type, content + """ self.fetch() - self.pre_process() - - def is_valid(self): - """ did this ACL download successful - """ - if self._target_data is not None: - return True - else: - return False - - def get_data(self): - """ retrieve data - """ - # XXX: maybe some postprocessing is needed here, all will be used with a squid dstdom_regex tag - return self._target_data + for filename, filedata in self.get_files(): + for line in filedata.split('\n'): + if line.find('/') > -1: + file_type = 'url' + else: + file_type = 'domain' + yield filename, file_type, line # parse OPNsense external ACLs config @@ -121,6 +118,10 @@ if os.path.exists(acl_config_fn): # create acl directory (if new) if not os.path.exists(acl_target_dir): os.mkdir(acl_target_dir) + else: + # remove index files + for filename in glob.glob('%s/*.index'%acl_target_dir): + os.remove(filename) # read config and download per section cnf = ConfigParser() cnf.read(acl_config_fn) @@ -129,22 +130,42 @@ if os.path.exists(acl_config_fn): if cnf.has_option(section,'enabled'): # if enabled fetch file target_filename = acl_target_dir+'/'+section - if cnf.get(section,'enabled')=='1': - if cnf.has_option(section,'url'): + if cnf.has_option(section,'url'): + # define targets + targets = {'domain': {'filename': target_filename, 'handle' : None}, + 'url': {'filename': '%s.url'%target_filename, 'handle': None}} + + # download file + if cnf.get(section,'enabled') == '1': + # only generate files if enabled, otherwise dump empty files download_url = cnf.get(section,'url') acl = ACLDownload(download_url, acl_max_timeout) - acl.download() - if acl.is_valid(): - output_data = acl.get_data() - with open(target_filename, "wb") as code: - code.write(output_data) - elif not os.path.isfile(target_filename): - # if there's no file available, create an empty one (otherwise leave the last download). - with open(target_filename, "wb") as code: - code.write("") - # if disabled or not 1 try to remove old file - elif cnf.get(section,'enabled')!='1': - try: - os.remove(acl_target_dir+'/'+section) - except OSError: - pass + all_filenames = list() + for filename, filetype, line in acl.download(): + if filename not in all_filenames: + all_filenames.append(filename) + if filetype in targets and targets[filetype]['handle'] is None: + targets[filetype]['handle'] = open(targets[filetype]['filename'], 'wb') + if filetype in targets: + targets[filetype]['handle'].write('%s\n'%line) + # save index to disc + with open('%s.index'%target_filename,'wb') as idx_out: + index_data = dict() + for filename in all_filenames: + if len(filename.split('/')) > 3: + index_key = '/'.join(filename.split('/')[1:-1]) + if index_key not in index_data: + index_data[index_key] = index_key + idx_out.write(json.dumps(index_data)) + # cleanup + for filetype in targets: + if targets[filetype]['handle'] is not None: + targets[filetype]['handle'].close() + elif cnf.get(section,'enabled') != '1': + if os.path.isfile(targets[filetype]['filename']): + # disabled, remove previous data + os.remove(targets[filetype]['filename']) + elif not os.path.isfile(targets[filetype]['filename']): + # no data fetched and no file available, create new empty file + with open(targets[filetype]['filename'], 'wb') as target_out: + target_out.write("")