mirror of
https://github.com/lucaspalomodevelop/core.git
synced 2026-03-14 00:24:40 +00:00
(proxy) optimize acl download, split domains/urls and add index file
This commit is contained in:
parent
2a1afe9ec3
commit
932e2d6948
@ -28,6 +28,8 @@
|
||||
|
||||
import urllib2
|
||||
import os
|
||||
import json
|
||||
import glob
|
||||
import os.path
|
||||
import tarfile
|
||||
import gzip
|
||||
@ -61,8 +63,9 @@ class ACLDownload(object):
|
||||
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s'%self._url)
|
||||
self._source_data = None
|
||||
|
||||
def pre_process(self):
|
||||
""" pre process downloaded data, handle compression
|
||||
def get_files(self):
|
||||
""" process downloaded data, handle compression
|
||||
:return: iterator filename, content
|
||||
"""
|
||||
if self._source_data is not None:
|
||||
# handle compressed data
|
||||
@ -71,49 +74,43 @@ class ACLDownload(object):
|
||||
# source is in tar.gz format, extract all into a single string
|
||||
try:
|
||||
tf = tarfile.open(fileobj=StringIO.StringIO(self._source_data))
|
||||
target_data = []
|
||||
for tf_file in tf.getmembers():
|
||||
if tf_file.isfile():
|
||||
target_data.append(tf.extractfile(tf_file).read())
|
||||
self._target_data = ''.join(target_data)
|
||||
yield tf_file.name, tf.extractfile(tf_file).read()
|
||||
except IOError as e:
|
||||
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
|
||||
elif len(self._url) > 4 and self._url[-3:] == '.gz':
|
||||
# source is in .gz format unpack
|
||||
try:
|
||||
gf = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(self._source_data))
|
||||
self._target_data = gf.read()
|
||||
yield os.path.basename(self._url), gf.read()
|
||||
except IOError as e:
|
||||
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
|
||||
elif len(self._url) > 5 and self._url[-4:] == '.zip':
|
||||
# source is in .zip format, extract all into a single string
|
||||
target_data = []
|
||||
target_data = dict()
|
||||
with zipfile.ZipFile(StringIO.StringIO(self._source_data),
|
||||
mode='r',
|
||||
compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
for item in zf.infolist():
|
||||
target_data.append(zf.read(item))
|
||||
self._target_data = ''.join(target_data)
|
||||
if item.file_size > 0:
|
||||
yield item.filename, zf.read(item)
|
||||
self._target_data = target_data
|
||||
else:
|
||||
self._target_data = self._source_data
|
||||
yield os.path.basename(self._url), self._source_data
|
||||
|
||||
def download(self):
|
||||
""" download / unpack ACL
|
||||
:return: iterator filename, type, content
|
||||
"""
|
||||
self.fetch()
|
||||
self.pre_process()
|
||||
|
||||
def is_valid(self):
|
||||
""" did this ACL download successful
|
||||
"""
|
||||
if self._target_data is not None:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_data(self):
|
||||
""" retrieve data
|
||||
"""
|
||||
# XXX: maybe some postprocessing is needed here, all will be used with a squid dstdom_regex tag
|
||||
return self._target_data
|
||||
for filename, filedata in self.get_files():
|
||||
for line in filedata.split('\n'):
|
||||
if line.find('/') > -1:
|
||||
file_type = 'url'
|
||||
else:
|
||||
file_type = 'domain'
|
||||
yield filename, file_type, line
|
||||
|
||||
|
||||
# parse OPNsense external ACLs config
|
||||
@ -121,6 +118,10 @@ if os.path.exists(acl_config_fn):
|
||||
# create acl directory (if new)
|
||||
if not os.path.exists(acl_target_dir):
|
||||
os.mkdir(acl_target_dir)
|
||||
else:
|
||||
# remove index files
|
||||
for filename in glob.glob('%s/*.index'%acl_target_dir):
|
||||
os.remove(filename)
|
||||
# read config and download per section
|
||||
cnf = ConfigParser()
|
||||
cnf.read(acl_config_fn)
|
||||
@ -129,22 +130,42 @@ if os.path.exists(acl_config_fn):
|
||||
if cnf.has_option(section,'enabled'):
|
||||
# if enabled fetch file
|
||||
target_filename = acl_target_dir+'/'+section
|
||||
if cnf.get(section,'enabled')=='1':
|
||||
if cnf.has_option(section,'url'):
|
||||
if cnf.has_option(section,'url'):
|
||||
# define targets
|
||||
targets = {'domain': {'filename': target_filename, 'handle' : None},
|
||||
'url': {'filename': '%s.url'%target_filename, 'handle': None}}
|
||||
|
||||
# download file
|
||||
if cnf.get(section,'enabled') == '1':
|
||||
# only generate files if enabled, otherwise dump empty files
|
||||
download_url = cnf.get(section,'url')
|
||||
acl = ACLDownload(download_url, acl_max_timeout)
|
||||
acl.download()
|
||||
if acl.is_valid():
|
||||
output_data = acl.get_data()
|
||||
with open(target_filename, "wb") as code:
|
||||
code.write(output_data)
|
||||
elif not os.path.isfile(target_filename):
|
||||
# if there's no file available, create an empty one (otherwise leave the last download).
|
||||
with open(target_filename, "wb") as code:
|
||||
code.write("")
|
||||
# if disabled or not 1 try to remove old file
|
||||
elif cnf.get(section,'enabled')!='1':
|
||||
try:
|
||||
os.remove(acl_target_dir+'/'+section)
|
||||
except OSError:
|
||||
pass
|
||||
all_filenames = list()
|
||||
for filename, filetype, line in acl.download():
|
||||
if filename not in all_filenames:
|
||||
all_filenames.append(filename)
|
||||
if filetype in targets and targets[filetype]['handle'] is None:
|
||||
targets[filetype]['handle'] = open(targets[filetype]['filename'], 'wb')
|
||||
if filetype in targets:
|
||||
targets[filetype]['handle'].write('%s\n'%line)
|
||||
# save index to disc
|
||||
with open('%s.index'%target_filename,'wb') as idx_out:
|
||||
index_data = dict()
|
||||
for filename in all_filenames:
|
||||
if len(filename.split('/')) > 3:
|
||||
index_key = '/'.join(filename.split('/')[1:-1])
|
||||
if index_key not in index_data:
|
||||
index_data[index_key] = index_key
|
||||
idx_out.write(json.dumps(index_data))
|
||||
# cleanup
|
||||
for filetype in targets:
|
||||
if targets[filetype]['handle'] is not None:
|
||||
targets[filetype]['handle'].close()
|
||||
elif cnf.get(section,'enabled') != '1':
|
||||
if os.path.isfile(targets[filetype]['filename']):
|
||||
# disabled, remove previous data
|
||||
os.remove(targets[filetype]['filename'])
|
||||
elif not os.path.isfile(targets[filetype]['filename']):
|
||||
# no data fetched and no file available, create new empty file
|
||||
with open(targets[filetype]['filename'], 'wb') as target_out:
|
||||
target_out.write("")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user