logging: split formats from queryLog.py implementation. for https://github.com/opnsense/core/pull/3941

This commit is contained in:
Ad Schellevis 2020-02-18 13:07:17 +01:00
parent 6db615b5bb
commit e202940403
4 changed files with 243 additions and 37 deletions

View File

@ -0,0 +1,91 @@
"""
Copyright (c) 2020 Ad Schellevis <ad@opnsense.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import os
import glob
import importlib
import sys
class BaseLogFormat:
""" Log format handler
"""
def __init__(self, filename):
self._filename = filename
self._priority = 255
@property
def name(self):
return self.__class__.__name__
@property
def prio(self):
""" priority, lower means more important
"""
return self._priority
@staticmethod
def match(line):
""" Does this formatter fit for the provided line
"""
return False
@staticmethod
def timestamp(line):
""" Extract timestamp from line
"""
pass
@staticmethod
def line(line):
""" Return line (without timestamp)
"""
return line
class FormatContainer:
def __init__(self, filename):
self._handlers = list()
self._filename = filename
self._register()
def _register(self):
all_handlers = list()
for filename in glob.glob("%s/*.py" % os.path.dirname(__file__)):
if not filename.endswith('__init__.py'):
module_name = ".%s" % os.path.splitext(os.path.basename(filename))[0]
importlib.import_module(module_name, "logformats")
for module_name in dir(sys.modules['logformats']):
for attribute_name in dir(getattr(sys.modules['logformats'], module_name)):
cls = getattr(getattr(sys.modules['logformats'], module_name), attribute_name)
if isinstance(cls, type) and issubclass(cls, BaseLogFormat) and cls != BaseLogFormat:
all_handlers.append(cls(self._filename))
self._handlers = sorted(all_handlers, key=lambda k: k.prio)
def get_format(self, line):
for handler in self._handlers:
if handler.match(line):
return handler

View File

@ -0,0 +1,70 @@
"""
Copyright (c) 2020 Ad Schellevis <ad@opnsense.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import re
import datetime
from . import BaseLogFormat
squid_ext_timeformat = r'.*(\[\d{1,2}/[A-Za-z]{3}/\d{4}:\d{1,2}:\d{1,2}:\d{1,2} \+\d{4}\]).*'
squid_timeformat = r'^(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}).*'
class SquidLogFormat(BaseLogFormat):
def __init__(self, filename):
super(SquidLogFormat, self).__init__(filename)
self._priority = 100
def match(self, line):
return self._filename.find('squid') > -1 and re.match(squid_timeformat, line) is not None
@staticmethod
def timestamp(line):
tmp = re.match(squid_timeformat, line)
grp = tmp.group(1)
return datetime.datetime.strptime(grp, "%Y/%m/%d %H:%M:%S").isoformat()
@staticmethod
def line(line):
return line[19:].strip()
class SquidExtLogFormat(BaseLogFormat):
def __init__(self, filename):
super(SquidExtLogFormat, self).__init__(filename)
self._priority = 120
def match(self, line):
return self._filename.find('squid') > -1 and re.match(squid_ext_timeformat, line) is not None
@staticmethod
def timestamp(line):
tmp = re.match(squid_ext_timeformat, line)
grp = tmp.group(1)
return datetime.datetime.strptime(grp[1:].split()[0], "%d/%b/%Y:%H:%M:%S").isoformat()
@staticmethod
def line(line):
tmp = re.match(squid_ext_timeformat, line)
grp = tmp.group(1)
return line.replace(grp, '')

View File

@ -0,0 +1,73 @@
"""
Copyright (c) 2020 Ad Schellevis <ad@opnsense.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import re
import datetime
from . import BaseLogFormat
class SysLogFormat(BaseLogFormat):
def __init__(self, filename):
super(SysLogFormat, self).__init__(filename)
self._priority = 1
self._startup_timestamp = datetime.datetime.now()
@staticmethod
def match(line):
return len(line) > 15 and re.match(r'(?:[01]\d|2[0123]):(?:[012345]\d):(?:[012345]\d)', line[7:15])
def timestamp(self, line):
# syslog format, strip timestamp and return actual log data
ts = datetime.datetime.strptime(line[0:15], "%b %d %H:%M:%S")
ts = ts.replace(year=self._startup_timestamp.year)
if (self._startup_timestamp - ts).days < 0:
# likely previous year, (month for this year not reached yet)
ts = ts.replace(year=ts.year - 1)
return ts.isoformat()
@staticmethod
def line(line):
# strip timestamp from log line
response = line[16:]
# strip hostname from log line
return response[response.find(' ')+1:].strip()
class SysLogFormatEpoch(BaseLogFormat):
def __init__(self, filename):
super(SysLogFormatEpoch, self).__init__(filename)
self._priority = 2
@staticmethod
def match(line):
# looks like an epoch
return len(line) > 15 and line[0:10].isdigit() and line[10] == '.' and line[11:13].isdigit()
@staticmethod
def timestamp(line):
return datetime.datetime.fromtimestamp(float(line[0:13])).isoformat()
@staticmethod
def line(line):
return line[14:].strip()

View File

@ -1,7 +1,7 @@
#!/usr/local/bin/python3
"""
Copyright (c) 2019 Ad Schellevis <ad@opnsense.org>
Copyright (c) 2019-2020 Ad Schellevis <ad@opnsense.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -36,11 +36,10 @@ import re
import sre_constants
import ujson
import datetime
from logformats import FormatContainer
sys.path.insert(0, "/usr/local/opnsense/site-python")
from log_helper import reverse_log_reader, fetch_clog
import argparse
squid_ext_timeformat = r'.*(\[\d{1,2}/[A-Za-z]{3}/\d{4}:\d{1,2}:\d{1,2}:\d{1,2} \+\d{4}\]).*'
squid_timeformat = r'^(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}).*'
if __name__ == '__main__':
# handle parameters
@ -55,7 +54,6 @@ if __name__ == '__main__':
result = {'filters': filter, 'rows': [], 'total_rows': 0, 'origin': os.path.basename(inputargs.filename)}
if inputargs.filename != "":
startup_timestamp = datetime.datetime.now()
if inputargs.module == 'core':
log_filename = "/var/log/%s.log" % os.path.basename(inputargs.filename)
else:
@ -76,6 +74,7 @@ if __name__ == '__main__':
filter_regexp = re.compile('.*')
if os.path.exists(log_filename):
format_container = FormatContainer(log_filename)
try:
filename = fetch_clog(log_filename)
except Exception as e:
@ -85,39 +84,12 @@ if __name__ == '__main__':
result['total_rows'] += 1
if (len(result['rows']) < limit or limit == 0) and result['total_rows'] >= offset:
record['timestamp'] = None
if len(record['line']) > 15 and \
re.match(r'(?:[01]\d|2[0123]):(?:[012345]\d):(?:[012345]\d)', record['line'][7:15]):
# syslog format, strip timestamp and return actual log data
ts = datetime.datetime.strptime(record['line'][0:15], "%b %d %H:%M:%S")
ts = ts.replace(year=startup_timestamp.year)
if (startup_timestamp - ts).days < 0:
# likely previous year, (month for this year not reached yet)
ts = ts.replace(year=ts.year - 1)
record['timestamp'] = ts.isoformat()
# strip timestamp from log line
record['line'] = record['line'][16:]
# strip hostname from log line
record['line'] = record['line'][record['line'].find(' ')+1:].strip()
elif len(record['line']) > 15 and record['line'][0:10].isdigit() and \
record['line'][10] == '.' and record['line'][11:13].isdigit():
# looks like an epoch
ts = datetime.datetime.fromtimestamp(float(record['line'][0:13]))
record['timestamp'] = ts.isoformat()
# strip timestamp
record['line'] = record['line'][14:].strip()
elif re.match(squid_ext_timeformat, record['line']):
tmp = re.match(squid_ext_timeformat, record['line'])
grp = tmp.group(1)
ts = datetime.datetime.strptime(grp[1:].split()[0], "%d/%b/%Y:%H:%M:%S")
record['timestamp'] = ts.isoformat()
# strip timestamp
record['line'] = record['line'].replace(grp, '')
elif re.match(squid_timeformat, record['line']):
tmp = re.match(squid_timeformat, record['line'])
grp = tmp.group(1)
ts = datetime.datetime.strptime(grp, "%Y/%m/%d %H:%M:%S")
record['timestamp'] = ts.isoformat()
record['line'] = record['line'][19:].strip()
record['parser'] = None
frmt = format_container.get_format(record['line'])
if frmt:
record['timestamp'] = frmt.timestamp(record['line'])
record['line'] = frmt.line(record['line'])
record['parser'] = frmt.name
result['rows'].append(record)
elif result['total_rows'] > offset + limit:
# do not fetch data until end of file...