Reporting: Unbound DNS: resolve hostnames for clients

This commit is contained in:
Stephan de Wit 2023-01-09 11:12:13 +01:00
parent 0474d2558f
commit 5a3765a0c1
2 changed files with 48 additions and 3 deletions

View File

@ -34,6 +34,8 @@ import time
import datetime
import pandas
import signal
import socket
import duckdb
from collections import deque
sys.path.insert(0, "/usr/local/opnsense/site-python")
from duckdb_helper import DbConnection
@ -47,6 +49,22 @@ class DNSReader:
self.selector = selectors.DefaultSelector()
self.fd = None
self.client_map = {}
self.update_hostname = False
def resolve_ip(self, ip, timeout=0.01):
# if a host is known locally, we should be able to resolve it sub 10ms
if ip is None:
return
old = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
try:
host = socket.gethostbyaddr(ip)[0]
except socket.timeout:
host = None
socket.setdefaulttimeout(old)
return host
def _setup_db(self):
with DbConnection('/var/unbound/data/unbound.duckdb', read_only=False) as db:
db.connection.execute("""
@ -67,6 +85,13 @@ class DNSReader:
)
""")
db.connection.execute("""
CREATE TABLE IF NOT EXISTS client (
ipaddr TEXT UNIQUE,
hostname TEXT
);
""")
for size in [600, 300, 60]:
db.connection.execute(
"""
@ -106,7 +131,15 @@ class DNSReader:
if r == '':
return False
self.buffer.append(tuple(r.strip("\n").split()))
q = tuple(r.strip("\n").split())
self.buffer.append(q)
client = q[2]
client_check = (time.time() - self.client_map.get(client, 0)) > 3600
if client_check:
self.client_map[client] = time.time()
syslog.syslog(syslog.LOG_INFO, "Update hostname for client %s" % client)
self.update_hostname = True
# Start a transaction every flush_interval seconds. With regular inserts
# we would also need to limit the amount of queries we buffer before inserting them,
@ -133,6 +166,13 @@ class DNSReader:
# faster than transactional inserts, and doesn't block even under high load.
db.connection.append('query', pandas.DataFrame(list(self.buffer)))
self.buffer.clear()
if self.update_hostname:
host = self.resolve_ip(client)
if host is not None:
try:
db.connection.execute("INSERT INTO client VALUES (?, ?)", [client, host])
except duckdb.ConstraintException:
db.connection.execute("UPDATE client SET hostname=? WHERE ipaddr=?", [host, client])
return True

View File

@ -69,8 +69,10 @@ def handle_rolling(args):
s as start_timestamp,
e as end_timestamp,
GROUP_CONCAT(cl) as clients,
GROUP_CONCAT(COALESCE(resolved.hostname, '')) as hostnames,
GROUP_CONCAT(cnt_cl) as client_totals
FROM grouped
LEFT JOIN client resolved ON cl = resolved.ipaddr
GROUP BY s, e
ORDER BY e
""".format(intv=interval//60, tp=tp)
@ -106,8 +108,11 @@ def handle_rolling(args):
interval = {row[0]: {}}
if row[2]:
tmp = []
counts = row[3].split(',')
hosts = row[3].split(',')
counts = row[4].split(',')
for idx, client in enumerate(row[2].split(',')):
if hosts[idx] != '':
client = hosts[idx]
tmp.append((client, int(counts[idx])))
# sort the list by most active client
tmp.sort(key=itemgetter(1), reverse=True)
@ -245,7 +250,7 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(dest='command', help='sub-command help')
r_parser = subparsers.add_parser('rolling', help='get rolling aggregate of query data')
r_parser.add_argument('--timeperiod', help='timeperiod in hours. Valid values are [24, 12, 1]', type=int, default=24)
r_parser.add_argument('--interval', help='interval in seconds. valid values are [300, 60]', type=int, default=300)
r_parser.add_argument('--interval', help='interval in seconds. valid values are [600, 300, 60]', type=int, default=300)
r_parser.add_argument('--clients', help='get top 10 client activity instead', action='store_true')
r_parser.set_defaults(func=handle_rolling)