system: change the "monitor" syshook and de-deprecate; closese #8199

We move the gateway recovery into the hook as a user and users
can do their on similar scripts to fetch current status and
inspect and react accordingly.  We do so before filter reload
to avoid excessive reloads in the facility script(s).

What this loses is the ability to get the previous argments
for statistics, but OTOH it also reduces the risk for spurious
events as we only trigger on state transitions.
This commit is contained in:
Franco Fichtner 2025-02-14 11:08:17 +01:00
parent 11b7ca4d01
commit af235daa43
5 changed files with 54 additions and 38 deletions

2
plist
View File

@ -136,7 +136,7 @@
/usr/local/etc/rc.syshook.d/early/20-backup
/usr/local/etc/rc.syshook.d/early/90-carp
/usr/local/etc/rc.syshook.d/import/20-importer
/usr/local/etc/rc.syshook.d/monitor/10-dpinger
/usr/local/etc/rc.syshook.d/monitor/20-recover
/usr/local/etc/rc.syshook.d/start/10-newwanip
/usr/local/etc/rc.syshook.d/start/20-freebsd
/usr/local/etc/rc.syshook.d/start/25-syslog

View File

@ -36,7 +36,7 @@ require_once 'filter.inc';
exit_on_bootup();
/* when called with an argument we are in gateway switch mode */
$recover_only = !empty($argv[1]);
$monitor_hook = !empty($argv[1]);
/* drop deleted routes */
foreach (glob("/tmp/delete_route_*.todo") as $filename) {
@ -46,19 +46,11 @@ foreach (glob("/tmp/delete_route_*.todo") as $filename) {
unlink($filename);
}
system_routing_configure(true, null, !$recover_only);
/* routing is not allowed to restart monitors when extended hook is requested */
system_routing_configure(true, null, !$monitor_hook);
if ($recover_only) {
$gwnames = [];
foreach (return_gateways_status() as $status) {
if ($status['status'] == 'down') {
/* try to recover monitors stuck in down state ignoring "force_down" */
$gwnames[] = $status['name'];
}
}
plugins_configure('monitor', true, [$gwnames]);
if ($monitor_hook) {
passthru('/usr/local/etc/rc.syshook monitor');
}
filter_configure_sync(true, false);

View File

@ -1,3 +0,0 @@
#!/bin/sh
# XXX stub for monitoring facility, functionality moved to gateway_watcher.php

View File

@ -0,0 +1,43 @@
#!/usr/local/bin/php
<?php
/*
* Copyright (c) 2024 Franco Fichtner <franco@opnsense.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
require_once 'config.inc';
require_once 'util.inc';
require_once 'system.inc';
$gwnames = [];
foreach (return_gateways_status() as $status) {
if ($status['status'] == 'down') {
/* try to recover monitors stuck in down state ignoring "force_down" */
$gwnames[] = $status['name'];
}
}
plugins_configure('monitor', true, [$gwnames]);

View File

@ -2,7 +2,7 @@
<?php
/*
* Copyright (C) 2023 Franco Fichtner <franco@opnsense.org>
* Copyright (C) 2023-2025 Franco Fichtner <franco@opnsense.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -64,8 +64,6 @@ while (1) {
continue;
}
$alarm = false;
/* clear known gateways in first step to flush unknown in second step */
$cleanup = $mode;
foreach ($status as $report) {
@ -77,7 +75,7 @@ while (1) {
/* run main watcher pass */
foreach ($status as $report) {
$ralarm = false;
$alarm = false;
if ($report['loss'] == '~') {
/* wait for valid data before triggering an alarm */
@ -98,7 +96,7 @@ while (1) {
if (isset($config['system']['gw_switch_default'])) {
/* only consider down state transition in this case */
if (!empty($mode[$report['name']]) && $mode[$report['name']] != $report['status'] && ($mode[$report['name']] == 'down' || $report['status'] == 'down')) {
$ralarm = true;
$alarm = true;
}
}
@ -109,31 +107,17 @@ while (1) {
/* consider all state transitions as they depend on individual trigger setting */
if (!empty($mode[$report['name']]) && $mode[$report['name']] != $report['status']) {
/* XXX consider trigger conditions later on */
$ralarm = true;
$alarm = true;
break;
}
}
}
}
if ($ralarm) {
/* raise an alarm via the rc.syshook monitor facility */
shell_safe("/usr/local/etc/rc.syshook monitor %s %s %s %s %s %s", [
$report['name'],
$report['monitor'],
$mode[$report['name']] . ' -> ' . $report['status'],
$report['delay'],
$report['stddev'],
$report['loss']
]);
$alarm = true;
}
if ($mode[$report['name']] != $report['status']) {
syslog(LOG_NOTICE, sprintf(
"%s: %s (Addr: %s Alarm: %s RTT: %s RTTd: %s Loss: %s)",
$ralarm ? 'ALERT' : 'MONITOR',
$alarm ? 'ALERT' : 'MONITOR',
$report['name'],
$report['monitor'],
$mode[$report['name']] . ' -> ' . $report['status'],