detect starting wan, plus tons of bug fixes and overall improvements close #1

master
Thomas Lynch 3 years ago
parent 54c9c8ecac
commit f5174f8b4f
  1. 96
      failover.sh

@ -1,26 +1,55 @@
#!/bin/bash
#wan failover script. ping multiple hosts and failover from primary to secondary after too many fails on primary
#how often to send pings
INTERVAL=1
#how many packets to send to each host
PACKETS=1
#how often (in seconds) to re-check primary wan while on secondary wan and primary had failed
RECOVERY_CHECK=300
#reliable hosts for checking are cloudflare, quad9, hgoogle
HOSTS=("1.1.1.1" "9.9.9.9" "8.8.8.8")
WAN1=eth0
WAN2=wwan0
USINGWAN=eth0
CHECKWAN=eth0
COUNTER=0
CHANGED=0
#interface and luci names of primary and secondary wan
WAN1_IFACE_NAME="eth0"
WAN2_IFACE_NAME="wwan0"
WAN1_LUCI_NAME="wan"
WAN2_LUCI_NAME="wanb" #currently unused
#get the current active wan interface, in case the script is started during failover
function get_active_wan {
local ROUTE=0
local CURRENT_METRIC=""
local ROUTE_IFACE=""
while [ $ROUTE -le 100 ]
do
ROUTE_IFACE=`uci get network.@route[$ROUTE].interface`
if [ "$ROUTE_IFACE" == "uci: Entry not found" ]; then
break
elif [ "$ROUTE_IFACE" == "$WAN1_LUCI_NAME" ]; then
CURRENT_METRIC=`uci get network.@route[$ROUTE].metric`
break
fi
let ROUTE++
done
if [ "$CURRENT_METRIC" == "0" ]; then
#current wan1 metric is 0, so it is set as primary
echo $WAN1_IFACE_NAME
else
echo $WAN2_IFACE_NAME
fi
}
#change the primary wan metric
function change_wan_metric() {
CHANGED=$SECONDS
local ROUTE=0
# 100 just in case this runs away for some reason
local ROUTE_IFACE=""
while [ $ROUTE -le 100 ]
do
local ROUTE_IFACE=`uci get network.@route[$ROUTE].interface`
if [ "$ROUTE_INTERFACE" == "uci: Entry not found" ]; then
ROUTE_IFACE=`uci get network.@route[$ROUTE].interface`
if [ "$ROUTE_IFACE" == "uci: Entry not found" ]; then
break
elif [ "$ROUTE_INTERFACE" == "wan" ]; then
elif [ "$ROUTE_IFACE" == "$WAN1_LUCI_NAME" ]; then
uci set network.@route[$ROUTE].metric='$1'
uci commit
reload_config
@ -28,22 +57,31 @@ function change_wan_metric() {
fi
let ROUTE++
done
logger -t failover "`date`: Changed active WAN to $USINGWAN"
}
function debug {
logger -t failover "`date`: pings:$PINGS, counter:$COUNTER, changed:$CHANGED, seconds:$SECONDS, usingwan:$USINGWAN, checkwan:$CHECKWAN"
}
logger -t failover "`date`: Failover script started."
#which wan is currently being used/checked
USINGWAN=$(get_active_wan)
CHECKWAN=$(get_active_wan)
#how many times all pings failed
COUNTER=0
#when wan was last changed
CHANGED=0
logger -t failover "`date`: Failover script started, current wan $USINGWAN"
while sleep $INTERVAL
do
if [ "$USINGWAN" == "$WAN2" ] && [ $(($SECONDS-$CHANGED)) -gt 300 ]; then
if [ "$USINGWAN" == "$WAN2_IFACE_NAME" ] && [ $(($SECONDS-$CHANGED)) -gt $RECOVERY_CHECK ]; then
debug
# after 5 minutes on failover wan2, check main wan. if it fails counter will go to 1 but since
# wan2 is still fine it should reset to 0 next time and stay on wan2, until the check repeats and so on.
CHECKWAN=$WAN1
CHANGED=$SECONDS #and reset changed so it just tried once then returns to checking working wan
CHECKWAN=$WAN1_IFACE_NAME
CHANGED=$SECONDS
#NOTE: we dont do a modulo check because and instead just reset CHANGED because pings can take longer than the interval
#during a failure so it can potentially take multiples of RECOVERY_CHECK even after primary wan has recovered
fi
PINGS=0
@ -55,12 +93,11 @@ do
fi
done
#debug if any pings fail, just for fun
#debug if any pings fail
if [ $PINGS -lt ${#HOSTS[@]} ]; then
debug
fi
# if all pings failed increase counter
if [ $PINGS -eq 0 ]; then
let COUNTER++
@ -69,31 +106,24 @@ do
fi
# counter failed pings to all hosts multiple times, switch WAN
# NOTE: if both wan fail somultaneously, it will just switch back and forth. but not like it matters, neither would work anyway... lol
if [ $COUNTER -gt 3 ]; then
debug
# NOTE: if both wan fail, it will just switch back and forth. but not like it matters, neither would work anyway... lol
if [ "$USINGWAN" == "$WAN1" ]; then
if [ "$USINGWAN" == "$WAN1_IFACE_NAME" ]; then
#if failed and currently on wan, switch to wanb
USINGWAN=$WAN2_IFACE_NAME
change_wan_metric 2
USINGWAN=$WAN2
CHECKWAN=$USINGWAN
logger -t failover "`date`: Changed active WAN metric to 4G modem!"
elif [ "$USINGWAN" == "$WAN2" ]; then
elif [ "$USINGWAN" == "$WAN2_IFACE_NAME" ]; then
#if failed and currently on wanb, switch back to wan
USINGWAN=$WAN1_IFACE_NAME
change_wan_metric 0
USINGWAN=$WAN1
CHECKWAN=$USINGWAN
logger -t failover "`date`: Changed active WAN metric to Cable connection!"
fi
elif [ $COUNTER -eq 0 ]; then
# counter is successful, if on wan2 and wan1 has recovered return to wan1
if [ "$CHECKWAN" == "$WAN1" ] && [ "$USINGWAN" == "$WAN2" ]; then
if [ "$CHECKWAN" == "$WAN1_IFACE_NAME" ] && [ "$USINGWAN" == "$WAN2_IFACE_NAME" ]; then
#ping(s) successful, if on wan2 and wan1 has recovered return to wan1
debug
#if failed and currently on wanb, switch back to wan
USINGWAN=$WAN1_IFACE_NAME
change_wan_metric 0
USINGWAN=$WAN1
CHECKWAN=$USINGWAN
logger -t failover "`date`: Changed active WAN metric to Cable connection!"
fi
fi

Loading…
Cancel
Save