#!/bin/bash
#wan failover script. ping multiple hosts and failover from primary to secondary after too many fails on primary
#how often to send pings
INTERVAL = 1
#how many packets to send to each host
PACKETS = 1
#how often (in seconds) to re-check primary wan while on secondary wan and primary had failed
RECOVERY_CHECK = 300
#reliable hosts for checking are cloudflare, quad9, hgoogle
HOSTS = ( "1.1.1.1" "9.9.9.9" "8.8.8.8" )
PING_RESULTS = ( )
#interface and luci names of primary and secondary wan
WAN1_IFACE_NAME = "eth0"
WAN2_IFACE_NAME = "wwan0"
WAN1_LUCI_NAME = "wan"
WAN2_LUCI_NAME = "wanb" #currently unused
#vpn gareway
VPN_GATEWAY = "139.99.201.65"
#get the current active wan interface, in case the script is started during failover
function get_active_wan {
local ROUTE = 0
local CURRENT_METRIC = ""
local ROUTE_IFACE = ""
while [ $ROUTE -le 100 ]
do
ROUTE_IFACE = ` uci get network.@route[ $ROUTE ] .interface`
ROUTE_TARGET = ` uci get network.@route[ $ROUTE ] .target`
if [ " $ROUTE_IFACE " = = "uci: Entry not found" ] ; then
break
elif [ " $ROUTE_IFACE " = = " $WAN1_LUCI_NAME " ] && [ " $ROUTE_TARGET " = = " $VPN_GATEWAY " ] ; then
CURRENT_METRIC = ` uci get network.@route[ $ROUTE ] .metric`
break
fi
let ROUTE++
done
if [ " $CURRENT_METRIC " = = "0" ] ; then
#current wan1 metric is 0, so it is set as primary
echo $WAN1_IFACE_NAME
else
echo $WAN2_IFACE_NAME
fi
}
#change the primary wan metric
function change_wan_metric( ) {
CHANGED = $SECONDS
local ROUTE = 0
local ROUTE_IFACE = ""
while [ $ROUTE -le 100 ]
do
ROUTE_IFACE = ` uci get network.@route[ $ROUTE ] .interface`
if [ " $ROUTE_IFACE " = = "uci: Entry not found" ] ; then
break
elif [ " $ROUTE_IFACE " = = " $WAN1_LUCI_NAME " ] ; then
uci set network.@route[ $ROUTE ] .metric= " $1 "
uci commit
reload_config
break
fi
let ROUTE++
done
logger -t failover " `date`: Changed active WAN to $USINGWAN "
}
function debug {
local PING_ZIPPER = ""
for ( ( i = 0; i<${# PING_RESULTS [@] } ; ++i) ) ; do
PING_ZIPPER += " ${ HOSTS [ $i ] } ${ PING_RESULTS [ $i ] } | "
done
logger -t failover " `date`: pings: $PINGS ( $PING_ZIPPER ), counter: $COUNTER , changed: $CHANGED , seconds: $SECONDS , usingwan: $USINGWAN , checkwan: $CHECKWAN "
}
#which wan is currently being used/checked
USINGWAN = $( get_active_wan)
CHECKWAN = $( get_active_wan)
#how many times all pings failed
COUNTER = 0
#when wan was last changed
CHANGED = 0
logger -t failover " `date`: Failover script started, current wan $USINGWAN "
while sleep $INTERVAL
do
if [ " $USINGWAN " = = " $WAN2_IFACE_NAME " ] && [ $(( $SECONDS - $CHANGED )) -gt $RECOVERY_CHECK ] ; then
debug
CHECKWAN = $WAN1_IFACE_NAME
CHANGED = $SECONDS
#NOTE: we dont do a modulo check because and instead just reset CHANGED because pings can take longer than the interval
#during a failure so it can potentially take multiples of RECOVERY_CHECK even after primary wan has recovered
fi
PINGS = 0
for ( ( c = 0; c<${# HOSTS [@] } ; c++ ) )
do
RET = ` ping -I $CHECKWAN -w 1 -W 1 -c $PACKETS ${ HOSTS [c] } 2>/dev/null | awk '/received/ {print $4}' `
PING_RESULTS[ c] = $RET
if [ " $RET " = = " $PACKETS " ] ; then
let PINGS++
fi
done
#debug if any pings fail
if [ $PINGS -lt ${# HOSTS [@] } ] ; then
debug
fi
# if all pings failed increase counter
if [ $PINGS -eq 0 ] ; then
let COUNTER++
else
COUNTER = 0
fi
# counter failed pings to all hosts multiple times, switch WAN
# NOTE: if both wan fail somultaneously, it will just switch back and forth. but not like it matters, neither would work anyway... lol
if [ $COUNTER -gt 3 ] ; then
debug
if [ " $USINGWAN " = = " $WAN1_IFACE_NAME " ] ; then
#if failed and currently on wan, switch to wanb
USINGWAN = $WAN2_IFACE_NAME
change_wan_metric 2
elif [ " $USINGWAN " = = " $WAN2_IFACE_NAME " ] ; then
#if failed and currently on wanb, switch back to wan
USINGWAN = $WAN1_IFACE_NAME
change_wan_metric 0
fi
elif [ $COUNTER -eq 0 ] ; then
if [ " $CHECKWAN " = = " $WAN1_IFACE_NAME " ] && [ " $USINGWAN " = = " $WAN2_IFACE_NAME " ] ; then
#ping(s) successful, if on wan2 and wan1 has recovered return to wan1
debug
USINGWAN = $WAN1_IFACE_NAME
change_wan_metric 0
fi
fi
#set the checkwan back to the usingwan, in case it was set to the other wan for recovery check
CHECKWAN = $USINGWAN
done ;