|
|
|
@ -1,26 +1,55 @@ |
|
|
|
|
#!/bin/bash |
|
|
|
|
#wan failover script. ping multiple hosts and failover from primary to secondary after too many fails on primary |
|
|
|
|
|
|
|
|
|
#how often to send pings |
|
|
|
|
INTERVAL=1 |
|
|
|
|
#how many packets to send to each host |
|
|
|
|
PACKETS=1 |
|
|
|
|
#how often (in seconds) to re-check primary wan while on secondary wan and primary had failed |
|
|
|
|
RECOVERY_CHECK=300 |
|
|
|
|
#reliable hosts for checking are cloudflare, quad9, hgoogle |
|
|
|
|
HOSTS=("1.1.1.1" "9.9.9.9" "8.8.8.8") |
|
|
|
|
WAN1=eth0 |
|
|
|
|
WAN2=wwan0 |
|
|
|
|
USINGWAN=eth0 |
|
|
|
|
CHECKWAN=eth0 |
|
|
|
|
COUNTER=0 |
|
|
|
|
CHANGED=0 |
|
|
|
|
#interface and luci names of primary and secondary wan |
|
|
|
|
WAN1_IFACE_NAME="eth0" |
|
|
|
|
WAN2_IFACE_NAME="wwan0" |
|
|
|
|
WAN1_LUCI_NAME="wan" |
|
|
|
|
WAN2_LUCI_NAME="wanb" #currently unused |
|
|
|
|
|
|
|
|
|
#get the current active wan interface, in case the script is started during failover |
|
|
|
|
function get_active_wan { |
|
|
|
|
local ROUTE=0 |
|
|
|
|
local CURRENT_METRIC="" |
|
|
|
|
local ROUTE_IFACE="" |
|
|
|
|
while [ $ROUTE -le 100 ] |
|
|
|
|
do |
|
|
|
|
ROUTE_IFACE=`uci get network.@route[$ROUTE].interface` |
|
|
|
|
if [ "$ROUTE_IFACE" == "uci: Entry not found" ]; then |
|
|
|
|
break |
|
|
|
|
elif [ "$ROUTE_IFACE" == "$WAN1_LUCI_NAME" ]; then |
|
|
|
|
CURRENT_METRIC=`uci get network.@route[$ROUTE].metric` |
|
|
|
|
break |
|
|
|
|
fi |
|
|
|
|
let ROUTE++ |
|
|
|
|
done |
|
|
|
|
if [ "$CURRENT_METRIC" == "0" ]; then |
|
|
|
|
#current wan1 metric is 0, so it is set as primary |
|
|
|
|
echo $WAN1_IFACE_NAME |
|
|
|
|
else |
|
|
|
|
echo $WAN2_IFACE_NAME |
|
|
|
|
fi |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#change the primary wan metric |
|
|
|
|
function change_wan_metric() { |
|
|
|
|
CHANGED=$SECONDS |
|
|
|
|
local ROUTE=0 |
|
|
|
|
# 100 just in case this runs away for some reason |
|
|
|
|
local ROUTE_IFACE="" |
|
|
|
|
while [ $ROUTE -le 100 ] |
|
|
|
|
do |
|
|
|
|
local ROUTE_IFACE=`uci get network.@route[$ROUTE].interface` |
|
|
|
|
if [ "$ROUTE_INTERFACE" == "uci: Entry not found" ]; then |
|
|
|
|
ROUTE_IFACE=`uci get network.@route[$ROUTE].interface` |
|
|
|
|
if [ "$ROUTE_IFACE" == "uci: Entry not found" ]; then |
|
|
|
|
break |
|
|
|
|
elif [ "$ROUTE_INTERFACE" == "wan" ]; then |
|
|
|
|
elif [ "$ROUTE_IFACE" == "$WAN1_LUCI_NAME" ]; then |
|
|
|
|
uci set network.@route[$ROUTE].metric='$1' |
|
|
|
|
uci commit |
|
|
|
|
reload_config |
|
|
|
@ -28,22 +57,31 @@ function change_wan_metric() { |
|
|
|
|
fi |
|
|
|
|
let ROUTE++ |
|
|
|
|
done |
|
|
|
|
logger -t failover "`date`: Changed active WAN to $USINGWAN" |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function debug { |
|
|
|
|
logger -t failover "`date`: pings:$PINGS, counter:$COUNTER, changed:$CHANGED, seconds:$SECONDS, usingwan:$USINGWAN, checkwan:$CHECKWAN" |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
logger -t failover "`date`: Failover script started." |
|
|
|
|
#which wan is currently being used/checked |
|
|
|
|
USINGWAN=$(get_active_wan) |
|
|
|
|
CHECKWAN=$(get_active_wan) |
|
|
|
|
#how many times all pings failed |
|
|
|
|
COUNTER=0 |
|
|
|
|
#when wan was last changed |
|
|
|
|
CHANGED=0 |
|
|
|
|
|
|
|
|
|
logger -t failover "`date`: Failover script started, current wan $USINGWAN" |
|
|
|
|
while sleep $INTERVAL |
|
|
|
|
do |
|
|
|
|
|
|
|
|
|
if [ "$USINGWAN" == "$WAN2" ] && [ $(($SECONDS-$CHANGED)) -gt 300 ]; then |
|
|
|
|
if [ "$USINGWAN" == "$WAN2_IFACE_NAME" ] && [ $(($SECONDS-$CHANGED)) -gt $RECOVERY_CHECK ]; then |
|
|
|
|
debug |
|
|
|
|
# after 5 minutes on failover wan2, check main wan. if it fails counter will go to 1 but since |
|
|
|
|
# wan2 is still fine it should reset to 0 next time and stay on wan2, until the check repeats and so on. |
|
|
|
|
CHECKWAN=$WAN1 |
|
|
|
|
CHANGED=$SECONDS #and reset changed so it just tried once then returns to checking working wan |
|
|
|
|
CHECKWAN=$WAN1_IFACE_NAME |
|
|
|
|
CHANGED=$SECONDS |
|
|
|
|
#NOTE: we dont do a modulo check because and instead just reset CHANGED because pings can take longer than the interval |
|
|
|
|
#during a failure so it can potentially take multiples of RECOVERY_CHECK even after primary wan has recovered |
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
PINGS=0 |
|
|
|
@ -55,12 +93,11 @@ do |
|
|
|
|
fi |
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
#debug if any pings fail, just for fun |
|
|
|
|
#debug if any pings fail |
|
|
|
|
if [ $PINGS -lt ${#HOSTS[@]} ]; then |
|
|
|
|
debug |
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if all pings failed increase counter |
|
|
|
|
if [ $PINGS -eq 0 ]; then |
|
|
|
|
let COUNTER++ |
|
|
|
@ -69,31 +106,24 @@ do |
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
# counter failed pings to all hosts multiple times, switch WAN |
|
|
|
|
# NOTE: if both wan fail somultaneously, it will just switch back and forth. but not like it matters, neither would work anyway... lol |
|
|
|
|
if [ $COUNTER -gt 3 ]; then |
|
|
|
|
debug |
|
|
|
|
# NOTE: if both wan fail, it will just switch back and forth. but not like it matters, neither would work anyway... lol |
|
|
|
|
if [ "$USINGWAN" == "$WAN1" ]; then |
|
|
|
|
if [ "$USINGWAN" == "$WAN1_IFACE_NAME" ]; then |
|
|
|
|
#if failed and currently on wan, switch to wanb |
|
|
|
|
USINGWAN=$WAN2_IFACE_NAME |
|
|
|
|
change_wan_metric 2 |
|
|
|
|
USINGWAN=$WAN2 |
|
|
|
|
CHECKWAN=$USINGWAN |
|
|
|
|
logger -t failover "`date`: Changed active WAN metric to 4G modem!" |
|
|
|
|
elif [ "$USINGWAN" == "$WAN2" ]; then |
|
|
|
|
elif [ "$USINGWAN" == "$WAN2_IFACE_NAME" ]; then |
|
|
|
|
#if failed and currently on wanb, switch back to wan |
|
|
|
|
USINGWAN=$WAN1_IFACE_NAME |
|
|
|
|
change_wan_metric 0 |
|
|
|
|
USINGWAN=$WAN1 |
|
|
|
|
CHECKWAN=$USINGWAN |
|
|
|
|
logger -t failover "`date`: Changed active WAN metric to Cable connection!" |
|
|
|
|
fi |
|
|
|
|
elif [ $COUNTER -eq 0 ]; then |
|
|
|
|
# counter is successful, if on wan2 and wan1 has recovered return to wan1 |
|
|
|
|
if [ "$CHECKWAN" == "$WAN1" ] && [ "$USINGWAN" == "$WAN2" ]; then |
|
|
|
|
if [ "$CHECKWAN" == "$WAN1_IFACE_NAME" ] && [ "$USINGWAN" == "$WAN2_IFACE_NAME" ]; then |
|
|
|
|
#ping(s) successful, if on wan2 and wan1 has recovered return to wan1 |
|
|
|
|
debug |
|
|
|
|
#if failed and currently on wanb, switch back to wan |
|
|
|
|
USINGWAN=$WAN1_IFACE_NAME |
|
|
|
|
change_wan_metric 0 |
|
|
|
|
USINGWAN=$WAN1 |
|
|
|
|
CHECKWAN=$USINGWAN |
|
|
|
|
logger -t failover "`date`: Changed active WAN metric to Cable connection!" |
|
|
|
|
fi |
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|