Compare commits

...

31 Commits

Author SHA1 Message Date
bb4e04818b increase timeout when reading data from gathering functions 2024-11-07 10:58:31 +01:00
522861a96e workaround a bug in Bash an re-add missing debug log messages 2024-11-06 10:25:12 +01:00
733cf3e97b use set_oid in set_oid_list 2024-11-04 14:18:05 +01:00
d67917c958 read args to commands synchronous, async is not faster in this case 2024-11-04 13:11:00 +01:00
1aee330230 improve sub-proc handling / killing 2024-11-04 10:40:50 +01:00
1c0b61eae2 change comment about wait function 2024-11-04 10:33:56 +01:00
b37bc30549 just handle rc of wait instead of comparing bash version 2024-11-04 10:27:42 +01:00
e84310932e fix only wait for sub-procs on bash versions 5.1 or above 2024-11-04 10:16:15 +01:00
d5d807c7c3 change log messages 2024-11-02 14:13:12 +01:00
6ec4c02e40 improve handling / killing of hanging background procs 2024-11-02 01:07:11 +01:00
ce2daf9749 start data gathering funcs asynchronously to prevent daemon locks 2024-11-02 00:42:34 +01:00
201456a056 send data and ENDOFDATA to main depending on rc and data availability 2024-11-01 21:56:45 +01:00
add8e44e4f validate reception of ENDOFDATA from data gathering functions 2024-11-01 20:24:14 +01:00
7d4029fd06 prevent time shift when scheduling 2024-10-31 22:25:23 +01:00
3aecda95ba also log the invalid line 2024-10-15 16:36:22 +02:00
48346d02e7 log error if cache receives invalid line 2024-10-15 15:34:03 +02:00
ae0f0aa745 remove test.sh 2024-10-04 16:39:03 +02:00
bec584b42d exit if daemon receives invalid command 2024-10-04 16:37:25 +02:00
75cac1c93b fix issue when receiving partial lines 2024-09-30 15:19:53 +02:00
59dabc3565 read complete line immediately if partial line is received 2024-09-30 14:55:20 +02:00
df9b79f931 skip debug log on partial lines if buffer length is 0 2024-09-30 13:34:26 +02:00
95c8c276ac add debug log for partial received lines 2024-09-30 13:29:07 +02:00
8af91ec36a send error log message to stderr 2024-09-30 12:44:36 +02:00
e8b2555b6b fix typo 2024-09-30 09:08:26 +02:00
b73dda4496 fix log message 2024-09-30 08:59:20 +02:00
79855cfbca set executable bit on snmpd-oid-daemon.sh 2024-09-30 01:01:39 +02:00
c851ff14e0 cleanup 2024-09-30 00:58:31 +02:00
eda4d2725b Change README.md 2024-09-30 00:55:09 +02:00
5bcda90b06 add debug log message to clear_cached_oid function 2024-09-30 00:31:28 +02:00
22b6c9c751 Change README.md 2024-09-29 23:09:25 +02:00
d4679c4c78 Initial commit of snmpd-oid-daemon.sh 2024-09-29 23:08:59 +02:00
2 changed files with 654 additions and 1 deletions

View File

@@ -1,3 +1,23 @@
# snmpd-oid-daemon # snmpd-oid-daemon
A customizable daemon written in Bash to provide custom OIDs with snmpd. A customizable daemon written in Bash to provide custom OIDs to snmpd.
# Requirements
snmpd-oid-daemon runs with Bash version 4.3 or later.
# Installation
* Copy **snmpd-oid-daemon.sh** to your system where it is accessible by snmpd (e.g. to /usr/local/bin).
* Edit **snmpd.conf** and add the following line, replace OID with your custom base OID (e.g. .1.3.6.1.4.1.8072.9999.9999).
```
pass_persist OID /PATH/TO/snmpd-oid-agent.sh --base-oid OID
```
* Restart snmpd.
# Data gathering functions
* Take a look at the existing functions to learn which ones already exist and how they are implemented.
* Implement your own function or overwrite an existing one in the overload-script if neccessary.
* Overwrite the global array DATA_FUNCS in the overload-script to enable/disable functions or change their refresh delay.
# Known issues
* The interface to snmpd does not support type Gauge64. One way arround this is to use String instead and convert to int64 on the receiving end.
* The function 'gather_filesum_data' in its current form requires snmpd to run as root which is not always the case. Overload the function (--overload-script) if this is an issue in your case.

633
snmpd-oid-daemon.sh Executable file
View File

@@ -0,0 +1,633 @@
#!/bin/bash
SCRIPT_PATH=$(readlink -f -- "$BASH_SOURCE")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
SCRIPT=$(basename "$SCRIPT_PATH")
# default config
OVERLOAD_SCRIPT="${SCRIPT_DIR}/${SCRIPT%.*}-overload.sh"
DEBUG=false
BASE_OID=.1.3.6.1.4.1.8072.9999.9999
LOG_ENABLED=true
LOG_TAG=${SCRIPT%.*}
DEBUG_LOG_MARKER=
#############################################################################################################
#
# COMMAND LINE
#
function usage() {
cat <<EOF
Usage: $SCRIPT [-b BASE_OID] [-d] [-m FILE] [-n] [-h]
Mandatory arguments to long options are mandatory for short options too.
-b, --base=BASE_OID base OID to operate on, default is '$BASE_OID'
-d, --debug enable debug output
-h, --help display this help and exit
-m, --debug-marker=FILE debug logs will enabled or disabled during runtime
based on the existence of this file
-o, --overload-script=FILE source file to add or overload data gathering functions
-n, --no-log disable logging
-t, --tag mark every line to be logged with the specified tag,
default is '$LOG_TAG'
EOF
}
while (( $# > 0 )); do
arg=$1
shift
case "$arg" in
-b|--base-oid)
BASE_OID=$1
shift
! [[ $BASE_OID =~ ^(\.[0-9]+)+$ ]] && echo "invalid base OID '$BASE_OID'!" >&2 && exit 1
;;
-d|--debug)
DEBUG=true
;;
-h|--help)
usage
exit 0
;;
-m|--debug-marker)
DEBUG_LOG_MARKER=$1
shift
;;
-n|--no-log)
LOG_ENABLED=false
;;
-o|--overload-script)
OVERLOAD_SCRIPT=$1
shift
[ -z "$OVERLOAD_SCRIP" ] && echo "overload-script is empty!" >&2 && exit 1
;;
-t|--tag)
LOG_TAG=$1
shift
[ -z "$LOG_TAG" ] && echo "log tag is empty!" >&2 && exit 1
;;
*)
echo "invalid argument '$arg'!" >&2
usage
exit 1
;;
esac
done
#############################################################################################################
#
# LOGGING
#
if $LOG_ENABLED; then
exec {logger}> >(logger --id=$$ -t "$LOG_TAG")
exec {logger_err}> >(logger --id=$$ -t "$LOG_TAG"-error)
if $DEBUG; then
logger_debug=$logger
elif [ -n "$DEBUG_LOG_MARKER" ]; then
exec {logger_debug}> >(while read -r line; do test -f "$DEBUG_LOG_MARKER" && echo "$line" >&$logger; done)
else
exec {logger_debug}>/dev/null
fi
else
exec {logger}>/dev/null
exec {logger_err}>/dev/null
exec {logger_debug}>/dev/null
fi
if $DEBUG && [ -t 0 ]; then
exec {LOG}> >(tee /dev/fd/$logger)
exec 2> >(tee /dev/fd/$logger_err)
exec {DEBUGLOG}> >(tee /dev/fd/$logger_debug)
else
exec {LOG}>&$logger
exec 2>&$logger_err
exec {DEBUGLOG}>&$logger_debug
fi
#############################################################################################################
#
# DATA GATHERING FUNCTIONS
#
# These functions are called periodically and gather data for OID trees.
# The resulting data is handed over to the cache by calling 'set_oid' or 'set_oid_list' and pipe
# their output to 'submit_oids'.
#
# Strips declarations generated by 'declare -p' and writes only the value to stdout.
# This is done to avoid security issues by using 'eval'. We manually declare the variables again at the
# receiving end.
# E.g. declare -- var="string" => "string"
# declare -a var=("string") => ("string")
#
# It also works around a bug in bash prior to version 4.4, generates wrong declarations for
# arrays and associative arrays.
# E.g. declare -a var='("string")' => ("string")
# declare -A var='(["0"]="string")' => (["0"]="string")
#
function strip_declaration() {
local decl=$1
[ -z $decl ] && read -r decl
decl=${decl#declare -}
local is_list=false
[[ $decl =~ ^(a|A ).* ]] && is_list=true
decl=${decl#*=}
if $is_list && (( BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 4 )); then
decl=${decl#\'}
decl=${decl%\'}
fi
echo "$decl"
return 0
}
# Sets type and value for a single OID. Arguments are OID, type and value.
# Its output should be piped to 'submit_oids'.
#
set_oid() {
echo $1
echo $2
echo ${3//[$'\n'$'\r']/}
return 0
}
# DATA is a two-dimensional array (list of lists) or table. Because this is not supported by the shell, columns are
# represented by declarations of list variables generated by 'declare -p'.
# The names of these variables used during generation do not matter as they get stripped away.
declare -a DATA
# COL_TYPES is a list of column types. It is essential that the number of types in COL_TYPES is equal to
# the number of columns in rows in DATA.
declare -a COL_TYPES
# Sets OID types and values based on base_oid, DATA and COL_TYPES.
# Row and column start indexes are optional arguments and default to 1.
# Its output should be piped to 'submit_oids'.
#
function set_oid_list {
local base_oid=$1
local -i row_id=${2:-1}
local -i col_start_idx=${3:-1}
local -i col_id type_id
local row_decl value
if (( ${#COL_TYPES[@]} == 1 )); then
for row_decl in "${DATA[@]}"; do
declare -a row=$(strip_declaration <<<"$row_decl")
set_oid "$base_oid.$row_id" "${COL_TYPES[0]}" "${row[0]//[$'\n'$'\r']/}"
((row_id++))
done
else
for row_decl in "${DATA[@]}"; do
local -a row=$(strip_declaration <<<"$row_decl")
col_id=$col_start_idx
type_id=0
for value in "${row[@]}"; do
set_oid "$base_oid.$col_id.$row_id" "${COL_TYPES[$type_id]}" "${value//[$'\n'$'\r']/}"
((col_id++))
((type_id++))
done
((row_id++))
done
fi
return 0
}
# Optionally clears provided base OID, reads passed OIDs from stdin, combines them
# in an associative array and writes its declaration to stdout.
#
function submit_oids() {
local clear_base_oid=${1:-}
local oid oid_type value
local -A oids
local -a type_value
test -n "$clear_base_oid" && echo CLEAR $clear_base_oid
while read -r oid; do
read -r oid_type
read -r value
type_value=("$oid_type" "$value")
oids[$oid]=$(declare -p type_value | strip_declaration)
done
if (( ${#oids[@]} > 0 )); then
echo -n "UPDATE "
declare -p oids | strip_declaration
fi
return 0
}
function gather_multipath_data() {
DATA=()
local mp uuid dev_model dev_vendor slave_state_f slave_state
local -i slave_failed slave_count
local -a row
for mp in /sys/devices/virtual/block/dm-*; do
read -r uuid <$mp/dm/uuid
[[ $uuid != mpath-* ]] && continue
slave_failed=0
slave_count=0
dev_model=
dev_vendor=
for slave_state_f in $mp/slaves/*/device/state; do
((slave_count++))
read -r slave_state <$slave_state_f
if [[ $slave_state != "running" ]]; then
((slave_failed++))
else
read -r dev_vendor <${slave_state_f%state}vendor
read -r dev_model <${slave_state_f%state}model
fi
done
row=("${mp##*/}" "${uuid#mpath-}" "$dev_vendor,$dev_model" $slave_count $slave_failed)
DATA+=("$(declare -p row)")
done
submit_oids .2 < <(
set_oid .2.1.0 gauge "$(date +%s)"
set_oid .2.2.0 gauge "${#DATA[@]}"
COL_TYPES=(string string string gauge gauge)
set_oid_list .2.3.1
)
return 0
}
function gather_meminfo_data() {
local memfree meminactive
local memfree=0
if [ -r /proc/meminfo ]; then
memfree=$(grep MemAvailable: /proc/meminfo)
if (( $? == 0 )); then
memfree=${memfree//[^0-9]/}
else
memfree=$(grep MemFree: /proc/meminfo)
meminactive=$(grep Inactive: /proc/meminfo)
memfree=$((${memfree//[^0-9]/} + ${meminactive//[^0-9]/}))
fi
fi
submit_oids < <(
set_oid .3.1.0 gauge "$(date +%s)"
set_oid .3.2.0 string "$memfree"
)
return 0
}
function gather_zombies_data() {
local zombies=$(grep zombie /proc/*/status 2>/dev/null | wc -l)
submit_oids < <(
set_oid .4.1.0 gauge "$(date +%s)"
set_oid .4.2.0 gauge "$zombies"
)
return 0
}
function gather_bonding_data() {
local bond master_state slaves slave slave_state
local -a row
DATA=()
for bond in /sys/devices/virtual/net/bond*; do
read -r master_state <$bond/bonding/mii_status
read -r slaves < $bond/bonding/slaves
for slave in $slaves; do
read -r slave_state <$bond/lower_$slave/bonding_slave/mii_status
row=("${bond##*/}" "$master_state" "$slave" "$slave_state")
DATA+=("$(declare -p row)")
done
done
submit_oids .5 < <(
set_oid .5.1.0 gauge "$(date +%s)"
set_oid .5.2.0 gauge ""${#DATA[@]}
COL_TYPES=(string string string string)
set_oid_list .5.3.1
)
return 0
}
function gather_filesum_data() {
DATA=()
local sum path row
while read -r sum path; do
row=("$path" "$sum")
DATA+=("$(declare -p row)")
done < <(sha1sum /etc/passwd /etc/shadow /etc/group /root/.ssh/authorized_keys)
submit_oids .6 < <(
set_oid .6.1.0 gauge "$(date +%s)"
set_oid .6.2.0 gauge "${#DATA[@]}"
COL_TYPES=(string string)
set_oid_list .6.3.1
)
return 0
}
# Data gathering functions and their refresh delay
declare -A DATA_FUNCS=(
["gather_multipath_data"]=60
["gather_meminfo_data"]=30
["gather_zombies_data"]=30
["gather_bonding_data"]=30
["gather_filesum_data"]=60
)
#############################################################################################################
#
# OVERLOAD SCRIPT
#
# Source the file specified in OVERLOAD_SCRIPT to overload data gathering functions.
#
if [ -f "$OVERLOAD_SCRIPT" -a -r "$OVERLOAD_SCRIPT" ]; then
echo "source $OVERLOAD_SCRIPT" >&$LOG
source "$OVERLOAD_SCRIPT"
else
echo "overload script '$OVERLOAD_SCRIPT' does not exist or is not readable" >&$DEBUGLOG
fi
#############################################################################################################
#
# MAIN AND ITS HELPER FUNCTIONS
#
# The main logic of the daemon is defined here.
#
# Cache variables for OID data and types
declare -A OIDDATA
declare -A OIDTYPES
# Removes all elements from OIDDATA and OIDTYPES with an OID starting with base_oid.
#
function clear_cached_oid() {
local base_oid=$1
local oid
local count=0
for oid in ${!OIDDATA[@]}; do
if [[ $oid == $base_oid.* ]]; then
unset OIDDATA[$oid]
unset OIDTYPES[$oid]
((count++))
fi
done
echo "cache: removed $count OIDs" >&$DEBUGLOG
return 0
}
# Reads the output of 'submit_oids' and updates the OIDDATA and OIDTYPES arrays accordingly.
# If warmup is set to true, it waits for all gathering functions to return data
# before it returns, otherwise it just waits for a single one and returns.
#
function update_oid_cache() {
local warmup=${1:-false}
local line base_oid oid
while :; do
read -r line || exit 255
echo "cache: received: $line" >&$DEBUGLOG
case "$line" in
"CLEAR "?*)
base_oid=${line#CLEAR }
clear_cached_oid "${base_oid}"
;;
"UPDATE "?*)
local -A oids=${line#UPDATE }
for oid in $(sort -V <<< "$(printf "%s\n" ${!oids[@]})"); do
local -a type_value=${oids[$oid]}
OIDTYPES[$oid]=${type_value[0]}
OIDDATA[$oid]=${type_value[1]}
echo "cache: update $oid = ${type_value[0]}: ${type_value[1]}" >&$DEBUGLOG
done
;;
"ENDOFDATA")
$warmup || break
;;
STARTUPDONE)
break
;;
*)
echo "cache: received invalid line: $line" >&2
;;
esac
done
return 0
}
function snmp_echo() {
local value
for value in "$@"; do
echo "> $value" >&$DEBUGLOG
echo "$value"
done
return 0
}
function req_from_oid() {
local oid=$1
declare -gn var=$2
var=${oid#$BASE_OID}
if (( ${#var} == ${#oid} )); then
echo "$oid is not part of our base OID" >&$DEBUGLOG
snmp_echo NONE
return 1
fi
[ -z $var ] && var=".0"
return 0
}
function return_oid() {
local req=$1
snmp_echo "$BASE_OID$req" "${OIDTYPES[$req]}" "${OIDDATA[$req]}"
return 0
}
# Main logic of the daemon.
#
function main() {
local buf cmd oid req next
echo "waiting for all data gathering functions to return data" >&$LOG
update_oid_cache true
echo "daemon started (BASE_OID: $BASE_OID)" >&$LOG
while :; do
while read -t 0; do
update_oid_cache
done
read -r -t 1 -u $STDIN buf
rc=$?
if (( rc == 0 )); then
cmd+=${buf}
elif (( rc > 128 )); then
# read timed out
[ -z "$buf" ] && continue
echo "< $buf (partial line)" >&$DEBUGLOG
cmd+=${buf}
# to work around a bug in Bash prior to version 5.3, check if $cmd contains a complete command first before continuing reading
# -> bug report: https://lists.gnu.org/archive/html/bug-bash/2024-10/msg00005.html
# -> bug fix: https://git.savannah.gnu.org/cgit/bash.git/diff/builtins/read.def?h=devel&id=3ed028ccec871bc8d3b198c1681374b1e37df7cd
[[ "${cmd,,}" =~ ^(ping|set|get|getnext)$ ]] || continue
else
exit 255
fi
echo "< $cmd" >&$DEBUGLOG
case "${cmd,,}" in
ping)
cmd=""
snmp_echo PONG
;;
set)
# we need to args here, 'oid' and 'type_and_value'
cmd=""
read -r -u $STDIN buf
echo "< $buf" >&$DEBUGLOG
read -r -u $STDIN buf
echo "< $buf" >&$DEBUGLOG
snmp_echo not-writable
;;
get)
cmd=""
read -r -u $STDIN oid
echo "< $oid" >&$DEBUGLOG
if [ -z "$oid" ]; then
echo "received empty oid" >&2
snmp_echo NONE
fi
req_from_oid $oid req || continue
if [[ ! -v OIDDATA[$req] ]]; then
echo "$oid not found" >&$DEBUGLOG
snmp_echo NONE
continue
fi
return_oid "$req"
;;
getnext)
cmd=""
read -r -u $STDIN oid
echo "< $oid" >&$DEBUGLOG
if [ -z "$oid" ]; then
echo "received empty oid" >&2
snmp_echo NONE
fi
req_from_oid $oid req || continue
next=$(printf "%s\n" ${!OIDDATA[@]} $req | sort -V | grep -A1 -E "^$req\$" | tail -n 1)
echo "evaluated next candidate: [requested: '$req', next: '$next']" >&$DEBUGLOG
if [ -z "$next" -o "$next" == "$req" ]; then
echo "$oid not found" >&$DEBUGLOG
snmp_echo NONE
continue
fi
return_oid "$next"
;;
"")
echo "empty command, exiting ..." >&$LOG
break
;;
*)
echo "invalid command '$cmd', exiting ..." >&2
break
;;
esac
done
return 0
}
#############################################################################################################
#
# STARTUP
#
# Start the daemon.
# Main is running in a sub-shell, reads commands from stdin and writes results to stdout.
# The main process gathers the data and writes it to the FD DATAIN which is read by main.
#
export PATH=/usr/bin:/bin
shopt -s nullglob
# Redirect stdin a new fd that is used in main.
# This is neccessary because data updates will be piped to main.
exec {STDIN}<&0
# Start main in a sub-shell and create a writable fd to it.
echo "daemon starting (PID: $$)" >&$LOG
exec {DATAIN}> >(main)
main_pid=$!
trap "echo daemon stopped >&$LOG" EXIT
declare -A timetable
declare -A fdtable
declare -A pidtable
first_run=true
while :; do
# Check if main is still alive and exit otherwise.
ps -p $main_pid >/dev/null || break
[ -v EPOCHSECONDS ] && now=$EPOCHSECONDS || now=$(date +%s)
for func in "${!DATA_FUNCS[@]}"; do
next_update=${timetable[$func]:-$now}
if (( now >= next_update )); then
delay=${DATA_FUNCS[$func]}
$first_run && echo "starting $func (refresh every $delay seconds)" >&$LOG
fd=${fdtable[$func]:--1}
if (( fd == -1 )); then
exec {data}< <($func </dev/null)
pid=$!
echo "gather: executed $func (PID $pid, FD $data)" >&$DEBUGLOG
fdtable[$func]=$data
pidtable[$func]=$pid
else
pid=${pidtable[$func]}
echo "gather: skip executing $func, it is still running (PID $pid, FD $fd)" >&2
fi
((next_update+=delay))
timetable[$func]=$next_update
echo "gathe: scheduled next execution of $func at $(date -d @$next_update)" >&$DEBUGLOG
fi
done
for func in "${!fdtable[@]}"; do
fd=${fdtable[$func]}
(( fd == -1 )) && continue
if ! $first_run; then
read -t 0 -u $fd
(( $? == 0 )) || continue
fi
data=$(timeout 5 cat <&$fd)
rc=$?
eval "exec $fd>&-"
fdtable[$func]=-1
pid=${pidtable[$func]}
if (( rc == 124 )); then
echo "gather: timeout receiving data from $func (PID $pid, FD $fd), sending SIGTERM" >&2
kill -SIGTERM $pid
sleep 1
if ps -p $pid >/dev/null; then
echo "gather: unable to terminate $func (PID $pid, FD $fd), sending SIGKILL" >&2
kill -SIGKILL $pid
fi
continue
fi
wait $pid &>/dev/null
rc=$?
# the wait function in older Bash versions prior to 5.1 always returns 127 if the
# sub-process already exited at this point
(( rc == 127 )) && rc=0
echo "gather: $func (PID $pid, FD $fd) exited with rc = $rc" >&$DEBUGLOG
if (( rc == 0 )) && [ -n "$data" ]; then
echo "gather: sending data to cache" >&$DEBUGLOG
echo "$data" 1>&$DATAIN
echo "ENDOFDATA" >&$DATAIN
fi
done
# Emit STARTUPDONE to inform main about data availibity from all gatherin functions (warmup).
$first_run && echo STARTUPDONE >&$DATAIN && first_run=false
sleep 1
done
exit 0