#!/bin/sh
#
#      -*- OpenSAF  -*-
#
# (C) Copyright 2009 The OpenSAF Foundation
# Copyright Ericsson AB 2020 - All Rights Reserved.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
# under the GNU Lesser General Public License Version 2.1, February 1999.
# The complete license can be accessed from the following location:
# http://opensource.org/licenses/lgpl-license.php
# See the Copying file included with the OpenSAF distribution for full
# licensing terms.
#
# Author(s): Emerson Network Power
#

# Uncomment and update if remote fencing is to be used. Also set FMS_USE_REMOTE_FENCING=1 in fmd.conf file.
#export FMS_FENCE_CMD="stonith"
#export FMS_DEVICE_TYPE="external/libvirt"
#export FMS_HYPERVISOR_URI="qemu+tcp://192.168.122.1/system"
#export FMS_FENCE_ACTION="reset"

. /etc/opensaf/osafdir.conf

export LD_LIBRARY_PATH=$libdir:$LD_LIBRARY_PATH

# NOTE: This script is a customization point between OpenSAF and a target
# system and should be changed according to the needs of such system.

# Node fencing: OpenSAF cannot reboot a node when there's no CLM node to
# PLM EE mapping in the information model. In such cases rebooting would be done
# through proprietary mechanisms, i.e. not through PLM. Node_id is (the only
# entity) at the disposal of such a mechanism.

if [ -f "$pkgsysconfdir/fmd.conf" ]; then
  . "$pkgsysconfdir/fmd.conf"
fi
if [ -f "$pkgsysconfdir/nid.conf" ]; then
  . "$pkgsysconfdir/nid.conf"
fi

NODE_ID_FILE=$pkglocalstatedir/node_id

node_id=$1
ee_name=$2
safe_reboot=$3

# Run commands through sudo when not superuser
test $(id -u) -ne 0 && icmd=$(which sudo 2> /dev/null)

opensaf_safe_reboot()
{
	# Phase one: stop IMM to prevent sync
	logger -t "opensaf_reboot" "Stopping IMMND"
	$icmd pkill -STOP osafimmnd
	# Wait until IMM has been stopped on all nodes in the cluster
	sleep 5
	# Phase two: IMM is stopped on all nodes, so now we can safely reboot
	# the cluster
	# Flush OpenSAF internal log server messages to disk.
	$bindir/osaflog --flush
	logger -t "opensaf_reboot" "Rebooting local node using $icmd /sbin/shutdown -r now"
	$icmd /sbin/shutdown -r now
}

## Use stonith for remote fencing
opensaf_reboot_with_remote_fencing()
{
	"$FMS_FENCE_CMD" -t "$FMS_DEVICE_TYPE" hostlist="node:$ee_name" hypervisor_uri="$FMS_HYPERVISOR_URI" -T "$FMS_FENCE_ACTION" node

	retval=$?
	if [ $retval != 0 ]; then
		logger -t "opensaf_reboot" "Rebooting remote node $ee_name using $FMS_FENCE_CMD failed, rc: $retval"
		exit 1
	fi
}


#if plm exists in the system,then the reboot is performed using the eename.
opensaf_reboot_with_plm()
{
	immadm -o 7 $ee_name
	retval=$?
	if [ $retval != 0 ]; then
		logger -t "opensaf_reboot" "graceful restart failed for $ee_name: attempting abrupt restart"
		immadm -o 7 -p restartOption:SA_STRING_T:abrupt $ee_name
		retval=$?

		if [ $retval != 0 ]; then
			if [ "$self_node_id" = "$node_id" ];then
				sudo /sbin/reboot -f
			else
				logger -t "opensaf_reboot" "abrupt restart failed for $ee_name: unable to restart remote node"
				exit 1
			fi
		fi
	fi
#Note: Operation Id SA_PLM_ADMIN_RESTART=7
#In the example the $ee_name would expand to (for eg:-) safEE=my_linux_os,safHE=64bitmulticore,safDomain=my_domain
}

# Force local node reboot as fast as possible
quick_local_node_reboot()
{
	logger -t "opensaf_reboot" "Do quick local node reboot"

	for service in osafamfnd osafimmnd; do
		$icmd pkill -STOP $service
	done
	for service in osafrded osafamfd osafimmd osaflogd osafntfd osafclmd; do
		$icmd pkill -KILL $service
	done

	$icmd /bin/sh -c "/bin/echo -n 'b' 2> /dev/null > /proc/sysrq-trigger"
	ret_code=$?

	if [ $ret_code != 0 ] && [ -x /bin/systemctl ]; then
		$icmd /bin/systemctl --force --force reboot
		ret_code=$?
	fi

	if [ $ret_code != 0 ]; then
		$icmd /sbin/reboot -f
	fi
}

if ! test -f "$NODE_ID_FILE"; then
	logger -t "opensaf_reboot" "$NODE_ID_FILE doesnt exists,reboot failed "
	exit 1
fi

temp_node_id=`cat "$NODE_ID_FILE"`
temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e 's:^:0x:'`
self_node_id=`printf "%d" $temp_node_id`

tipc=$(which tipc 2> /dev/null)
tipc_config=$(which tipc-config 2> /dev/null)

if [ -x "${tipc}" ]; then
	tipc_config="${pkglibdir}"/tipc-config
fi

unset tipc

# If no argument is provided, forcing node reboot immediately without log
# flushing, process terminating, disk un-mounting.
# If clm cluster reboot requested argument one and two are set but not used,
# argument 3 is set to 1, "safe reboot" request.
if [ "$#" = 0 ]; then
	quick_local_node_reboot
elif [ "$safe_reboot" = 1 ]; then
	opensaf_safe_reboot
else
	# A node ID of zero(0) means an order to reboot the local node
	if [ "$self_node_id" = "$node_id" ] || [ $node_id = 0 ]; then
		# uncomment the following line if debugging errors that keep restarting the node
		# exit 0

                # If the application is using different interface for cluster communication, please
                # add your application specific isolation commands here

		logger -t "opensaf_reboot" "Rebooting local node; timeout=$OPENSAF_REBOOT_TIMEOUT"

                # Isolate the node
                if [ "$MDS_TRANSPORT" = "TIPC" ]; then
                   ${tipc_config} -bd=eth:$TIPC_ETH_IF
                else
                   $icmd pkill -STOP osafdtmd
                fi

		# Start a reboot supervision background process. Note that a similar
		# supervision is also done in the opensaf_reboot() function in LEAP.
		# However, that supervision may be stopped by one of the pkill commands
		# below, if it was called from AMF or FM.
		if [ "${OPENSAF_REBOOT_TIMEOUT}0" -gt "0" ]; then
			(sleep "$OPENSAF_REBOOT_TIMEOUT"; echo -n "b" > "/proc/sysrq-trigger") &
		fi

		# Stop some important opensaf processes to prevent bad things from happening
		$icmd pkill -STOP osafamfwd
		$icmd pkill -STOP osafamfnd
		$icmd pkill -STOP osafamfd
		$icmd pkill -STOP osaffmd

		# Flush OpenSAF internal log server messages to disk.
		$bindir/osaflog --flush

		# Reboot (not shutdown) system WITH file system sync
		$icmd /sbin/reboot -f
	else
		# If remote fencing argument one is "peer node id" and argument 2 is "peer_clm_node_name"
		if [ "$FMS_USE_REMOTE_FENCING" = 1 ]; then
			opensaf_reboot_with_remote_fencing
		else
			if [ ":$ee_name" != ":" ]; then

				plm_node_presence_state=`immlist $ee_name |grep saPlmEEPresenceState|awk '{print $3}'`
				plm_node_state=`immlist $ee_name |grep saPlmEEAdminState|awk '{print $3}'`
				if [ "$plm_node_presence_state" != 3 ] ; then
					logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is not in INSTANTIATED state"
				elif [ $plm_node_state != 2 ]; then
					opensaf_reboot_with_plm
				else
					logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is already in locked state"
				fi
			else
			logger -t "opensaf_reboot" "Rebooting remote node in the absence of PLM is outside the scope of OpenSAF"
			fi
		fi
	fi
fi
