RedHat Linux AS3 做集群的问题!!!!
参考了:http://chinaunix.net/jh/4/325828.html
我在VM下装了两台RedHat Linux AS3:
linux1的IP:10.0.0.230
linux2的IP:10.0.0.231
安装 clumanager 和 redhat-config-cluster 后,作好了相关配置
输入:service clumanager start
一会儿就重新启动系统了 这是怎么回事呀?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(9)
看看日志先...
pls run "dmesg|tee log" and paste log file , paste /etc/init.d/clumanager script
以下是错误日志:
-------------------------------------------------------------------------------------
Aug 2 20:57:55 linux1 clumanager: [1345]: <notice>; Starting Red Hat Cluster Manager...
Aug 2 20:57:56 linux1 cludb[1362]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1363]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1364]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1365]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1366]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: No drivers configured for host '10.0.0.230'!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: Data integrity may be compromised!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: No drivers configured for host '10.0.0.231'!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: Data integrity may be compromised!
Aug 2 20:58:12 linux1 cluquorumd[1375]: <notice>; Quorum Formed; Starting Service Manager
-------------------------------------------------------------------------------------
以下是/etc/init.d/clumanager 脚本:
-------------------------------------------------------------------------------------
#!/bin/sh
#
# Copyright (C) 2003 Red Hat, Inc.
#
# This program is Free Software. You may modify and/or redistribute it under
# the terms of the GNU General Public License version 2, or (at your option)
# any later version.
#
# description: Starts and stops Red Hat Cluster Manager
# chkconfig: 2345 99 01
#
# Source function library
. /etc/init.d/functions
# Grab the network config file
. /etc/sysconfig/network
SIMPLE_OPERATION=0
ID="Red Hat Cluster Manager"
CFG_DIR="/etc"
CFG_FILE="$CFG_DIR/cluster.xml"
FW_CHAIN=RH-Firewall-1-INPUT
SVCMGRD="clusvcmgrd"
QUORUMD="cluquorumd"
MEMBD="clumembd"
LOCKD="clulockd"
RMTABD="clurmtabd"
LOG_ERR=3
LOG_WARNING=4
LOG_NOTICE=5
LOG_INFO=6
#
# Only root wants to run this...
#
[ `id -u` = 0 ] || exit 0
#
# If we're not configured, then don't start anything.
#
[ "${NETWORKING}" = "yes" ] || exit 0
[ -f "/etc/cluster.xml" ] || exit 1
#
# Check the cluster configuration file and load the watchdog timer if
# necessary.
# XXX This loads the watchdog timer on ALL machines, even if they do not
# have 'watchdog=yes' specified.
#
watchdog_init()
{
#
# Check the cluster configuration for watchdog support
#
# XXX This creates and loads the watchdog module regardless of whether
# the local member uses it.
#
grep -q -i "watchdog=\"yes\"" $CFG_FILE
if [ $? -eq 1 ]; then
return 0
fi
# Check to ensure we have /dev/watchdog
if ! [ -c /dev/watchdog ]; then
if [ -f /dev/watchdog ]; then
action "Removing invalid /dev/watchdog:" \
rm -f /dev/watchdog
fi
action "Creating /dev/watchdog:" /dev/MAKEDEV watchdog
fi
# Check /etc/modules.conf for "alias watchdog xxxxxx" line; xxxxxx =
# the specific driver (see below) we're dealing with.
# If there is no alias, default to softdog.
_WDT=`grep "alias wdt" /etc/modules.conf | awk '{print $3}'`
if [ -z "$_WDT" ]; then
_PROBE=softdog
_WDT=softdog
else
_PROBE=wdt
fi
# Don't try to load the module a second time.
lsmod | grep -q $_WDT
if [ $? -ne 0 ]; then
action "Loading Watchdog Timer ($_WDT): " modprobe $_PROBE
fi
unset _WDT _PROBE
return 0
}
#
# open a port in our firewall
#
open_port()
{
declare PROTO=$1
declare -i PORT=$2
if [ -z "$1" -o -z "$2" ]; then
echo "usage: $0 <protocol>; <port>;"
return 1
fi
/sbin/iptables -I $FW_CHAIN -m state \
--state NEW -m $PROTO -p $PROTO --dport $PORT -j ACCEPT
}
#
# Close a port in the firewall.
#
close_port()
{
declare -i PORT=$1
while [ 0 ]; do
#
# Grab the rule number so we can delete it.
# - List our table w/ line numbers for each rule.
# - Grab the rule number from column 1 of the first line
#
declare rule=`iptables -L $FW_CHAIN -n --line-numbers | grep $PORT | head -1 | awk {'print $1'}`
[ -z "$rule" ] && break;
iptables -D $FW_CHAIN $rule
done
return 0
}
#
# Read the cluster configuration and open the default and/or configured
# ports.
#
cluster_firewall()
{
declare -i clumembd_addr=34001
declare -i clusvcmgrd_addr=34002
declare -i cluquorumd_addr=34003
declare -i clulockd_addr=34004
declare -i hb_bcast=1228
declare -i hb_mcast=1229
declare port
if ! iptables -L $FW_CHAIN &>; /dev/null; then
return 0
fi
port=`cludb -g clumembd%addr`
if [ "$port" != "not found" ]; then
clumembd_addr=$port
fi
port=`cludb -g clusvcmgrd%addr`
if [ "$port" != "not found" ]; then
clusvcmgrd_addr=$port
fi
port=`cludb -g cluquorumd%addr`
if [ "$port" != "not found" ]; then
cluquorumd_addr=$port
fi
port=`cludb -g clulockd%addr`
if [ "$port" != "not found" ]; then
clulockd_addr=$port
fi
port=`cludb -g clumembd%port`
if [ "$port" != "not found" ]; then
hb_bcast=$port
hb_mcast=$((port+1))
fi
case $1 in
start)
open_port udp $hb_bcast || return 1
open_port udp $hb_mcast || return 1
open_port tcp $clumembd_addr || return 1
open_port tcp $clusvcmgrd_addr || return 1
open_port tcp $cluquorumd_addr || return 1
open_port tcp $clulockd_addr || return 1
;;
stop)
close_port $hb_bcast || return 1
close_port $hb_mcast || return 1
close_port $clumembd_addr || return 1
close_port $clusvcmgrd_addr || return 1
close_port $cluquorumd_addr || return 1
close_port $clulockd_addr || return 1
;;
*)
echo "usage: $0 {start|stop}"
return 1
;;
esac
return 0
}
#
# Open firewall ports
#
open_firewall() {
if ! iptables -L $FW_CHAIN &>; /dev/null; then
return 0
fi
echo -n "Opening Firewall Ports:"
if ! cluster_firewall start; then
echo_failure
echo
return 1
fi
echo_success
echo
}
#
# Close firewall ports
#
close_firewall() {
if ! iptables -L $FW_CHAIN &>; /dev/null; then
return 0
fi
echo -n "Closing Firewall Ports:"
if ! cluster_firewall stop; then
echo_failure
echo
return 1
fi
echo_success
echo
return 0
}
#
# log_and_print <level>; <message>;
#
log_and_print()
{
if [ -z "$1" -o -z "$2" ]; then
return 1;
fi
/usr/sbin/clulog -p $$ -n "clumanager" -s $1 "$2"
echo $2
return 0;
}
#
# Bring down the cluster on a node.
#
stop_cluster()
{
#
# Sometimes, people stop the cluster before the service manager
# is running - this causes the cluster stop script to hang; since
# the service manager never actually receives the shutdown signal.
# In this case, we need to resend the TERM singal to the quorum
# daemon - so that it can notify everyone properly.
#
while [ 0 ]; do
echo -n $"Initiating shutdown of Quorum Services: "
killproc $QUORUMD -TERM
echo
if [ -n "`pidof $SVCMGRD`" ]; then
echo -n $"Waiting for User Services to stop: "
while [ -n "`pidof $SVCMGRD`" ]; do
sleep 1
done
echo_success
echo
else
echo $"User Services are stopped."
fi
# Ensure all NFS rmtab daemons are dead.
killall $RMTABD &>; /dev/null
# Just in case the service manager blew up during shutdown...
killproc $QUORUMD -TERM &>; /dev/null
if [ -n "`pidof $QUORUMD`" ]; then
echo -n $"Waiting for Quorum Services to stop: "
while [ -n "`pidof $QUORUMD`" ]; do
sleep 1
if [ -n "`pidof $SVCMGRD`" ]; then
echo_failure
echo
echo "Retrying..."
continue 2
fi
done
echo_success
echo
else
echo $"Quorum Services are stopped."
fi
if [ -n "`pidof $MEMBD`" ]; then
echo -n $"Waiting for Membership Services to stop: "
while [ -n "`pidof $MEMBD`" ]; do
sleep 1
done
echo_success
echo
else
echo $"Membership Services are stopped."
fi
rm -f /var/run/$SVCMGRD.pid
rm -f /var/run/$QUORUMD.pid
for dmn in $MEMBD $LOCKD $SVCMGRD $QUORUMD; do
killall -9 $dmn &>; /dev/null
rm -f /var/run/$dmn.pid
done
return 0
done
}
#
# update_status daemon last_return
#
update_status()
{
status $1
local new_status=$?;
local old_status=$2;
if [ -z "$2" ]; then
old_status=$new_status;
fi
if [ $old_status -ne $new_status ]; then
# Return 5 to signal nonuniform statuses
return 5;
fi
return $new_status;
}
case $1 in
start)
SIMPLE_OPERATION=$(/usr/sbin/clugetconfig cluster%simple_operation)
if [ $? -eq 2 ]; then
SIMPLE_OPERATION=0
fi
if [ $SIMPLE_OPERATION -ne 0 ]; then
ID="Red Hat Quorum Services"
fi
log_and_print $LOG_NOTICE "Starting $ID..."
watchdog_init
open_firewall || exit 1
echo -n $"Starting Quorum Daemon: "
daemon $QUORUMD $FLAGS
echo
# To be consistent...
touch /var/lock/subsys/clumanager
;;
restart)
$0 status &>; /dev/null
if [ $? -ne 1 ]; then
$0 stop
fi
$0 start
;;
condrestart)
$0 status $>; /dev/null
if [ $? -eq 0 ]; then
$0 stop
$0 start
fi
;;
reload)
clulog -p $LOG_NOTICE "Reloading Cluster Configuration."
echo -n $"Reloading Cluster Configuration: "
killproc $SVCMGRD -HUP
rv=$?
echo
exit $rv
;;
status)
update_status $MEMBD
update_status $QUORUMD $?
rv=$?
if [ $SIMPLE_OPERATION -eq 0 ]; then
update_status $LOCKD $rv
rv=$?
update_status $SVCMGRD $rv
svcmgr=$?
#
# Check to see if, when the service manager is dead
# but everything else is running
#
if [ $rv -ne 5 -a $svcmgr -eq 5 ]; then
clustat -Q
# No Quorm + No Service Manager ->; OK!
if [ $? -eq 1 ]; then
echo "Note: Service manager is not running because this member"
echo " is not participating in the cluster quorum."
exit 0;
fi
fi
rv=$svcmgr
fi
[ $rv -eq 5 ] && exit 1;
exit $rv
;;
stop)
if [ -n "`pidof $QUORUMD`" ]; then
log_and_print $LOG_NOTICE "Shutting down $ID..."
stop_cluster
elif [ -n "`pidof $MEMBD`" ]; then
log_and_print $LOG_NOTICE "Shutting down $ID..."
echo $"Stopping Cluster Membership Daemon: "
killproc $MEMBD -KILL
echo
fi
close_firewall
rm -f /var/lock/subsys/clumanager
log_and_print $LOG_NOTICE "$ID is stopped."
;;
esac
cluster manager 启动后,这个集群的显示这台机器是activing吗?
你是执行service clumanager start的过程中重新启动呢,还是执行完以后见到#或$提示符以后才启动呢?
[quote]原帖由 "lipeng21cn"]cluster manager 启动后,这个集群的显示这台机器是activing吗?[/quote 发表:
第一次安装完后启动,在linux1上显示linux1处于activing状态,linux1不会重新启动,然后在linux2上启动,刚启动完就重启系统了,现在不管是在linux1上启动clumanager,还是在linux2上启动都会重新启动系统。
[quote]原帖由 "lipeng21cn"]你是执行service clumanager start的过程中重新启动呢,还是执行完以后见到#或$提示符以后才启动呢?[/quote 发表:
[root@linux1 qjf]# service clumanager start
Starting Red Hat Cluster Manager...
Loading Watchdog Timer (softdog): [确定]
Opening Firewall Ports: [确定]
Starting Quorum Daemon: [确定]
见到提示符之后才重启的。
顶一下
我也碰到相同的问题,请教!!
这好像是因为你没有初始化你的裸设备,而且还要将集群的配置文件写入裸设备,你可以看看redhat.com的rhcs文档