Hi,
I have a HA cluster of two node built on openSUSE 11.1 (x86_64). Following is my cib.xml and ha.cf configuration :

<cib generated="true" admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="2" cib_feature_revision="2.0" crm_feature_set="2.0" epoch="357" ccm_transition="4" dc_uuid="10e4502b-1fee-4537-84c4-4a7244022d3a" num_updates="25">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.1.4-node: aa909246edb386137b986c5773344b98c6969999"/>
<nvpair name="last-lrm-refresh" id="cib-bootstrap-options-last-lrm-refresh" value="1331042357"/>
<nvpair id="cib-bootstrap-options-default-action-timeout" name="default-action-timeout" value="600s"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="10e4502b-1fee-4537-84c4-4a7244022d3a" uname="aaa" type="normal"/>
<node id="84e6cf5e-4719-44dc-b6ac-71ecc3f20e02" uname="bbb" type="normal"/>
</nodes>
<resources>
<group id="HA">
<meta_attributes id="HA_meta_attrs">
<attributes>
<nvpair name="target_role" id="HA_metaattr_target_role" value="started"/>
<nvpair id="HA_metaattr_ordered" name="ordered" value="true"/>
<nvpair id="HA_metaattr_collocated" name="collocated" value="true"/>
<nvpair id="HA_metaattr_resource_failure_stickiness" name="resource_failure_stickiness" value="-INFINITY"/>
<nvpair name="resource_stickiness" id="HA_metaattr_resource_stickiness" value="90000"/>
<nvpair id="HA_metaattr_is_managed" name="is_managed" value="true"/>
</attributes>
</meta_attributes>
<primitive id="FCRstart" class="ocf" type="rsmres" provider="zzz">
<operations>
<op id="c4fbe1d9-6784-4c26-ae02-f9e054745579" name="monitor" interval="15" timeout="15" start_delay="6"/>
</operations>
</primitive>
<primitive id="IP" class="ocf" type="IPaddr2" provider="heartbeat">
<instance_attributes id="IP_instance_attrs">
<attributes>
<nvpair id="c7723fc5-1cae-4867-ae2f-77d1bc8191c7" name="ip" value="10.216.213.10"/>
<nvpair id="2b0ad025-160b-446c-ae26-685c5c4590db" name="nic" value="eth0"/>
</attributes>
</instance_attributes>
<operations>
<op id="84255438-93b1-4478-aaab-580b46dc299b" name="monitor" description="ip_monitor" interval="15" timeout="20" start_delay="5"/>
</operations>
</primitive>
<primitive id="MySQL" class="ocf" type="mysql" provider=zzz">
<instance_attributes id="MySQL_instance_attrs">
<attributes>
<nvpair id="c276eaf6-fc2a-4bc4-84b4-2580d049cef3" name="binary" value="/usr/bin/safe_mysqld"/>
<nvpair id="dde02449-e5c6-4fa4-8412-48e9eea61ce4" name="config" value="/etc/my.cnf"/>
</attributes>
</instance_attributes>
<operations>
<op id="bf3dbbb9-8019-44e4-8095-e6f67d1a53f8" name="monitor" description="mysql_monitor" interval="15" timeout="60" start_delay="10"/>
</operations>
</primitive>
<primitive class="ocf" type="jbossnew" provider="zzz" id="JBoss">
<operations>
<op id="f888448b-02c2-4026-b03e-07e4e11c91b1" name="status" description="jboss_status" interval="30" timeout="60" start_delay="110" disabled="false" role="Started" on_fail="restart"/>
</operations>
<meta_attributes id="JBoss_meta_attrs">
<attributes/>
</meta_attributes>
</primitive>
<primitive id="PAMSync" class="ocf" type="rsmsyncpam" provider="zzz">
<operations>
<op id="b4126a77-55e2-4254-9c21-1e3e00252be8" name="monitor" description="pam_monitor" interval="15" timeout="15" start_delay="10" disabled="false" role="Started"/>
</operations>
<meta_attributes id="PAMSync_meta_attrs">
<attributes/>
</meta_attributes>
</primitive>
<primitive id="RSMResil" class="ocf" type="rsmresend" provider="zzz"/>
</group>
</resources>
<constraints/>
</configuration>
</cib>

ha.cf:
====

logfacility local0
keepalive 500ms
deadtime 3
initdead 60
bcast eth1
auto_failback off
debug 2
use_logd yes
crm on

Heartbeat version: 2.1.4

My problem is that whenever there's a CPU's load average of 6.0 or more on ACTIVE node, then the node gets rebooted (tried with both crm ON and crm RESPAWN). We try to give load to the system by hitting heavy queries on MySQL. We also use Jboss as our application server. There is no STONITH enabled in cib.xml on either of the nodes (also commented in ha.cf). There aren't any logging statement in "/var/log/messages" file that indicates any failure or stopping of resources (start/stop/monitor). Also there aren't any log statement indicating some error. The only pattern what I get is that the reboot occurs only when there is high load to the system and at each time the "free -m" command shows me with 54mb free memory available (donot know whether this is important or not).
Please let me know if some more inputs is required.