Well, initially I thought writing the OCF resource agent for Tivoli Storage Manager was the hard part. But as it turns out, it really ain’t. The hard part, is getting the resources into the heartbeat agent (or whatever you wanna call it). The worst part about it, is that the hb_gui is completely worthless if you want to do a configuration without quorum.
First of all, we need to setup the main Linux-HA configuration file ( /etc/ha.d/ha.cf). Configuring that, is rather simple. For me, as I do have two network devices, over which both nodes see each other (one is an adapter bond of comprising of two simple, plain, old 1G copper ports; the other is the 1G fibre cluster port), the configuration looks like this:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| udpport 694
autojoin none
crm true
use_logd on
debug false
coredumps false
auto_failback on
ucast bond0 10.0.0.10
ucast bond0 10.0.0.20
ucast eth2 10.0.0.29
ucast eth2 10.0.0.30
node tsm1
node tsm2
respawn root /usr/lib64/heartbeat/pingd -m 100 -d 5s
ping 10.0.0.1
respawn root /sbin/evmsd
apiauth evms uid=hacluster,root
|
After configuring the service itself is done, one just needs to start the heartbeat daemon on both nodes. Afterwards, we should be able to configure the cluster resources.
I find it particularly easier to just update the corresponding sections with cibadmin (the man-page really has some good examples). So here are my configuration files for two resource groups ( crm_mon doesn’t difference between resources and grouped resources, it’ll just show you that you configured two resources).
crm_config.xml:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| <cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-symmetric-cluster" name="symmetric-cluster" value="true"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="stop"/>
<nvpair id="cib-bootstrap-options-default-resource-stickiness" name="default-resource-stickiness" value="0"/>
<nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="0"/>
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false"/>
<nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="reboot"/>
<nvpair id="cib-bootstrap-options-startup-fencing" name="startup-fencing" value="true"/>
<nvpair id="cib-bootstrap-options-stop-orphan-resources" name="stop-orphan-resources" value="true"/>
<nvpair id="cib-bootstrap-options-stop-orphan-actions" name="stop-orphan-actions" value="true"/>
<nvpair id="cib-bootstrap-options-remove-after-stop" name="remove-after-stop" value="false"/>
<nvpair id="cib-bootstrap-options-short-resource-names" name="short-resource-names" value="true"/>
<nvpair id="cib-bootstrap-options-transition-idle-timeout" name="transition-idle-timeout" value="5min"/>
<nvpair id="cib-bootstrap-options-default-action-timeout" name="default-action-timeout" value="20s"/>
<nvpair id="cib-bootstrap-options-is-managed-default" name="is-managed-default" value="true"/>
<nvpair id="cib-bootstrap-options-cluster-delay" name="cluster-delay" value="60s"/>
<nvpair id="cib-bootstrap-options-pe-error-series-max" name="pe-error-series-max" value="-1"/>
<nvpair id="cib-bootstrap-options-pe-warn-series-max" name="pe-warn-series-max" value="-1"/>
<nvpair id="cib-bootstrap-options-pe-input-series-max" name="pe-input-series-max" value="-1"/>
</attributes>
</cluster_property_set>
|
This section is created by heartbeat on the first startup, so you don’t have to mess with it unless you want to tweak it.
resources.xml:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
| <group id="tsm1_group">
<primitive class="ocf" id="10.0.0.70" provider="heartbeat" type="IPaddr2">
<operations>
<op id="ip_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="ip_tsm1_inst_attr">
<attributes>
<nvpair id="ip_tsm1" name="ip" value="10.0.0.70"/>
<nvpair id="ip_tsm1_bcast" name="broadcast" value="10.0.0.255"/>
<nvpair id="ip_tsm1_device" name="nic" value="eth0"/>
<nvpair id="ip_tsm1_netmask" name="cidr_netmask" value="24"/>
<nvpair id="ip_tsm1_label" name="iflabel" value="tsm1"/>
<nvpair id="ip_tsm1_status" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM1" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_1_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_1_tsm1_inst_attr">
<attributes>
<nvpair id="fs_1_tsm1_dev" name="device" value="/dev/tsm1_server_dir"/>
<nvpair id="fs_1_tsm1_mountpoint" name="directory" value="/tsm/TSM1"/>
<nvpair id="fs_1_tsm1_fstype" name="fstype" value="ext3"/>
<nvpair id="fs_1_tsm1_status" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM1/db" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_2_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_2_tsm1_inst_attr">
<attributes>
<nvpair id="fs_2_tsm1_dev" name="device" value="/dev/tsm1_db"/>
<nvpair id="fs_2_tsm1_mountpoint" name="directory" value="/tsm/TSM1/db"/>
<nvpair id="fs_2_tsm1_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM1/db_mirror" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_3_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_3_tsm1_inst_attr">
<attributes>
<nvpair id="fs_3_tsm1_dev" name="device" value="/dev/tsm1_db_mirror"/>
<nvpair id="fs_3_tsm1_mountpoint" name="directory" value="/tsm/TSM1/db_mirror"/>
<nvpair id="fs_3_tsm1_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM1/log" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_4_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_4_tsm1_inst_attr">
<attributes>
<nvpair id="fs_4_tsm1_dev" name="device" value="/dev/tsm1_log"/>
<nvpair id="fs_4_tsm1_mountpoint" name="directory" value="/tsm/TSM1/log"/>
<nvpair id="fs_4_tsm1_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM1/log_mirror" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_5_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_5_tsm1_inst_attr">
<attributes>
<nvpair id="fs_5_tsm1_dev" name="device" value="/dev/tsm1_log_mirror"/>
<nvpair id="fs_5_tsm1_mountpoint" name="directory" value="/tsm/TSM1/log_mirror"/>
<nvpair id="fs_5_tsm1_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="TSM1: dsmserv" provider="heartbeat" type="dsmserv">
<operations>
<op id="dsmserv_tsm1_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="dsmserv_tsm1_inst_attr">
<attributes>
<nvpair id="dsmserv_tsm1_status" name="target_role" value="started"/>
<nvpair id="dsmserv_tsm1_prefix" name="prefix" value="/tsm"/>
<nvpair id="dsmserv_tsm1_instance" name="instance" value="TSM1"/>
<nvpair id="dsmserv_tsm1_userid" name="id" value="ha_client"/>
<nvpair id="dsmserv_tsm1_password" name="password" value="ha_client"/>
<nvpair id="dsmserv_tsm1_tcpaddress" name="TCPAddress" value="10.0.0.70"/>
<nvpair id="dsmserv_tsm1_tcpport" name="TCPPort" value="1500"/>
</attributes>
</instance_attributes>
</primitive>
</group>
<group id="tsm2_group">
<primitive class="ocf" id="10.0.0.80" provider="heartbeat" type="IPaddr2">
<operations>
<op id="ip_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="ip_tsm2_inst_attr">
<attributes>
<nvpair id="ip_tsm2" name="ip" value="10.0.0.80"/>
<nvpair id="ip_tsm2_bcast" name="broadcast" value="10.0.0.255"/>
<nvpair id="ip_tsm2_device" name="nic" value="eth0"/>
<nvpair id="ip_tsm2_netmask" name="cidr_netmask" value="24"/>
<nvpair id="ip_tsm2_label" name="iflabel" value="tsm2"/>
<nvpair id="ip_tsm2_status" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM2" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_1_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_1_tsm2_inst_attr">
<attributes>
<nvpair id="fs_1_tsm2_dev" name="device" value="/dev/tsm2_server_dir"/>
<nvpair id="fs_1_tsm2_mountpoint" name="directory" value="/tsm/TSM2"/>
<nvpair id="fs_1_tsm2_fstype" name="fstype" value="ext3"/>
<nvpair id="fs_1_tsm2_status" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM2/db" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_2_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_2_tsm2_inst_attr">
<attributes>
<nvpair id="fs_2_tsm2_dev" name="device" value="/dev/tsm2_db"/>
<nvpair id="fs_2_tsm2_mountpoint" name="directory" value="/tsm/TSM2/db"/>
<nvpair id="fs_2_tsm2_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM2/db_mirror" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_3_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_3_tsm2_inst_attr">
<attributes>
<nvpair id="fs_3_tsm2_dev" name="device" value="/dev/tsm2_db_mirror"/>
<nvpair id="fs_3_tsm2_mountpoint" name="directory" value="/tsm/TSM2/db_mirror"/>
<nvpair id="fs_3_tsm2_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM2/log" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_4_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_4_tsm2_inst_attr">
<attributes>
<nvpair id="fs_4_tsm2_dev" name="device" value="/dev/tsm2_log"/>
<nvpair id="fs_4_tsm2_mountpoint" name="directory" value="/tsm/TSM2/log"/>
<nvpair id="fs_4_tsm2_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="/tsm/TSM2/log_mirror" provider="heartbeat" type="Filesystem">
<operations>
<op id="fs_5_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="fs_5_tsm2_inst_attr">
<attributes>
<nvpair id="fs_5_tsm2_dev" name="device" value="/dev/tsm2_log_mirror"/>
<nvpair id="fs_5_tsm2_mountpoint" name="directory" value="/tsm/TSM2/log_mirror"/>
<nvpair id="fs_5_tsm2_fstype" name="fstype" value="ext2"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="TSM2: dsmserv" provider="heartbeat" type="dsmserv">
<operations>
<op id="dsmserv_tsm2_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="dsmserv_tsm2_inst_attr">
<attributes>
<nvpair id="dsmserv_tsm2_status" name="target_role" value="started"/>
<nvpair id="dsmserv_tsm2_prefix" name="prefix" value="/tsm"/>
<nvpair id="dsmserv_tsm2_instance" name="instance" value="TSM2"/>
<nvpair id="dsmserv_tsm2_userid" name="id" value="ha_client"/>
<nvpair id="dsmserv_tsm2_password" name="password" value="ha_client"/>
<nvpair id="dsmserv_tsm2_tcpaddress" name="TCPAddress" value="10.0.0.80"/>
<nvpair id="dsmserv_tsm2_tcpport" name="TCPPort" value="1501"/>
<nvpair id="dsmserv_tsm2_max_retries" name="max_retries" value="2"/>
<nvpair id="dsmserv_tsm2_shutdown_timeout" name="shutdown_timeout" value="10"/>
</attributes>
</instance_attributes>
</primitive>
</group>
|
constraints.xml:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| <rsc_location id="tsm1_group_rsc_location" rsc="tsm1_group">
<rule id="tsm1_group:prefered_location:rule" score="100">
<expression id="tsm1_group:prefered_location:rule:expr" attribute="#uname" operation="eq" value="tsm1"/>
</rule>
<rule id="tsm1_group:connected:rule" score="-INFINITY" boolean_op="or">
<expression id="tsm1_group:connected:expr:not_defined" attribute="pingd" operation="not_defined" />
<expression id="tsm1_group:connected:expr:zero" attribute="pingd" operation="lte" value="0" />
</rule>
</rsc_location>
<rsc_location id="tsm2_group_rsc_location" rsc="tsm2_group">
<rule id="tsm2_group:prefered_location:rule" score="100">
<expression id="tsm2_group:prefered_location:rule:expr" attribute="#uname" operation="eq" value="tsm2"/>
</rule>
<rule id="tsm2_group:connected:rule" score="-INFINITY" boolean_op="or">
<expression id="tsm2_group:connected:expr:not_defined" attribute="pingd" operation="not_defined" />
<expression id="tsm2_group:connected:expr:zero" attribute="pingd" operation="lte" value="0" />
</rule>
</rsc_location>
|
The nice thing about resource groups with Linux-HA is, that they are started in order as they are listed in the XML-file, and stopped in reverse as listed in the XML-file.