View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.TreeSet;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Abortable;
39  import org.apache.hadoop.hbase.ClusterStatus;
40  import org.apache.hadoop.hbase.HBaseConfiguration;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.MiniHBaseCluster;
48  import org.apache.hadoop.hbase.RegionTransition;
49  import org.apache.hadoop.hbase.ServerName;
50  import org.apache.hadoop.hbase.TableName;
51  import org.apache.hadoop.hbase.catalog.CatalogTracker;
52  import org.apache.hadoop.hbase.catalog.MetaEditor;
53  import org.apache.hadoop.hbase.client.HTable;
54  import org.apache.hadoop.hbase.executor.EventType;
55  import org.apache.hadoop.hbase.master.RegionState.State;
56  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
57  import org.apache.hadoop.hbase.protobuf.RequestConverter;
58  import org.apache.hadoop.hbase.regionserver.HRegion;
59  import org.apache.hadoop.hbase.regionserver.HRegionServer;
60  import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
61  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
62  import org.apache.hadoop.hbase.util.Bytes;
63  import org.apache.hadoop.hbase.util.FSTableDescriptors;
64  import org.apache.hadoop.hbase.util.FSUtils;
65  import org.apache.hadoop.hbase.util.JVMClusterUtil;
66  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
67  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
68  import org.apache.hadoop.hbase.util.Threads;
69  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
70  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
71  import org.apache.hadoop.hbase.zookeeper.ZKTable;
72  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
73  import org.apache.zookeeper.data.Stat;
74  import org.junit.Test;
75  import org.junit.experimental.categories.Category;
76  
77  @Category(LargeTests.class)
78  public class TestMasterFailover {
79    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
80  
81    /**
82     * Complex test of master failover that tests as many permutations of the
83     * different possible states that regions in transition could be in within ZK.
84     * <p>
85     * This tests the proper handling of these states by the failed-over master
86     * and includes a thorough testing of the timeout code as well.
87     * <p>
88     * Starts with a single master and three regionservers.
89     * <p>
90     * Creates two tables, enabledTable and disabledTable, each containing 5
91     * regions.  The disabledTable is then disabled.
92     * <p>
93     * After reaching steady-state, the master is killed.  We then mock several
94     * states in ZK.
95     * <p>
96     * After mocking them, we will startup a new master which should become the
97     * active master and also detect that it is a failover.  The primary test
98     * passing condition will be that all regions of the enabled table are
99     * assigned and all the regions of the disabled table are not assigned.
100    * <p>
101    * The different scenarios to be tested are below:
102    * <p>
103    * <b>ZK State:  OFFLINE</b>
104    * <p>A node can get into OFFLINE state if</p>
105    * <ul>
106    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
107    * <li>The Master is assigning the region to a RS before it sends RPC
108    * </ul>
109    * <p>We will mock the scenarios</p>
110    * <ul>
111    * <li>Master has assigned an enabled region but RS failed so a region is
112    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
113    * <li>This seems to cover both cases?</li>
114    * </ul>
115    * <p>
116    * <b>ZK State:  CLOSING</b>
117    * <p>A node can get into CLOSING state if</p>
118    * <ul>
119    * <li>An RS has begun to close a region
120    * </ul>
121    * <p>We will mock the scenarios</p>
122    * <ul>
123    * <li>Region of enabled table was being closed but did not complete
124    * <li>Region of disabled table was being closed but did not complete
125    * </ul>
126    * <p>
127    * <b>ZK State:  CLOSED</b>
128    * <p>A node can get into CLOSED state if</p>
129    * <ul>
130    * <li>An RS has completed closing a region but not acknowledged by master yet
131    * </ul>
132    * <p>We will mock the scenarios</p>
133    * <ul>
134    * <li>Region of a table that should be enabled was closed on an RS
135    * <li>Region of a table that should be disabled was closed on an RS
136    * </ul>
137    * <p>
138    * <b>ZK State:  OPENING</b>
139    * <p>A node can get into OPENING state if</p>
140    * <ul>
141    * <li>An RS has begun to open a region
142    * </ul>
143    * <p>We will mock the scenarios</p>
144    * <ul>
145    * <li>RS was opening a region of enabled table but never finishes
146    * </ul>
147    * <p>
148    * <b>ZK State:  OPENED</b>
149    * <p>A node can get into OPENED state if</p>
150    * <ul>
151    * <li>An RS has finished opening a region but not acknowledged by master yet
152    * </ul>
153    * <p>We will mock the scenarios</p>
154    * <ul>
155    * <li>Region of a table that should be enabled was opened on an RS
156    * <li>Region of a table that should be disabled was opened on an RS
157    * </ul>
158    * @throws Exception
159    */
160   @Test (timeout=240000)
161   public void testMasterFailoverWithMockedRIT() throws Exception {
162 
163     final int NUM_MASTERS = 1;
164     final int NUM_RS = 3;
165 
166     // Create config to use for this cluster
167     Configuration conf = HBaseConfiguration.create();
168     conf.setBoolean("hbase.assignment.usezk", true);
169 
170     // Start the cluster
171     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
172     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
173     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
174     log("Cluster started");
175 
176     // Create a ZKW to use in the test
177     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
178 
179     // get all the master threads
180     List<MasterThread> masterThreads = cluster.getMasterThreads();
181     assertEquals(1, masterThreads.size());
182 
183     // only one master thread, let's wait for it to be initialized
184     assertTrue(cluster.waitForActiveAndReadyMaster());
185     HMaster master = masterThreads.get(0).getMaster();
186     assertTrue(master.isActiveMaster());
187     assertTrue(master.isInitialized());
188 
189     // disable load balancing on this master
190     master.balanceSwitch(false);
191 
192     // create two tables in META, each with 10 regions
193     byte [] FAMILY = Bytes.toBytes("family");
194     byte [][] SPLIT_KEYS = new byte [][] {
195         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
196         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
197         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
198         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
199     };
200 
201     byte [] enabledTable = Bytes.toBytes("enabledTable");
202     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
203     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
204 
205     FileSystem filesystem = FileSystem.get(conf);
206     Path rootdir = FSUtils.getRootDir(conf);
207     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
208     // Write the .tableinfo
209     fstd.createTableDescriptor(htdEnabled);
210 
211     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
212     createRegion(hriEnabled, rootdir, conf, htdEnabled);
213 
214     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
215         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
216 
217     TableName disabledTable = TableName.valueOf("disabledTable");
218     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
219     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
220     // Write the .tableinfo
221     fstd.createTableDescriptor(htdDisabled);
222     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
223     createRegion(hriDisabled, rootdir, conf, htdDisabled);
224     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
225         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
226 
227     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
228     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
229 
230     log("Regions in hbase:meta and namespace have been created");
231 
232     // at this point we only expect 4 regions to be assigned out
233     // (catalogs and namespace, + 2 merging regions)
234     assertEquals(4, cluster.countServedRegions());
235 
236     // Move merging regions to the same region server
237     AssignmentManager am = master.getAssignmentManager();
238     RegionStates regionStates = am.getRegionStates();
239     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
240     assertEquals(2, mergingRegions.size());
241     HRegionInfo a = mergingRegions.get(0);
242     HRegionInfo b = mergingRegions.get(1);
243     HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
244     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
245     ServerName serverB = regionStates.getRegionServerOfRegion(b);
246     if (!serverB.equals(mergingServer)) {
247       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
248       am.balance(plan);
249       assertTrue(am.waitForAssignment(b));
250     }
251 
252     // Let's just assign everything to first RS
253     HRegionServer hrs = cluster.getRegionServer(0);
254     ServerName serverName = hrs.getServerName();
255     HRegionInfo closingRegion = enabledRegions.remove(0);
256     // we'll need some regions to already be assigned out properly on live RS
257     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
258     enabledAndAssignedRegions.add(enabledRegions.remove(0));
259     enabledAndAssignedRegions.add(enabledRegions.remove(0));
260     enabledAndAssignedRegions.add(closingRegion);
261 
262     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
263     disabledAndAssignedRegions.add(disabledRegions.remove(0));
264     disabledAndAssignedRegions.add(disabledRegions.remove(0));
265 
266     // now actually assign them
267     for (HRegionInfo hri : enabledAndAssignedRegions) {
268       master.assignmentManager.addPlan(hri.getEncodedName(),
269           new RegionPlan(hri, null, serverName));
270       master.assignRegion(hri);
271     }
272 
273     for (HRegionInfo hri : disabledAndAssignedRegions) {
274       master.assignmentManager.addPlan(hri.getEncodedName(),
275           new RegionPlan(hri, null, serverName));
276       master.assignRegion(hri);
277     }
278 
279     // wait for no more RIT
280     log("Waiting for assignment to finish");
281     ZKAssign.blockUntilNoRIT(zkw);
282     log("Assignment completed");
283 
284     // Stop the master
285     log("Aborting master");
286     cluster.abortMaster(0);
287     cluster.waitOnMaster(0);
288     log("Master has aborted");
289 
290     /*
291      * Now, let's start mocking up some weird states as described in the method
292      * javadoc.
293      */
294 
295     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
296     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
297 
298     log("Beginning to mock scenarios");
299 
300     // Disable the disabledTable in ZK
301     ZKTable zktable = new ZKTable(zkw);
302     zktable.setDisabledTable(disabledTable);
303 
304     /*
305      *  ZK = OFFLINE
306      */
307 
308     // Region that should be assigned but is not and is in ZK as OFFLINE
309     // Cause: This can happen if the master crashed after creating the znode but before sending the
310     //  request to the region server
311     HRegionInfo region = enabledRegions.remove(0);
312     regionsThatShouldBeOnline.add(region);
313     ZKAssign.createNodeOffline(zkw, region, serverName);
314 
315     /*
316      * ZK = CLOSING
317      */
318     // Cause: Same as offline.
319     regionsThatShouldBeOnline.add(closingRegion);
320     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
321 
322     /*
323      * ZK = CLOSED
324      */
325 
326     // Region of enabled table closed but not ack
327     //Cause: Master was down while the region server updated the ZK status.
328     region = enabledRegions.remove(0);
329     regionsThatShouldBeOnline.add(region);
330     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
331     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
332 
333     // Region of disabled table closed but not ack
334     region = disabledRegions.remove(0);
335     regionsThatShouldBeOffline.add(region);
336     version = ZKAssign.createNodeClosing(zkw, region, serverName);
337     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
338 
339     /*
340      * ZK = OPENED
341      */
342 
343     // Region of enabled table was opened on RS
344     // Cause: as offline
345     region = enabledRegions.remove(0);
346     regionsThatShouldBeOnline.add(region);
347     ZKAssign.createNodeOffline(zkw, region, serverName);
348     ProtobufUtil.openRegion(hrs, hrs.getServerName(), region);
349     while (true) {
350       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
351       RegionTransition rt = RegionTransition.parseFrom(bytes);
352       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
353         break;
354       }
355       Thread.sleep(100);
356     }
357 
358     // Region of disable table was opened on RS
359     // Cause: Master failed while updating the status for this region server.
360     region = disabledRegions.remove(0);
361     regionsThatShouldBeOffline.add(region);
362     ZKAssign.createNodeOffline(zkw, region, serverName);
363     ProtobufUtil.openRegion(hrs, hrs.getServerName(), region);
364     while (true) {
365       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
366       RegionTransition rt = RegionTransition.parseFrom(bytes);
367       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
368         break;
369       }
370       Thread.sleep(100);
371     }
372 
373     /*
374      * ZK = MERGING
375      */
376 
377     // Regions of table of merging regions
378     // Cause: Master was down while merging was going on
379     RegionMergeTransaction.createNodeMerging(
380       zkw, newRegion, mergingServer, a, b);
381 
382     /*
383      * ZK = NONE
384      */
385 
386     /*
387      * DONE MOCKING
388      */
389 
390     log("Done mocking data up in ZK");
391 
392     // Start up a new master
393     log("Starting up a new master");
394     master = cluster.startMaster().getMaster();
395     log("Waiting for master to be ready");
396     cluster.waitForActiveAndReadyMaster();
397     log("Master is ready");
398 
399     // Get new region states since master restarted
400     regionStates = master.getAssignmentManager().getRegionStates();
401     // Merging region should remain merging
402     assertTrue(regionStates.isRegionInState(a, State.MERGING));
403     assertTrue(regionStates.isRegionInState(b, State.MERGING));
404     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
405     // Now remove the faked merging znode, merging regions should be
406     // offlined automatically, otherwise it is a bug in AM.
407     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
408 
409     // Failover should be completed, now wait for no RIT
410     log("Waiting for no more RIT");
411     ZKAssign.blockUntilNoRIT(zkw);
412     log("No more RIT in ZK, now doing final test verification");
413 
414     // Grab all the regions that are online across RSs
415     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
416     for (JVMClusterUtil.RegionServerThread rst :
417       cluster.getRegionServerThreads()) {
418       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rst.getRegionServer()));
419     }
420 
421     // Now, everything that should be online should be online
422     for (HRegionInfo hri : regionsThatShouldBeOnline) {
423       assertTrue(onlineRegions.contains(hri));
424     }
425 
426     // Everything that should be offline should not be online
427     for (HRegionInfo hri : regionsThatShouldBeOffline) {
428       if (onlineRegions.contains(hri)) {
429        LOG.debug(hri);
430       }
431       assertFalse(onlineRegions.contains(hri));
432     }
433 
434     log("Done with verification, all passed, shutting down cluster");
435 
436     // Done, shutdown the cluster
437     TEST_UTIL.shutdownMiniCluster();
438   }
439 
440   /**
441    * Complex test of master failover that tests as many permutations of the
442    * different possible states that regions in transition could be in within ZK
443    * pointing to an RS that has died while no master is around to process it.
444    * <p>
445    * This tests the proper handling of these states by the failed-over master
446    * and includes a thorough testing of the timeout code as well.
447    * <p>
448    * Starts with a single master and two regionservers.
449    * <p>
450    * Creates two tables, enabledTable and disabledTable, each containing 5
451    * regions.  The disabledTable is then disabled.
452    * <p>
453    * After reaching steady-state, the master is killed.  We then mock several
454    * states in ZK.  And one of the RS will be killed.
455    * <p>
456    * After mocking them and killing an RS, we will startup a new master which
457    * should become the active master and also detect that it is a failover.  The
458    * primary test passing condition will be that all regions of the enabled
459    * table are assigned and all the regions of the disabled table are not
460    * assigned.
461    * <p>
462    * The different scenarios to be tested are below:
463    * <p>
464    * <b>ZK State:  CLOSING</b>
465    * <p>A node can get into CLOSING state if</p>
466    * <ul>
467    * <li>An RS has begun to close a region
468    * </ul>
469    * <p>We will mock the scenarios</p>
470    * <ul>
471    * <li>Region was being closed but the RS died before finishing the close
472    * </ul>
473    * <b>ZK State:  OPENED</b>
474    * <p>A node can get into OPENED state if</p>
475    * <ul>
476    * <li>An RS has finished opening a region but not acknowledged by master yet
477    * </ul>
478    * <p>We will mock the scenarios</p>
479    * <ul>
480    * <li>Region of a table that should be enabled was opened by a now-dead RS
481    * <li>Region of a table that should be disabled was opened by a now-dead RS
482    * </ul>
483    * <p>
484    * <b>ZK State:  NONE</b>
485    * <p>A region could not have a transition node if</p>
486    * <ul>
487    * <li>The server hosting the region died and no master processed it
488    * </ul>
489    * <p>We will mock the scenarios</p>
490    * <ul>
491    * <li>Region of enabled table was on a dead RS that was not yet processed
492    * <li>Region of disabled table was on a dead RS that was not yet processed
493    * </ul>
494    * @throws Exception
495    */
496   @Test (timeout=180000)
497   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
498 
499     final int NUM_MASTERS = 1;
500     final int NUM_RS = 2;
501 
502     // Create and start the cluster
503     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
504     Configuration conf = TEST_UTIL.getConfiguration();
505     conf.setBoolean("hbase.assignment.usezk", true);
506 
507     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
508     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
509     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
510     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
511     log("Cluster started");
512 
513     // Create a ZKW to use in the test
514     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
515         "unittest", new Abortable() {
516 
517           @Override
518           public void abort(String why, Throwable e) {
519             LOG.error("Fatal ZK Error: " + why, e);
520             org.junit.Assert.assertFalse("Fatal ZK error", true);
521           }
522 
523           @Override
524           public boolean isAborted() {
525             return false;
526           }
527 
528     });
529 
530     // get all the master threads
531     List<MasterThread> masterThreads = cluster.getMasterThreads();
532     assertEquals(1, masterThreads.size());
533 
534     // only one master thread, let's wait for it to be initialized
535     assertTrue(cluster.waitForActiveAndReadyMaster());
536     HMaster master = masterThreads.get(0).getMaster();
537     assertTrue(master.isActiveMaster());
538     assertTrue(master.isInitialized());
539 
540     // disable load balancing on this master
541     master.balanceSwitch(false);
542 
543     // create two tables in META, each with 30 regions
544     byte [] FAMILY = Bytes.toBytes("family");
545     byte[][] SPLIT_KEYS =
546         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
547 
548     byte [] enabledTable = Bytes.toBytes("enabledTable");
549     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
550     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
551     FileSystem filesystem = FileSystem.get(conf);
552     Path rootdir = FSUtils.getRootDir(conf);
553     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
554     // Write the .tableinfo
555     fstd.createTableDescriptor(htdEnabled);
556     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
557         null, null);
558     createRegion(hriEnabled, rootdir, conf, htdEnabled);
559 
560     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
561         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
562 
563     TableName disabledTable =
564         TableName.valueOf("disabledTable");
565     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
566     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
567     // Write the .tableinfo
568     fstd.createTableDescriptor(htdDisabled);
569     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
570     createRegion(hriDisabled, rootdir, conf, htdDisabled);
571 
572     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
573         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
574 
575     log("Regions in hbase:meta and Namespace have been created");
576 
577     // at this point we only expect 2 regions to be assigned out (catalogs and namespace  )
578     assertEquals(2, cluster.countServedRegions());
579 
580     // The first RS will stay online
581     List<RegionServerThread> regionservers =
582       cluster.getRegionServerThreads();
583     HRegionServer hrs = regionservers.get(0).getRegionServer();
584 
585     // The second RS is going to be hard-killed
586     RegionServerThread hrsDeadThread = regionservers.get(1);
587     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
588     ServerName deadServerName = hrsDead.getServerName();
589 
590     // we'll need some regions to already be assigned out properly on live RS
591     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
592     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
593     enabledRegions.removeAll(enabledAndAssignedRegions);
594     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
595     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
596     disabledRegions.removeAll(disabledAndAssignedRegions);
597 
598     // now actually assign them
599     for (HRegionInfo hri : enabledAndAssignedRegions) {
600       master.assignmentManager.addPlan(hri.getEncodedName(),
601           new RegionPlan(hri, null, hrs.getServerName()));
602       master.assignRegion(hri);
603     }
604     for (HRegionInfo hri : disabledAndAssignedRegions) {
605       master.assignmentManager.addPlan(hri.getEncodedName(),
606           new RegionPlan(hri, null, hrs.getServerName()));
607       master.assignRegion(hri);
608     }
609 
610     log("Waiting for assignment to finish");
611     ZKAssign.blockUntilNoRIT(zkw);
612     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
613     log("Assignment completed");
614 
615     assertTrue(" Table must be enabled.", master.getAssignmentManager()
616         .getZKTable().isEnabledTable(TableName.valueOf("enabledTable")));
617     // we also need regions assigned out on the dead server
618     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
619     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
620     enabledRegions.removeAll(enabledAndOnDeadRegions);
621     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
622     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
623     disabledRegions.removeAll(disabledAndOnDeadRegions);
624 
625     // set region plan to server to be killed and trigger assign
626     for (HRegionInfo hri : enabledAndOnDeadRegions) {
627       master.assignmentManager.addPlan(hri.getEncodedName(),
628           new RegionPlan(hri, null, deadServerName));
629       master.assignRegion(hri);
630     }
631     for (HRegionInfo hri : disabledAndOnDeadRegions) {
632       master.assignmentManager.addPlan(hri.getEncodedName(),
633           new RegionPlan(hri, null, deadServerName));
634       master.assignRegion(hri);
635     }
636 
637     // wait for no more RIT
638     log("Waiting for assignment to finish");
639     ZKAssign.blockUntilNoRIT(zkw);
640     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
641     log("Assignment completed");
642 
643     // Due to master.assignRegion(hri) could fail to assign a region to a specified RS
644     // therefore, we need make sure that regions are in the expected RS
645     verifyRegionLocation(hrs, enabledAndAssignedRegions);
646     verifyRegionLocation(hrs, disabledAndAssignedRegions);
647     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
648     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
649 
650     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
651       enabledAndAssignedRegions.size() >= 2);
652     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
653       disabledAndAssignedRegions.size() >= 2);
654     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
655       enabledAndOnDeadRegions.size() >= 2);
656     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
657       disabledAndOnDeadRegions.size() >= 2);
658 
659     // Stop the master
660     log("Aborting master");
661     cluster.abortMaster(0);
662     cluster.waitOnMaster(0);
663     log("Master has aborted");
664 
665     /*
666      * Now, let's start mocking up some weird states as described in the method
667      * javadoc.
668      */
669 
670     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
671     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
672 
673     log("Beginning to mock scenarios");
674 
675     // Disable the disabledTable in ZK
676     ZKTable zktable = new ZKTable(zkw);
677     zktable.setDisabledTable(disabledTable);
678 
679     assertTrue(" The enabled table should be identified on master fail over.",
680         zktable.isEnabledTable(TableName.valueOf("enabledTable")));
681 
682     /*
683      * ZK = CLOSING
684      */
685 
686     // Region of enabled table being closed on dead RS but not finished
687     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
688     regionsThatShouldBeOnline.add(region);
689     ZKAssign.createNodeClosing(zkw, region, deadServerName);
690     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
691         region + "\n\n");
692 
693     // Region of disabled table being closed on dead RS but not finished
694     region = disabledAndOnDeadRegions.remove(0);
695     regionsThatShouldBeOffline.add(region);
696     ZKAssign.createNodeClosing(zkw, region, deadServerName);
697     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
698         region + "\n\n");
699 
700     /*
701      * ZK = CLOSED
702      */
703 
704     // Region of enabled on dead server gets closed but not ack'd by master
705     region = enabledAndOnDeadRegions.remove(0);
706     regionsThatShouldBeOnline.add(region);
707     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
708     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
709     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
710         region + "\n\n");
711 
712     // Region of disabled on dead server gets closed but not ack'd by master
713     region = disabledAndOnDeadRegions.remove(0);
714     regionsThatShouldBeOffline.add(region);
715     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
716     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
717     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
718         region + "\n\n");
719 
720     /*
721      * ZK = OPENING
722      */
723 
724     // RS was opening a region of enabled table then died
725     region = enabledRegions.remove(0);
726     regionsThatShouldBeOnline.add(region);
727     ZKAssign.createNodeOffline(zkw, region, deadServerName);
728     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
729     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
730         region + "\n\n");
731 
732     // RS was opening a region of disabled table then died
733     region = disabledRegions.remove(0);
734     regionsThatShouldBeOffline.add(region);
735     ZKAssign.createNodeOffline(zkw, region, deadServerName);
736     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
737     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
738         region + "\n\n");
739 
740     /*
741      * ZK = OPENED
742      */
743 
744     // Region of enabled table was opened on dead RS
745     region = enabledRegions.remove(0);
746     regionsThatShouldBeOnline.add(region);
747     ZKAssign.createNodeOffline(zkw, region, deadServerName);
748     ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
749     while (true) {
750       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
751       RegionTransition rt = RegionTransition.parseFrom(bytes);
752       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
753         break;
754       }
755       Thread.sleep(100);
756     }
757     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
758         region + "\n\n");
759 
760     // Region of disabled table was opened on dead RS
761     region = disabledRegions.remove(0);
762     regionsThatShouldBeOffline.add(region);
763     ZKAssign.createNodeOffline(zkw, region, deadServerName);
764     ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
765     while (true) {
766       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
767       RegionTransition rt = RegionTransition.parseFrom(bytes);
768       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
769         break;
770       }
771       Thread.sleep(100);
772     }
773     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
774         region + "\n\n");
775 
776     /*
777      * ZK = NONE
778      */
779 
780     // Region of enabled table was open at steady-state on dead RS
781     region = enabledRegions.remove(0);
782     regionsThatShouldBeOnline.add(region);
783     ZKAssign.createNodeOffline(zkw, region, deadServerName);
784     ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
785     while (true) {
786       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
787       RegionTransition rt = RegionTransition.parseFrom(bytes);
788       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
789         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
790         LOG.debug("DELETED " + rt);
791         break;
792       }
793       Thread.sleep(100);
794     }
795     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
796         + "\n" + region + "\n\n");
797 
798     // Region of disabled table was open at steady-state on dead RS
799     region = disabledRegions.remove(0);
800     regionsThatShouldBeOffline.add(region);
801     ZKAssign.createNodeOffline(zkw, region, deadServerName);
802     ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region);
803     while (true) {
804       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
805       RegionTransition rt = RegionTransition.parseFrom(bytes);
806       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
807         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
808         break;
809       }
810       Thread.sleep(100);
811     }
812     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
813       + "\n" + region + "\n\n");
814 
815     /*
816      * DONE MOCKING
817      */
818 
819     log("Done mocking data up in ZK");
820 
821     // Kill the RS that had a hard death
822     log("Killing RS " + deadServerName);
823     hrsDead.abort("Killing for unit test");
824     log("RS " + deadServerName + " killed");
825 
826     // Start up a new master.  Wait until regionserver is completely down
827     // before starting new master because of hbase-4511.
828     while (hrsDeadThread.isAlive()) {
829       Threads.sleep(10);
830     }
831     log("Starting up a new master");
832     master = cluster.startMaster().getMaster();
833     log("Waiting for master to be ready");
834     assertTrue(cluster.waitForActiveAndReadyMaster());
835     log("Master is ready");
836 
837     // Wait until SSH processing completed for dead server.
838     while (master.getServerManager().areDeadServersInProgress()) {
839       Thread.sleep(10);
840     }
841 
842     // Failover should be completed, now wait for no RIT
843     log("Waiting for no more RIT");
844     ZKAssign.blockUntilNoRIT(zkw);
845     log("No more RIT in ZK");
846     long now = System.currentTimeMillis();
847     long maxTime = 120000;
848     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
849     if (!done) {
850       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
851       LOG.info("rit=" + regionStates.getRegionsInTransition());
852     }
853     long elapsed = System.currentTimeMillis() - now;
854     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
855       elapsed < maxTime);
856     log("No more RIT in RIT map, doing final test verification");
857 
858     // Grab all the regions that are online across RSs
859     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
860     now = System.currentTimeMillis();
861     maxTime = 30000;
862     for (JVMClusterUtil.RegionServerThread rst :
863         cluster.getRegionServerThreads()) {
864       try {
865         HRegionServer rs = rst.getRegionServer();
866         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
867           elapsed = System.currentTimeMillis() - now;
868           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
869           if (rs.isAborted() || rs.isStopped()) {
870             // This region server is stopped, skip it.
871             break;
872           }
873           Thread.sleep(100);
874         }
875         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs));
876       } catch (RegionServerStoppedException e) {
877         LOG.info("Got RegionServerStoppedException", e);
878       }
879     }
880 
881     // Now, everything that should be online should be online
882     for (HRegionInfo hri : regionsThatShouldBeOnline) {
883       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
884         onlineRegions.contains(hri));
885     }
886 
887     // Everything that should be offline should not be online
888     for (HRegionInfo hri : regionsThatShouldBeOffline) {
889       assertFalse(onlineRegions.contains(hri));
890     }
891 
892     log("Done with verification, all passed, shutting down cluster");
893 
894     // Done, shutdown the cluster
895     TEST_UTIL.shutdownMiniCluster();
896   }
897 
898   /**
899    * Verify regions are on the expected region server
900    */
901   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
902       throws IOException {
903     List<HRegionInfo> tmpOnlineRegions = ProtobufUtil.getOnlineRegions(hrs);
904     Iterator<HRegionInfo> itr = regions.iterator();
905     while (itr.hasNext()) {
906       HRegionInfo tmp = itr.next();
907       if (!tmpOnlineRegions.contains(tmp)) {
908         itr.remove();
909       }
910     }
911   }
912 
913   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
914       final HTableDescriptor htd)
915   throws IOException {
916     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
917     // The above call to create a region will create an hlog file.  Each
918     // log file create will also create a running thread to do syncing.  We need
919     // to close out this log else we will have a running thread trying to sync
920     // the file system continuously which is ugly when dfs is taken away at the
921     // end of the test.
922     HRegion.closeHRegion(r);
923     return r;
924   }
925 
926   // TODO: Next test to add is with testing permutations of the RIT or the RS
927   //       killed are hosting ROOT and hbase:meta regions.
928 
929   private void log(String string) {
930     LOG.info("\n\n" + string + " \n\n");
931   }
932 
933   @Test (timeout=180000)
934   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
935       throws Exception {
936     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
937     final int NUM_MASTERS = 1;
938     final int NUM_RS = 2;
939 
940     // Start the cluster
941     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
942     Configuration conf = TEST_UTIL.getConfiguration();
943     conf.setInt("hbase.master.info.port", -1);
944     conf.setBoolean("hbase.assignment.usezk", true);
945 
946     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
947     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
948 
949     // Find regionserver carrying meta.
950     List<RegionServerThread> regionServerThreads =
951       cluster.getRegionServerThreads();
952     int count = -1;
953     HRegion metaRegion = null;
954     for (RegionServerThread regionServerThread : regionServerThreads) {
955       HRegionServer regionServer = regionServerThread.getRegionServer();
956       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
957       count++;
958       regionServer.abort("");
959       if (null != metaRegion) break;
960     }
961     HRegionServer regionServer = cluster.getRegionServer(count);
962 
963     TEST_UTIL.shutdownMiniHBaseCluster();
964 
965     // Create a ZKW to use in the test
966     ZooKeeperWatcher zkw =
967       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
968           metaRegion, regionServer.getServerName());
969 
970     LOG.info("Staring cluster for second time");
971     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
972 
973     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
974     while (!master.isInitialized()) {
975       Thread.sleep(100);
976     }
977     // Failover should be completed, now wait for no RIT
978     log("Waiting for no more RIT");
979     ZKAssign.blockUntilNoRIT(zkw);
980 
981     zkw.close();
982     // Stop the cluster
983     TEST_UTIL.shutdownMiniCluster();
984   }
985 
986   /**
987    * This tests a RIT in offline state will get re-assigned after a master restart
988    */
989   @Test(timeout=240000)
990   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
991     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
992     final int NUM_MASTERS = 1;
993     final int NUM_RS = 2;
994 
995     // Create config to use for this cluster
996     Configuration conf = HBaseConfiguration.create();
997     conf.setBoolean("hbase.assignment.usezk", true);
998 
999     // Start the cluster
1000     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1001     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1002     log("Cluster started");
1003 
1004     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1005     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1006     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1007     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1008     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1009     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1010 
1011     ServerName dstName = null;
1012     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1013       if (!tmpServer.equals(serverName)) {
1014         dstName = tmpServer;
1015         break;
1016       }
1017     }
1018     // find a different server
1019     assertTrue(dstName != null);
1020     // shutdown HBase cluster
1021     TEST_UTIL.shutdownMiniHBaseCluster();
1022     // create a RIT node in offline state
1023     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1024     ZKAssign.createNodeOffline(zkw, hri, dstName);
1025     Stat stat = new Stat();
1026     byte[] data =
1027         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1028     assertTrue(data != null);
1029     RegionTransition rt = RegionTransition.parseFrom(data);
1030     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1031 
1032     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1033         + " and dst server=" + dstName);
1034 
1035     // start HBase cluster
1036     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1037 
1038     while (true) {
1039       master = TEST_UTIL.getHBaseCluster().getMaster();
1040       if (master != null && master.isInitialized()) {
1041         ServerManager serverManager = master.getServerManager();
1042         if (!serverManager.areDeadServersInProgress()) {
1043           break;
1044         }
1045       }
1046       Thread.sleep(200);
1047     }
1048 
1049     // verify the region is assigned
1050     master = TEST_UTIL.getHBaseCluster().getMaster();
1051     master.getAssignmentManager().waitForAssignment(hri);
1052     regionStates = master.getAssignmentManager().getRegionStates();
1053     RegionState newState = regionStates.getRegionState(hri);
1054     assertTrue(newState.isOpened());
1055   }
1056   
1057  /**
1058    * Simple test of master failover.
1059    * <p>
1060    * Starts with three masters.  Kills a backup master.  Then kills the active
1061    * master.  Ensures the final master becomes active and we can still contact
1062    * the cluster.
1063    * @throws Exception
1064    */
1065   @Test (timeout=240000)
1066   public void testSimpleMasterFailover() throws Exception {
1067 
1068     final int NUM_MASTERS = 3;
1069     final int NUM_RS = 3;
1070 
1071     // Start the cluster
1072     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1073 
1074     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1075     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1076 
1077     // get all the master threads
1078     List<MasterThread> masterThreads = cluster.getMasterThreads();
1079 
1080     // wait for each to come online
1081     for (MasterThread mt : masterThreads) {
1082       assertTrue(mt.isAlive());
1083     }
1084 
1085     // verify only one is the active master and we have right number
1086     int numActive = 0;
1087     int activeIndex = -1;
1088     ServerName activeName = null;
1089     HMaster active = null;
1090     for (int i = 0; i < masterThreads.size(); i++) {
1091       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1092         numActive++;
1093         activeIndex = i;
1094         active = masterThreads.get(activeIndex).getMaster();
1095         activeName = active.getServerName();
1096       }
1097     }
1098     assertEquals(1, numActive);
1099     assertEquals(NUM_MASTERS, masterThreads.size());
1100     LOG.info("Active master " + activeName);
1101 
1102     // Check that ClusterStatus reports the correct active and backup masters
1103     assertNotNull(active);
1104     ClusterStatus status = active.getClusterStatus();
1105     assertTrue(status.getMaster().equals(activeName));
1106     assertEquals(2, status.getBackupMastersSize());
1107     assertEquals(2, status.getBackupMasters().size());
1108 
1109     // attempt to stop one of the inactive masters
1110     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1111     HMaster master = cluster.getMaster(backupIndex);
1112     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1113     cluster.stopMaster(backupIndex, false);
1114     cluster.waitOnMaster(backupIndex);
1115 
1116     // Verify still one active master and it's the same
1117     for (int i = 0; i < masterThreads.size(); i++) {
1118       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1119         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1120         activeIndex = i;
1121         active = masterThreads.get(activeIndex).getMaster();
1122       }
1123     }
1124     assertEquals(1, numActive);
1125     assertEquals(2, masterThreads.size());
1126     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1127     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1128     assertEquals(3, rsCount);
1129 
1130     // Check that ClusterStatus reports the correct active and backup masters
1131     assertNotNull(active);
1132     status = active.getClusterStatus();
1133     assertTrue(status.getMaster().equals(activeName));
1134     assertEquals(1, status.getBackupMastersSize());
1135     assertEquals(1, status.getBackupMasters().size());
1136 
1137     // kill the active master
1138     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1139     cluster.stopMaster(activeIndex, false);
1140     cluster.waitOnMaster(activeIndex);
1141 
1142     // wait for an active master to show up and be ready
1143     assertTrue(cluster.waitForActiveAndReadyMaster());
1144 
1145     LOG.debug("\n\nVerifying backup master is now active\n");
1146     // should only have one master now
1147     assertEquals(1, masterThreads.size());
1148 
1149     // and he should be active
1150     active = masterThreads.get(0).getMaster();
1151     assertNotNull(active);
1152     status = active.getClusterStatus();
1153     ServerName mastername = status.getMaster();
1154     assertTrue(mastername.equals(active.getServerName()));
1155     assertTrue(active.isActiveMaster());
1156     assertEquals(0, status.getBackupMastersSize());
1157     assertEquals(0, status.getBackupMasters().size());
1158     int rss = status.getServersSize();
1159     LOG.info("Active master " + mastername.getServerName() + " managing " +
1160       rss +  " region servers");
1161     assertEquals(3, rss);
1162 
1163     // Stop the cluster
1164     TEST_UTIL.shutdownMiniCluster();
1165   }
1166 
1167   /**
1168    * Test region in pending_open/close and failed_open/close when master failover
1169    */
1170   @Test (timeout=180000)
1171   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1172     final int NUM_MASTERS = 1;
1173     final int NUM_RS = 1;
1174 
1175     // Create config to use for this cluster
1176     Configuration conf = HBaseConfiguration.create();
1177     conf.setBoolean("hbase.assignment.usezk", false);
1178 
1179     // Start the cluster
1180     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1181     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1182     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1183     log("Cluster started");
1184 
1185     // get all the master threads
1186     List<MasterThread> masterThreads = cluster.getMasterThreads();
1187     assertEquals(1, masterThreads.size());
1188 
1189     // only one master thread, let's wait for it to be initialized
1190     assertTrue(cluster.waitForActiveAndReadyMaster());
1191     HMaster master = masterThreads.get(0).getMaster();
1192     assertTrue(master.isActiveMaster());
1193     assertTrue(master.isInitialized());
1194 
1195     // Create a table with a region online
1196     HTable onlineTable = TEST_UTIL.createTable("onlineTable", "family");
1197 
1198     // Create a table in META, so it has a region offline
1199     HTableDescriptor offlineTable = new HTableDescriptor(
1200       TableName.valueOf(Bytes.toBytes("offlineTable")));
1201     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1202 
1203     FileSystem filesystem = FileSystem.get(conf);
1204     Path rootdir = FSUtils.getRootDir(conf);
1205     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1206     fstd.createTableDescriptor(offlineTable);
1207 
1208     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1209     createRegion(hriOffline, rootdir, conf, offlineTable);
1210     MetaEditor.addRegionToMeta(master.getCatalogTracker(), hriOffline);
1211 
1212     log("Regions in hbase:meta and namespace have been created");
1213 
1214     // at this point we only expect 3 regions to be assigned out
1215     // (catalogs and namespace, + 1 online region)
1216     assertEquals(3, cluster.countServedRegions());
1217     HRegionInfo hriOnline = onlineTable.getRegionLocation("").getRegionInfo();
1218 
1219     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1220     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1221 
1222     // Put the online region in pending_close. It is actually already opened.
1223     // This is to simulate that the region close RPC is not sent out before failover
1224     RegionState oldState = regionStates.getRegionState(hriOnline);
1225     RegionState newState = new RegionState(hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1226     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1227 
1228     // Put the offline region in pending_open. It is actually not opened yet.
1229     // This is to simulate that the region open RPC is not sent out before failover
1230     oldState = new RegionState(hriOffline, State.OFFLINE);
1231     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1232     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1233     
1234     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1235     createRegion(failedClose, rootdir, conf, offlineTable);
1236     MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedClose);
1237     
1238     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1239     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1240     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1241     
1242    
1243     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1244     createRegion(failedOpen, rootdir, conf, offlineTable);
1245     MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedOpen);
1246     
1247     // Simulate a region transitioning to failed open when the region server reports the
1248     // transition as FAILED_OPEN
1249     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1250     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1251     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1252     
1253     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1254     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1255     MetaEditor.addRegionToMeta(master.getCatalogTracker(), failedOpenNullServer);
1256     
1257     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1258     // the region
1259     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1260     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1261     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1262     
1263     
1264 
1265     // Stop the master
1266     log("Aborting master");
1267     cluster.abortMaster(0);
1268     cluster.waitOnMaster(0);
1269     log("Master has aborted");
1270 
1271     // Start up a new master
1272     log("Starting up a new master");
1273     master = cluster.startMaster().getMaster();
1274     log("Waiting for master to be ready");
1275     cluster.waitForActiveAndReadyMaster();
1276     log("Master is ready");
1277 
1278     // Wait till no region in transition any more
1279     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1280 
1281     // Get new region states since master restarted
1282     regionStates = master.getAssignmentManager().getRegionStates();
1283 
1284     // Both pending_open (RPC sent/not yet) regions should be online
1285     assertTrue(regionStates.isRegionOnline(hriOffline));
1286     assertTrue(regionStates.isRegionOnline(hriOnline));
1287     assertTrue(regionStates.isRegionOnline(failedClose));
1288     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1289     assertTrue(regionStates.isRegionOnline(failedOpen));
1290     
1291     log("Done with verification, shutting down cluster");
1292 
1293     // Done, shutdown the cluster
1294     TEST_UTIL.shutdownMiniCluster();
1295   }
1296 
1297   /**
1298    * Test meta in transition when master failover
1299    */
1300   @Test(timeout = 180000)
1301   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1302     final int NUM_MASTERS = 1;
1303     final int NUM_RS = 1;
1304 
1305     // Start the cluster
1306     Configuration conf = HBaseConfiguration.create();
1307     conf.setBoolean("hbase.assignment.usezk", false);
1308     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1309     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1310     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1311     CatalogTracker catalogTracker = new CatalogTracker(HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL),
1312       conf, null);
1313     
1314     log("Cluster started");
1315 
1316     log("Moving meta off the master");
1317     HMaster activeMaster = cluster.getMaster();
1318     HRegionServer rs = cluster.getRegionServer(0);
1319     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1320       .get(0).getRegionServer().getServerName();
1321     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1322       Bytes.toBytes(metaServerName.getServerName()));
1323     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1324     assertEquals("Meta should be assigned on expected regionserver",
1325       metaServerName, catalogTracker.getMetaLocation());
1326 
1327     // Now kill master, root should remain on rs, where we placed it before.
1328     log("Aborting master");
1329     activeMaster.stop("test-kill");
1330     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1331     log("Master has aborted");
1332 
1333     // r should remain where it was
1334     RegionState metaState =
1335       catalogTracker.getMetaRegionState();
1336     assertEquals("hbase:root should be onlined on RS",
1337       metaState.getServerName(), rs.getServerName());
1338     assertEquals("hbase:root should be onlined on RS",
1339       metaState.getState(), State.OPEN);
1340 
1341     // Start up a new master
1342     log("Starting up a new master");
1343     activeMaster = cluster.startMaster().getMaster();
1344     log("Waiting for master to be ready");
1345     cluster.waitForActiveAndReadyMaster();
1346     log("Master is ready");
1347 
1348     // ensure root is still deployed on RS
1349     metaState =
1350         catalogTracker.getMetaRegionState();
1351     assertEquals("hbase:root should be onlined on RS",
1352       metaState.getServerName(), rs.getServerName());
1353     assertEquals("hbase:root should be onlined on RS",
1354       metaState.getState(), State.OPEN);
1355 
1356     // Update root state as PENDING_OPEN, then kill master
1357     // that simulates, that RS successfully deployed, but
1358     // RPC was lost right before failure.
1359     // region server should expire (how it can be verified?)
1360     MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1361       rs.getServerName(), State.PENDING_OPEN);
1362     HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1363     rs.removeFromOnlineRegions(meta, null);
1364     meta.close();
1365 
1366     log("Aborting master");
1367     activeMaster.stop("test-kill");
1368     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1369     log("Master has aborted");
1370 
1371     // Start up a new master
1372     log("Starting up a new master");
1373     activeMaster = cluster.startMaster().getMaster();
1374     log("Waiting for master to be ready");
1375     cluster.waitForActiveAndReadyMaster();
1376     log("Master is ready");
1377 
1378     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1379     log("ROOT was assigned");
1380 
1381     metaState =
1382       catalogTracker.getMetaRegionState();
1383     assertEquals("hbase:root should be onlined on RS",
1384       metaState.getServerName(), rs.getServerName());
1385     assertEquals("hbase:root should be onlined on RS",
1386       metaState.getState(), State.OPEN);
1387 
1388     // Update root state as PENDING_CLOSE, then kill master
1389     // that simulates, that RS successfully deployed, but
1390     // RPC was lost right before failure.
1391     // region server should expire (how it can be verified?)
1392     MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1393       rs.getServerName(), State.PENDING_CLOSE);
1394 
1395     log("Aborting master");
1396     activeMaster.stop("test-kill");
1397     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1398     log("Master has aborted");
1399 
1400     rs.closeRegion(null, RequestConverter.buildCloseRegionRequest(
1401       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1402 
1403     // Start up a new master
1404     log("Starting up a new master");
1405     activeMaster = cluster.startMaster().getMaster();
1406     log("Waiting for master to be ready");
1407     cluster.waitForActiveAndReadyMaster();
1408     log("Master is ready");
1409 
1410     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1411     log("Meta was assigned");
1412     
1413     rs.closeRegion(null, RequestConverter.buildCloseRegionRequest(
1414       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1415 
1416     // Set a dummy server to check if master reassigns meta on restart
1417     MetaRegionTracker.setMetaLocation(activeMaster.getZooKeeper(),
1418          ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1419     
1420     log("Aborting master");
1421     activeMaster.stop("test-kill");
1422     
1423     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1424     log("Master has aborted");
1425     
1426     // Start up a new master
1427     log("Starting up a new master");
1428     activeMaster = cluster.startMaster().getMaster();
1429     log("Waiting for master to be ready");
1430     cluster.waitForActiveAndReadyMaster();
1431     log("Master is ready");
1432 
1433     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1434     catalogTracker.verifyMetaRegionLocation(10000);
1435     log("Meta was assigned");
1436     
1437     // Done, shutdown the cluster
1438     TEST_UTIL.shutdownMiniCluster();
1439   }
1440 }
1441