View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.chaos.actions;
20  
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  import java.util.List;
24  
25  import org.apache.commons.lang.math.RandomUtils;
26  import org.apache.hadoop.hbase.ClusterStatus;
27  import org.apache.hadoop.hbase.ServerName;
28  import org.junit.Assert;
29  
30  /** This action is too specific to put in ChaosMonkey; put it here */
31  public class UnbalanceKillAndRebalanceAction extends Action {
32    /** Fractions of servers to get regions and live and die respectively; from all other
33     * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */
34    private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1;
35    private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1;
36    private static final double HOARD_FRC_OF_REGIONS = 0.8;
37    /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
38     * and restarting the servers; to make sure these events have time to impact the cluster. */
39    private long waitForUnbalanceMilliSec;
40    private long waitForKillsMilliSec;
41    private long waitAfterBalanceMilliSec;
42  
43    public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance) {
44      super();
45      waitForUnbalanceMilliSec = waitUnbalance;
46      waitForKillsMilliSec = waitKill;
47      waitAfterBalanceMilliSec = waitAfterBalance;
48    }
49  
50    @Override
51    public void perform() throws Exception {
52      ClusterStatus status = this.cluster.getClusterStatus();
53      List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
54      int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size());
55      int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size());
56      Assert.assertTrue((liveCount + deadCount) < victimServers.size());
57      List<ServerName> targetServers = new ArrayList<ServerName>(liveCount);
58      for (int i = 0; i < liveCount + deadCount; ++i) {
59        int victimIx = RandomUtils.nextInt(victimServers.size());
60        targetServers.add(victimServers.remove(victimIx));
61      }
62      unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS);
63      Thread.sleep(waitForUnbalanceMilliSec);
64      for (int i = 0; i < liveCount; ++i) {
65        killRs(targetServers.get(i));
66      }
67      Thread.sleep(waitForKillsMilliSec);
68      forceBalancer();
69      Thread.sleep(waitAfterBalanceMilliSec);
70      for (int i = 0; i < liveCount; ++i) {
71        startRs(targetServers.get(i));
72      }
73    }
74  }