1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mttr;
20
21 import static org.junit.Assert.assertEquals;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.concurrent.Callable;
26 import java.util.concurrent.ExecutorService;
27 import java.util.concurrent.Executors;
28 import java.util.concurrent.Future;
29 import java.util.concurrent.TimeUnit;
30
31 import org.apache.commons.lang.RandomStringUtils;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HColumnDescriptor;
37 import org.apache.hadoop.hbase.HTableDescriptor;
38 import org.apache.hadoop.hbase.IntegrationTestingUtility;
39 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
40 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
41 import org.apache.hadoop.hbase.NamespaceExistException;
42 import org.apache.hadoop.hbase.NamespaceNotFoundException;
43 import org.apache.hadoop.hbase.TableExistsException;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.TableNotFoundException;
46 import org.apache.hadoop.hbase.chaos.actions.Action;
47 import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
48 import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
49 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
50 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
51 import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
52 import org.apache.hadoop.hbase.client.HBaseAdmin;
53 import org.apache.hadoop.hbase.client.HTable;
54 import org.apache.hadoop.hbase.client.Put;
55 import org.apache.hadoop.hbase.client.Result;
56 import org.apache.hadoop.hbase.client.ResultScanner;
57 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
58 import org.apache.hadoop.hbase.client.Scan;
59 import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
60 import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
61 import org.apache.hadoop.hbase.ipc.FatalConnectionException;
62 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
63 import org.apache.hadoop.hbase.security.AccessDeniedException;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.LoadTestTool;
66 import org.cloudera.htrace.Span;
67 import org.cloudera.htrace.Trace;
68 import org.cloudera.htrace.TraceScope;
69 import org.cloudera.htrace.impl.AlwaysSampler;
70 import org.junit.AfterClass;
71 import org.junit.BeforeClass;
72 import org.junit.Test;
73 import org.junit.experimental.categories.Category;
74
75 import com.google.common.base.Objects;
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116 @Category(IntegrationTests.class)
117 public class IntegrationTestMTTR {
118
119
120
121 private static final byte[] FAMILY = Bytes.toBytes("d");
122 private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
123 private static long sleepTime;
124 private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
125 private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
126
127
128
129
130 private static TableName tableName;
131 private static TableName loadTableName;
132
133
134
135
136 private static IntegrationTestingUtility util;
137
138
139
140
141 private static ExecutorService executorService;
142
143
144
145
146 private static Action restartRSAction;
147 private static Action restartMetaAction;
148 private static Action moveRegionAction;
149 private static Action restartMasterAction;
150
151
152
153
154 private static LoadTestTool loadTool;
155
156
157 @BeforeClass
158 public static void setUp() throws Exception {
159
160 if (util == null) {
161 util = new IntegrationTestingUtility();
162 }
163
164
165 util.initializeCluster(3);
166
167
168 loadTool = new LoadTestTool();
169 loadTool.setConf(util.getConfiguration());
170
171
172
173 executorService = Executors.newFixedThreadPool(8);
174
175
176 setupTables();
177
178
179 sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
180 setupActions();
181 }
182
183 private static void setupActions() throws IOException {
184
185
186 util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
187
188
189
190 restartRSAction = new RestartRsHoldingTableAction(sleepTime, tableName.getNameAsString());
191
192
193 restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
194
195
196 moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
197 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName.getNameAsString());
198
199
200 restartMasterAction = new RestartActiveMasterAction(1000);
201
202
203 Action.ActionContext actionContext = new Action.ActionContext(util);
204 restartRSAction.init(actionContext);
205 restartMetaAction.init(actionContext);
206 moveRegionAction.init(actionContext);
207 restartMasterAction.init(actionContext);
208 }
209
210 private static void setupTables() throws IOException {
211
212 tableName = TableName.valueOf(util.getConfiguration()
213 .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
214
215 loadTableName = TableName.valueOf(util.getConfiguration()
216 .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
217
218 if (util.getHBaseAdmin().tableExists(tableName)) {
219 util.deleteTable(tableName);
220 }
221
222 if (util.getHBaseAdmin().tableExists(loadTableName)) {
223 util.deleteTable(loadTableName);
224 }
225
226
227 HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
228
229
230 tableDescriptor.setMaxFileSize(Long.MAX_VALUE);
231
232 HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
233 descriptor.setMaxVersions(1);
234 tableDescriptor.addFamily(descriptor);
235 util.getHBaseAdmin().createTable(tableDescriptor);
236
237
238 int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
239 assertEquals("Failed to initialize LoadTestTool", 0, ret);
240 }
241
242 @AfterClass
243 public static void after() throws IOException {
244
245 util.restoreCluster();
246 util = null;
247
248
249 executorService.shutdown();
250 executorService = null;
251
252
253 moveRegionAction = null;
254 restartMetaAction = null;
255 restartRSAction = null;
256 restartMasterAction = null;
257
258 loadTool = null;
259 }
260
261 @Test
262 public void testRestartRsHoldingTable() throws Exception {
263 run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
264 }
265
266 @Test
267 public void testKillRsHoldingMeta() throws Exception {
268 run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
269 }
270
271 @Test
272 public void testMoveRegion() throws Exception {
273 run(new ActionCallable(moveRegionAction), "MoveRegion");
274 }
275
276 @Test
277 public void testRestartMaster() throws Exception {
278 run(new ActionCallable(restartMasterAction), "RestartMaster");
279 }
280
281 public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
282 int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
283 LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
284
285
286 ArrayList<TimingResult> resultPuts = new ArrayList<TimingResult>(maxIters);
287 ArrayList<TimingResult> resultScan = new ArrayList<TimingResult>(maxIters);
288 ArrayList<TimingResult> resultAdmin = new ArrayList<TimingResult>(maxIters);
289 long start = System.nanoTime();
290
291 try {
292
293 for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
294
295 Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
296
297
298 Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
299 Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
300 Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
301
302 Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
303
304 monkeyFuture.get();
305 loadFuture.get();
306
307
308 TimingResult putTime = putFuture.get();
309 TimingResult scanTime = scanFuture.get();
310 TimingResult adminTime = adminFuture.get();
311
312
313 resultPuts.add(putTime);
314 resultScan.add(scanTime);
315 resultAdmin.add(adminTime);
316
317
318 Thread.sleep(5000l);
319 }
320 } catch (Exception e) {
321 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
322 LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
323 throw e;
324 }
325
326 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
327
328 Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
329 .add("putResults", resultPuts)
330 .add("scanResults", resultScan)
331 .add("adminResults", resultAdmin)
332 .add("totalRuntimeMs", runtimeMs)
333 .add("name", testName);
334
335
336 LOG.info(helper.toString());
337 }
338
339
340
341
342
343
344 private static class TimingResult {
345 DescriptiveStatistics stats = new DescriptiveStatistics();
346 ArrayList<Long> traces = new ArrayList<Long>(10);
347
348
349
350
351
352
353 public void addResult(long time, Span span) {
354 stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
355 if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
356 traces.add(span.getTraceId());
357 }
358 }
359
360 public String toString() {
361 Objects.ToStringHelper helper = Objects.toStringHelper(this)
362 .add("numResults", stats.getN())
363 .add("minTime", stats.getMin())
364 .add("meanTime", stats.getMean())
365 .add("maxTime", stats.getMax())
366 .add("25th", stats.getPercentile(25))
367 .add("50th", stats.getPercentile(50))
368 .add("75th", stats.getPercentile(75))
369 .add("90th", stats.getPercentile(90))
370 .add("95th", stats.getPercentile(95))
371 .add("99th", stats.getPercentile(99))
372 .add("99.9th", stats.getPercentile(99.9))
373 .add("99.99th", stats.getPercentile(99.99))
374 .add("traces", traces);
375 return helper.toString();
376 }
377 }
378
379
380
381
382 static abstract class TimingCallable implements Callable<TimingResult> {
383 protected final Future<?> future;
384
385 public TimingCallable(Future<?> f) {
386 future = f;
387 }
388
389 @Override
390 public TimingResult call() throws Exception {
391 TimingResult result = new TimingResult();
392 final int maxIterations = 10;
393 int numAfterDone = 0;
394 int resetCount = 0;
395
396 while (numAfterDone < maxIterations) {
397 long start = System.nanoTime();
398 TraceScope scope = null;
399 try {
400 scope = Trace.startSpan(getSpanName(), AlwaysSampler.INSTANCE);
401 boolean actionResult = doAction();
402 if (actionResult && future.isDone()) {
403 numAfterDone++;
404 }
405
406
407
408
409
410
411 } catch (AccessDeniedException e) {
412 throw e;
413 } catch (CoprocessorException e) {
414 throw e;
415 } catch (FatalConnectionException e) {
416 throw e;
417 } catch (InvalidFamilyOperationException e) {
418 throw e;
419 } catch (NamespaceExistException e) {
420 throw e;
421 } catch (NamespaceNotFoundException e) {
422 throw e;
423 } catch (NoSuchColumnFamilyException e) {
424 throw e;
425 } catch (TableExistsException e) {
426 throw e;
427 } catch (TableNotFoundException e) {
428 throw e;
429 } catch (RetriesExhaustedException e){
430 throw e;
431
432
433
434
435
436 } catch (Exception e) {
437 resetCount++;
438 if (resetCount < maxIterations) {
439 LOG.info("Non-fatal exception while running " + this.toString()
440 + ". Resetting loop counter", e);
441 numAfterDone = 0;
442 } else {
443 LOG.info("Too many unexpected Exceptions. Aborting.", e);
444 throw e;
445 }
446 } finally {
447 if (scope != null) {
448 scope.close();
449 }
450 }
451 result.addResult(System.nanoTime() - start, scope.getSpan());
452 }
453 return result;
454 }
455
456 protected abstract boolean doAction() throws Exception;
457
458 protected String getSpanName() {
459 return this.getClass().getSimpleName();
460 }
461
462 @Override
463 public String toString() {
464 return this.getSpanName();
465 }
466 }
467
468
469
470
471
472 static class PutCallable extends TimingCallable {
473
474 private final HTable table;
475
476 public PutCallable(Future<?> f) throws IOException {
477 super(f);
478 this.table = new HTable(util.getConfiguration(), tableName);
479 }
480
481 @Override
482 protected boolean doAction() throws Exception {
483 Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
484 p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
485 table.put(p);
486 table.flushCommits();
487 return true;
488 }
489
490 @Override
491 protected String getSpanName() {
492 return "MTTR Put Test";
493 }
494 }
495
496
497
498
499
500 static class ScanCallable extends TimingCallable {
501 private final HTable table;
502
503 public ScanCallable(Future<?> f) throws IOException {
504 super(f);
505 this.table = new HTable(util.getConfiguration(), tableName);
506 }
507
508 @Override
509 protected boolean doAction() throws Exception {
510 ResultScanner rs = null;
511 try {
512 Scan s = new Scan();
513 s.setBatch(2);
514 s.addFamily(FAMILY);
515 s.setFilter(new KeyOnlyFilter());
516 s.setMaxVersions(1);
517
518 rs = table.getScanner(s);
519 Result result = rs.next();
520 return result != null && result.size() > 0;
521 } finally {
522 if (rs != null) {
523 rs.close();
524 }
525 }
526 }
527 @Override
528 protected String getSpanName() {
529 return "MTTR Scan Test";
530 }
531 }
532
533
534
535
536 static class AdminCallable extends TimingCallable {
537
538 public AdminCallable(Future<?> f) throws IOException {
539 super(f);
540 }
541
542 @Override
543 protected boolean doAction() throws Exception {
544 HBaseAdmin admin = null;
545 try {
546 admin = new HBaseAdmin(util.getConfiguration());
547 ClusterStatus status = admin.getClusterStatus();
548 return status != null;
549 } finally {
550 if (admin != null) {
551 admin.close();
552 }
553 }
554 }
555
556 @Override
557 protected String getSpanName() {
558 return "MTTR Admin Test";
559 }
560 }
561
562
563 static class ActionCallable implements Callable<Boolean> {
564 private final Action action;
565
566 public ActionCallable(Action action) {
567 this.action = action;
568 }
569
570 @Override
571 public Boolean call() throws Exception {
572 this.action.perform();
573 return true;
574 }
575 }
576
577
578
579
580
581 public static class LoadCallable implements Callable<Boolean> {
582
583 private final Future<?> future;
584
585 public LoadCallable(Future<?> f) {
586 future = f;
587 }
588
589 @Override
590 public Boolean call() throws Exception {
591 int colsPerKey = 10;
592 int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
593 int numKeys = numServers * 5000;
594 int writeThreads = 10;
595
596
597
598
599 do {
600 int ret = loadTool.run(new String[]{
601 "-tn", loadTableName.getNameAsString(),
602 "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
603 "-num_keys", String.valueOf(numKeys),
604 "-skip_init"
605 });
606 assertEquals("Load failed", 0, ret);
607 } while (!future.isDone());
608
609 return true;
610 }
611 }
612 }