1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.apache.commons.lang.time.StopWatch;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.AuthUtil;
38 import org.apache.hadoop.hbase.DoNotRetryIOException;
39 import org.apache.hadoop.hbase.HBaseConfiguration;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HRegionInfo;
42 import org.apache.hadoop.hbase.HTableDescriptor;
43 import org.apache.hadoop.hbase.ServerName;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.TableNotEnabledException;
46 import org.apache.hadoop.hbase.TableNotFoundException;
47 import org.apache.hadoop.hbase.client.Get;
48 import org.apache.hadoop.hbase.client.HBaseAdmin;
49 import org.apache.hadoop.hbase.client.HTable;
50 import org.apache.hadoop.hbase.client.ResultScanner;
51 import org.apache.hadoop.hbase.client.Scan;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55
56
57
58
59
60
61
62
63
64
65
66 public final class Canary implements Tool {
67
68 public interface Sink {
69 public void publishReadFailure(HRegionInfo region, Exception e);
70 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
71 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
72 }
73
74
75 public interface ExtendedSink extends Sink {
76 public void publishReadFailure(String table, String server);
77 public void publishReadTiming(String table, String server, long msTime);
78 }
79
80
81
82 public static class StdOutSink implements Sink {
83 @Override
84 public void publishReadFailure(HRegionInfo region, Exception e) {
85 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
86 }
87
88 @Override
89 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
90 LOG.error(String.format("read from region %s column family %s failed",
91 region.getRegionNameAsString(), column.getNameAsString()), e);
92 }
93
94 @Override
95 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
96 LOG.info(String.format("read from region %s column family %s in %dms",
97 region.getRegionNameAsString(), column.getNameAsString(), msTime));
98 }
99 }
100
101 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
102
103 @Override
104 public void publishReadFailure(String table, String server) {
105 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
106 }
107
108 @Override
109 public void publishReadTiming(String table, String server, long msTime) {
110 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
111 table, server, msTime));
112 }
113 }
114
115 private static final int USAGE_EXIT_CODE = 1;
116 private static final int INIT_ERROR_EXIT_CODE = 2;
117 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
118 private static final int ERROR_EXIT_CODE = 4;
119
120 private static final long DEFAULT_INTERVAL = 6000;
121
122 private static final long DEFAULT_TIMEOUT = 600000;
123
124 private static final Log LOG = LogFactory.getLog(Canary.class);
125
126 private Configuration conf = null;
127 private long interval = 0;
128 private Sink sink = null;
129
130 private boolean useRegExp;
131 private long timeout = DEFAULT_TIMEOUT;
132 private boolean failOnError = true;
133 private boolean regionServerMode = false;
134
135 public Canary() {
136 this(new RegionServerStdOutSink());
137 }
138
139 public Canary(Sink sink) {
140 this.sink = sink;
141 }
142
143 @Override
144 public Configuration getConf() {
145 return conf;
146 }
147
148 @Override
149 public void setConf(Configuration conf) {
150 this.conf = conf;
151 }
152
153 @Override
154 public int run(String[] args) throws Exception {
155 int index = -1;
156
157
158 for (int i = 0; i < args.length; i++) {
159 String cmd = args[i];
160
161 if (cmd.startsWith("-")) {
162 if (index >= 0) {
163
164 System.err.println("Invalid command line options");
165 printUsageAndExit();
166 }
167
168 if (cmd.equals("-help")) {
169
170 printUsageAndExit();
171 } else if (cmd.equals("-daemon") && interval == 0) {
172
173 interval = DEFAULT_INTERVAL;
174 } else if (cmd.equals("-interval")) {
175
176 i++;
177
178 if (i == args.length) {
179 System.err.println("-interval needs a numeric value argument.");
180 printUsageAndExit();
181 }
182
183 try {
184 interval = Long.parseLong(args[i]) * 1000;
185 } catch (NumberFormatException e) {
186 System.err.println("-interval needs a numeric value argument.");
187 printUsageAndExit();
188 }
189 } else if(cmd.equals("-regionserver")) {
190 this.regionServerMode = true;
191 } else if (cmd.equals("-e")) {
192 this.useRegExp = true;
193 } else if (cmd.equals("-t")) {
194 i++;
195
196 if (i == args.length) {
197 System.err.println("-t needs a numeric value argument.");
198 printUsageAndExit();
199 }
200
201 try {
202 this.timeout = Long.parseLong(args[i]);
203 } catch (NumberFormatException e) {
204 System.err.println("-t needs a numeric value argument.");
205 printUsageAndExit();
206 }
207
208 } else if (cmd.equals("-f")) {
209 i++;
210
211 if (i == args.length) {
212 System.err
213 .println("-f needs a boolean value argument (true|false).");
214 printUsageAndExit();
215 }
216
217 this.failOnError = Boolean.parseBoolean(args[i]);
218 } else {
219
220 System.err.println(cmd + " options is invalid.");
221 printUsageAndExit();
222 }
223 } else if (index < 0) {
224
225 index = i;
226 }
227 }
228
229
230 Monitor monitor = null;
231 Thread monitorThread = null;
232 long startTime = 0;
233 long currentTimeLength = 0;
234
235 do {
236
237 monitor = this.newMonitor(index, args);
238 monitorThread = new Thread(monitor);
239 startTime = System.currentTimeMillis();
240 monitorThread.start();
241 while (!monitor.isDone()) {
242
243 Thread.sleep(1000);
244
245 if (this.failOnError && monitor.hasError()) {
246 monitorThread.interrupt();
247 if (monitor.initialized) {
248 System.exit(monitor.errorCode);
249 } else {
250 System.exit(INIT_ERROR_EXIT_CODE);
251 }
252 }
253 currentTimeLength = System.currentTimeMillis() - startTime;
254 if (currentTimeLength > this.timeout) {
255 LOG.error("The monitor is running too long (" + currentTimeLength
256 + ") after timeout limit:" + this.timeout
257 + " will be killed itself !!");
258 if (monitor.initialized) {
259 System.exit(TIMEOUT_ERROR_EXIT_CODE);
260 } else {
261 System.exit(INIT_ERROR_EXIT_CODE);
262 }
263 break;
264 }
265 }
266
267 if (this.failOnError && monitor.hasError()) {
268 monitorThread.interrupt();
269 System.exit(monitor.errorCode);
270 }
271
272 Thread.sleep(interval);
273 } while (interval > 0);
274
275 return(monitor.errorCode);
276 }
277
278 private void printUsageAndExit() {
279 System.err.printf(
280 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
281 getClass().getName());
282 System.err.println(" where [opts] are:");
283 System.err.println(" -help Show this help and exit.");
284 System.err.println(" -regionserver replace the table argument to regionserver,");
285 System.err.println(" which means to enable regionserver mode");
286 System.err.println(" -daemon Continuous check at defined intervals.");
287 System.err.println(" -interval <N> Interval between checks (sec)");
288 System.err.println(" -e Use region/regionserver as regular expression");
289 System.err.println(" which means the region/regionserver is regular expression pattern");
290 System.err.println(" -f <B> stop whole program if first error occurs," +
291 " default is true");
292 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
293 System.exit(USAGE_EXIT_CODE);
294 }
295
296
297
298
299
300
301
302
303 public Monitor newMonitor(int index, String[] args) {
304 Monitor monitor = null;
305 String[] monitorTargets = null;
306
307 if(index >= 0) {
308 int length = args.length - index;
309 monitorTargets = new String[length];
310 System.arraycopy(args, index, monitorTargets, 0, length);
311 }
312
313 if(this.regionServerMode) {
314 monitor = new RegionServerMonitor(
315 this.conf,
316 monitorTargets,
317 this.useRegExp,
318 (ExtendedSink)this.sink);
319 } else {
320 monitor = new RegionMonitor(this.conf, monitorTargets, this.useRegExp, this.sink);
321 }
322 return monitor;
323 }
324
325
326 public static abstract class Monitor implements Runnable {
327
328 protected Configuration config;
329 protected HBaseAdmin admin;
330 protected String[] targets;
331 protected boolean useRegExp;
332 protected boolean initialized = false;
333
334 protected boolean done = false;
335 protected int errorCode = 0;
336 protected Sink sink;
337
338 public boolean isDone() {
339 return done;
340 }
341
342 public boolean hasError() {
343 return errorCode != 0;
344 }
345
346 protected Monitor(Configuration config, String[] monitorTargets,
347 boolean useRegExp, Sink sink) {
348 if (null == config)
349 throw new IllegalArgumentException("config shall not be null");
350
351 this.config = config;
352 this.targets = monitorTargets;
353 this.useRegExp = useRegExp;
354 this.sink = sink;
355 }
356
357 public abstract void run();
358
359 protected boolean initAdmin() {
360 if (null == this.admin) {
361 try {
362 this.admin = new HBaseAdmin(config);
363 } catch (Exception e) {
364 LOG.error("Initial HBaseAdmin failed...", e);
365 this.errorCode = INIT_ERROR_EXIT_CODE;
366 }
367 } else if (admin.isAborted()) {
368 LOG.error("HBaseAdmin aborted");
369 this.errorCode = INIT_ERROR_EXIT_CODE;
370 }
371 return !this.hasError();
372 }
373 }
374
375
376 private static class RegionMonitor extends Monitor {
377
378 public RegionMonitor(Configuration config, String[] monitorTargets,
379 boolean useRegExp, Sink sink) {
380 super(config, monitorTargets, useRegExp, sink);
381 }
382
383 @Override
384 public void run() {
385 if(this.initAdmin()) {
386 try {
387 if (this.targets != null && this.targets.length > 0) {
388 String[] tables = generateMonitorTables(this.targets);
389 this.initialized = true;
390 for (String table : tables) {
391 Canary.sniff(admin, sink, table);
392 }
393 } else {
394 sniff();
395 }
396 } catch (Exception e) {
397 LOG.error("Run regionMonitor failed", e);
398 this.errorCode = ERROR_EXIT_CODE;
399 }
400 }
401 this.done = true;
402 }
403
404 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
405 String[] returnTables = null;
406
407 if(this.useRegExp) {
408 Pattern pattern = null;
409 HTableDescriptor[] tds = null;
410 Set<String> tmpTables = new TreeSet<String>();
411 try {
412 for (String monitorTarget : monitorTargets) {
413 pattern = Pattern.compile(monitorTarget);
414 tds = this.admin.listTables(pattern);
415 if (tds != null) {
416 for (HTableDescriptor td : tds) {
417 tmpTables.add(td.getNameAsString());
418 }
419 }
420 }
421 } catch(IOException e) {
422 LOG.error("Communicate with admin failed", e);
423 throw e;
424 }
425
426 if(tmpTables.size() > 0) {
427 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
428 } else {
429 String msg = "No HTable found, tablePattern:"
430 + Arrays.toString(monitorTargets);
431 LOG.error(msg);
432 this.errorCode = INIT_ERROR_EXIT_CODE;
433 throw new TableNotFoundException(msg);
434 }
435 } else {
436 returnTables = monitorTargets;
437 }
438
439 return returnTables;
440 }
441
442
443
444
445 private void sniff() throws Exception {
446 for (HTableDescriptor table : admin.listTables()) {
447 Canary.sniff(admin, sink, table);
448 }
449 }
450
451 }
452
453
454
455
456
457 public static void sniff(final HBaseAdmin admin, TableName tableName) throws Exception {
458 sniff(admin, new StdOutSink(), tableName.getNameAsString());
459 }
460
461
462
463
464
465 private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName)
466 throws Exception {
467 if (admin.isTableAvailable(tableName)) {
468 sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes()));
469 } else {
470 LOG.warn(String.format("Table %s is not available", tableName));
471 }
472 }
473
474
475
476
477 private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
478 throws Exception {
479 HTable table = null;
480
481 try {
482 table = new HTable(admin.getConfiguration(), tableDesc.getName());
483 } catch (TableNotFoundException e) {
484 return;
485 }
486
487 try {
488 for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
489 try {
490 sniffRegion(admin, sink, region, table);
491 } catch (Exception e) {
492 sink.publishReadFailure(region, e);
493 LOG.debug("sniffRegion failed", e);
494 }
495 }
496 } finally {
497 table.close();
498 }
499 }
500
501
502
503
504
505 private static void sniffRegion(
506 final HBaseAdmin admin,
507 final Sink sink,
508 HRegionInfo region,
509 HTable table) throws Exception {
510 HTableDescriptor tableDesc = table.getTableDescriptor();
511 byte[] startKey = null;
512 Get get = null;
513 Scan scan = null;
514 ResultScanner rs = null;
515 StopWatch stopWatch = new StopWatch();
516 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
517 stopWatch.reset();
518 startKey = region.getStartKey();
519
520 if (startKey.length > 0) {
521 get = new Get(startKey);
522 get.addFamily(column.getName());
523 } else {
524 scan = new Scan();
525 scan.setCaching(1);
526 scan.addFamily(column.getName());
527 scan.setMaxResultSize(1L);
528 }
529
530 try {
531 if (startKey.length > 0) {
532 stopWatch.start();
533 table.get(get);
534 stopWatch.stop();
535 sink.publishReadTiming(region, column, stopWatch.getTime());
536 } else {
537 stopWatch.start();
538 rs = table.getScanner(scan);
539 stopWatch.stop();
540 sink.publishReadTiming(region, column, stopWatch.getTime());
541 }
542 } catch (Exception e) {
543 sink.publishReadFailure(region, column, e);
544 } finally {
545 if (rs != null) {
546 rs.close();
547 }
548 scan = null;
549 get = null;
550 startKey = null;
551 }
552 }
553 }
554
555 private static class RegionServerMonitor extends Monitor {
556
557 public RegionServerMonitor(Configuration config, String[] monitorTargets,
558 boolean useRegExp, ExtendedSink sink) {
559 super(config, monitorTargets, useRegExp, sink);
560 }
561
562 private ExtendedSink getSink() {
563 return (ExtendedSink) this.sink;
564 }
565
566 @Override
567 public void run() {
568 if (this.initAdmin() && this.checkNoTableNames()) {
569 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
570 this.initialized = true;
571 this.monitorRegionServers(rsAndRMap);
572 }
573 this.done = true;
574 }
575
576 private boolean checkNoTableNames() {
577 List<String> foundTableNames = new ArrayList<String>();
578 TableName[] tableNames = null;
579
580 try {
581 tableNames = this.admin.listTableNames();
582 } catch (IOException e) {
583 LOG.error("Get listTableNames failed", e);
584 this.errorCode = INIT_ERROR_EXIT_CODE;
585 return false;
586 }
587
588 if (this.targets == null || this.targets.length == 0) return true;
589
590 for (String target : this.targets) {
591 for (TableName tableName : tableNames) {
592 if (target.equals(tableName.getNameAsString())) {
593 foundTableNames.add(target);
594 }
595 }
596 }
597
598 if (foundTableNames.size() > 0) {
599 System.err.println("Cannot pass a tablename when using the -regionserver " +
600 "option, tablenames:" + foundTableNames.toString());
601 this.errorCode = USAGE_EXIT_CODE;
602 }
603 return foundTableNames.size() == 0;
604 }
605
606 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
607 String serverName = null;
608 String tableName = null;
609 HRegionInfo region = null;
610 HTable table = null;
611 Get get = null;
612 byte[] startKey = null;
613 Scan scan = null;
614 StopWatch stopWatch = new StopWatch();
615
616 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
617 stopWatch.reset();
618 serverName = entry.getKey();
619
620 region = entry.getValue().get(0);
621 try {
622 tableName = region.getTable().getNameAsString();
623 table = new HTable(this.admin.getConfiguration(), tableName);
624 startKey = region.getStartKey();
625
626 if(startKey.length > 0) {
627 get = new Get(startKey);
628 stopWatch.start();
629 table.get(get);
630 stopWatch.stop();
631 } else {
632 scan = new Scan();
633 scan.setCaching(1);
634 scan.setMaxResultSize(1L);
635 stopWatch.start();
636 table.getScanner(scan);
637 stopWatch.stop();
638 }
639 this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime());
640 } catch (TableNotFoundException tnfe) {
641
642 } catch (TableNotEnabledException tnee) {
643
644 LOG.debug("The targeted table was disabled. Assuming success.");
645 } catch (DoNotRetryIOException dnrioe) {
646 this.getSink().publishReadFailure(tableName, serverName);
647 LOG.error(dnrioe);
648 } catch (IOException e) {
649 this.getSink().publishReadFailure(tableName, serverName);
650 LOG.error(e);
651 this.errorCode = ERROR_EXIT_CODE;
652 } finally {
653 if (table != null) {
654 try {
655 table.close();
656 } catch (IOException e) {
657 }
658 }
659 scan = null;
660 get = null;
661 startKey = null;
662 }
663 }
664 }
665
666 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
667 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
668 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
669 return regionServerAndRegionsMap;
670 }
671
672 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
673 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
674 HTable table = null;
675 try {
676 HTableDescriptor[] tableDescs = this.admin.listTables();
677 List<HRegionInfo> regions = null;
678 for (HTableDescriptor tableDesc : tableDescs) {
679 table = new HTable(this.admin.getConfiguration(), tableDesc.getName());
680
681 for (Map.Entry<HRegionInfo, ServerName> entry : table
682 .getRegionLocations().entrySet()) {
683 ServerName rs = entry.getValue();
684 String rsName = rs.getHostname();
685 HRegionInfo r = entry.getKey();
686
687 if (rsAndRMap.containsKey(rsName)) {
688 regions = rsAndRMap.get(rsName);
689 } else {
690 regions = new ArrayList<HRegionInfo>();
691 rsAndRMap.put(rsName, regions);
692 }
693 regions.add(r);
694 }
695 table.close();
696 }
697
698 } catch (IOException e) {
699 String msg = "Get HTables info failed";
700 LOG.error(msg, e);
701 this.errorCode = INIT_ERROR_EXIT_CODE;
702 } finally {
703 if (table != null) {
704 try {
705 table.close();
706 } catch (IOException e) {
707 LOG.warn("Close table failed", e);
708 }
709 }
710 }
711
712 return rsAndRMap;
713 }
714
715 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
716 Map<String, List<HRegionInfo>> fullRsAndRMap) {
717
718 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
719
720 if (this.targets != null && this.targets.length > 0) {
721 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
722 Pattern pattern = null;
723 Matcher matcher = null;
724 boolean regExpFound = false;
725 for (String rsName : this.targets) {
726 if (this.useRegExp) {
727 regExpFound = false;
728 pattern = Pattern.compile(rsName);
729 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
730 matcher = pattern.matcher(entry.getKey());
731 if (matcher.matches()) {
732 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
733 regExpFound = true;
734 }
735 }
736 if (!regExpFound) {
737 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
738 }
739 } else {
740 if (fullRsAndRMap.containsKey(rsName)) {
741 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
742 } else {
743 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
744 }
745 }
746 }
747 } else {
748 filteredRsAndRMap = fullRsAndRMap;
749 }
750 return filteredRsAndRMap;
751 }
752 }
753
754 public static void main(String[] args) throws Exception {
755 final Configuration conf = HBaseConfiguration.create();
756 AuthUtil.launchAuthChore(conf);
757 int exitCode = ToolRunner.run(conf, new Canary(), args);
758 System.exit(exitCode);
759 }
760 }