1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintWriter;
23 import java.io.StringWriter;
24 import java.net.InetAddress;
25 import java.net.URI;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Set;
38 import java.util.SortedMap;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.Callable;
43 import java.util.concurrent.ConcurrentSkipListMap;
44 import java.util.concurrent.ExecutionException;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Future;
47 import java.util.concurrent.ScheduledThreadPoolExecutor;
48 import java.util.concurrent.atomic.AtomicInteger;
49 import java.util.concurrent.atomic.AtomicBoolean;
50
51 import org.apache.commons.lang.StringUtils;
52 import org.apache.commons.logging.Log;
53 import org.apache.commons.logging.LogFactory;
54 import org.apache.hadoop.hbase.classification.InterfaceAudience;
55 import org.apache.hadoop.hbase.classification.InterfaceStability;
56 import org.apache.hadoop.conf.Configuration;
57 import org.apache.hadoop.conf.Configured;
58 import org.apache.hadoop.fs.FSDataOutputStream;
59 import org.apache.hadoop.fs.FileStatus;
60 import org.apache.hadoop.fs.FileSystem;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.fs.permission.FsAction;
63 import org.apache.hadoop.fs.permission.FsPermission;
64 import org.apache.hadoop.hbase.Abortable;
65 import org.apache.hadoop.hbase.Cell;
66 import org.apache.hadoop.hbase.ClusterStatus;
67 import org.apache.hadoop.hbase.HBaseConfiguration;
68 import org.apache.hadoop.hbase.HColumnDescriptor;
69 import org.apache.hadoop.hbase.HConstants;
70 import org.apache.hadoop.hbase.HRegionInfo;
71 import org.apache.hadoop.hbase.HRegionLocation;
72 import org.apache.hadoop.hbase.HTableDescriptor;
73 import org.apache.hadoop.hbase.KeyValue;
74 import org.apache.hadoop.hbase.MasterNotRunningException;
75 import org.apache.hadoop.hbase.ServerName;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
78 import org.apache.hadoop.hbase.catalog.MetaEditor;
79 import org.apache.hadoop.hbase.client.Delete;
80 import org.apache.hadoop.hbase.client.Get;
81 import org.apache.hadoop.hbase.client.HBaseAdmin;
82 import org.apache.hadoop.hbase.client.HConnectable;
83 import org.apache.hadoop.hbase.client.HConnection;
84 import org.apache.hadoop.hbase.client.HConnectionManager;
85 import org.apache.hadoop.hbase.client.HTable;
86 import org.apache.hadoop.hbase.client.MetaScanner;
87 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
88 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
89 import org.apache.hadoop.hbase.client.Put;
90 import org.apache.hadoop.hbase.client.Result;
91 import org.apache.hadoop.hbase.client.RowMutations;
92 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
93 import org.apache.hadoop.hbase.io.hfile.HFile;
94 import org.apache.hadoop.hbase.master.MasterFileSystem;
95 import org.apache.hadoop.hbase.master.RegionState;
96 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
97 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
98 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
99 import org.apache.hadoop.hbase.regionserver.HRegion;
100 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
101 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
102 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
103 import org.apache.hadoop.hbase.security.UserProvider;
104 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
105 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
106 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
107 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
108 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
109 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
110 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
111 import org.apache.hadoop.hbase.zookeeper.ZKTable;
112 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
113 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
114 import org.apache.hadoop.hbase.security.AccessDeniedException;
115 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
116 import org.apache.hadoop.io.IOUtils;
117 import org.apache.hadoop.ipc.RemoteException;
118 import org.apache.hadoop.security.UserGroupInformation;
119 import org.apache.hadoop.util.ReflectionUtils;
120 import org.apache.hadoop.util.Tool;
121 import org.apache.hadoop.util.ToolRunner;
122 import org.apache.zookeeper.KeeperException;
123
124 import com.google.common.annotations.VisibleForTesting;
125 import com.google.common.base.Joiner;
126 import com.google.common.base.Preconditions;
127 import com.google.common.collect.Lists;
128 import com.google.common.collect.Multimap;
129 import com.google.common.collect.TreeMultimap;
130 import com.google.protobuf.ServiceException;
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177 @InterfaceAudience.Public
178 @InterfaceStability.Evolving
179 public class HBaseFsck extends Configured {
180 public static final long DEFAULT_TIME_LAG = 60000;
181 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
182 private static final int MAX_NUM_THREADS = 50;
183 private static boolean rsSupportsOffline = true;
184 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
185 private static final int DEFAULT_MAX_MERGE = 5;
186 private static final String TO_BE_LOADED = "to_be_loaded";
187 private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
188
189
190
191
192
193 private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
194 private ClusterStatus status;
195 private HConnection connection;
196 private HBaseAdmin admin;
197 private HTable meta;
198
199 protected ExecutorService executor;
200 private long startMillis = System.currentTimeMillis();
201 private HFileCorruptionChecker hfcc;
202 private int retcode = 0;
203 private Path HBCK_LOCK_PATH;
204 private FSDataOutputStream hbckOutFd;
205
206
207
208 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
209
210
211
212
213 private static boolean details = false;
214 private long timelag = DEFAULT_TIME_LAG;
215 private boolean fixAssignments = false;
216 private boolean fixMeta = false;
217 private boolean checkHdfs = true;
218 private boolean fixHdfsHoles = false;
219 private boolean fixHdfsOverlaps = false;
220 private boolean fixHdfsOrphans = false;
221 private boolean fixTableOrphans = false;
222 private boolean fixVersionFile = false;
223 private boolean fixSplitParents = false;
224 private boolean fixReferenceFiles = false;
225 private boolean fixEmptyMetaCells = false;
226 private boolean fixTableLocks = false;
227 private boolean fixTableZNodes = false;
228 private boolean fixAny = false;
229
230
231
232 private Set<TableName> tablesIncluded = new HashSet<TableName>();
233 private int maxMerge = DEFAULT_MAX_MERGE;
234 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
235 private boolean sidelineBigOverlaps = false;
236 private Path sidelineDir = null;
237
238 private boolean rerun = false;
239 private static boolean summary = false;
240 private boolean checkMetaOnly = false;
241 private boolean checkRegionBoundaries = false;
242 private boolean ignorePreCheckPermission = false;
243
244
245
246
247 final private ErrorReporter errors;
248 int fixes = 0;
249
250
251
252
253
254
255 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
256 private TreeSet<TableName> disabledTables =
257 new TreeSet<TableName>();
258
259 private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
260
261
262
263
264
265
266
267
268
269
270
271 private SortedMap<TableName, TableInfo> tablesInfo =
272 new ConcurrentSkipListMap<TableName, TableInfo>();
273
274
275
276
277 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
278
279 private Map<TableName, Set<String>> orphanTableDirs =
280 new HashMap<TableName, Set<String>>();
281
282
283
284
285 private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
286
287
288
289
290
291
292
293
294 public HBaseFsck(Configuration conf) throws MasterNotRunningException,
295 ZooKeeperConnectionException, IOException, ClassNotFoundException {
296 super(conf);
297
298 setConf(HBaseConfiguration.create(getConf()));
299
300 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
301 errors = getErrorReporter(conf);
302
303 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
304 executor = new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
305 }
306
307
308
309
310
311
312
313
314
315
316
317 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
318 ZooKeeperConnectionException, IOException, ClassNotFoundException {
319 super(conf);
320 errors = getErrorReporter(getConf());
321 this.executor = exec;
322 }
323
324
325
326
327
328
329
330 private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
331 long start = EnvironmentEdgeManager.currentTimeMillis();
332 try {
333 FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
334 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
335 HConstants.DATA_FILE_UMASK_KEY);
336 Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
337 fs.mkdirs(tmpDir);
338 HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
339 final FSDataOutputStream out = FSUtils.create(fs, HBCK_LOCK_PATH, defaultPerms, false);
340 out.writeBytes(InetAddress.getLocalHost().toString());
341 out.flush();
342 return out;
343 } catch(RemoteException e) {
344 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
345 return null;
346 } else {
347 throw e;
348 }
349 } finally {
350 long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
351 if (duration > 30000) {
352 LOG.warn("Took " + duration + " milliseconds to obtain lock");
353
354 return null;
355 }
356 }
357 }
358
359 private void unlockHbck() {
360 if(hbckLockCleanup.compareAndSet(true, false)){
361 IOUtils.closeStream(hbckOutFd);
362 try{
363 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
364 } catch(IOException ioe) {
365 LOG.warn("Failed to delete " + HBCK_LOCK_PATH);
366 LOG.debug(ioe);
367 }
368 }
369 }
370
371
372
373
374
375 public void connect() throws IOException {
376
377
378 hbckOutFd = checkAndMarkRunningHbck();
379 if (hbckOutFd == null) {
380 setRetCode(-1);
381 LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
382 " no other instance is running, delete the lock file " +
383 HBCK_LOCK_PATH + " and rerun the tool]");
384 throw new IOException("Duplicate hbck - Abort");
385 }
386
387
388 hbckLockCleanup.set(true);
389
390
391
392
393 Runtime.getRuntime().addShutdownHook(new Thread() {
394 @Override
395 public void run() {
396 unlockHbck();
397 }
398 });
399 LOG.debug("Launching hbck");
400
401 connection = HConnectionManager.createConnection(getConf());
402 admin = new HBaseAdmin(connection);
403 meta = new HTable(TableName.META_TABLE_NAME, connection);
404 status = admin.getClusterStatus();
405 }
406
407
408
409
410 private void loadDeployedRegions() throws IOException, InterruptedException {
411
412 Collection<ServerName> regionServers = status.getServers();
413 errors.print("Number of live region servers: " + regionServers.size());
414 if (details) {
415 for (ServerName rsinfo: regionServers) {
416 errors.print(" " + rsinfo.getServerName());
417 }
418 }
419
420
421 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
422 errors.print("Number of dead region servers: " + deadRegionServers.size());
423 if (details) {
424 for (ServerName name: deadRegionServers) {
425 errors.print(" " + name);
426 }
427 }
428
429
430 errors.print("Master: " + status.getMaster());
431
432
433 Collection<ServerName> backupMasters = status.getBackupMasters();
434 errors.print("Number of backup masters: " + backupMasters.size());
435 if (details) {
436 for (ServerName name: backupMasters) {
437 errors.print(" " + name);
438 }
439 }
440
441 errors.print("Average load: " + status.getAverageLoad());
442 errors.print("Number of requests: " + status.getRequestsCount());
443 errors.print("Number of regions: " + status.getRegionsCount());
444
445 Map<String, RegionState> rits = status.getRegionsInTransition();
446 errors.print("Number of regions in transition: " + rits.size());
447 if (details) {
448 for (RegionState state: rits.values()) {
449 errors.print(" " + state.toDescriptiveString());
450 }
451 }
452
453
454 processRegionServers(regionServers);
455 }
456
457
458
459
460 private void clearState() {
461
462 fixes = 0;
463 regionInfoMap.clear();
464 emptyRegionInfoQualifiers.clear();
465 disabledTables.clear();
466 errors.clear();
467 tablesInfo.clear();
468 orphanHdfsDirs.clear();
469 }
470
471
472
473
474
475
476 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
477
478 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
479 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
480 LOG.info("Loading regioninfos HDFS");
481
482 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
483 int curIter = 0;
484 do {
485 clearState();
486
487 restoreHdfsIntegrity();
488 curIter++;
489 } while (fixes > 0 && curIter <= maxIterations);
490
491
492
493 if (curIter > 2) {
494 if (curIter == maxIterations) {
495 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
496 + "Tables integrity may not be fully repaired!");
497 } else {
498 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
499 }
500 }
501 }
502 }
503
504
505
506
507
508
509
510
511
512 public int onlineConsistencyRepair() throws IOException, KeeperException,
513 InterruptedException {
514 clearState();
515
516
517 loadDeployedRegions();
518
519 recordMetaRegion();
520
521 if (!checkMetaRegion()) {
522 String errorMsg = "hbase:meta table is not consistent. ";
523 if (shouldFixAssignments()) {
524 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
525 } else {
526 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
527 }
528 errors.reportError(errorMsg + " Exiting...");
529 return -2;
530 }
531
532 LOG.info("Loading regionsinfo from the hbase:meta table");
533 boolean success = loadMetaEntries();
534 if (!success) return -1;
535
536
537 reportEmptyMetaCells();
538
539
540 if (shouldFixEmptyMetaCells()) {
541 fixEmptyMetaCells();
542 }
543
544
545 if (!checkMetaOnly) {
546 reportTablesInFlux();
547 }
548
549
550 if (shouldCheckHdfs()) {
551 loadHdfsRegionDirs();
552 loadHdfsRegionInfos();
553 }
554
555
556 loadDisabledTables();
557
558
559 fixOrphanTables();
560
561
562 checkAndFixConsistency();
563
564
565 checkIntegrity();
566 return errors.getErrorList().size();
567 }
568
569
570
571
572
573 public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
574
575 errors.print("Version: " + status.getHBaseVersion());
576 offlineHdfsIntegrityRepair();
577
578
579 boolean oldBalancer = admin.setBalancerRunning(false, true);
580 try {
581 onlineConsistencyRepair();
582 }
583 finally {
584 admin.setBalancerRunning(oldBalancer, false);
585 }
586
587 if (checkRegionBoundaries) {
588 checkRegionBoundaries();
589 }
590
591 offlineReferenceFileRepair();
592
593 checkAndFixTableLocks();
594
595
596 checkAndFixOrphanedTableZNodes();
597
598
599 unlockHbck();
600
601
602 printTableSummary(tablesInfo);
603 return errors.summarize();
604 }
605
606 public static byte[] keyOnly (byte[] b) {
607 if (b == null)
608 return b;
609 int rowlength = Bytes.toShort(b, 0);
610 byte[] result = new byte[rowlength];
611 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
612 return result;
613 }
614
615 private static class RegionBoundariesInformation {
616 public byte [] regionName;
617 public byte [] metaFirstKey;
618 public byte [] metaLastKey;
619 public byte [] storesFirstKey;
620 public byte [] storesLastKey;
621 @Override
622 public String toString () {
623 return "regionName=" + Bytes.toStringBinary(regionName) +
624 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
625 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
626 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
627 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
628 }
629 }
630
631 public void checkRegionBoundaries() {
632 try {
633 ByteArrayComparator comparator = new ByteArrayComparator();
634 List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), false);
635 final RegionBoundariesInformation currentRegionBoundariesInformation =
636 new RegionBoundariesInformation();
637 Path hbaseRoot = FSUtils.getRootDir(getConf());
638 for (HRegionInfo regionInfo : regions) {
639 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
640 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
641
642
643 Path path = new Path(tableDir, regionInfo.getEncodedName());
644 FileSystem fs = path.getFileSystem(getConf());
645 FileStatus[] files = fs.listStatus(path);
646
647 byte[] storeFirstKey = null;
648 byte[] storeLastKey = null;
649 for (FileStatus file : files) {
650 String fileName = file.getPath().toString();
651 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
652 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
653 FileStatus[] storeFiles = fs.listStatus(file.getPath());
654
655 for (FileStatus storeFile : storeFiles) {
656 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
657 getConf()), getConf());
658 if ((reader.getFirstKey() != null)
659 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
660 reader.getFirstKey()) > 0))) {
661 storeFirstKey = reader.getFirstKey();
662 }
663 if ((reader.getLastKey() != null)
664 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
665 reader.getLastKey())) < 0)) {
666 storeLastKey = reader.getLastKey();
667 }
668 reader.close();
669 }
670 }
671 }
672 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
673 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
674 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
675 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
676 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
677 currentRegionBoundariesInformation.metaFirstKey = null;
678 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
679 currentRegionBoundariesInformation.metaLastKey = null;
680
681
682
683
684
685
686 boolean valid = true;
687
688 if ((currentRegionBoundariesInformation.storesFirstKey != null)
689 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
690 valid = valid
691 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
692 currentRegionBoundariesInformation.metaFirstKey) >= 0;
693 }
694
695 if ((currentRegionBoundariesInformation.storesLastKey != null)
696 && (currentRegionBoundariesInformation.metaLastKey != null)) {
697 valid = valid
698 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
699 currentRegionBoundariesInformation.metaLastKey) < 0;
700 }
701 if (!valid) {
702 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
703 tablesInfo.get(regionInfo.getTable()));
704 LOG.warn("Region's boundaries not alligned between stores and META for:");
705 LOG.warn(currentRegionBoundariesInformation);
706 }
707 }
708 } catch (IOException e) {
709 LOG.error(e);
710 }
711 }
712
713
714
715
716 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
717 for (HbckInfo hi : orphanHdfsDirs) {
718 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
719 adoptHdfsOrphan(hi);
720 }
721 }
722
723
724
725
726
727
728
729
730
731
732 @SuppressWarnings("deprecation")
733 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
734 Path p = hi.getHdfsRegionDir();
735 FileSystem fs = p.getFileSystem(getConf());
736 FileStatus[] dirs = fs.listStatus(p);
737 if (dirs == null) {
738 LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
739 p + ". This dir could probably be deleted.");
740 return ;
741 }
742
743 TableName tableName = hi.getTableName();
744 TableInfo tableInfo = tablesInfo.get(tableName);
745 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
746 HTableDescriptor template = tableInfo.getHTD();
747
748
749 Pair<byte[],byte[]> orphanRegionRange = null;
750 for (FileStatus cf : dirs) {
751 String cfName= cf.getPath().getName();
752
753 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
754
755 FileStatus[] hfiles = fs.listStatus(cf.getPath());
756 for (FileStatus hfile : hfiles) {
757 byte[] start, end;
758 HFile.Reader hf = null;
759 try {
760 CacheConfig cacheConf = new CacheConfig(getConf());
761 hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
762 hf.loadFileInfo();
763 KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
764 start = startKv.getRow();
765 KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
766 end = endKv.getRow();
767 } catch (IOException ioe) {
768 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
769 continue;
770 } catch (NullPointerException ioe) {
771 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
772 continue;
773 } finally {
774 if (hf != null) {
775 hf.close();
776 }
777 }
778
779
780 if (orphanRegionRange == null) {
781
782 orphanRegionRange = new Pair<byte[], byte[]>(start, end);
783 } else {
784
785
786
787 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
788 orphanRegionRange.setFirst(start);
789 }
790 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
791 orphanRegionRange.setSecond(end);
792 }
793 }
794 }
795 }
796 if (orphanRegionRange == null) {
797 LOG.warn("No data in dir " + p + ", sidelining data");
798 fixes++;
799 sidelineRegionDir(fs, hi);
800 return;
801 }
802 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
803 Bytes.toString(orphanRegionRange.getSecond()) + ")");
804
805
806 HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond());
807 LOG.info("Creating new region : " + hri);
808 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
809 Path target = region.getRegionFileSystem().getRegionDir();
810
811
812 mergeRegionDirs(target, hi);
813 fixes++;
814 }
815
816
817
818
819
820
821
822
823
824 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
825
826 LOG.info("Loading HBase regioninfo from HDFS...");
827 loadHdfsRegionDirs();
828
829 int errs = errors.getErrorList().size();
830
831 tablesInfo = loadHdfsRegionInfos();
832 checkHdfsIntegrity(false, false);
833
834 if (errors.getErrorList().size() == errs) {
835 LOG.info("No integrity errors. We are done with this phase. Glorious.");
836 return 0;
837 }
838
839 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
840 adoptHdfsOrphans(orphanHdfsDirs);
841
842 }
843
844
845 if (shouldFixHdfsHoles()) {
846 clearState();
847 loadHdfsRegionDirs();
848 tablesInfo = loadHdfsRegionInfos();
849 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
850 }
851
852
853 if (shouldFixHdfsOverlaps()) {
854
855 clearState();
856 loadHdfsRegionDirs();
857 tablesInfo = loadHdfsRegionInfos();
858 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
859 }
860
861 return errors.getErrorList().size();
862 }
863
864
865
866
867
868
869
870
871
872 private void offlineReferenceFileRepair() throws IOException {
873 Configuration conf = getConf();
874 Path hbaseRoot = FSUtils.getRootDir(conf);
875 FileSystem fs = hbaseRoot.getFileSystem(conf);
876 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot);
877 for (Path path: allFiles.values()) {
878 boolean isReference = false;
879 try {
880 isReference = StoreFileInfo.isReference(path);
881 } catch (Throwable t) {
882
883
884
885
886 }
887 if (!isReference) continue;
888
889 Path referredToFile = StoreFileInfo.getReferredToFile(path);
890 if (fs.exists(referredToFile)) continue;
891
892
893 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
894 "Found lingering reference file " + path);
895 if (!shouldFixReferenceFiles()) continue;
896
897
898 boolean success = false;
899 String pathStr = path.toString();
900
901
902
903
904
905 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
906 for (int i = 0; index > 0 && i < 5; i++) {
907 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
908 }
909 if (index > 0) {
910 Path rootDir = getSidelineDir();
911 Path dst = new Path(rootDir, pathStr.substring(index + 1));
912 fs.mkdirs(dst.getParent());
913 LOG.info("Trying to sildeline reference file "
914 + path + " to " + dst);
915 setShouldRerun();
916
917 success = fs.rename(path, dst);
918 }
919 if (!success) {
920 LOG.error("Failed to sideline reference file " + path);
921 }
922 }
923 }
924
925
926
927
928 private void reportEmptyMetaCells() {
929 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
930 emptyRegionInfoQualifiers.size());
931 if (details) {
932 for (Result r: emptyRegionInfoQualifiers) {
933 errors.print(" " + r);
934 }
935 }
936 }
937
938
939
940
941 private void reportTablesInFlux() {
942 AtomicInteger numSkipped = new AtomicInteger(0);
943 HTableDescriptor[] allTables = getTables(numSkipped);
944 errors.print("Number of Tables: " + allTables.length);
945 if (details) {
946 if (numSkipped.get() > 0) {
947 errors.detail("Number of Tables in flux: " + numSkipped.get());
948 }
949 for (HTableDescriptor td : allTables) {
950 errors.detail(" Table: " + td.getTableName() + "\t" +
951 (td.isReadOnly() ? "ro" : "rw") + "\t" +
952 (td.isMetaRegion() ? "META" : " ") + "\t" +
953 " families: " + td.getFamilies().size());
954 }
955 }
956 }
957
958 public ErrorReporter getErrors() {
959 return errors;
960 }
961
962
963
964
965
966 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
967 Path regionDir = hbi.getHdfsRegionDir();
968 if (regionDir == null) {
969 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
970 return;
971 }
972
973 if (hbi.hdfsEntry.hri != null) {
974
975 return;
976 }
977
978 FileSystem fs = FileSystem.get(getConf());
979 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
980 LOG.debug("HRegionInfo read: " + hri.toString());
981 hbi.hdfsEntry.hri = hri;
982 }
983
984
985
986
987
988 public static class RegionRepairException extends IOException {
989 private static final long serialVersionUID = 1L;
990 final IOException ioe;
991 public RegionRepairException(String s, IOException ioe) {
992 super(s);
993 this.ioe = ioe;
994 }
995 }
996
997
998
999
1000 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1001 throws IOException, InterruptedException {
1002 tablesInfo.clear();
1003
1004 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1005
1006
1007 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1008 List<Future<Void>> hbiFutures;
1009
1010 for (HbckInfo hbi : hbckInfos) {
1011 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1012 hbis.add(work);
1013 }
1014
1015
1016 hbiFutures = executor.invokeAll(hbis);
1017
1018 for(int i=0; i<hbiFutures.size(); i++) {
1019 WorkItemHdfsRegionInfo work = hbis.get(i);
1020 Future<Void> f = hbiFutures.get(i);
1021 try {
1022 f.get();
1023 } catch(ExecutionException e) {
1024 LOG.warn("Failed to read .regioninfo file for region " +
1025 work.hbi.getRegionNameAsString(), e.getCause());
1026 }
1027 }
1028
1029 Path hbaseRoot = FSUtils.getRootDir(getConf());
1030 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1031
1032 for (HbckInfo hbi: hbckInfos) {
1033
1034 if (hbi.getHdfsHRI() == null) {
1035
1036 continue;
1037 }
1038
1039
1040
1041 TableName tableName = hbi.getTableName();
1042 if (tableName == null) {
1043
1044 LOG.warn("tableName was null for: " + hbi);
1045 continue;
1046 }
1047
1048 TableInfo modTInfo = tablesInfo.get(tableName);
1049 if (modTInfo == null) {
1050
1051 modTInfo = new TableInfo(tableName);
1052 tablesInfo.put(tableName, modTInfo);
1053 try {
1054 HTableDescriptor htd =
1055 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1056 modTInfo.htds.add(htd);
1057 } catch (IOException ioe) {
1058 if (!orphanTableDirs.containsKey(tableName)) {
1059 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1060
1061 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1062 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1063 Set<String> columns = new HashSet<String>();
1064 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1065 }
1066 }
1067 }
1068 if (!hbi.isSkipChecks()) {
1069 modTInfo.addRegionInfo(hbi);
1070 }
1071 }
1072
1073 loadTableInfosForTablesWithNoRegion();
1074
1075 return tablesInfo;
1076 }
1077
1078
1079
1080
1081
1082
1083
1084
1085 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1086 Path regionDir = hbi.getHdfsRegionDir();
1087 FileSystem fs = regionDir.getFileSystem(getConf());
1088 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1089 for (FileStatus subdir : subDirs) {
1090 String columnfamily = subdir.getPath().getName();
1091 columns.add(columnfamily);
1092 }
1093 return columns;
1094 }
1095
1096
1097
1098
1099
1100
1101
1102
1103 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1104 Set<String> columns) throws IOException {
1105 if (columns ==null || columns.isEmpty()) return false;
1106 HTableDescriptor htd = new HTableDescriptor(tableName);
1107 for (String columnfamimly : columns) {
1108 htd.addFamily(new HColumnDescriptor(columnfamimly));
1109 }
1110 fstd.createTableDescriptor(htd, true);
1111 return true;
1112 }
1113
1114
1115
1116
1117
1118 public void fixEmptyMetaCells() throws IOException {
1119 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1120 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1121 for (Result region : emptyRegionInfoQualifiers) {
1122 deleteMetaRegion(region.getRow());
1123 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1124 }
1125 emptyRegionInfoQualifiers.clear();
1126 }
1127 }
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 public void fixOrphanTables() throws IOException {
1139 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1140
1141 List<TableName> tmpList = new ArrayList<TableName>();
1142 tmpList.addAll(orphanTableDirs.keySet());
1143 HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1144 Iterator<Entry<TableName, Set<String>>> iter =
1145 orphanTableDirs.entrySet().iterator();
1146 int j = 0;
1147 int numFailedCase = 0;
1148 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1149 while (iter.hasNext()) {
1150 Entry<TableName, Set<String>> entry =
1151 iter.next();
1152 TableName tableName = entry.getKey();
1153 LOG.info("Trying to fix orphan table error: " + tableName);
1154 if (j < htds.length) {
1155 if (tableName.equals(htds[j].getTableName())) {
1156 HTableDescriptor htd = htds[j];
1157 LOG.info("fixing orphan table: " + tableName + " from cache");
1158 fstd.createTableDescriptor(htd, true);
1159 j++;
1160 iter.remove();
1161 }
1162 } else {
1163 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1164 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1165 LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1166 iter.remove();
1167 } else {
1168 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1169 numFailedCase++;
1170 }
1171 }
1172 fixes++;
1173 }
1174
1175 if (orphanTableDirs.isEmpty()) {
1176
1177
1178 setShouldRerun();
1179 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1180 } else if (numFailedCase > 0) {
1181 LOG.error("Failed to fix " + numFailedCase
1182 + " OrphanTables with default .tableinfo files");
1183 }
1184
1185 }
1186
1187 orphanTableDirs.clear();
1188
1189 }
1190
1191
1192
1193
1194
1195
1196 private HRegion createNewMeta() throws IOException {
1197 Path rootdir = FSUtils.getRootDir(getConf());
1198 Configuration c = getConf();
1199 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1200 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1201 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1202 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1203 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1204 return meta;
1205 }
1206
1207
1208
1209
1210
1211
1212
1213 private ArrayList<Put> generatePuts(
1214 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1215 ArrayList<Put> puts = new ArrayList<Put>();
1216 boolean hasProblems = false;
1217 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1218 TableName name = e.getKey();
1219
1220
1221 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1222 continue;
1223 }
1224
1225 TableInfo ti = e.getValue();
1226 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1227 .entrySet()) {
1228 Collection<HbckInfo> his = spl.getValue();
1229 int sz = his.size();
1230 if (sz != 1) {
1231
1232 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1233 + " had " + sz + " regions instead of exactly 1." );
1234 hasProblems = true;
1235 continue;
1236 }
1237
1238
1239 HbckInfo hi = his.iterator().next();
1240 HRegionInfo hri = hi.getHdfsHRI();
1241 Put p = MetaEditor.makePutFromRegionInfo(hri);
1242 puts.add(p);
1243 }
1244 }
1245 return hasProblems ? null : puts;
1246 }
1247
1248
1249
1250
1251 private void suggestFixes(
1252 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1253 for (TableInfo tInfo : tablesInfo.values()) {
1254 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1255 tInfo.checkRegionChain(handler);
1256 }
1257 }
1258
1259
1260
1261
1262
1263
1264
1265
1266 public boolean rebuildMeta(boolean fix) throws IOException,
1267 InterruptedException {
1268
1269
1270
1271
1272
1273 LOG.info("Loading HBase regioninfo from HDFS...");
1274 loadHdfsRegionDirs();
1275
1276 int errs = errors.getErrorList().size();
1277 tablesInfo = loadHdfsRegionInfos();
1278 checkHdfsIntegrity(false, false);
1279
1280
1281 if (errors.getErrorList().size() != errs) {
1282
1283 while(true) {
1284 fixes = 0;
1285 suggestFixes(tablesInfo);
1286 errors.clear();
1287 loadHdfsRegionInfos();
1288 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1289
1290 int errCount = errors.getErrorList().size();
1291
1292 if (fixes == 0) {
1293 if (errCount > 0) {
1294 return false;
1295 } else {
1296 break;
1297 }
1298 }
1299 }
1300 }
1301
1302
1303 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1304 Path backupDir = sidelineOldMeta();
1305
1306 LOG.info("Creating new hbase:meta");
1307 HRegion meta = createNewMeta();
1308
1309
1310 List<Put> puts = generatePuts(tablesInfo);
1311 if (puts == null) {
1312 LOG.fatal("Problem encountered when creating new hbase:meta entries. " +
1313 "You may need to restore the previously sidelined hbase:meta");
1314 return false;
1315 }
1316 meta.batchMutate(puts.toArray(new Put[puts.size()]));
1317 HRegion.closeHRegion(meta);
1318 LOG.info("Success! hbase:meta table rebuilt.");
1319 LOG.info("Old hbase:meta is moved into " + backupDir);
1320 return true;
1321 }
1322
1323 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1324 boolean fixOverlaps) throws IOException {
1325 LOG.info("Checking HBase region split map from HDFS data...");
1326 for (TableInfo tInfo : tablesInfo.values()) {
1327 TableIntegrityErrorHandler handler;
1328 if (fixHoles || fixOverlaps) {
1329 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1330 fixHoles, fixOverlaps);
1331 } else {
1332 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1333 }
1334 if (!tInfo.checkRegionChain(handler)) {
1335
1336 errors.report("Found inconsistency in table " + tInfo.getName());
1337 }
1338 }
1339 return tablesInfo;
1340 }
1341
1342 private Path getSidelineDir() throws IOException {
1343 if (sidelineDir == null) {
1344 Path hbaseDir = FSUtils.getRootDir(getConf());
1345 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1346 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1347 + startMillis);
1348 }
1349 return sidelineDir;
1350 }
1351
1352
1353
1354
1355 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1356 return sidelineRegionDir(fs, null, hi);
1357 }
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367 Path sidelineRegionDir(FileSystem fs,
1368 String parentDir, HbckInfo hi) throws IOException {
1369 TableName tableName = hi.getTableName();
1370 Path regionDir = hi.getHdfsRegionDir();
1371
1372 if (!fs.exists(regionDir)) {
1373 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1374 return null;
1375 }
1376
1377 Path rootDir = getSidelineDir();
1378 if (parentDir != null) {
1379 rootDir = new Path(rootDir, parentDir);
1380 }
1381 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1382 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1383 fs.mkdirs(sidelineRegionDir);
1384 boolean success = false;
1385 FileStatus[] cfs = fs.listStatus(regionDir);
1386 if (cfs == null) {
1387 LOG.info("Region dir is empty: " + regionDir);
1388 } else {
1389 for (FileStatus cf : cfs) {
1390 Path src = cf.getPath();
1391 Path dst = new Path(sidelineRegionDir, src.getName());
1392 if (fs.isFile(src)) {
1393
1394 success = fs.rename(src, dst);
1395 if (!success) {
1396 String msg = "Unable to rename file " + src + " to " + dst;
1397 LOG.error(msg);
1398 throw new IOException(msg);
1399 }
1400 continue;
1401 }
1402
1403
1404 fs.mkdirs(dst);
1405
1406 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1407
1408
1409
1410
1411 FileStatus[] hfiles = fs.listStatus(src);
1412 if (hfiles != null && hfiles.length > 0) {
1413 for (FileStatus hfile : hfiles) {
1414 success = fs.rename(hfile.getPath(), dst);
1415 if (!success) {
1416 String msg = "Unable to rename file " + src + " to " + dst;
1417 LOG.error(msg);
1418 throw new IOException(msg);
1419 }
1420 }
1421 }
1422 LOG.debug("Sideline directory contents:");
1423 debugLsr(sidelineRegionDir);
1424 }
1425 }
1426
1427 LOG.info("Removing old region dir: " + regionDir);
1428 success = fs.delete(regionDir, true);
1429 if (!success) {
1430 String msg = "Unable to delete dir " + regionDir;
1431 LOG.error(msg);
1432 throw new IOException(msg);
1433 }
1434 return sidelineRegionDir;
1435 }
1436
1437
1438
1439
1440 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1441 Path backupHbaseDir) throws IOException {
1442 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1443 if (fs.exists(tableDir)) {
1444 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1445 fs.mkdirs(backupTableDir.getParent());
1446 boolean success = fs.rename(tableDir, backupTableDir);
1447 if (!success) {
1448 throw new IOException("Failed to move " + tableName + " from "
1449 + tableDir + " to " + backupTableDir);
1450 }
1451 } else {
1452 LOG.info("No previous " + tableName + " exists. Continuing.");
1453 }
1454 }
1455
1456
1457
1458
1459 Path sidelineOldMeta() throws IOException {
1460
1461 Path hbaseDir = FSUtils.getRootDir(getConf());
1462 FileSystem fs = hbaseDir.getFileSystem(getConf());
1463 Path backupDir = getSidelineDir();
1464 fs.mkdirs(backupDir);
1465
1466 try {
1467 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1468 } catch (IOException e) {
1469 LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore "
1470 + "try to rename hbase:meta in " + backupDir.getName() + " to "
1471 + hbaseDir.getName() + ".", e);
1472 throw e;
1473 }
1474 return backupDir;
1475 }
1476
1477
1478
1479
1480
1481
1482 private void loadDisabledTables()
1483 throws ZooKeeperConnectionException, IOException {
1484 HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1485 @Override
1486 public Void connect(HConnection connection) throws IOException {
1487 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1488 try {
1489 for (TableName tableName :
1490 ZKTableReadOnly.getDisabledOrDisablingTables(zkw)) {
1491 disabledTables.add(tableName);
1492 }
1493 } catch (KeeperException ke) {
1494 throw new IOException(ke);
1495 } finally {
1496 zkw.close();
1497 }
1498 return null;
1499 }
1500 });
1501 }
1502
1503
1504
1505
1506 private boolean isTableDisabled(HRegionInfo regionInfo) {
1507 return disabledTables.contains(regionInfo.getTable());
1508 }
1509
1510
1511
1512
1513
1514 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1515 Path rootDir = FSUtils.getRootDir(getConf());
1516 FileSystem fs = rootDir.getFileSystem(getConf());
1517
1518
1519 List<FileStatus> tableDirs = Lists.newArrayList();
1520
1521 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1522
1523 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1524 for (Path path : paths) {
1525 TableName tableName = FSUtils.getTableName(path);
1526 if ((!checkMetaOnly &&
1527 isTableIncluded(tableName)) ||
1528 tableName.equals(TableName.META_TABLE_NAME)) {
1529 tableDirs.add(fs.getFileStatus(path));
1530 }
1531 }
1532
1533
1534 if (!foundVersionFile) {
1535 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1536 "Version file does not exist in root dir " + rootDir);
1537 if (shouldFixVersionFile()) {
1538 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1539 + " file.");
1540 setShouldRerun();
1541 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1542 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1543 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1544 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1545 }
1546 }
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 private boolean recordMetaRegion() throws IOException {
1573 HRegionLocation metaLocation = connection.locateRegion(
1574 TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW);
1575
1576
1577 if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1578 metaLocation.getHostname() == null) {
1579 errors.reportError(ERROR_CODE.NULL_META_REGION,
1580 "META region or some of its attributes are null.");
1581 return false;
1582 }
1583 ServerName sn;
1584 try {
1585 sn = getMetaRegionServerName();
1586 } catch (KeeperException e) {
1587 throw new IOException(e);
1588 }
1589 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
1590 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1591 if (hbckInfo == null) {
1592 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1593 } else {
1594 hbckInfo.metaEntry = m;
1595 }
1596 return true;
1597 }
1598
1599 private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1600 return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1601 @Override
1602 public void abort(String why, Throwable e) {
1603 LOG.error(why, e);
1604 System.exit(1);
1605 }
1606
1607 @Override
1608 public boolean isAborted() {
1609 return false;
1610 }
1611
1612 });
1613 }
1614
1615 private ServerName getMetaRegionServerName()
1616 throws IOException, KeeperException {
1617 ZooKeeperWatcher zkw = createZooKeeperWatcher();
1618 ServerName sn = null;
1619 try {
1620 sn = MetaRegionTracker.getMetaRegionLocation(zkw);
1621 } finally {
1622 zkw.close();
1623 }
1624 return sn;
1625 }
1626
1627
1628
1629
1630
1631
1632 void processRegionServers(Collection<ServerName> regionServerList)
1633 throws IOException, InterruptedException {
1634
1635 List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1636 List<Future<Void>> workFutures;
1637
1638
1639 for (ServerName rsinfo: regionServerList) {
1640 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1641 }
1642
1643 workFutures = executor.invokeAll(workItems);
1644
1645 for(int i=0; i<workFutures.size(); i++) {
1646 WorkItemRegion item = workItems.get(i);
1647 Future<Void> f = workFutures.get(i);
1648 try {
1649 f.get();
1650 } catch(ExecutionException e) {
1651 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1652 e.getCause());
1653 }
1654 }
1655 }
1656
1657
1658
1659
1660 private void checkAndFixConsistency()
1661 throws IOException, KeeperException, InterruptedException {
1662 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1663 checkRegionConsistency(e.getKey(), e.getValue());
1664 }
1665 }
1666
1667 private void preCheckPermission() throws IOException, AccessDeniedException {
1668 if (shouldIgnorePreCheckPermission()) {
1669 return;
1670 }
1671
1672 Path hbaseDir = FSUtils.getRootDir(getConf());
1673 FileSystem fs = hbaseDir.getFileSystem(getConf());
1674 UserProvider userProvider = UserProvider.instantiate(getConf());
1675 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1676 FileStatus[] files = fs.listStatus(hbaseDir);
1677 for (FileStatus file : files) {
1678 try {
1679 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1680 } catch (AccessDeniedException ace) {
1681 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1682 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1683 + " does not have write perms to " + file.getPath()
1684 + ". Please rerun hbck as hdfs user " + file.getOwner());
1685 throw ace;
1686 }
1687 }
1688 }
1689
1690
1691
1692
1693 private void deleteMetaRegion(HbckInfo hi) throws IOException {
1694 deleteMetaRegion(hi.metaEntry.getRegionName());
1695 }
1696
1697
1698
1699
1700 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1701 Delete d = new Delete(metaKey);
1702 meta.delete(d);
1703 meta.flushCommits();
1704 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1705 }
1706
1707
1708
1709
1710 private void resetSplitParent(HbckInfo hi) throws IOException {
1711 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1712 Delete d = new Delete(hi.metaEntry.getRegionName());
1713 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1714 d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1715 mutations.add(d);
1716
1717 HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1718 hri.setOffline(false);
1719 hri.setSplit(false);
1720 Put p = MetaEditor.makePutFromRegionInfo(hri);
1721 mutations.add(p);
1722
1723 meta.mutateRow(mutations);
1724 meta.flushCommits();
1725 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1726 }
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736 private void offline(byte[] regionName) throws IOException {
1737 String regionString = Bytes.toStringBinary(regionName);
1738 if (!rsSupportsOffline) {
1739 LOG.warn("Using unassign region " + regionString
1740 + " instead of using offline method, you should"
1741 + " restart HMaster after these repairs");
1742 admin.unassign(regionName, true);
1743 return;
1744 }
1745
1746
1747 try {
1748 LOG.info("Offlining region " + regionString);
1749 admin.offline(regionName);
1750 } catch (IOException ioe) {
1751 String notFoundMsg = "java.lang.NoSuchMethodException: " +
1752 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1753 if (ioe.getMessage().contains(notFoundMsg)) {
1754 LOG.warn("Using unassign region " + regionString
1755 + " instead of using offline method, you should"
1756 + " restart HMaster after these repairs");
1757 rsSupportsOffline = false;
1758 admin.unassign(regionName, true);
1759 return;
1760 }
1761 throw ioe;
1762 }
1763 }
1764
1765 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
1766 for (OnlineEntry rse : hi.deployedEntries) {
1767 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
1768 try {
1769 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, rse.hsa, rse.hri);
1770 offline(rse.hri.getRegionName());
1771 } catch (IOException ioe) {
1772 LOG.warn("Got exception when attempting to offline region "
1773 + Bytes.toString(rse.hri.getRegionName()), ioe);
1774 }
1775 }
1776 }
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
1791 if (hi.metaEntry == null && hi.hdfsEntry == null) {
1792 undeployRegions(hi);
1793 return;
1794 }
1795
1796
1797 Get get = new Get(hi.getRegionName());
1798 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1799 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
1800 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
1801 Result r = meta.get(get);
1802 ServerName serverName = HRegionInfo.getServerName(r);
1803 if (serverName == null) {
1804 errors.reportError("Unable to close region "
1805 + hi.getRegionNameAsString() + " because meta does not "
1806 + "have handle to reach it.");
1807 return;
1808 }
1809
1810 HRegionInfo hri = HRegionInfo.getHRegionInfo(r);
1811 if (hri == null) {
1812 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
1813 + " because hbase:meta had invalid or missing "
1814 + HConstants.CATALOG_FAMILY_STR + ":"
1815 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
1816 + " qualifier value.");
1817 return;
1818 }
1819
1820
1821 HBaseFsckRepair.closeRegionSilentlyAndWait(admin, serverName, hri);
1822 }
1823
1824 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
1825 KeeperException, InterruptedException {
1826
1827 if (shouldFixAssignments()) {
1828 errors.print(msg);
1829 undeployRegions(hbi);
1830 setShouldRerun();
1831 HRegionInfo hri = hbi.getHdfsHRI();
1832 if (hri == null) {
1833 hri = hbi.metaEntry;
1834 }
1835 HBaseFsckRepair.fixUnassigned(admin, hri);
1836 HBaseFsckRepair.waitUntilAssigned(admin, hri);
1837 }
1838 }
1839
1840
1841
1842
1843 private void checkRegionConsistency(final String key, final HbckInfo hbi)
1844 throws IOException, KeeperException, InterruptedException {
1845 String descriptiveName = hbi.toString();
1846
1847 boolean inMeta = hbi.metaEntry != null;
1848
1849 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
1850 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
1851 boolean isDeployed = !hbi.deployedOn.isEmpty();
1852 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
1853 boolean deploymentMatchesMeta =
1854 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
1855 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
1856 boolean splitParent =
1857 (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
1858 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
1859 boolean recentlyModified = inHdfs &&
1860 hbi.getModTime() + timelag > System.currentTimeMillis();
1861
1862
1863 if (hbi.containsOnlyHdfsEdits()) {
1864 return;
1865 }
1866 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
1867 return;
1868 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
1869 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
1870 "tabled that is not deployed");
1871 return;
1872 } else if (recentlyModified) {
1873 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
1874 return;
1875 }
1876
1877 else if (!inMeta && !inHdfs && !isDeployed) {
1878
1879 assert false : "Entry for region with no data";
1880 } else if (!inMeta && !inHdfs && isDeployed) {
1881 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
1882 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
1883 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1884 if (shouldFixAssignments()) {
1885 undeployRegions(hbi);
1886 }
1887
1888 } else if (!inMeta && inHdfs && !isDeployed) {
1889 if (hbi.isMerged()) {
1890
1891
1892 hbi.setSkipChecks(true);
1893 LOG.info("Region " + descriptiveName
1894 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
1895 return;
1896 }
1897 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
1898 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
1899 "or deployed on any region server");
1900
1901 if (shouldFixMeta()) {
1902 if (!hbi.isHdfsRegioninfoPresent()) {
1903 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
1904 + " in table integrity repair phase if -fixHdfsOrphans was" +
1905 " used.");
1906 return;
1907 }
1908
1909 HRegionInfo hri = hbi.getHdfsHRI();
1910 TableInfo tableInfo = tablesInfo.get(hri.getTable());
1911 if (tableInfo.regionsFromMeta.isEmpty()) {
1912 for (HbckInfo h : regionInfoMap.values()) {
1913 if (hri.getTable().equals(h.getTableName())) {
1914 if (h.metaEntry != null) tableInfo.regionsFromMeta
1915 .add((HRegionInfo) h.metaEntry);
1916 }
1917 }
1918 Collections.sort(tableInfo.regionsFromMeta);
1919 }
1920 for (HRegionInfo region : tableInfo.regionsFromMeta) {
1921 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
1922 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
1923 hri.getEndKey()) >= 0)
1924 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
1925 if(region.isSplit() || region.isOffline()) continue;
1926 Path regionDir = hbi.getHdfsRegionDir();
1927 FileSystem fs = regionDir.getFileSystem(getConf());
1928 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
1929 for (Path familyDir : familyDirs) {
1930 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
1931 for (Path referenceFilePath : referenceFilePaths) {
1932 Path parentRegionDir =
1933 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
1934 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
1935 LOG.warn(hri + " start and stop keys are in the range of " + region
1936 + ". The region might not be cleaned up from hdfs when region " + region
1937 + " split failed. Hence deleting from hdfs.");
1938 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
1939 regionDir.getParent(), hri);
1940 return;
1941 }
1942 }
1943 }
1944 }
1945 }
1946
1947 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
1948 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1949
1950 tryAssignmentRepair(hbi, "Trying to reassign region...");
1951 }
1952
1953 } else if (!inMeta && inHdfs && isDeployed) {
1954 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
1955 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
1956 debugLsr(hbi.getHdfsRegionDir());
1957 if (shouldFixMeta()) {
1958 if (!hbi.isHdfsRegioninfoPresent()) {
1959 LOG.error("This should have been repaired in table integrity repair phase");
1960 return;
1961 }
1962
1963 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
1964 HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
1965
1966 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
1967 }
1968
1969
1970 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
1971
1972
1973 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
1974
1975 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
1976 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
1977 if (infoA != null && infoB != null) {
1978
1979 hbi.setSkipChecks(true);
1980 return;
1981 }
1982 }
1983 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
1984 + descriptiveName + " is a split parent in META, in HDFS, "
1985 + "and not deployed on any region server. This could be transient.");
1986 if (shouldFixSplitParents()) {
1987 setShouldRerun();
1988 resetSplitParent(hbi);
1989 }
1990 } else if (inMeta && !inHdfs && !isDeployed) {
1991 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
1992 + descriptiveName + " found in META, but not in HDFS "
1993 + "or deployed on any region server.");
1994 if (shouldFixMeta()) {
1995 deleteMetaRegion(hbi);
1996 }
1997 } else if (inMeta && !inHdfs && isDeployed) {
1998 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
1999 + " found in META, but not in HDFS, " +
2000 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2001
2002
2003
2004 if (shouldFixAssignments()) {
2005 errors.print("Trying to fix unassigned region...");
2006 undeployRegions(hbi);
2007 }
2008 if (shouldFixMeta()) {
2009
2010 deleteMetaRegion(hbi);
2011 }
2012 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2013 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2014 + " not deployed on any region server.");
2015 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2016 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2017 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2018 "Region " + descriptiveName + " should not be deployed according " +
2019 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2020 if (shouldFixAssignments()) {
2021 errors.print("Trying to close the region " + descriptiveName);
2022 setShouldRerun();
2023 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2024 }
2025 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2026 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2027 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2028 + " but is multiply assigned to region servers " +
2029 Joiner.on(", ").join(hbi.deployedOn));
2030
2031 if (shouldFixAssignments()) {
2032 errors.print("Trying to fix assignment error...");
2033 setShouldRerun();
2034 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2035 }
2036 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2037 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2038 + descriptiveName + " listed in hbase:meta on region server " +
2039 hbi.metaEntry.regionServer + " but found on region server " +
2040 hbi.deployedOn.get(0));
2041
2042 if (shouldFixAssignments()) {
2043 errors.print("Trying to fix assignment error...");
2044 setShouldRerun();
2045 HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
2046 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2047 }
2048 } else {
2049 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2050 " is in an unforeseen state:" +
2051 " inMeta=" + inMeta +
2052 " inHdfs=" + inHdfs +
2053 " isDeployed=" + isDeployed +
2054 " isMultiplyDeployed=" + isMultiplyDeployed +
2055 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2056 " shouldBeDeployed=" + shouldBeDeployed);
2057 }
2058 }
2059
2060
2061
2062
2063
2064
2065
2066 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2067 tablesInfo = new TreeMap<TableName,TableInfo> ();
2068 List<HbckInfo> noHDFSRegionInfos = new ArrayList<HbckInfo>();
2069 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2070 for (HbckInfo hbi : regionInfoMap.values()) {
2071
2072 if (hbi.metaEntry == null) {
2073
2074 noHDFSRegionInfos.add(hbi);
2075 Path p = hbi.getHdfsRegionDir();
2076 if (p == null) {
2077 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2078 }
2079
2080
2081 continue;
2082 }
2083 if (hbi.metaEntry.regionServer == null) {
2084 errors.detail("Skipping region because no region server: " + hbi);
2085 continue;
2086 }
2087 if (hbi.metaEntry.isOffline()) {
2088 errors.detail("Skipping region because it is offline: " + hbi);
2089 continue;
2090 }
2091 if (hbi.containsOnlyHdfsEdits()) {
2092 errors.detail("Skipping region because it only contains edits" + hbi);
2093 continue;
2094 }
2095
2096
2097
2098
2099
2100
2101 if (hbi.deployedOn.size() == 0) continue;
2102
2103
2104 TableName tableName = hbi.metaEntry.getTable();
2105 TableInfo modTInfo = tablesInfo.get(tableName);
2106 if (modTInfo == null) {
2107 modTInfo = new TableInfo(tableName);
2108 }
2109 for (ServerName server : hbi.deployedOn) {
2110 modTInfo.addServer(server);
2111 }
2112
2113 if (!hbi.isSkipChecks()) {
2114 modTInfo.addRegionInfo(hbi);
2115 }
2116
2117 tablesInfo.put(tableName, modTInfo);
2118 }
2119
2120 loadTableInfosForTablesWithNoRegion();
2121
2122 for (TableInfo tInfo : tablesInfo.values()) {
2123 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2124 if (!tInfo.checkRegionChain(handler)) {
2125 errors.report("Found inconsistency in table " + tInfo.getName());
2126 }
2127 }
2128 return tablesInfo;
2129 }
2130
2131
2132
2133
2134 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2135 Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2136 for (HTableDescriptor htd : allTables.values()) {
2137 if (checkMetaOnly && !htd.isMetaTable()) {
2138 continue;
2139 }
2140
2141 TableName tableName = htd.getTableName();
2142 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2143 TableInfo tableInfo = new TableInfo(tableName);
2144 tableInfo.htds.add(htd);
2145 tablesInfo.put(htd.getTableName(), tableInfo);
2146 }
2147 }
2148 }
2149
2150
2151
2152
2153
2154 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2155 int fileMoves = 0;
2156 String thread = Thread.currentThread().getName();
2157 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2158 debugLsr(contained.getHdfsRegionDir());
2159
2160
2161 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2162 FileStatus[] dirs = null;
2163 try {
2164 dirs = fs.listStatus(contained.getHdfsRegionDir());
2165 } catch (FileNotFoundException fnfe) {
2166
2167
2168 if (!fs.exists(contained.getHdfsRegionDir())) {
2169 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2170 + " is missing. Assuming already sidelined or moved.");
2171 } else {
2172 sidelineRegionDir(fs, contained);
2173 }
2174 return fileMoves;
2175 }
2176
2177 if (dirs == null) {
2178 if (!fs.exists(contained.getHdfsRegionDir())) {
2179 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2180 + " already sidelined.");
2181 } else {
2182 sidelineRegionDir(fs, contained);
2183 }
2184 return fileMoves;
2185 }
2186
2187 for (FileStatus cf : dirs) {
2188 Path src = cf.getPath();
2189 Path dst = new Path(targetRegionDir, src.getName());
2190
2191 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2192
2193 continue;
2194 }
2195
2196 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2197
2198 continue;
2199 }
2200
2201 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2202
2203
2204
2205
2206 for (FileStatus hfile : fs.listStatus(src)) {
2207 boolean success = fs.rename(hfile.getPath(), dst);
2208 if (success) {
2209 fileMoves++;
2210 }
2211 }
2212 LOG.debug("[" + thread + "] Sideline directory contents:");
2213 debugLsr(targetRegionDir);
2214 }
2215
2216
2217 sidelineRegionDir(fs, contained);
2218 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2219 getSidelineDir());
2220 debugLsr(contained.getHdfsRegionDir());
2221
2222 return fileMoves;
2223 }
2224
2225
2226 static class WorkItemOverlapMerge implements Callable<Void> {
2227 private TableIntegrityErrorHandler handler;
2228 Collection<HbckInfo> overlapgroup;
2229
2230 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2231 this.handler = handler;
2232 this.overlapgroup = overlapgroup;
2233 }
2234
2235 @Override
2236 public Void call() throws Exception {
2237 handler.handleOverlapGroup(overlapgroup);
2238 return null;
2239 }
2240 };
2241
2242
2243
2244
2245
2246 public class TableInfo {
2247 TableName tableName;
2248 TreeSet <ServerName> deployedOn;
2249
2250
2251 final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2252
2253
2254 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2255
2256
2257 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2258
2259
2260 final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2261
2262
2263 final Multimap<byte[], HbckInfo> overlapGroups =
2264 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2265
2266
2267 final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
2268
2269 TableInfo(TableName name) {
2270 this.tableName = name;
2271 deployedOn = new TreeSet <ServerName>();
2272 }
2273
2274
2275
2276
2277 private HTableDescriptor getHTD() {
2278 if (htds.size() == 1) {
2279 return (HTableDescriptor)htds.toArray()[0];
2280 } else {
2281 LOG.error("None/Multiple table descriptors found for table '"
2282 + tableName + "' regions: " + htds);
2283 }
2284 return null;
2285 }
2286
2287 public void addRegionInfo(HbckInfo hir) {
2288 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2289
2290 sc.add(hir);
2291 return;
2292 }
2293
2294
2295 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2296 errors.reportError(
2297 ERROR_CODE.REGION_CYCLE,
2298 String.format("The endkey for this region comes before the "
2299 + "startkey, startkey=%s, endkey=%s",
2300 Bytes.toStringBinary(hir.getStartKey()),
2301 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2302 backwards.add(hir);
2303 return;
2304 }
2305
2306
2307 sc.add(hir);
2308 }
2309
2310 public void addServer(ServerName server) {
2311 this.deployedOn.add(server);
2312 }
2313
2314 public TableName getName() {
2315 return tableName;
2316 }
2317
2318 public int getNumRegions() {
2319 return sc.getStarts().size() + backwards.size();
2320 }
2321
2322 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2323 ErrorReporter errors;
2324
2325 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2326 this.errors = errors;
2327 setTableInfo(ti);
2328 }
2329
2330 @Override
2331 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2332 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2333 "First region should start with an empty key. You need to "
2334 + " create a new region and regioninfo in HDFS to plug the hole.",
2335 getTableInfo(), hi);
2336 }
2337
2338 @Override
2339 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2340 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2341 "Last region should end with an empty key. You need to "
2342 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2343 }
2344
2345 @Override
2346 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2347 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2348 "Region has the same start and end key.", getTableInfo(), hi);
2349 }
2350
2351 @Override
2352 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2353 byte[] key = r1.getStartKey();
2354
2355 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2356 "Multiple regions have the same startkey: "
2357 + Bytes.toStringBinary(key), getTableInfo(), r1);
2358 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2359 "Multiple regions have the same startkey: "
2360 + Bytes.toStringBinary(key), getTableInfo(), r2);
2361 }
2362
2363 @Override
2364 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2365 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2366 "There is an overlap in the region chain.",
2367 getTableInfo(), hi1, hi2);
2368 }
2369
2370 @Override
2371 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2372 errors.reportError(
2373 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2374 "There is a hole in the region chain between "
2375 + Bytes.toStringBinary(holeStart) + " and "
2376 + Bytes.toStringBinary(holeStop)
2377 + ". You need to create a new .regioninfo and region "
2378 + "dir in hdfs to plug the hole.");
2379 }
2380 };
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2395 Configuration conf;
2396
2397 boolean fixOverlaps = true;
2398
2399 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2400 boolean fixHoles, boolean fixOverlaps) {
2401 super(ti, errors);
2402 this.conf = conf;
2403 this.fixOverlaps = fixOverlaps;
2404
2405 }
2406
2407
2408
2409
2410
2411
2412 @Override
2413 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2414 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2415 "First region should start with an empty key. Creating a new " +
2416 "region and regioninfo in HDFS to plug the hole.",
2417 getTableInfo(), next);
2418 HTableDescriptor htd = getTableInfo().getHTD();
2419
2420 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2421 HConstants.EMPTY_START_ROW, next.getStartKey());
2422
2423
2424 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2425 LOG.info("Table region start key was not empty. Created new empty region: "
2426 + newRegion + " " +region);
2427 fixes++;
2428 }
2429
2430 @Override
2431 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2432 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2433 "Last region should end with an empty key. Creating a new "
2434 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2435 HTableDescriptor htd = getTableInfo().getHTD();
2436
2437 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2438 HConstants.EMPTY_START_ROW);
2439
2440 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2441 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
2442 + " " + region);
2443 fixes++;
2444 }
2445
2446
2447
2448
2449
2450 @Override
2451 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2452 errors.reportError(
2453 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2454 "There is a hole in the region chain between "
2455 + Bytes.toStringBinary(holeStartKey) + " and "
2456 + Bytes.toStringBinary(holeStopKey)
2457 + ". Creating a new regioninfo and region "
2458 + "dir in hdfs to plug the hole.");
2459 HTableDescriptor htd = getTableInfo().getHTD();
2460 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2461 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2462 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2463 fixes++;
2464 }
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 @Override
2478 public void handleOverlapGroup(Collection<HbckInfo> overlap)
2479 throws IOException {
2480 Preconditions.checkNotNull(overlap);
2481 Preconditions.checkArgument(overlap.size() >0);
2482
2483 if (!this.fixOverlaps) {
2484 LOG.warn("Not attempting to repair overlaps.");
2485 return;
2486 }
2487
2488 if (overlap.size() > maxMerge) {
2489 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2490 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2491 if (sidelineBigOverlaps) {
2492
2493 sidelineBigOverlaps(overlap);
2494 }
2495 return;
2496 }
2497
2498 mergeOverlaps(overlap);
2499 }
2500
2501 void mergeOverlaps(Collection<HbckInfo> overlap)
2502 throws IOException {
2503 String thread = Thread.currentThread().getName();
2504 LOG.info("== [" + thread + "] Merging regions into one region: "
2505 + Joiner.on(",").join(overlap));
2506
2507 Pair<byte[], byte[]> range = null;
2508 for (HbckInfo hi : overlap) {
2509 if (range == null) {
2510 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2511 } else {
2512 if (RegionSplitCalculator.BYTES_COMPARATOR
2513 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2514 range.setFirst(hi.getStartKey());
2515 }
2516 if (RegionSplitCalculator.BYTES_COMPARATOR
2517 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2518 range.setSecond(hi.getEndKey());
2519 }
2520 }
2521
2522 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
2523 LOG.debug("[" + thread + "] Contained region dir before close");
2524 debugLsr(hi.getHdfsRegionDir());
2525 try {
2526 LOG.info("[" + thread + "] Closing region: " + hi);
2527 closeRegion(hi);
2528 } catch (IOException ioe) {
2529 LOG.warn("[" + thread + "] Was unable to close region " + hi
2530 + ". Just continuing... ", ioe);
2531 } catch (InterruptedException e) {
2532 LOG.warn("[" + thread + "] Was unable to close region " + hi
2533 + ". Just continuing... ", e);
2534 }
2535
2536 try {
2537 LOG.info("[" + thread + "] Offlining region: " + hi);
2538 offline(hi.getRegionName());
2539 } catch (IOException ioe) {
2540 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2541 + ". Just continuing... ", ioe);
2542 }
2543 }
2544
2545
2546 HTableDescriptor htd = getTableInfo().getHTD();
2547
2548 HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2549 range.getSecond());
2550 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2551 LOG.info("[" + thread + "] Created new empty container region: " +
2552 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2553 debugLsr(region.getRegionFileSystem().getRegionDir());
2554
2555
2556 boolean didFix= false;
2557 Path target = region.getRegionFileSystem().getRegionDir();
2558 for (HbckInfo contained : overlap) {
2559 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
2560 int merges = mergeRegionDirs(target, contained);
2561 if (merges > 0) {
2562 didFix = true;
2563 }
2564 }
2565 if (didFix) {
2566 fixes++;
2567 }
2568 }
2569
2570
2571
2572
2573
2574
2575
2576
2577 void sidelineBigOverlaps(
2578 Collection<HbckInfo> bigOverlap) throws IOException {
2579 int overlapsToSideline = bigOverlap.size() - maxMerge;
2580 if (overlapsToSideline > maxOverlapsToSideline) {
2581 overlapsToSideline = maxOverlapsToSideline;
2582 }
2583 List<HbckInfo> regionsToSideline =
2584 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2585 FileSystem fs = FileSystem.get(conf);
2586 for (HbckInfo regionToSideline: regionsToSideline) {
2587 try {
2588 LOG.info("Closing region: " + regionToSideline);
2589 closeRegion(regionToSideline);
2590 } catch (IOException ioe) {
2591 LOG.warn("Was unable to close region " + regionToSideline
2592 + ". Just continuing... ", ioe);
2593 } catch (InterruptedException e) {
2594 LOG.warn("Was unable to close region " + regionToSideline
2595 + ". Just continuing... ", e);
2596 }
2597
2598 try {
2599 LOG.info("Offlining region: " + regionToSideline);
2600 offline(regionToSideline.getRegionName());
2601 } catch (IOException ioe) {
2602 LOG.warn("Unable to offline region from master: " + regionToSideline
2603 + ". Just continuing... ", ioe);
2604 }
2605
2606 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2607 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2608 if (sidelineRegionDir != null) {
2609 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2610 LOG.info("After sidelined big overlapped region: "
2611 + regionToSideline.getRegionNameAsString()
2612 + " to " + sidelineRegionDir.toString());
2613 fixes++;
2614 }
2615 }
2616 }
2617 }
2618
2619
2620
2621
2622
2623
2624
2625 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2626
2627
2628
2629 if (disabledTables.contains(this.tableName)) {
2630 return true;
2631 }
2632 int originalErrorsCount = errors.getErrorList().size();
2633 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2634 SortedSet<byte[]> splits = sc.getSplits();
2635
2636 byte[] prevKey = null;
2637 byte[] problemKey = null;
2638
2639 if (splits.size() == 0) {
2640
2641 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2642 }
2643
2644 for (byte[] key : splits) {
2645 Collection<HbckInfo> ranges = regions.get(key);
2646 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2647 for (HbckInfo rng : ranges) {
2648 handler.handleRegionStartKeyNotEmpty(rng);
2649 }
2650 }
2651
2652
2653 for (HbckInfo rng : ranges) {
2654
2655 byte[] endKey = rng.getEndKey();
2656 endKey = (endKey.length == 0) ? null : endKey;
2657 if (Bytes.equals(rng.getStartKey(),endKey)) {
2658 handler.handleDegenerateRegion(rng);
2659 }
2660 }
2661
2662 if (ranges.size() == 1) {
2663
2664 if (problemKey != null) {
2665 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2666 }
2667 problemKey = null;
2668 } else if (ranges.size() > 1) {
2669
2670
2671 if (problemKey == null) {
2672
2673 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2674 problemKey = key;
2675 }
2676 overlapGroups.putAll(problemKey, ranges);
2677
2678
2679 ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2680
2681 for (HbckInfo r1 : ranges) {
2682 subRange.remove(r1);
2683 for (HbckInfo r2 : subRange) {
2684 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
2685 handler.handleDuplicateStartKeys(r1,r2);
2686 } else {
2687
2688 handler.handleOverlapInRegionChain(r1, r2);
2689 }
2690 }
2691 }
2692
2693 } else if (ranges.size() == 0) {
2694 if (problemKey != null) {
2695 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2696 }
2697 problemKey = null;
2698
2699 byte[] holeStopKey = sc.getSplits().higher(key);
2700
2701 if (holeStopKey != null) {
2702
2703 handler.handleHoleInRegionChain(key, holeStopKey);
2704 }
2705 }
2706 prevKey = key;
2707 }
2708
2709
2710
2711 if (prevKey != null) {
2712 handler.handleRegionEndKeyNotEmpty(prevKey);
2713 }
2714
2715
2716 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
2717 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
2718 " false to run serially.");
2719 boolean ok = handleOverlapsParallel(handler, prevKey);
2720 if (!ok) {
2721 return false;
2722 }
2723 } else {
2724 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
2725 " true to run in parallel.");
2726 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2727 handler.handleOverlapGroup(overlap);
2728 }
2729 }
2730
2731 if (details) {
2732
2733 errors.print("---- Table '" + this.tableName
2734 + "': region split map");
2735 dump(splits, regions);
2736 errors.print("---- Table '" + this.tableName
2737 + "': overlap groups");
2738 dumpOverlapProblems(overlapGroups);
2739 errors.print("There are " + overlapGroups.keySet().size()
2740 + " overlap groups with " + overlapGroups.size()
2741 + " overlapping regions");
2742 }
2743 if (!sidelinedRegions.isEmpty()) {
2744 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
2745 errors.print("---- Table '" + this.tableName
2746 + "': sidelined big overlapped regions");
2747 dumpSidelinedRegions(sidelinedRegions);
2748 }
2749 return errors.getErrorList().size() == originalErrorsCount;
2750 }
2751
2752 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
2753 throws IOException {
2754
2755
2756 List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
2757 List<Future<Void>> rets;
2758 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
2759
2760 merges.add(new WorkItemOverlapMerge(overlap, handler));
2761 }
2762 try {
2763 rets = executor.invokeAll(merges);
2764 } catch (InterruptedException e) {
2765 LOG.error("Overlap merges were interrupted", e);
2766 return false;
2767 }
2768 for(int i=0; i<merges.size(); i++) {
2769 WorkItemOverlapMerge work = merges.get(i);
2770 Future<Void> f = rets.get(i);
2771 try {
2772 f.get();
2773 } catch(ExecutionException e) {
2774 LOG.warn("Failed to merge overlap group" + work, e.getCause());
2775 } catch (InterruptedException e) {
2776 LOG.error("Waiting for overlap merges was interrupted", e);
2777 return false;
2778 }
2779 }
2780 return true;
2781 }
2782
2783
2784
2785
2786
2787
2788
2789 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
2790
2791 StringBuilder sb = new StringBuilder();
2792 for (byte[] k : splits) {
2793 sb.setLength(0);
2794 sb.append(Bytes.toStringBinary(k) + ":\t");
2795 for (HbckInfo r : regions.get(k)) {
2796 sb.append("[ "+ r.toString() + ", "
2797 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
2798 }
2799 errors.print(sb.toString());
2800 }
2801 }
2802 }
2803
2804 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
2805
2806
2807 for (byte[] k : regions.keySet()) {
2808 errors.print(Bytes.toStringBinary(k) + ":");
2809 for (HbckInfo r : regions.get(k)) {
2810 errors.print("[ " + r.toString() + ", "
2811 + Bytes.toStringBinary(r.getEndKey()) + "]");
2812 }
2813 errors.print("----");
2814 }
2815 }
2816
2817 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
2818 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
2819 TableName tableName = entry.getValue().getTableName();
2820 Path path = entry.getKey();
2821 errors.print("This sidelined region dir should be bulk loaded: "
2822 + path.toString());
2823 errors.print("Bulk load command looks like: "
2824 + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
2825 + path.toUri().getPath() + " "+ tableName);
2826 }
2827 }
2828
2829 public Multimap<byte[], HbckInfo> getOverlapGroups(
2830 TableName table) {
2831 TableInfo ti = tablesInfo.get(table);
2832 return ti.overlapGroups;
2833 }
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844 HTableDescriptor[] getTables(AtomicInteger numSkipped) {
2845 List<TableName> tableNames = new ArrayList<TableName>();
2846 long now = System.currentTimeMillis();
2847
2848 for (HbckInfo hbi : regionInfoMap.values()) {
2849 MetaEntry info = hbi.metaEntry;
2850
2851
2852
2853 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
2854 if (info.modTime + timelag < now) {
2855 tableNames.add(info.getTable());
2856 } else {
2857 numSkipped.incrementAndGet();
2858 }
2859 }
2860 }
2861 return getHTableDescriptors(tableNames);
2862 }
2863
2864 HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
2865 HTableDescriptor[] htd = new HTableDescriptor[0];
2866 try {
2867 LOG.info("getHTableDescriptors == tableNames => " + tableNames);
2868 htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
2869 } catch (IOException e) {
2870 LOG.debug("Exception getting table descriptors", e);
2871 }
2872 return htd;
2873 }
2874
2875
2876
2877
2878
2879
2880 private synchronized HbckInfo getOrCreateInfo(String name) {
2881 HbckInfo hbi = regionInfoMap.get(name);
2882 if (hbi == null) {
2883 hbi = new HbckInfo(null);
2884 regionInfoMap.put(name, hbi);
2885 }
2886 return hbi;
2887 }
2888
2889 private void checkAndFixTableLocks() throws IOException {
2890 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2891
2892 try {
2893 TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
2894 checker.checkTableLocks();
2895
2896 if (this.fixTableLocks) {
2897 checker.fixExpiredTableLocks();
2898 }
2899 } finally {
2900 zkw.close();
2901 }
2902 }
2903
2904
2905
2906
2907
2908
2909
2910 private void checkAndFixOrphanedTableZNodes()
2911 throws IOException, KeeperException, InterruptedException {
2912 ZooKeeperWatcher zkw = createZooKeeperWatcher();
2913 try {
2914 ZKTable zkTable = new ZKTable(zkw);
2915 Set<TableName> enablingTables = zkTable.getEnablingTables(zkw);
2916 String msg;
2917 TableInfo tableInfo;
2918
2919 for (TableName tableName : enablingTables) {
2920
2921 tableInfo = tablesInfo.get(tableName);
2922 if (tableInfo != null) {
2923
2924 continue;
2925 }
2926
2927 msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
2928 LOG.warn(msg);
2929 orphanedTableZNodes.add(tableName);
2930 errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
2931 }
2932
2933 if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
2934 for (TableName tableName : orphanedTableZNodes) {
2935
2936
2937
2938
2939 zkTable.setDisabledTable(tableName);
2940 }
2941 }
2942 } finally {
2943 zkw.close();
2944 }
2945 }
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2957 List<HbckInfo> metaRegions = Lists.newArrayList();
2958 for (HbckInfo value : regionInfoMap.values()) {
2959 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
2960 metaRegions.add(value);
2961 }
2962 }
2963
2964
2965
2966 List<ServerName> servers = new ArrayList<ServerName>();
2967 HbckInfo metaHbckInfo = null;
2968 if (!metaRegions.isEmpty()) {
2969 metaHbckInfo = metaRegions.get(0);
2970 servers = metaHbckInfo.deployedOn;
2971 }
2972 if (servers.size() != 1) {
2973 if (servers.size() == 0) {
2974 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
2975 if (shouldFixAssignments()) {
2976 errors.print("Trying to fix a problem with hbase:meta..");
2977 setShouldRerun();
2978
2979 HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
2980 HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
2981 }
2982 } else if (servers.size() > 1) {
2983 errors
2984 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region.");
2985 if (shouldFixAssignments()) {
2986 if (metaHbckInfo == null) {
2987 errors.print(
2988 "Unable to fix problem with hbase:meta due to hbase:meta region info missing");
2989 return false;
2990 }
2991 errors.print("Trying to fix a problem with hbase:meta..");
2992 setShouldRerun();
2993
2994 HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
2995 }
2996 }
2997
2998 return false;
2999 }
3000
3001 return true;
3002 }
3003
3004
3005
3006
3007
3008 boolean loadMetaEntries() throws IOException {
3009 MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3010 int countRecord = 1;
3011
3012
3013 final Comparator<Cell> comp = new Comparator<Cell>() {
3014 @Override
3015 public int compare(Cell k1, Cell k2) {
3016 return (int)(k1.getTimestamp() - k2.getTimestamp());
3017 }
3018 };
3019
3020 @Override
3021 public boolean processRow(Result result) throws IOException {
3022 try {
3023
3024
3025 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3026 Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(result);
3027 if (pair == null || pair.getFirst() == null) {
3028 emptyRegionInfoQualifiers.add(result);
3029 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3030 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3031 return true;
3032 }
3033 ServerName sn = null;
3034 if (pair.getSecond() != null) {
3035 sn = pair.getSecond();
3036 }
3037 HRegionInfo hri = pair.getFirst();
3038 if (!(isTableIncluded(hri.getTable())
3039 || hri.isMetaRegion())) {
3040 return true;
3041 }
3042 PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3043 MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3044 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3045 if (previous == null) {
3046 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3047 } else if (previous.metaEntry == null) {
3048 previous.metaEntry = m;
3049 } else {
3050 throw new IOException("Two entries in hbase:meta are same " + previous);
3051 }
3052
3053 PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3054 for (HRegionInfo mergeRegion : new HRegionInfo[] {
3055 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3056 if (mergeRegion != null) {
3057
3058 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3059 hbInfo.setMerged(true);
3060 }
3061 }
3062
3063
3064 if (countRecord % 100 == 0) {
3065 errors.progress();
3066 }
3067 countRecord++;
3068 return true;
3069 } catch (RuntimeException e) {
3070 LOG.error("Result=" + result);
3071 throw e;
3072 }
3073 }
3074 };
3075 if (!checkMetaOnly) {
3076
3077 MetaScanner.metaScan(getConf(), visitor);
3078 }
3079
3080 errors.print("");
3081 return true;
3082 }
3083
3084
3085
3086
3087 static class MetaEntry extends HRegionInfo {
3088 ServerName regionServer;
3089 long modTime;
3090 HRegionInfo splitA, splitB;
3091
3092 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3093 this(rinfo, regionServer, modTime, null, null);
3094 }
3095
3096 public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3097 HRegionInfo splitA, HRegionInfo splitB) {
3098 super(rinfo);
3099 this.regionServer = regionServer;
3100 this.modTime = modTime;
3101 this.splitA = splitA;
3102 this.splitB = splitB;
3103 }
3104
3105 @Override
3106 public boolean equals(Object o) {
3107 boolean superEq = super.equals(o);
3108 if (!superEq) {
3109 return superEq;
3110 }
3111
3112 MetaEntry me = (MetaEntry) o;
3113 if (!regionServer.equals(me.regionServer)) {
3114 return false;
3115 }
3116 return (modTime == me.modTime);
3117 }
3118
3119 @Override
3120 public int hashCode() {
3121 int hash = Arrays.hashCode(getRegionName());
3122 hash ^= getRegionId();
3123 hash ^= Arrays.hashCode(getStartKey());
3124 hash ^= Arrays.hashCode(getEndKey());
3125 hash ^= Boolean.valueOf(isOffline()).hashCode();
3126 hash ^= getTable().hashCode();
3127 if (regionServer != null) {
3128 hash ^= regionServer.hashCode();
3129 }
3130 hash ^= modTime;
3131 return hash;
3132 }
3133 }
3134
3135
3136
3137
3138 static class HdfsEntry {
3139 HRegionInfo hri;
3140 Path hdfsRegionDir = null;
3141 long hdfsRegionDirModTime = 0;
3142 boolean hdfsRegioninfoFilePresent = false;
3143 boolean hdfsOnlyEdits = false;
3144 }
3145
3146
3147
3148
3149 static class OnlineEntry {
3150 HRegionInfo hri;
3151 ServerName hsa;
3152
3153 @Override
3154 public String toString() {
3155 return hsa.toString() + ";" + hri.getRegionNameAsString();
3156 }
3157 }
3158
3159
3160
3161
3162
3163 public static class HbckInfo implements KeyRange {
3164 private MetaEntry metaEntry = null;
3165 private HdfsEntry hdfsEntry = null;
3166 private List<OnlineEntry> deployedEntries = Lists.newArrayList();
3167 private List<ServerName> deployedOn = Lists.newArrayList();
3168 private boolean skipChecks = false;
3169 private boolean isMerged = false;
3170
3171 HbckInfo(MetaEntry metaEntry) {
3172 this.metaEntry = metaEntry;
3173 }
3174
3175 public synchronized void addServer(HRegionInfo hri, ServerName server) {
3176 OnlineEntry rse = new OnlineEntry() ;
3177 rse.hri = hri;
3178 rse.hsa = server;
3179 this.deployedEntries.add(rse);
3180 this.deployedOn.add(server);
3181 }
3182
3183 @Override
3184 public synchronized String toString() {
3185 StringBuilder sb = new StringBuilder();
3186 sb.append("{ meta => ");
3187 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3188 sb.append( ", hdfs => " + getHdfsRegionDir());
3189 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3190 sb.append(" }");
3191 return sb.toString();
3192 }
3193
3194 @Override
3195 public byte[] getStartKey() {
3196 if (this.metaEntry != null) {
3197 return this.metaEntry.getStartKey();
3198 } else if (this.hdfsEntry != null) {
3199 return this.hdfsEntry.hri.getStartKey();
3200 } else {
3201 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3202 return null;
3203 }
3204 }
3205
3206 @Override
3207 public byte[] getEndKey() {
3208 if (this.metaEntry != null) {
3209 return this.metaEntry.getEndKey();
3210 } else if (this.hdfsEntry != null) {
3211 return this.hdfsEntry.hri.getEndKey();
3212 } else {
3213 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3214 return null;
3215 }
3216 }
3217
3218 public TableName getTableName() {
3219 if (this.metaEntry != null) {
3220 return this.metaEntry.getTable();
3221 } else if (this.hdfsEntry != null) {
3222
3223
3224 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3225 return FSUtils.getTableName(tableDir);
3226 } else {
3227
3228
3229 return null;
3230 }
3231 }
3232
3233 public String getRegionNameAsString() {
3234 if (metaEntry != null) {
3235 return metaEntry.getRegionNameAsString();
3236 } else if (hdfsEntry != null) {
3237 if (hdfsEntry.hri != null) {
3238 return hdfsEntry.hri.getRegionNameAsString();
3239 }
3240 }
3241 return null;
3242 }
3243
3244 public byte[] getRegionName() {
3245 if (metaEntry != null) {
3246 return metaEntry.getRegionName();
3247 } else if (hdfsEntry != null) {
3248 return hdfsEntry.hri.getRegionName();
3249 } else {
3250 return null;
3251 }
3252 }
3253
3254 Path getHdfsRegionDir() {
3255 if (hdfsEntry == null) {
3256 return null;
3257 }
3258 return hdfsEntry.hdfsRegionDir;
3259 }
3260
3261 boolean containsOnlyHdfsEdits() {
3262 if (hdfsEntry == null) {
3263 return false;
3264 }
3265 return hdfsEntry.hdfsOnlyEdits;
3266 }
3267
3268 boolean isHdfsRegioninfoPresent() {
3269 if (hdfsEntry == null) {
3270 return false;
3271 }
3272 return hdfsEntry.hdfsRegioninfoFilePresent;
3273 }
3274
3275 long getModTime() {
3276 if (hdfsEntry == null) {
3277 return 0;
3278 }
3279 return hdfsEntry.hdfsRegionDirModTime;
3280 }
3281
3282 HRegionInfo getHdfsHRI() {
3283 if (hdfsEntry == null) {
3284 return null;
3285 }
3286 return hdfsEntry.hri;
3287 }
3288
3289 public void setSkipChecks(boolean skipChecks) {
3290 this.skipChecks = skipChecks;
3291 }
3292
3293 public boolean isSkipChecks() {
3294 return skipChecks;
3295 }
3296
3297 public void setMerged(boolean isMerged) {
3298 this.isMerged = isMerged;
3299 }
3300
3301 public boolean isMerged() {
3302 return this.isMerged;
3303 }
3304 }
3305
3306 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3307 @Override
3308 public int compare(HbckInfo l, HbckInfo r) {
3309 if (l == r) {
3310
3311 return 0;
3312 }
3313
3314 int tableCompare = l.getTableName().compareTo(r.getTableName());
3315 if (tableCompare != 0) {
3316 return tableCompare;
3317 }
3318
3319 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3320 l.getStartKey(), r.getStartKey());
3321 if (startComparison != 0) {
3322 return startComparison;
3323 }
3324
3325
3326 byte[] endKey = r.getEndKey();
3327 endKey = (endKey.length == 0) ? null : endKey;
3328 byte[] endKey2 = l.getEndKey();
3329 endKey2 = (endKey2.length == 0) ? null : endKey2;
3330 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3331 endKey2, endKey);
3332
3333 if (endComparison != 0) {
3334 return endComparison;
3335 }
3336
3337
3338
3339 if (l.hdfsEntry == null && r.hdfsEntry == null) {
3340 return 0;
3341 }
3342 if (l.hdfsEntry == null && r.hdfsEntry != null) {
3343 return 1;
3344 }
3345
3346 if (r.hdfsEntry == null) {
3347 return -1;
3348 }
3349
3350 return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3351 }
3352 };
3353
3354
3355
3356
3357 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3358 StringBuilder sb = new StringBuilder();
3359 errors.print("Summary:");
3360 for (TableInfo tInfo : tablesInfo.values()) {
3361 if (errors.tableHasErrors(tInfo)) {
3362 errors.print("Table " + tInfo.getName() + " is inconsistent.");
3363 } else {
3364 errors.print(" " + tInfo.getName() + " is okay.");
3365 }
3366 errors.print(" Number of regions: " + tInfo.getNumRegions());
3367 sb.setLength(0);
3368 sb.append(" Deployed on: ");
3369 for (ServerName server : tInfo.deployedOn) {
3370 sb.append(" " + server.toString());
3371 }
3372 errors.print(sb.toString());
3373 }
3374 }
3375
3376 static ErrorReporter getErrorReporter(
3377 final Configuration conf) throws ClassNotFoundException {
3378 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3379 return ReflectionUtils.newInstance(reporter, conf);
3380 }
3381
3382 public interface ErrorReporter {
3383 enum ERROR_CODE {
3384 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3385 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3386 MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3387 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3388 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3389 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3390 WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3391 }
3392 void clear();
3393 void report(String message);
3394 void reportError(String message);
3395 void reportError(ERROR_CODE errorCode, String message);
3396 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3397 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3398 void reportError(
3399 ERROR_CODE errorCode,
3400 String message,
3401 TableInfo table,
3402 HbckInfo info1,
3403 HbckInfo info2
3404 );
3405 int summarize();
3406 void detail(String details);
3407 ArrayList<ERROR_CODE> getErrorList();
3408 void progress();
3409 void print(String message);
3410 void resetErrors();
3411 boolean tableHasErrors(TableInfo table);
3412 }
3413
3414 static class PrintingErrorReporter implements ErrorReporter {
3415 public int errorCount = 0;
3416 private int showProgress;
3417
3418 Set<TableInfo> errorTables = new HashSet<TableInfo>();
3419
3420
3421 private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3422
3423 @Override
3424 public void clear() {
3425 errorTables.clear();
3426 errorList.clear();
3427 errorCount = 0;
3428 }
3429
3430 @Override
3431 public synchronized void reportError(ERROR_CODE errorCode, String message) {
3432 if (errorCode == ERROR_CODE.WRONG_USAGE) {
3433 System.err.println(message);
3434 return;
3435 }
3436
3437 errorList.add(errorCode);
3438 if (!summary) {
3439 System.out.println("ERROR: " + message);
3440 }
3441 errorCount++;
3442 showProgress = 0;
3443 }
3444
3445 @Override
3446 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3447 errorTables.add(table);
3448 reportError(errorCode, message);
3449 }
3450
3451 @Override
3452 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3453 HbckInfo info) {
3454 errorTables.add(table);
3455 String reference = "(region " + info.getRegionNameAsString() + ")";
3456 reportError(errorCode, reference + " " + message);
3457 }
3458
3459 @Override
3460 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3461 HbckInfo info1, HbckInfo info2) {
3462 errorTables.add(table);
3463 String reference = "(regions " + info1.getRegionNameAsString()
3464 + " and " + info2.getRegionNameAsString() + ")";
3465 reportError(errorCode, reference + " " + message);
3466 }
3467
3468 @Override
3469 public synchronized void reportError(String message) {
3470 reportError(ERROR_CODE.UNKNOWN, message);
3471 }
3472
3473
3474
3475
3476
3477
3478 @Override
3479 public synchronized void report(String message) {
3480 if (! summary) {
3481 System.out.println("ERROR: " + message);
3482 }
3483 showProgress = 0;
3484 }
3485
3486 @Override
3487 public synchronized int summarize() {
3488 System.out.println(Integer.toString(errorCount) +
3489 " inconsistencies detected.");
3490 if (errorCount == 0) {
3491 System.out.println("Status: OK");
3492 return 0;
3493 } else {
3494 System.out.println("Status: INCONSISTENT");
3495 return -1;
3496 }
3497 }
3498
3499 @Override
3500 public ArrayList<ERROR_CODE> getErrorList() {
3501 return errorList;
3502 }
3503
3504 @Override
3505 public synchronized void print(String message) {
3506 if (!summary) {
3507 System.out.println(message);
3508 }
3509 }
3510
3511 @Override
3512 public boolean tableHasErrors(TableInfo table) {
3513 return errorTables.contains(table);
3514 }
3515
3516 @Override
3517 public void resetErrors() {
3518 errorCount = 0;
3519 }
3520
3521 @Override
3522 public synchronized void detail(String message) {
3523 if (details) {
3524 System.out.println(message);
3525 }
3526 showProgress = 0;
3527 }
3528
3529 @Override
3530 public synchronized void progress() {
3531 if (showProgress++ == 10) {
3532 if (!summary) {
3533 System.out.print(".");
3534 }
3535 showProgress = 0;
3536 }
3537 }
3538 }
3539
3540
3541
3542
3543 static class WorkItemRegion implements Callable<Void> {
3544 private HBaseFsck hbck;
3545 private ServerName rsinfo;
3546 private ErrorReporter errors;
3547 private HConnection connection;
3548
3549 WorkItemRegion(HBaseFsck hbck, ServerName info,
3550 ErrorReporter errors, HConnection connection) {
3551 this.hbck = hbck;
3552 this.rsinfo = info;
3553 this.errors = errors;
3554 this.connection = connection;
3555 }
3556
3557 @Override
3558 public synchronized Void call() throws IOException {
3559 errors.progress();
3560 try {
3561 BlockingInterface server = connection.getAdmin(rsinfo);
3562
3563
3564 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3565 regions = filterRegions(regions);
3566
3567 if (details) {
3568 errors.detail("RegionServer: " + rsinfo.getServerName() +
3569 " number of regions: " + regions.size());
3570 for (HRegionInfo rinfo: regions) {
3571 errors.detail(" " + rinfo.getRegionNameAsString() +
3572 " id: " + rinfo.getRegionId() +
3573 " encoded_name: " + rinfo.getEncodedName() +
3574 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
3575 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
3576 }
3577 }
3578
3579
3580 for (HRegionInfo r:regions) {
3581 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3582 hbi.addServer(r, rsinfo);
3583 }
3584 } catch (IOException e) {
3585 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
3586 " Unable to fetch region information. " + e);
3587 throw e;
3588 }
3589 return null;
3590 }
3591
3592 private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
3593 List<HRegionInfo> ret = Lists.newArrayList();
3594 for (HRegionInfo hri : regions) {
3595 if (hri.isMetaTable() || (!hbck.checkMetaOnly
3596 && hbck.isTableIncluded(hri.getTable()))) {
3597 ret.add(hri);
3598 }
3599 }
3600 return ret;
3601 }
3602 }
3603
3604
3605
3606
3607
3608 static class WorkItemHdfsDir implements Callable<Void> {
3609 private HBaseFsck hbck;
3610 private FileStatus tableDir;
3611 private ErrorReporter errors;
3612 private FileSystem fs;
3613
3614 WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
3615 FileStatus status) {
3616 this.hbck = hbck;
3617 this.fs = fs;
3618 this.tableDir = status;
3619 this.errors = errors;
3620 }
3621
3622 @Override
3623 public synchronized Void call() throws IOException {
3624 try {
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3680 private HbckInfo hbi;
3681 private HBaseFsck hbck;
3682 private ErrorReporter errors;
3683
3684 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
3685 this.hbi = hbi;
3686 this.hbck = hbck;
3687 this.errors = errors;
3688 }
3689
3690 @Override
3691 public synchronized Void call() throws IOException {
3692
3693 if (hbi.getHdfsHRI() == null) {
3694 try {
3695 hbck.loadHdfsRegioninfo(hbi);
3696 } catch (IOException ioe) {
3697 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3698 + hbi.getTableName() + " in hdfs dir "
3699 + hbi.getHdfsRegionDir()
3700 + "! It may be an invalid format or version file. Treating as "
3701 + "an orphaned regiondir.";
3702 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3703 try {
3704 hbck.debugLsr(hbi.getHdfsRegionDir());
3705 } catch (IOException ioe2) {
3706 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3707 throw ioe2;
3708 }
3709 hbck.orphanHdfsDirs.add(hbi);
3710 throw ioe;
3711 }
3712 }
3713 return null;
3714 }
3715 };
3716
3717
3718
3719
3720
3721 public static void setDisplayFullReport() {
3722 details = true;
3723 }
3724
3725
3726
3727
3728
3729 void setSummary() {
3730 summary = true;
3731 }
3732
3733
3734
3735
3736
3737 void setCheckMetaOnly() {
3738 checkMetaOnly = true;
3739 }
3740
3741
3742
3743
3744 void setRegionBoundariesCheck() {
3745 checkRegionBoundaries = true;
3746 }
3747
3748
3749
3750
3751
3752 public void setFixTableLocks(boolean shouldFix) {
3753 fixTableLocks = shouldFix;
3754 fixAny |= shouldFix;
3755 }
3756
3757
3758
3759
3760
3761 public void setFixTableZNodes(boolean shouldFix) {
3762 fixTableZNodes = shouldFix;
3763 fixAny |= shouldFix;
3764 }
3765
3766
3767
3768
3769
3770
3771
3772 void setShouldRerun() {
3773 rerun = true;
3774 }
3775
3776 boolean shouldRerun() {
3777 return rerun;
3778 }
3779
3780
3781
3782
3783
3784 public void setFixAssignments(boolean shouldFix) {
3785 fixAssignments = shouldFix;
3786 fixAny |= shouldFix;
3787 }
3788
3789 boolean shouldFixAssignments() {
3790 return fixAssignments;
3791 }
3792
3793 public void setFixMeta(boolean shouldFix) {
3794 fixMeta = shouldFix;
3795 fixAny |= shouldFix;
3796 }
3797
3798 boolean shouldFixMeta() {
3799 return fixMeta;
3800 }
3801
3802 public void setFixEmptyMetaCells(boolean shouldFix) {
3803 fixEmptyMetaCells = shouldFix;
3804 fixAny |= shouldFix;
3805 }
3806
3807 boolean shouldFixEmptyMetaCells() {
3808 return fixEmptyMetaCells;
3809 }
3810
3811 public void setCheckHdfs(boolean checking) {
3812 checkHdfs = checking;
3813 }
3814
3815 boolean shouldCheckHdfs() {
3816 return checkHdfs;
3817 }
3818
3819 public void setFixHdfsHoles(boolean shouldFix) {
3820 fixHdfsHoles = shouldFix;
3821 fixAny |= shouldFix;
3822 }
3823
3824 boolean shouldFixHdfsHoles() {
3825 return fixHdfsHoles;
3826 }
3827
3828 public void setFixTableOrphans(boolean shouldFix) {
3829 fixTableOrphans = shouldFix;
3830 fixAny |= shouldFix;
3831 }
3832
3833 boolean shouldFixTableOrphans() {
3834 return fixTableOrphans;
3835 }
3836
3837 public void setFixHdfsOverlaps(boolean shouldFix) {
3838 fixHdfsOverlaps = shouldFix;
3839 fixAny |= shouldFix;
3840 }
3841
3842 boolean shouldFixHdfsOverlaps() {
3843 return fixHdfsOverlaps;
3844 }
3845
3846 public void setFixHdfsOrphans(boolean shouldFix) {
3847 fixHdfsOrphans = shouldFix;
3848 fixAny |= shouldFix;
3849 }
3850
3851 boolean shouldFixHdfsOrphans() {
3852 return fixHdfsOrphans;
3853 }
3854
3855 public void setFixVersionFile(boolean shouldFix) {
3856 fixVersionFile = shouldFix;
3857 fixAny |= shouldFix;
3858 }
3859
3860 public boolean shouldFixVersionFile() {
3861 return fixVersionFile;
3862 }
3863
3864 public void setSidelineBigOverlaps(boolean sbo) {
3865 this.sidelineBigOverlaps = sbo;
3866 }
3867
3868 public boolean shouldSidelineBigOverlaps() {
3869 return sidelineBigOverlaps;
3870 }
3871
3872 public void setFixSplitParents(boolean shouldFix) {
3873 fixSplitParents = shouldFix;
3874 fixAny |= shouldFix;
3875 }
3876
3877 boolean shouldFixSplitParents() {
3878 return fixSplitParents;
3879 }
3880
3881 public void setFixReferenceFiles(boolean shouldFix) {
3882 fixReferenceFiles = shouldFix;
3883 fixAny |= shouldFix;
3884 }
3885
3886 boolean shouldFixReferenceFiles() {
3887 return fixReferenceFiles;
3888 }
3889
3890 public boolean shouldIgnorePreCheckPermission() {
3891 return !fixAny || ignorePreCheckPermission;
3892 }
3893
3894 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3895 this.ignorePreCheckPermission = ignorePreCheckPermission;
3896 }
3897
3898
3899
3900
3901 public void setMaxMerge(int mm) {
3902 this.maxMerge = mm;
3903 }
3904
3905 public int getMaxMerge() {
3906 return maxMerge;
3907 }
3908
3909 public void setMaxOverlapsToSideline(int mo) {
3910 this.maxOverlapsToSideline = mo;
3911 }
3912
3913 public int getMaxOverlapsToSideline() {
3914 return maxOverlapsToSideline;
3915 }
3916
3917
3918
3919
3920
3921 boolean isTableIncluded(TableName table) {
3922 return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
3923 }
3924
3925 public void includeTable(TableName table) {
3926 tablesIncluded.add(table);
3927 }
3928
3929 Set<TableName> getIncludedTables() {
3930 return new HashSet<TableName>(tablesIncluded);
3931 }
3932
3933
3934
3935
3936
3937
3938 public void setTimeLag(long seconds) {
3939 timelag = seconds * 1000;
3940 }
3941
3942
3943
3944
3945
3946 public void setSidelineDir(String sidelineDir) {
3947 this.sidelineDir = new Path(sidelineDir);
3948 }
3949
3950 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3951 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3952 }
3953
3954 public HFileCorruptionChecker getHFilecorruptionChecker() {
3955 return hfcc;
3956 }
3957
3958 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3959 this.hfcc = hfcc;
3960 }
3961
3962 public void setRetCode(int code) {
3963 this.retcode = code;
3964 }
3965
3966 public int getRetCode() {
3967 return retcode;
3968 }
3969
3970 protected HBaseFsck printUsageAndExit() {
3971 StringWriter sw = new StringWriter(2048);
3972 PrintWriter out = new PrintWriter(sw);
3973 out.println("Usage: fsck [opts] {only tables}");
3974 out.println(" where [opts] are:");
3975 out.println(" -help Display help options (this)");
3976 out.println(" -details Display full report of all regions.");
3977 out.println(" -timelag <timeInSeconds> Process only regions that " +
3978 " have not experienced any metadata updates in the last " +
3979 " <timeInSeconds> seconds.");
3980 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3981 " before checking if the fix worked if run with -fix");
3982 out.println(" -summary Print only summary of the tables and status.");
3983 out.println(" -metaonly Only check the state of the hbase:meta table.");
3984 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3985 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
3986
3987 out.println("");
3988 out.println(" Metadata Repair options: (expert features, use with caution!)");
3989 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
3990 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3991 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3992 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3993 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3994 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3995 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3996 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3997 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3998 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3999 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4000 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4001 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4002 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4003 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4004 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4005 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4006 + " (empty REGIONINFO_QUALIFIER rows)");
4007
4008 out.println("");
4009 out.println(" Datafile Repair options: (expert features, use with caution!)");
4010 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4011 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4012
4013 out.println("");
4014 out.println(" Metadata Repair shortcuts");
4015 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4016 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4017 "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4018 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4019
4020 out.println("");
4021 out.println(" Table lock options");
4022 out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4023
4024 out.println("");
4025 out.println(" Table Znode options");
4026 out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
4027
4028 out.flush();
4029 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4030
4031 setRetCode(-2);
4032 return this;
4033 }
4034
4035
4036
4037
4038
4039
4040
4041 public static void main(String[] args) throws Exception {
4042
4043 Configuration conf = HBaseConfiguration.create();
4044 Path hbasedir = FSUtils.getRootDir(conf);
4045 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4046 FSUtils.setFsDefault(conf, new Path(defaultFs));
4047 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4048 System.exit(ret);
4049 }
4050
4051
4052
4053
4054 static class HBaseFsckTool extends Configured implements Tool {
4055 HBaseFsckTool(Configuration conf) { super(conf); }
4056 @Override
4057 public int run(String[] args) throws Exception {
4058 HBaseFsck hbck = new HBaseFsck(getConf());
4059 hbck.exec(hbck.executor, args);
4060 return hbck.getRetCode();
4061 }
4062 };
4063
4064
4065 public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4066 ServiceException, InterruptedException {
4067 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4068
4069 boolean checkCorruptHFiles = false;
4070 boolean sidelineCorruptHFiles = false;
4071
4072
4073 for (int i = 0; i < args.length; i++) {
4074 String cmd = args[i];
4075 if (cmd.equals("-help") || cmd.equals("-h")) {
4076 return printUsageAndExit();
4077 } else if (cmd.equals("-details")) {
4078 setDisplayFullReport();
4079 } else if (cmd.equals("-timelag")) {
4080 if (i == args.length - 1) {
4081 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4082 return printUsageAndExit();
4083 }
4084 try {
4085 long timelag = Long.parseLong(args[i+1]);
4086 setTimeLag(timelag);
4087 } catch (NumberFormatException e) {
4088 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4089 return printUsageAndExit();
4090 }
4091 i++;
4092 } else if (cmd.equals("-sleepBeforeRerun")) {
4093 if (i == args.length - 1) {
4094 errors.reportError(ERROR_CODE.WRONG_USAGE,
4095 "HBaseFsck: -sleepBeforeRerun needs a value.");
4096 return printUsageAndExit();
4097 }
4098 try {
4099 sleepBeforeRerun = Long.parseLong(args[i+1]);
4100 } catch (NumberFormatException e) {
4101 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4102 return printUsageAndExit();
4103 }
4104 i++;
4105 } else if (cmd.equals("-sidelineDir")) {
4106 if (i == args.length - 1) {
4107 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4108 return printUsageAndExit();
4109 }
4110 i++;
4111 setSidelineDir(args[i]);
4112 } else if (cmd.equals("-fix")) {
4113 errors.reportError(ERROR_CODE.WRONG_USAGE,
4114 "This option is deprecated, please use -fixAssignments instead.");
4115 setFixAssignments(true);
4116 } else if (cmd.equals("-fixAssignments")) {
4117 setFixAssignments(true);
4118 } else if (cmd.equals("-fixMeta")) {
4119 setFixMeta(true);
4120 } else if (cmd.equals("-noHdfsChecking")) {
4121 setCheckHdfs(false);
4122 } else if (cmd.equals("-fixHdfsHoles")) {
4123 setFixHdfsHoles(true);
4124 } else if (cmd.equals("-fixHdfsOrphans")) {
4125 setFixHdfsOrphans(true);
4126 } else if (cmd.equals("-fixTableOrphans")) {
4127 setFixTableOrphans(true);
4128 } else if (cmd.equals("-fixHdfsOverlaps")) {
4129 setFixHdfsOverlaps(true);
4130 } else if (cmd.equals("-fixVersionFile")) {
4131 setFixVersionFile(true);
4132 } else if (cmd.equals("-sidelineBigOverlaps")) {
4133 setSidelineBigOverlaps(true);
4134 } else if (cmd.equals("-fixSplitParents")) {
4135 setFixSplitParents(true);
4136 } else if (cmd.equals("-ignorePreCheckPermission")) {
4137 setIgnorePreCheckPermission(true);
4138 } else if (cmd.equals("-checkCorruptHFiles")) {
4139 checkCorruptHFiles = true;
4140 } else if (cmd.equals("-sidelineCorruptHFiles")) {
4141 sidelineCorruptHFiles = true;
4142 } else if (cmd.equals("-fixReferenceFiles")) {
4143 setFixReferenceFiles(true);
4144 } else if (cmd.equals("-fixEmptyMetaCells")) {
4145 setFixEmptyMetaCells(true);
4146 } else if (cmd.equals("-repair")) {
4147
4148
4149 setFixHdfsHoles(true);
4150 setFixHdfsOrphans(true);
4151 setFixMeta(true);
4152 setFixAssignments(true);
4153 setFixHdfsOverlaps(true);
4154 setFixVersionFile(true);
4155 setSidelineBigOverlaps(true);
4156 setFixSplitParents(false);
4157 setCheckHdfs(true);
4158 setFixReferenceFiles(true);
4159 setFixTableLocks(true);
4160 setFixTableZNodes(true);
4161 } else if (cmd.equals("-repairHoles")) {
4162
4163 setFixHdfsHoles(true);
4164 setFixHdfsOrphans(false);
4165 setFixMeta(true);
4166 setFixAssignments(true);
4167 setFixHdfsOverlaps(false);
4168 setSidelineBigOverlaps(false);
4169 setFixSplitParents(false);
4170 setCheckHdfs(true);
4171 } else if (cmd.equals("-maxOverlapsToSideline")) {
4172 if (i == args.length - 1) {
4173 errors.reportError(ERROR_CODE.WRONG_USAGE,
4174 "-maxOverlapsToSideline needs a numeric value argument.");
4175 return printUsageAndExit();
4176 }
4177 try {
4178 int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4179 setMaxOverlapsToSideline(maxOverlapsToSideline);
4180 } catch (NumberFormatException e) {
4181 errors.reportError(ERROR_CODE.WRONG_USAGE,
4182 "-maxOverlapsToSideline needs a numeric value argument.");
4183 return printUsageAndExit();
4184 }
4185 i++;
4186 } else if (cmd.equals("-maxMerge")) {
4187 if (i == args.length - 1) {
4188 errors.reportError(ERROR_CODE.WRONG_USAGE,
4189 "-maxMerge needs a numeric value argument.");
4190 return printUsageAndExit();
4191 }
4192 try {
4193 int maxMerge = Integer.parseInt(args[i+1]);
4194 setMaxMerge(maxMerge);
4195 } catch (NumberFormatException e) {
4196 errors.reportError(ERROR_CODE.WRONG_USAGE,
4197 "-maxMerge needs a numeric value argument.");
4198 return printUsageAndExit();
4199 }
4200 i++;
4201 } else if (cmd.equals("-summary")) {
4202 setSummary();
4203 } else if (cmd.equals("-metaonly")) {
4204 setCheckMetaOnly();
4205 } else if (cmd.equals("-boundaries")) {
4206 setRegionBoundariesCheck();
4207 } else if (cmd.equals("-fixTableLocks")) {
4208 setFixTableLocks(true);
4209 } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4210 setFixTableZNodes(true);
4211 } else if (cmd.startsWith("-")) {
4212 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4213 return printUsageAndExit();
4214 } else {
4215 includeTable(TableName.valueOf(cmd));
4216 errors.print("Allow checking/fixes for table: " + cmd);
4217 }
4218 }
4219
4220 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4221
4222
4223 try {
4224 preCheckPermission();
4225 } catch (AccessDeniedException ace) {
4226 Runtime.getRuntime().exit(-1);
4227 } catch (IOException ioe) {
4228 Runtime.getRuntime().exit(-1);
4229 }
4230
4231
4232 connect();
4233
4234 try {
4235
4236 if (checkCorruptHFiles || sidelineCorruptHFiles) {
4237 LOG.info("Checking all hfiles for corruption");
4238 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4239 setHFileCorruptionChecker(hfcc);
4240 Collection<TableName> tables = getIncludedTables();
4241 Collection<Path> tableDirs = new ArrayList<Path>();
4242 Path rootdir = FSUtils.getRootDir(getConf());
4243 if (tables.size() > 0) {
4244 for (TableName t : tables) {
4245 tableDirs.add(FSUtils.getTableDir(rootdir, t));
4246 }
4247 } else {
4248 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4249 }
4250 hfcc.checkTables(tableDirs);
4251 hfcc.report(errors);
4252 }
4253
4254
4255 int code = onlineHbck();
4256 setRetCode(code);
4257
4258
4259
4260
4261 if (shouldRerun()) {
4262 try {
4263 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4264 Thread.sleep(sleepBeforeRerun);
4265 } catch (InterruptedException ie) {
4266 return this;
4267 }
4268
4269 setFixAssignments(false);
4270 setFixMeta(false);
4271 setFixHdfsHoles(false);
4272 setFixHdfsOverlaps(false);
4273 setFixVersionFile(false);
4274 setFixTableOrphans(false);
4275 errors.resetErrors();
4276 code = onlineHbck();
4277 setRetCode(code);
4278 }
4279 } finally {
4280 IOUtils.cleanup(null, connection, meta, admin);
4281 }
4282 return this;
4283 }
4284
4285
4286
4287
4288 void debugLsr(Path p) throws IOException {
4289 debugLsr(getConf(), p, errors);
4290 }
4291
4292
4293
4294
4295 public static void debugLsr(Configuration conf,
4296 Path p) throws IOException {
4297 debugLsr(conf, p, new PrintingErrorReporter());
4298 }
4299
4300
4301
4302
4303 public static void debugLsr(Configuration conf,
4304 Path p, ErrorReporter errors) throws IOException {
4305 if (!LOG.isDebugEnabled() || p == null) {
4306 return;
4307 }
4308 FileSystem fs = p.getFileSystem(conf);
4309
4310 if (!fs.exists(p)) {
4311
4312 return;
4313 }
4314 errors.print(p.toString());
4315
4316 if (fs.isFile(p)) {
4317 return;
4318 }
4319
4320 if (fs.getFileStatus(p).isDir()) {
4321 FileStatus[] fss= fs.listStatus(p);
4322 for (FileStatus status : fss) {
4323 debugLsr(conf, status.getPath(), errors);
4324 }
4325 }
4326 }
4327 }