1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Set;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30
31 import org.apache.hadoop.hbase.classification.InterfaceAudience;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.conf.Configured;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.FSDataOutputStream;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.io.LongWritable;
39 import org.apache.hadoop.io.NullWritable;
40 import org.apache.hadoop.io.Text;
41 import org.apache.hadoop.util.LineReader;
42 import org.apache.hadoop.util.Tool;
43 import org.apache.hadoop.util.ToolRunner;
44
45 import org.apache.hadoop.mapreduce.InputSplit;
46 import org.apache.hadoop.mapreduce.Job;
47 import org.apache.hadoop.mapreduce.JobContext;
48 import org.apache.hadoop.mapreduce.Mapper;
49 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
50 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
51 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
52
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
55 import org.apache.hadoop.hbase.HTableDescriptor;
56 import org.apache.hadoop.hbase.HRegionInfo;
57 import org.apache.hadoop.hbase.regionserver.HRegion;
58 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
59 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
60 import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController;
61 import org.apache.hadoop.hbase.mapreduce.JobUtil;
62 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
63 import org.apache.hadoop.hbase.util.Bytes;
64 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
65 import org.apache.hadoop.hbase.util.FSTableDescriptors;
66 import org.apache.hadoop.hbase.util.FSUtils;
67
68
69
70
71
72
73
74
75
76 @InterfaceAudience.Public
77 public class CompactionTool extends Configured implements Tool {
78 private static final Log LOG = LogFactory.getLog(CompactionTool.class);
79
80 private final static String CONF_TMP_DIR = "hbase.tmp.dir";
81 private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
82 private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
83 private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
84 private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
85
86
87
88
89
90 private static class CompactionWorker {
91 private final boolean keepCompactedFiles;
92 private final boolean deleteCompacted;
93 private final Configuration conf;
94 private final FileSystem fs;
95 private final Path tmpDir;
96
97 public CompactionWorker(final FileSystem fs, final Configuration conf) {
98 this.conf = conf;
99 this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
100 this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
101 this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
102 this.fs = fs;
103 }
104
105
106
107
108
109
110
111
112 public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
113 if (isFamilyDir(fs, path)) {
114 Path regionDir = path.getParent();
115 Path tableDir = regionDir.getParent();
116 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
117 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
118 compactStoreFiles(tableDir, htd, hri, path.getName(), compactOnce, major);
119 } else if (isRegionDir(fs, path)) {
120 Path tableDir = path.getParent();
121 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
122 compactRegion(tableDir, htd, path, compactOnce, major);
123 } else if (isTableDir(fs, path)) {
124 compactTable(path, compactOnce, major);
125 } else {
126 throw new IOException(
127 "Specified path is not a table, region or family directory. path=" + path);
128 }
129 }
130
131 private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
132 throws IOException {
133 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
134 for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
135 compactRegion(tableDir, htd, regionDir, compactOnce, major);
136 }
137 }
138
139 private void compactRegion(final Path tableDir, final HTableDescriptor htd,
140 final Path regionDir, final boolean compactOnce, final boolean major)
141 throws IOException {
142 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
143 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
144 compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
145 }
146 }
147
148
149
150
151
152
153 private void compactStoreFiles(final Path tableDir, final HTableDescriptor htd,
154 final HRegionInfo hri, final String familyName, final boolean compactOnce,
155 final boolean major) throws IOException {
156 HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
157 LOG.info("Compact table=" + htd.getTableName() +
158 " region=" + hri.getRegionNameAsString() +
159 " family=" + familyName);
160 if (major) {
161 store.triggerMajorCompaction();
162 }
163 do {
164 CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
165 if (compaction == null) break;
166 List<StoreFile> storeFiles =
167 store.compact(compaction, NoLimitCompactionThroughputController.INSTANCE);
168 if (storeFiles != null && !storeFiles.isEmpty()) {
169 if (keepCompactedFiles && deleteCompacted) {
170 for (StoreFile storeFile: storeFiles) {
171 fs.delete(storeFile.getPath(), false);
172 }
173 }
174 }
175 } while (store.needsCompaction() && !compactOnce);
176 }
177
178
179
180
181
182 private static HStore getStore(final Configuration conf, final FileSystem fs,
183 final Path tableDir, final HTableDescriptor htd, final HRegionInfo hri,
184 final String familyName, final Path tempDir) throws IOException {
185 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
186 @Override
187 public Path getTempDir() {
188 return tempDir;
189 }
190 };
191 HRegion region = new HRegion(regionFs, null, conf, htd, null);
192 return new HStore(region, htd.getFamily(Bytes.toBytes(familyName)), conf);
193 }
194 }
195
196 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
197 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
198 return fs.exists(regionInfo);
199 }
200
201 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
202 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
203 }
204
205 private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
206 return isRegionDir(fs, path.getParent());
207 }
208
209 private static class CompactionMapper
210 extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
211 private CompactionWorker compactor = null;
212 private boolean compactOnce = false;
213 private boolean major = false;
214
215 @Override
216 public void setup(Context context) {
217 Configuration conf = context.getConfiguration();
218 compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
219 major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
220
221 try {
222 FileSystem fs = FileSystem.get(conf);
223 this.compactor = new CompactionWorker(fs, conf);
224 } catch (IOException e) {
225 throw new RuntimeException("Could not get the input FileSystem", e);
226 }
227 }
228
229 @Override
230 public void map(LongWritable key, Text value, Context context)
231 throws InterruptedException, IOException {
232 Path path = new Path(value.toString());
233 this.compactor.compact(path, compactOnce, major);
234 }
235 }
236
237
238
239
240 private static class CompactionInputFormat extends TextInputFormat {
241 @Override
242 protected boolean isSplitable(JobContext context, Path file) {
243 return true;
244 }
245
246
247
248
249
250 @Override
251 public List<InputSplit> getSplits(JobContext job) throws IOException {
252 List<InputSplit> splits = new ArrayList<InputSplit>();
253 List<FileStatus> files = listStatus(job);
254
255 Text key = new Text();
256 for (FileStatus file: files) {
257 Path path = file.getPath();
258 FileSystem fs = path.getFileSystem(job.getConfiguration());
259 LineReader reader = new LineReader(fs.open(path));
260 long pos = 0;
261 int n;
262 try {
263 while ((n = reader.readLine(key)) > 0) {
264 String[] hosts = getStoreDirHosts(fs, path);
265 splits.add(new FileSplit(path, pos, n, hosts));
266 pos += n;
267 }
268 } finally {
269 reader.close();
270 }
271 }
272
273 return splits;
274 }
275
276
277
278
279 private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
280 throws IOException {
281 FileStatus[] files = FSUtils.listStatus(fs, path);
282 if (files == null) {
283 return new String[] {};
284 }
285
286 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
287 for (FileStatus hfileStatus: files) {
288 HDFSBlocksDistribution storeFileBlocksDistribution =
289 FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
290 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
291 }
292
293 List<String> hosts = hdfsBlocksDistribution.getTopHosts();
294 return hosts.toArray(new String[hosts.size()]);
295 }
296
297
298
299
300
301
302 public static void createInputFile(final FileSystem fs, final Path path,
303 final Set<Path> toCompactDirs) throws IOException {
304
305 List<Path> storeDirs = new LinkedList<Path>();
306 for (Path compactDir: toCompactDirs) {
307 if (isFamilyDir(fs, compactDir)) {
308 storeDirs.add(compactDir);
309 } else if (isRegionDir(fs, compactDir)) {
310 for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
311 storeDirs.add(familyDir);
312 }
313 } else if (isTableDir(fs, compactDir)) {
314
315 for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
316 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
317 storeDirs.add(familyDir);
318 }
319 }
320 } else {
321 throw new IOException(
322 "Specified path is not a table, region or family directory. path=" + compactDir);
323 }
324 }
325
326
327 FSDataOutputStream stream = fs.create(path);
328 LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
329 try {
330 final byte[] newLine = Bytes.toBytes("\n");
331 for (Path storeDir: storeDirs) {
332 stream.write(Bytes.toBytes(storeDir.toString()));
333 stream.write(newLine);
334 }
335 } finally {
336 stream.close();
337 }
338 }
339 }
340
341
342
343
344 private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
345 final boolean compactOnce, final boolean major) throws Exception {
346 Configuration conf = getConf();
347 conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
348 conf.setBoolean(CONF_COMPACT_MAJOR, major);
349
350 Job job = new Job(conf);
351 job.setJobName("CompactionTool");
352 job.setJarByClass(CompactionTool.class);
353 job.setMapperClass(CompactionMapper.class);
354 job.setInputFormatClass(CompactionInputFormat.class);
355 job.setOutputFormatClass(NullOutputFormat.class);
356 job.setMapSpeculativeExecution(false);
357 job.setNumReduceTasks(0);
358
359
360 TableMapReduceUtil.addDependencyJars(job);
361
362 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
363 org.cliffc.high_scale_lib.Counter.class);
364
365 Path stagingDir = JobUtil.getStagingDir(conf);
366 try {
367
368 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTimeMillis());
369 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
370 CompactionInputFormat.addInputPath(job, inputPath);
371
372
373 TableMapReduceUtil.initCredentials(job);
374
375
376 return job.waitForCompletion(true) ? 0 : 1;
377 } finally {
378 fs.delete(stagingDir, true);
379 }
380 }
381
382
383
384
385 private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
386 final boolean compactOnce, final boolean major) throws IOException {
387 CompactionWorker worker = new CompactionWorker(fs, getConf());
388 for (Path path: toCompactDirs) {
389 worker.compact(path, compactOnce, major);
390 }
391 return 0;
392 }
393
394 @Override
395 public int run(String[] args) throws Exception {
396 Set<Path> toCompactDirs = new HashSet<Path>();
397 boolean compactOnce = false;
398 boolean major = false;
399 boolean mapred = false;
400
401 Configuration conf = getConf();
402 FileSystem fs = FileSystem.get(conf);
403
404 try {
405 for (int i = 0; i < args.length; ++i) {
406 String opt = args[i];
407 if (opt.equals("-compactOnce")) {
408 compactOnce = true;
409 } else if (opt.equals("-major")) {
410 major = true;
411 } else if (opt.equals("-mapred")) {
412 mapred = true;
413 } else if (!opt.startsWith("-")) {
414 Path path = new Path(opt);
415 FileStatus status = fs.getFileStatus(path);
416 if (!status.isDir()) {
417 printUsage("Specified path is not a directory. path=" + path);
418 return 1;
419 }
420 toCompactDirs.add(path);
421 } else {
422 printUsage();
423 }
424 }
425 } catch (Exception e) {
426 printUsage(e.getMessage());
427 return 1;
428 }
429
430 if (toCompactDirs.size() == 0) {
431 printUsage("No directories to compact specified.");
432 return 1;
433 }
434
435
436 if (mapred) {
437 return doMapReduce(fs, toCompactDirs, compactOnce, major);
438 } else {
439 return doClient(fs, toCompactDirs, compactOnce, major);
440 }
441 }
442
443 private void printUsage() {
444 printUsage(null);
445 }
446
447 private void printUsage(final String message) {
448 if (message != null && message.length() > 0) {
449 System.err.println(message);
450 }
451 System.err.println("Usage: java " + this.getClass().getName() + " \\");
452 System.err.println(" [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
453 System.err.println();
454 System.err.println("Options:");
455 System.err.println(" mapred Use MapReduce to run compaction.");
456 System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
457 System.err.println(" major Trigger major compaction.");
458 System.err.println();
459 System.err.println("Note: -D properties will be applied to the conf used. ");
460 System.err.println("For example: ");
461 System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
462 System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
463 System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
464 System.err.println();
465 System.err.println("Examples:");
466 System.err.println(" To compact the full 'TestTable' using MapReduce:");
467 System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
468 System.err.println();
469 System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
470 System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
471 }
472
473 public static void main(String[] args) throws Exception {
474 System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
475 }
476 }