1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.Random;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.conf.Configured;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.hbase.classification.InterfaceStability;
37 import org.apache.hadoop.hbase.client.HTable;
38 import org.apache.hadoop.hbase.client.Scan;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.util.GenericOptionsParser;
42 import org.apache.hadoop.util.Tool;
43 import org.apache.hadoop.util.ToolRunner;
44
45
46
47
48
49
50 @InterfaceAudience.Public
51 @InterfaceStability.Stable
52 public class CopyTable extends Configured implements Tool {
53 private static final Log LOG = LogFactory.getLog(CopyTable.class);
54
55 final static String NAME = "copytable";
56 long startTime = 0;
57 long endTime = 0;
58 int versions = -1;
59 String tableName = null;
60 String startRow = null;
61 String stopRow = null;
62 String dstTableName = null;
63 String peerAddress = null;
64 String families = null;
65 boolean allCells = false;
66
67 boolean bulkload = false;
68 Path bulkloadDir = null;
69
70 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
71
72
73
74
75 @Deprecated
76 static long startTime_ = 0;
77 @Deprecated
78 static long endTime_ = 0;
79 @Deprecated
80 static int versions_ = -1;
81 @Deprecated
82 static String tableName_ = null;
83 @Deprecated
84 static String startRow_ = null;
85 @Deprecated
86 static String stopRow_ = null;
87 @Deprecated
88 static String newTableName_ = null;
89 @Deprecated
90 static String peerAddress_ = null;
91 @Deprecated
92 static String families_ = null;
93 @Deprecated
94 static boolean allCells_ = false;
95
96 public CopyTable(Configuration conf) {
97 super(conf);
98 }
99
100
101
102
103
104
105
106
107
108
109 @Deprecated
110 public static Job createSubmittableJob(Configuration conf, String[] args)
111 throws IOException {
112 if (!deprecatedDoCommandLine(args)) {
113 return null;
114 }
115 Job job = new Job(conf, NAME + "_" + tableName_);
116 job.setJarByClass(CopyTable.class);
117 Scan scan = new Scan();
118 scan.setCacheBlocks(false);
119 if (startTime_ != 0) {
120 scan.setTimeRange(startTime_,
121 endTime_ == 0 ? HConstants.LATEST_TIMESTAMP : endTime_);
122 }
123 if (allCells_) {
124 scan.setRaw(true);
125 }
126 if (versions_ >= 0) {
127 scan.setMaxVersions(versions_);
128 }
129 if (startRow_ != null) {
130 scan.setStartRow(Bytes.toBytes(startRow_));
131 }
132 if (stopRow_ != null) {
133 scan.setStopRow(Bytes.toBytes(stopRow_));
134 }
135 if(families_ != null) {
136 String[] fams = families_.split(",");
137 Map<String,String> cfRenameMap = new HashMap<String,String>();
138 for(String fam : fams) {
139 String sourceCf;
140 if(fam.contains(":")) {
141
142 String[] srcAndDest = fam.split(":", 2);
143 sourceCf = srcAndDest[0];
144 String destCf = srcAndDest[1];
145 cfRenameMap.put(sourceCf, destCf);
146 } else {
147
148 sourceCf = fam;
149 }
150 scan.addFamily(Bytes.toBytes(sourceCf));
151 }
152 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
153 }
154 TableMapReduceUtil.initTableMapperJob(tableName_, scan,
155 Import.Importer.class, null, null, job);
156 TableMapReduceUtil.initTableReducerJob(
157 newTableName_ == null ? tableName_ : newTableName_, null, job,
158 null, peerAddress_, null, null);
159 job.setNumReduceTasks(0);
160 return job;
161 }
162
163 private static boolean deprecatedDoCommandLine(final String[] args) {
164
165
166 if (args.length < 1) {
167 printUsage(null);
168 return false;
169 }
170 try {
171 for (int i = 0; i < args.length; i++) {
172 String cmd = args[i];
173 if (cmd.equals("-h") || cmd.startsWith("--h")) {
174 printUsage(null);
175 return false;
176 }
177 final String startRowArgKey = "--startrow=";
178 if (cmd.startsWith(startRowArgKey)) {
179 startRow_ = cmd.substring(startRowArgKey.length());
180 continue;
181 }
182 final String stopRowArgKey = "--stoprow=";
183 if (cmd.startsWith(stopRowArgKey)) {
184 stopRow_ = cmd.substring(stopRowArgKey.length());
185 continue;
186 }
187 final String startTimeArgKey = "--starttime=";
188 if (cmd.startsWith(startTimeArgKey)) {
189 startTime_ = Long.parseLong(cmd.substring(startTimeArgKey.length()));
190 continue;
191 }
192 final String endTimeArgKey = "--endtime=";
193 if (cmd.startsWith(endTimeArgKey)) {
194 endTime_ = Long.parseLong(cmd.substring(endTimeArgKey.length()));
195 continue;
196 }
197 final String versionsArgKey = "--versions=";
198 if (cmd.startsWith(versionsArgKey)) {
199 versions_ = Integer.parseInt(cmd.substring(versionsArgKey.length()));
200 continue;
201 }
202 final String newNameArgKey = "--new.name=";
203 if (cmd.startsWith(newNameArgKey)) {
204 newTableName_ = cmd.substring(newNameArgKey.length());
205 continue;
206 }
207 final String peerAdrArgKey = "--peer.adr=";
208 if (cmd.startsWith(peerAdrArgKey)) {
209 peerAddress_ = cmd.substring(peerAdrArgKey.length());
210 continue;
211 }
212 final String familiesArgKey = "--families=";
213 if (cmd.startsWith(familiesArgKey)) {
214 families_ = cmd.substring(familiesArgKey.length());
215 continue;
216 }
217 if (cmd.startsWith("--all.cells")) {
218 allCells_ = true;
219 continue;
220 }
221 if (i == args.length-1) {
222 tableName_ = cmd;
223 } else {
224 printUsage("Invalid argument '" + cmd + "'" );
225 return false;
226 }
227 }
228 if (newTableName_ == null && peerAddress_ == null) {
229 printUsage("At least a new table name or a " +
230 "peer address must be specified");
231 return false;
232 }
233 if ((endTime_ != 0) && (startTime_ > endTime_)) {
234 printUsage("Invalid time range filter: starttime=" + startTime_ + " > endtime="
235 + endTime_);
236 return false;
237 }
238 } catch (Exception e) {
239 e.printStackTrace();
240 printUsage("Can't start because " + e.getMessage());
241 return false;
242 }
243 return true;
244 }
245
246
247
248
249
250
251
252
253 public Job createSubmittableJob(String[] args)
254 throws IOException {
255 if (!doCommandLine(args)) {
256 return null;
257 }
258
259 Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
260 job.setJarByClass(CopyTable.class);
261 Scan scan = new Scan();
262 scan.setCacheBlocks(false);
263 if (startTime != 0) {
264 scan.setTimeRange(startTime,
265 endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
266 }
267 if (allCells) {
268 scan.setRaw(true);
269 }
270 if (versions >= 0) {
271 scan.setMaxVersions(versions);
272 }
273
274 if (startRow != null) {
275 scan.setStartRow(Bytes.toBytes(startRow));
276 }
277
278 if (stopRow != null) {
279 scan.setStopRow(Bytes.toBytes(stopRow));
280 }
281
282 if(families != null) {
283 String[] fams = families.split(",");
284 Map<String,String> cfRenameMap = new HashMap<String,String>();
285 for(String fam : fams) {
286 String sourceCf;
287 if(fam.contains(":")) {
288
289 String[] srcAndDest = fam.split(":", 2);
290 sourceCf = srcAndDest[0];
291 String destCf = srcAndDest[1];
292 cfRenameMap.put(sourceCf, destCf);
293 } else {
294
295 sourceCf = fam;
296 }
297 scan.addFamily(Bytes.toBytes(sourceCf));
298 }
299 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
300 }
301 job.setNumReduceTasks(0);
302
303 if (bulkload) {
304 TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
305 null, job);
306
307
308 TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
309
310 FileSystem fs = FileSystem.get(getConf());
311 Random rand = new Random();
312 Path root = new Path(fs.getWorkingDirectory(), "copytable");
313 fs.mkdirs(root);
314 while (true) {
315 bulkloadDir = new Path(root, "" + rand.nextLong());
316 if (!fs.exists(bulkloadDir)) {
317 break;
318 }
319 }
320
321 System.out.println("HFiles will be stored at " + this.bulkloadDir);
322 HFileOutputFormat2.setOutputPath(job, bulkloadDir);
323 HTable htable = new HTable(getConf(), TableName.valueOf(dstTableName));
324 try {
325 HFileOutputFormat2.configureIncrementalLoadMap(job, htable);
326 } finally {
327 htable.close();
328 }
329 } else {
330 TableMapReduceUtil.initTableMapperJob(tableName, scan,
331 Import.Importer.class, null, null, job);
332
333 TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
334 null);
335 }
336
337 return job;
338 }
339
340
341
342
343 private static void printUsage(final String errorMsg) {
344 if (errorMsg != null && errorMsg.length() > 0) {
345 System.err.println("ERROR: " + errorMsg);
346 }
347 System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
348 "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
349 System.err.println();
350 System.err.println("Options:");
351 System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
352 System.err.println(" specify if different from current cluster");
353 System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
354 System.err.println(" startrow the start row");
355 System.err.println(" stoprow the stop row");
356 System.err.println(" starttime beginning of the time range (unixtime in millis)");
357 System.err.println(" without endtime means from starttime to forever");
358 System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
359 System.err.println(" versions number of cell versions to copy");
360 System.err.println(" new.name new table's name");
361 System.err.println(" peer.adr Address of the peer cluster given in the format");
362 System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
363 System.err.println(" families comma-separated list of families to copy");
364 System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
365 System.err.println(" To keep the same name, just give \"cfName\"");
366 System.err.println(" all.cells also copy delete markers and deleted cells");
367 System.err.println(" bulkload Write input into HFiles and bulk load to the destination "
368 + "table");
369 System.err.println();
370 System.err.println("Args:");
371 System.err.println(" tablename Name of the table to copy");
372 System.err.println();
373 System.err.println("Examples:");
374 System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
375 System.err.println(" $ bin/hbase " +
376 "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
377 "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
378 System.err.println("For performance consider the following general options:\n"
379 + "-Dhbase.client.scanner.caching=100\n"
380 + "-Dmapred.map.tasks.speculative.execution=false");
381 }
382
383 private boolean doCommandLine(final String[] args) {
384
385
386 if (args.length < 1) {
387 printUsage(null);
388 return false;
389 }
390 try {
391 for (int i = 0; i < args.length; i++) {
392 String cmd = args[i];
393 if (cmd.equals("-h") || cmd.startsWith("--h")) {
394 printUsage(null);
395 return false;
396 }
397
398 final String startRowArgKey = "--startrow=";
399 if (cmd.startsWith(startRowArgKey)) {
400 startRow = cmd.substring(startRowArgKey.length());
401 continue;
402 }
403
404 final String stopRowArgKey = "--stoprow=";
405 if (cmd.startsWith(stopRowArgKey)) {
406 stopRow = cmd.substring(stopRowArgKey.length());
407 continue;
408 }
409
410 final String startTimeArgKey = "--starttime=";
411 if (cmd.startsWith(startTimeArgKey)) {
412 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
413 continue;
414 }
415
416 final String endTimeArgKey = "--endtime=";
417 if (cmd.startsWith(endTimeArgKey)) {
418 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
419 continue;
420 }
421
422 final String versionsArgKey = "--versions=";
423 if (cmd.startsWith(versionsArgKey)) {
424 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
425 continue;
426 }
427
428 final String newNameArgKey = "--new.name=";
429 if (cmd.startsWith(newNameArgKey)) {
430 dstTableName = cmd.substring(newNameArgKey.length());
431 continue;
432 }
433
434 final String peerAdrArgKey = "--peer.adr=";
435 if (cmd.startsWith(peerAdrArgKey)) {
436 peerAddress = cmd.substring(peerAdrArgKey.length());
437 continue;
438 }
439
440 final String familiesArgKey = "--families=";
441 if (cmd.startsWith(familiesArgKey)) {
442 families = cmd.substring(familiesArgKey.length());
443 continue;
444 }
445
446 if (cmd.startsWith("--all.cells")) {
447 allCells = true;
448 continue;
449 }
450
451 if (cmd.startsWith("--bulkload")) {
452 bulkload = true;
453 continue;
454 }
455
456 if (i == args.length-1) {
457 tableName = cmd;
458 } else {
459 printUsage("Invalid argument '" + cmd + "'" );
460 return false;
461 }
462 }
463 if (dstTableName == null && peerAddress == null) {
464 printUsage("At least a new table name or a " +
465 "peer address must be specified");
466 return false;
467 }
468 if ((endTime != 0) && (startTime > endTime)) {
469 printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
470 return false;
471 }
472
473 if (bulkload && peerAddress != null) {
474 printUsage("Remote bulkload is not supported!");
475 return false;
476 }
477
478
479 if (dstTableName == null) {
480 dstTableName = tableName;
481 }
482 } catch (Exception e) {
483 e.printStackTrace();
484 printUsage("Can't start because " + e.getMessage());
485 return false;
486 }
487 return true;
488 }
489
490
491
492
493
494
495
496 public static void main(String[] args) throws Exception {
497 int ret = ToolRunner.run(new CopyTable(HBaseConfiguration.create()), args);
498 System.exit(ret);
499 }
500
501 @Override
502 public int run(String[] args) throws Exception {
503 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
504 Job job = createSubmittableJob(otherArgs);
505 if (job == null) return 1;
506 if (!job.waitForCompletion(true)) {
507 LOG.info("Map-reduce job failed!");
508 if (bulkload) {
509 LOG.info("Files are not bulkloaded!");
510 }
511 return 1;
512 }
513 int code = 0;
514 if (bulkload) {
515 code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
516 this.dstTableName});
517 if (code == 0) {
518
519
520 FileSystem fs = FileSystem.get(this.getConf());
521 if (!fs.delete(this.bulkloadDir, true)) {
522 LOG.error("Deleting folder " + bulkloadDir + " failed!");
523 code = 1;
524 }
525 }
526 }
527 return code;
528 }
529 }