View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.HashMap;
23  import java.util.Map;
24  import java.util.Random;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.conf.Configured;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.hbase.classification.InterfaceStability;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Scan;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.mapreduce.Job;
41  import org.apache.hadoop.util.GenericOptionsParser;
42  import org.apache.hadoop.util.Tool;
43  import org.apache.hadoop.util.ToolRunner;
44  
45  /**
46   * Tool used to copy a table to another one which can be on a different setup.
47   * It is also configurable with a start and time as well as a specification
48   * of the region server implementation if different from the local cluster.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class CopyTable extends Configured implements Tool {
53    private static final Log LOG = LogFactory.getLog(CopyTable.class);
54  
55    final static String NAME = "copytable";
56    long startTime = 0;
57    long endTime = 0;
58    int versions = -1;
59    String tableName = null;
60    String startRow = null;
61    String stopRow = null;
62    String dstTableName = null;
63    String peerAddress = null;
64    String families = null;
65    boolean allCells = false;
66    
67    boolean bulkload = false;
68    Path bulkloadDir = null;
69  
70    private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
71  
72  
73    // The following variables are introduced to preserve the binary compatibility in 0.98.
74    // Please see HBASE-12836 for further details.
75    @Deprecated
76    static long startTime_ = 0;
77    @Deprecated
78    static long endTime_ = 0;
79    @Deprecated
80    static int versions_ = -1;
81    @Deprecated
82    static String tableName_ = null;
83    @Deprecated
84    static String startRow_ = null;
85    @Deprecated
86    static String stopRow_ = null;
87    @Deprecated
88    static String newTableName_ = null;
89    @Deprecated
90    static String peerAddress_ = null;
91    @Deprecated
92    static String families_ = null;
93    @Deprecated
94    static boolean allCells_ = false;
95  
96    public CopyTable(Configuration conf) {
97      super(conf);
98    }
99  
100   /**
101    * Sets up the actual job.
102    *
103    * @param conf The current configuration.
104    * @param args The command line parameters.
105    * @return The newly created job.
106    * @throws IOException When setting up the job fails.
107    * @deprecated Use {@link #createSubmittableJob(String[])} instead
108    */
109   @Deprecated
110   public static Job createSubmittableJob(Configuration conf, String[] args)
111       throws IOException {
112     if (!deprecatedDoCommandLine(args)) {
113       return null;
114     }
115     Job job = new Job(conf, NAME + "_" + tableName_);
116     job.setJarByClass(CopyTable.class);
117     Scan scan = new Scan();
118     scan.setCacheBlocks(false);
119     if (startTime_ != 0) {
120       scan.setTimeRange(startTime_,
121           endTime_ == 0 ? HConstants.LATEST_TIMESTAMP : endTime_);
122     }
123     if (allCells_) {
124       scan.setRaw(true);
125     }
126     if (versions_ >= 0) {
127       scan.setMaxVersions(versions_);
128     }
129     if (startRow_ != null) {
130       scan.setStartRow(Bytes.toBytes(startRow_));
131     }
132     if (stopRow_ != null) {
133       scan.setStopRow(Bytes.toBytes(stopRow_));
134     }
135     if(families_ != null) {
136       String[] fams = families_.split(",");
137       Map<String,String> cfRenameMap = new HashMap<String,String>();
138       for(String fam : fams) {
139         String sourceCf;
140         if(fam.contains(":")) {
141           // fam looks like "sourceCfName:destCfName"
142           String[] srcAndDest = fam.split(":", 2);
143           sourceCf = srcAndDest[0];
144           String destCf = srcAndDest[1];
145           cfRenameMap.put(sourceCf, destCf);
146         } else {
147          // fam is just "sourceCf"
148           sourceCf = fam;
149         }
150         scan.addFamily(Bytes.toBytes(sourceCf));
151       }
152       Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
153     }
154     TableMapReduceUtil.initTableMapperJob(tableName_, scan,
155         Import.Importer.class, null, null, job);
156     TableMapReduceUtil.initTableReducerJob(
157         newTableName_ == null ? tableName_ : newTableName_, null, job,
158         null, peerAddress_, null, null);
159     job.setNumReduceTasks(0);
160     return job;
161   }
162 
163   private static boolean deprecatedDoCommandLine(final String[] args) {
164    // Process command-line args. TODO: Better cmd-line processing
165    // (but hopefully something not as painful as cli options).
166     if (args.length < 1) {
167       printUsage(null);
168       return false;
169     }
170     try {
171       for (int i = 0; i < args.length; i++) {
172         String cmd = args[i];
173         if (cmd.equals("-h") || cmd.startsWith("--h")) {
174           printUsage(null);
175           return false;
176         }
177         final String startRowArgKey = "--startrow=";
178         if (cmd.startsWith(startRowArgKey)) {
179           startRow_ = cmd.substring(startRowArgKey.length());
180           continue;
181         }
182         final String stopRowArgKey = "--stoprow=";
183         if (cmd.startsWith(stopRowArgKey)) {
184           stopRow_ = cmd.substring(stopRowArgKey.length());
185           continue;
186         }
187         final String startTimeArgKey = "--starttime=";
188         if (cmd.startsWith(startTimeArgKey)) {
189           startTime_ = Long.parseLong(cmd.substring(startTimeArgKey.length()));
190           continue;
191         }
192         final String endTimeArgKey = "--endtime=";
193         if (cmd.startsWith(endTimeArgKey)) {
194           endTime_ = Long.parseLong(cmd.substring(endTimeArgKey.length()));
195           continue;
196         }
197         final String versionsArgKey = "--versions=";
198         if (cmd.startsWith(versionsArgKey)) {
199           versions_ = Integer.parseInt(cmd.substring(versionsArgKey.length()));
200           continue;
201         }
202         final String newNameArgKey = "--new.name=";
203         if (cmd.startsWith(newNameArgKey)) {
204           newTableName_ = cmd.substring(newNameArgKey.length());
205           continue;
206         }
207         final String peerAdrArgKey = "--peer.adr=";
208         if (cmd.startsWith(peerAdrArgKey)) {
209           peerAddress_ = cmd.substring(peerAdrArgKey.length());
210           continue;
211         }
212         final String familiesArgKey = "--families=";
213         if (cmd.startsWith(familiesArgKey)) {
214           families_ = cmd.substring(familiesArgKey.length());
215           continue;
216         }
217         if (cmd.startsWith("--all.cells")) {
218           allCells_ = true;
219           continue;
220         }
221         if (i == args.length-1) {
222           tableName_ = cmd;
223         } else {
224           printUsage("Invalid argument '" + cmd + "'" );
225           return false;
226         }
227       }
228       if (newTableName_ == null && peerAddress_ == null) {
229         printUsage("At least a new table name or a " +
230             "peer address must be specified");
231         return false;
232       }
233       if ((endTime_ != 0) && (startTime_ > endTime_)) {
234         printUsage("Invalid time range filter: starttime=" + startTime_ + " > endtime="
235             + endTime_);
236         return false;
237       }
238     } catch (Exception e) {
239       e.printStackTrace();
240       printUsage("Can't start because " + e.getMessage());
241       return false;
242     }
243     return true;
244   }
245 
246   /**
247    * Sets up the actual job.
248    *
249    * @param args  The command line parameters.
250    * @return The newly created job.
251    * @throws IOException When setting up the job fails.
252    */
253   public Job createSubmittableJob(String[] args)
254   throws IOException {
255     if (!doCommandLine(args)) {
256       return null;
257     }
258     
259     Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
260     job.setJarByClass(CopyTable.class);
261     Scan scan = new Scan();
262     scan.setCacheBlocks(false);
263     if (startTime != 0) {
264       scan.setTimeRange(startTime,
265           endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
266     }
267     if (allCells) {
268       scan.setRaw(true);
269     }
270     if (versions >= 0) {
271       scan.setMaxVersions(versions);
272     }
273     
274     if (startRow != null) {
275       scan.setStartRow(Bytes.toBytes(startRow));
276     }
277     
278     if (stopRow != null) {
279       scan.setStopRow(Bytes.toBytes(stopRow));
280     }
281     
282     if(families != null) {
283       String[] fams = families.split(",");
284       Map<String,String> cfRenameMap = new HashMap<String,String>();
285       for(String fam : fams) {
286         String sourceCf;
287         if(fam.contains(":")) { 
288             // fam looks like "sourceCfName:destCfName"
289             String[] srcAndDest = fam.split(":", 2);
290             sourceCf = srcAndDest[0];
291             String destCf = srcAndDest[1];
292             cfRenameMap.put(sourceCf, destCf);
293         } else {
294             // fam is just "sourceCf"
295             sourceCf = fam; 
296         }
297         scan.addFamily(Bytes.toBytes(sourceCf));
298       }
299       Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
300     }
301     job.setNumReduceTasks(0);
302     
303     if (bulkload) {
304       TableMapReduceUtil.initTableMapperJob(tableName, scan, Import.KeyValueImporter.class, null,
305         null, job);
306       
307       // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
308       TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));
309       
310       FileSystem fs = FileSystem.get(getConf());
311       Random rand = new Random();
312       Path root = new Path(fs.getWorkingDirectory(), "copytable");
313       fs.mkdirs(root);
314       while (true) {
315         bulkloadDir = new Path(root, "" + rand.nextLong());
316         if (!fs.exists(bulkloadDir)) {
317           break;
318         }
319       }
320       
321       System.out.println("HFiles will be stored at " + this.bulkloadDir);
322       HFileOutputFormat2.setOutputPath(job, bulkloadDir);
323       HTable htable = new HTable(getConf(), TableName.valueOf(dstTableName));
324       try {
325         HFileOutputFormat2.configureIncrementalLoadMap(job, htable);
326       } finally {
327         htable.close();
328       }
329     } else {
330       TableMapReduceUtil.initTableMapperJob(tableName, scan,
331         Import.Importer.class, null, null, job);
332       
333       TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
334         null);
335     }
336     
337     return job;
338   }
339 
340   /*
341    * @param errorMsg Error message.  Can be null.
342    */
343   private static void printUsage(final String errorMsg) {
344     if (errorMsg != null && errorMsg.length() > 0) {
345       System.err.println("ERROR: " + errorMsg);
346     }
347     System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
348         "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
349     System.err.println();
350     System.err.println("Options:");
351     System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
352     System.err.println("              specify if different from current cluster");
353     System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
354     System.err.println(" startrow     the start row");
355     System.err.println(" stoprow      the stop row");
356     System.err.println(" starttime    beginning of the time range (unixtime in millis)");
357     System.err.println("              without endtime means from starttime to forever");
358     System.err.println(" endtime      end of the time range.  Ignored if no starttime specified.");
359     System.err.println(" versions     number of cell versions to copy");
360     System.err.println(" new.name     new table's name");
361     System.err.println(" peer.adr     Address of the peer cluster given in the format");
362     System.err.println("              hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
363     System.err.println(" families     comma-separated list of families to copy");
364     System.err.println("              To copy from cf1 to cf2, give sourceCfName:destCfName. ");
365     System.err.println("              To keep the same name, just give \"cfName\"");
366     System.err.println(" all.cells    also copy delete markers and deleted cells");
367     System.err.println(" bulkload     Write input into HFiles and bulk load to the destination "
368         + "table");
369     System.err.println();
370     System.err.println("Args:");
371     System.err.println(" tablename    Name of the table to copy");
372     System.err.println();
373     System.err.println("Examples:");
374     System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
375     System.err.println(" $ bin/hbase " +
376         "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
377         "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
378     System.err.println("For performance consider the following general options:\n"
379         + "-Dhbase.client.scanner.caching=100\n"
380         + "-Dmapred.map.tasks.speculative.execution=false");
381   }
382 
383   private boolean doCommandLine(final String[] args) {
384     // Process command-line args. TODO: Better cmd-line processing
385     // (but hopefully something not as painful as cli options).
386     if (args.length < 1) {
387       printUsage(null);
388       return false;
389     }
390     try {
391       for (int i = 0; i < args.length; i++) {
392         String cmd = args[i];
393         if (cmd.equals("-h") || cmd.startsWith("--h")) {
394           printUsage(null);
395           return false;
396         }
397         
398         final String startRowArgKey = "--startrow=";
399         if (cmd.startsWith(startRowArgKey)) {
400           startRow = cmd.substring(startRowArgKey.length());
401           continue;
402         }
403         
404         final String stopRowArgKey = "--stoprow=";
405         if (cmd.startsWith(stopRowArgKey)) {
406           stopRow = cmd.substring(stopRowArgKey.length());
407           continue;
408         }
409         
410         final String startTimeArgKey = "--starttime=";
411         if (cmd.startsWith(startTimeArgKey)) {
412           startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
413           continue;
414         }
415 
416         final String endTimeArgKey = "--endtime=";
417         if (cmd.startsWith(endTimeArgKey)) {
418           endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
419           continue;
420         }
421 
422         final String versionsArgKey = "--versions=";
423         if (cmd.startsWith(versionsArgKey)) {
424           versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
425           continue;
426         }
427 
428         final String newNameArgKey = "--new.name=";
429         if (cmd.startsWith(newNameArgKey)) {
430           dstTableName = cmd.substring(newNameArgKey.length());
431           continue;
432         }
433 
434         final String peerAdrArgKey = "--peer.adr=";
435         if (cmd.startsWith(peerAdrArgKey)) {
436           peerAddress = cmd.substring(peerAdrArgKey.length());
437           continue;
438         }
439 
440         final String familiesArgKey = "--families=";
441         if (cmd.startsWith(familiesArgKey)) {
442           families = cmd.substring(familiesArgKey.length());
443           continue;
444         }
445 
446         if (cmd.startsWith("--all.cells")) {
447           allCells = true;
448           continue;
449         }
450         
451         if (cmd.startsWith("--bulkload")) {
452           bulkload = true;
453           continue;
454         }
455 
456         if (i == args.length-1) {
457           tableName = cmd;
458         } else {
459           printUsage("Invalid argument '" + cmd + "'" );
460           return false;
461         }
462       }
463       if (dstTableName == null && peerAddress == null) {
464         printUsage("At least a new table name or a " +
465             "peer address must be specified");
466         return false;
467       }
468       if ((endTime != 0) && (startTime > endTime)) {
469         printUsage("Invalid time range filter: starttime=" + startTime + " >  endtime=" + endTime);
470         return false;
471       }
472       
473       if (bulkload && peerAddress != null) {
474         printUsage("Remote bulkload is not supported!");
475         return false;
476       }
477       
478       // set dstTableName if necessary
479       if (dstTableName == null) {
480         dstTableName = tableName;
481       }
482     } catch (Exception e) {
483       e.printStackTrace();
484       printUsage("Can't start because " + e.getMessage());
485       return false;
486     }
487     return true;
488   }
489 
490   /**
491    * Main entry point.
492    *
493    * @param args  The command line parameters.
494    * @throws Exception When running the job fails.
495    */
496   public static void main(String[] args) throws Exception {
497     int ret = ToolRunner.run(new CopyTable(HBaseConfiguration.create()), args);
498     System.exit(ret);
499   }
500 
501   @Override
502   public int run(String[] args) throws Exception {
503     String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
504     Job job = createSubmittableJob(otherArgs);
505     if (job == null) return 1;
506     if (!job.waitForCompletion(true)) {
507       LOG.info("Map-reduce job failed!");
508       if (bulkload) {
509         LOG.info("Files are not bulkloaded!");
510       }
511       return 1;
512     }
513     int code = 0;
514     if (bulkload) {
515       code = new LoadIncrementalHFiles(this.getConf()).run(new String[]{this.bulkloadDir.toString(),
516           this.dstTableName});
517       if (code == 0) {
518         // bulkloadDir is deleted only LoadIncrementalHFiles was successful so that one can rerun
519         // LoadIncrementalHFiles.
520         FileSystem fs = FileSystem.get(this.getConf());
521         if (!fs.delete(this.bulkloadDir, true)) {
522           LOG.error("Deleting folder " + bulkloadDir + " failed!");
523           code = 1;
524         }
525       }
526     }
527     return code;
528   }
529 }