1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce.replication;
20
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.conf.Configured;
27 import org.apache.hadoop.hbase.Abortable;
28 import org.apache.hadoop.hbase.HBaseConfiguration;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.client.HConnectable;
31 import org.apache.hadoop.hbase.client.HConnection;
32 import org.apache.hadoop.hbase.client.HConnectionManager;
33 import org.apache.hadoop.hbase.client.HTable;
34 import org.apache.hadoop.hbase.client.Put;
35 import org.apache.hadoop.hbase.client.Result;
36 import org.apache.hadoop.hbase.client.ResultScanner;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
39 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
40 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
41 import org.apache.hadoop.hbase.mapreduce.TableMapper;
42 import org.apache.hadoop.hbase.mapreduce.TableSplit;
43 import org.apache.hadoop.hbase.replication.ReplicationException;
44 import org.apache.hadoop.hbase.replication.ReplicationFactory;
45 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
46 import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
47 import org.apache.hadoop.hbase.replication.ReplicationPeers;
48 import org.apache.hadoop.hbase.util.Bytes;
49 import org.apache.hadoop.hbase.util.Pair;
50 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
52 import org.apache.hadoop.mapreduce.Job;
53 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
54 import org.apache.hadoop.util.Tool;
55 import org.apache.hadoop.util.ToolRunner;
56
57
58
59
60
61
62
63
64
65
66
67 public class VerifyReplication extends Configured implements Tool {
68
69 private static final Log LOG =
70 LogFactory.getLog(VerifyReplication.class);
71
72 public final static String NAME = "verifyrep";
73 static long startTime = 0;
74 static long endTime = Long.MAX_VALUE;
75 static int versions = -1;
76 static String tableName = null;
77 static String families = null;
78 static String peerId = null;
79
80
81
82
83 public static class Verifier
84 extends TableMapper<ImmutableBytesWritable, Put> {
85
86 public static enum Counters {
87 GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
88
89 private ResultScanner replicatedScanner;
90 private Result currentCompareRowInPeerTable;
91
92
93
94
95
96
97
98
99
100 @Override
101 public void map(ImmutableBytesWritable row, final Result value,
102 Context context)
103 throws IOException {
104 if (replicatedScanner == null) {
105 Configuration conf = context.getConfiguration();
106 final Scan scan = new Scan();
107 scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
108 long startTime = conf.getLong(NAME + ".startTime", 0);
109 long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
110 String families = conf.get(NAME + ".families", null);
111 if(families != null) {
112 String[] fams = families.split(",");
113 for(String fam : fams) {
114 scan.addFamily(Bytes.toBytes(fam));
115 }
116 }
117 scan.setTimeRange(startTime, endTime);
118 if (versions >= 0) {
119 scan.setMaxVersions(versions);
120 }
121
122 final TableSplit tableSplit = (TableSplit)(context.getInputSplit());
123 HConnectionManager.execute(new HConnectable<Void>(conf) {
124 @Override
125 public Void connect(HConnection conn) throws IOException {
126 String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
127 Configuration peerConf = HBaseConfiguration.create(conf);
128 ZKUtil.applyClusterKeyToConf(peerConf, zkClusterKey);
129
130 HTable replicatedTable = new HTable(peerConf, conf.get(NAME + ".tableName"));
131 scan.setStartRow(tableSplit.getStartRow());
132 scan.setStopRow(tableSplit.getEndRow());
133 replicatedScanner = replicatedTable.getScanner(scan);
134 return null;
135 }
136 });
137 currentCompareRowInPeerTable = replicatedScanner.next();
138 }
139 while (true) {
140 if (currentCompareRowInPeerTable == null) {
141
142 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
143 break;
144 }
145 int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
146 if (rowCmpRet == 0) {
147
148 try {
149 Result.compareResults(value, currentCompareRowInPeerTable);
150 context.getCounter(Counters.GOODROWS).increment(1);
151 } catch (Exception e) {
152 logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
153 }
154 currentCompareRowInPeerTable = replicatedScanner.next();
155 break;
156 } else if (rowCmpRet < 0) {
157
158 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
159 break;
160 } else {
161
162 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
163 currentCompareRowInPeerTable);
164 currentCompareRowInPeerTable = replicatedScanner.next();
165 }
166 }
167 }
168
169 private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
170 context.getCounter(counter).increment(1);
171 context.getCounter(Counters.BADROWS).increment(1);
172 LOG.error(counter.toString() + ", rowkey=" + Bytes.toString(row.getRow()));
173 }
174
175 @Override
176 protected void cleanup(Context context) {
177 if (replicatedScanner != null) {
178 try {
179 while (currentCompareRowInPeerTable != null) {
180 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
181 currentCompareRowInPeerTable);
182 currentCompareRowInPeerTable = replicatedScanner.next();
183 }
184 } catch (Exception e) {
185 LOG.error("fail to scan peer table in cleanup", e);
186 } finally {
187 replicatedScanner.close();
188 replicatedScanner = null;
189 }
190 }
191 }
192 }
193
194 private static String getPeerQuorumAddress(final Configuration conf) throws IOException {
195 ZooKeeperWatcher localZKW = null;
196 ReplicationPeerZKImpl peer = null;
197 try {
198 localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
199 new Abortable() {
200 @Override public void abort(String why, Throwable e) {}
201 @Override public boolean isAborted() {return false;}
202 });
203
204 ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
205 rp.init();
206
207 Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
208 if (pair == null) {
209 throw new IOException("Couldn't get peer conf!");
210 }
211 Configuration peerConf = rp.getPeerConf(peerId).getSecond();
212 return ZKUtil.getZooKeeperClusterKey(peerConf);
213 } catch (ReplicationException e) {
214 throw new IOException(
215 "An error occured while trying to connect to the remove peer cluster", e);
216 } finally {
217 if (peer != null) {
218 peer.close();
219 }
220 if (localZKW != null) {
221 localZKW.close();
222 }
223 }
224 }
225
226
227
228
229
230
231
232
233
234 public static Job createSubmittableJob(Configuration conf, String[] args)
235 throws IOException {
236 if (!doCommandLine(args)) {
237 return null;
238 }
239 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
240 HConstants.REPLICATION_ENABLE_DEFAULT)) {
241 throw new IOException("Replication needs to be enabled to verify it.");
242 }
243 conf.set(NAME+".peerId", peerId);
244 conf.set(NAME+".tableName", tableName);
245 conf.setLong(NAME+".startTime", startTime);
246 conf.setLong(NAME+".endTime", endTime);
247 if (families != null) {
248 conf.set(NAME+".families", families);
249 }
250
251 String peerQuorumAddress = getPeerQuorumAddress(conf);
252 conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
253 LOG.info("Peer Quorum Address: " + peerQuorumAddress);
254
255 Job job = new Job(conf, NAME + "_" + tableName);
256 job.setJarByClass(VerifyReplication.class);
257
258 Scan scan = new Scan();
259 scan.setTimeRange(startTime, endTime);
260 if (versions >= 0) {
261 scan.setMaxVersions(versions);
262 }
263 if(families != null) {
264 String[] fams = families.split(",");
265 for(String fam : fams) {
266 scan.addFamily(Bytes.toBytes(fam));
267 }
268 }
269 TableMapReduceUtil.initTableMapperJob(tableName, scan,
270 Verifier.class, null, null, job);
271
272
273 TableMapReduceUtil.initCredentialsForCluster(job, peerQuorumAddress);
274
275 job.setOutputFormatClass(NullOutputFormat.class);
276 job.setNumReduceTasks(0);
277 return job;
278 }
279
280 private static boolean doCommandLine(final String[] args) {
281 if (args.length < 2) {
282 printUsage(null);
283 return false;
284 }
285 try {
286 for (int i = 0; i < args.length; i++) {
287 String cmd = args[i];
288 if (cmd.equals("-h") || cmd.startsWith("--h")) {
289 printUsage(null);
290 return false;
291 }
292
293 final String startTimeArgKey = "--starttime=";
294 if (cmd.startsWith(startTimeArgKey)) {
295 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
296 continue;
297 }
298
299 final String endTimeArgKey = "--endtime=";
300 if (cmd.startsWith(endTimeArgKey)) {
301 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
302 continue;
303 }
304
305 final String versionsArgKey = "--versions=";
306 if (cmd.startsWith(versionsArgKey)) {
307 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
308 continue;
309 }
310
311 final String familiesArgKey = "--families=";
312 if (cmd.startsWith(familiesArgKey)) {
313 families = cmd.substring(familiesArgKey.length());
314 continue;
315 }
316
317 if (i == args.length-2) {
318 peerId = cmd;
319 }
320
321 if (i == args.length-1) {
322 tableName = cmd;
323 }
324 }
325 } catch (Exception e) {
326 e.printStackTrace();
327 printUsage("Can't start because " + e.getMessage());
328 return false;
329 }
330 return true;
331 }
332
333
334
335
336 private static void printUsage(final String errorMsg) {
337 if (errorMsg != null && errorMsg.length() > 0) {
338 System.err.println("ERROR: " + errorMsg);
339 }
340 System.err.println("Usage: verifyrep [--starttime=X]" +
341 " [--stoptime=Y] [--families=A] <peerid> <tablename>");
342 System.err.println();
343 System.err.println("Options:");
344 System.err.println(" starttime beginning of the time range");
345 System.err.println(" without endtime means from starttime to forever");
346 System.err.println(" endtime end of the time range");
347 System.err.println(" versions number of cell versions to verify");
348 System.err.println(" families comma-separated list of families to copy");
349 System.err.println();
350 System.err.println("Args:");
351 System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
352 System.err.println(" tablename Name of the table to verify");
353 System.err.println();
354 System.err.println("Examples:");
355 System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
356 System.err.println(" $ bin/hbase " +
357 "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
358 " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
359 }
360
361 @Override
362 public int run(String[] args) throws Exception {
363 Configuration conf = this.getConf();
364 Job job = createSubmittableJob(conf, args);
365 if (job != null) {
366 return job.waitForCompletion(true) ? 0 : 1;
367 }
368 return 1;
369 }
370
371
372
373
374
375
376
377 public static void main(String[] args) throws Exception {
378 int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
379 System.exit(res);
380 }
381 }