View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.when;
23  
24  import java.io.IOException;
25  import java.util.List;
26  
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
32  import org.apache.hadoop.hbase.testclassification.LargeTests;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.client.Result;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.io.NullWritable;
40  import org.apache.hadoop.mapreduce.InputSplit;
41  import org.apache.hadoop.mapreduce.Job;
42  import org.apache.hadoop.mapreduce.RecordReader;
43  import org.apache.hadoop.mapreduce.Reducer;
44  import org.apache.hadoop.mapreduce.TaskAttemptContext;
45  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
46  import org.junit.Assert;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  import com.google.common.collect.Lists;
51  
52  @Category(LargeTests.class)
53  public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
54  
55    public static byte[] bbb = Bytes.toBytes("bbb");
56    public static byte[] yyy = Bytes.toBytes("yyy");
57  
58    @Override
59    protected byte[] getStartRow() {
60      return bbb;
61    }
62  
63    @Override
64    protected byte[] getEndRow() {
65      return yyy;
66    }
67  
68    @Test
69    public void testGetBestLocations() throws IOException {
70      TableSnapshotInputFormatImpl tsif = new TableSnapshotInputFormatImpl();
71      Configuration conf = UTIL.getConfiguration();
72  
73      HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
74      Assert.assertEquals(Lists.newArrayList(), tsif.getBestLocations(conf, blockDistribution));
75  
76      blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
77      Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
78  
79      blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
80      Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
81  
82      blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
83      Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
84  
85      blockDistribution = new HDFSBlocksDistribution();
86      blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
87      blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
88      blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
89      blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
90      Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
91  
92      blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
93      Assert.assertEquals(Lists.newArrayList("h1", "h2"),
94        tsif.getBestLocations(conf, blockDistribution));
95  
96      blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
97      Assert.assertEquals(Lists.newArrayList("h2", "h1"),
98        tsif.getBestLocations(conf, blockDistribution));
99  
100     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
101     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
102 
103     Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
104       tsif.getBestLocations(conf, blockDistribution));
105   }
106 
107   public static enum TestTableSnapshotCounters {
108     VALIDATION_ERROR
109   }
110 
111   public static class TestTableSnapshotMapper
112     extends TableMapper<ImmutableBytesWritable, NullWritable> {
113     @Override
114     protected void map(ImmutableBytesWritable key, Result value,
115         Context context) throws IOException, InterruptedException {
116       // Validate a single row coming from the snapshot, and emit the row key
117       verifyRowFromMap(key, value);
118       context.write(key, NullWritable.get());
119     }
120   }
121 
122   public static class TestTableSnapshotReducer
123     extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
124     HBaseTestingUtility.SeenRowTracker rowTracker =
125         new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
126     @Override
127     protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
128        Context context) throws IOException, InterruptedException {
129       rowTracker.addRow(key.get());
130     }
131 
132     @Override
133     protected void cleanup(Context context) throws IOException,
134         InterruptedException {
135       rowTracker.validate();
136     }
137   }
138 
139   @Test
140   public void testInitTableSnapshotMapperJobConfig() throws Exception {
141     setupCluster();
142     TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
143     String snapshotName = "foo";
144 
145     try {
146       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
147       Job job = new Job(UTIL.getConfiguration());
148       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
149 
150       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
151         new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
152         NullWritable.class, job, false, tmpTableDir);
153 
154       // TODO: would be better to examine directly the cache instance that results from this
155       // config. Currently this is not possible because BlockCache initialization is static.
156       Assert.assertEquals(
157         "Snapshot job should be configured for default LruBlockCache.",
158         HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
159         job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
160       Assert.assertEquals(
161         "Snapshot job should not use SlabCache.",
162         0, job.getConfiguration().getFloat("hbase.offheapcache.percentage", -1), 0.01);
163       Assert.assertEquals(
164         "Snapshot job should not use BucketCache.",
165         0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
166     } finally {
167       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
168       UTIL.deleteTable(tableName);
169       tearDownCluster();
170     }
171   }
172 
173   public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
174       int numRegions, int expectedNumSplits) throws Exception {
175     setupCluster();
176     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
177     try {
178       createTableAndSnapshot(
179         util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
180 
181       Job job = new Job(util.getConfiguration());
182       Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
183       Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
184 
185       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
186           scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
187           NullWritable.class, job, false, tmpTableDir);
188 
189       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
190 
191     } finally {
192       util.getHBaseAdmin().deleteSnapshot(snapshotName);
193       util.deleteTable(tableName);
194       tearDownCluster();
195     }
196   }
197 
198   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
199       byte[] startRow, byte[] stopRow)
200       throws IOException, InterruptedException {
201     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
202     List<InputSplit> splits = tsif.getSplits(job);
203 
204     Assert.assertEquals(expectedNumSplits, splits.size());
205 
206     HBaseTestingUtility.SeenRowTracker rowTracker =
207         new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
208 
209     for (int i = 0; i < splits.size(); i++) {
210       // validate input split
211       InputSplit split = splits.get(i);
212       Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
213 
214       // validate record reader
215       TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
216       when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
217       RecordReader<ImmutableBytesWritable, Result> rr =
218           tsif.createRecordReader(split, taskAttemptContext);
219       rr.initialize(split, taskAttemptContext);
220 
221       // validate we can read all the data back
222       while (rr.nextKeyValue()) {
223         byte[] row = rr.getCurrentKey().get();
224         verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
225         rowTracker.addRow(row);
226       }
227 
228       rr.close();
229     }
230 
231     // validate all rows are seen
232     rowTracker.validate();
233   }
234 
235   @Override
236   protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
237       String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
238       boolean shutdownCluster) throws Exception {
239     doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
240       numRegions, expectedNumSplits, shutdownCluster);
241   }
242 
243   // this is also called by the IntegrationTestTableSnapshotInputFormat
244   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
245       String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
246       int expectedNumSplits, boolean shutdownCluster) throws Exception {
247 
248     //create the table and snapshot
249     createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
250 
251     if (shutdownCluster) {
252       util.shutdownMiniHBaseCluster();
253     }
254 
255     try {
256       // create the job
257       Job job = new Job(util.getConfiguration());
258       Scan scan = new Scan(startRow, endRow); // limit the scan
259 
260       job.setJarByClass(util.getClass());
261       TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
262         TestTableSnapshotInputFormat.class);
263 
264       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
265         scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
266         NullWritable.class, job, true, tableDir);
267 
268       job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
269       job.setNumReduceTasks(1);
270       job.setOutputFormatClass(NullOutputFormat.class);
271 
272       Assert.assertTrue(job.waitForCompletion(true));
273     } finally {
274       if (!shutdownCluster) {
275         util.getHBaseAdmin().deleteSnapshot(snapshotName);
276         util.deleteTable(tableName);
277       }
278     }
279   }
280 }