View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.NavigableMap;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileUtil;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.Scan;
40  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41  import org.apache.hadoop.hbase.testclassification.LargeTests;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.NullWritable;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.mapreduce.Reducer;
46  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47  import org.junit.After;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  /**
54   * Tests various scan start and stop row scenarios. This is set in a scan and
55   * tested in a MapReduce job to see if that is handed over and done properly
56   * too.
57   */
58  @Category(LargeTests.class)
59  public class TestMultiTableInputFormat {
60  
61    static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
62    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64    static final String TABLE_NAME = "scantest";
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final String KEY_STARTROW = "startRow";
67    static final String KEY_LASTROW = "stpRow";
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      // switch TIF to log at DEBUG level
72      TEST_UTIL.enableDebug(MultiTableInputFormat.class);
73      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
74      // start mini hbase cluster
75      TEST_UTIL.startMiniCluster(3);
76      // create and fill table
77      for (int i = 0; i < 3; i++) {
78        HTable table =
79            TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME + String.valueOf(i)), INPUT_FAMILY);
80        TEST_UTIL.createMultiRegions(TEST_UTIL.getConfiguration(), table, INPUT_FAMILY, 4);
81        TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
82      }
83      // start MR cluster
84      TEST_UTIL.startMiniMapReduceCluster();
85    }
86  
87    @AfterClass
88    public static void tearDownAfterClass() throws Exception {
89      TEST_UTIL.shutdownMiniMapReduceCluster();
90      TEST_UTIL.shutdownMiniCluster();
91    }
92    
93    @After
94    public void tearDown() throws Exception {
95      Configuration c = TEST_UTIL.getConfiguration();
96      FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
97    }
98  
99    /**
100    * Pass the key and value to reducer.
101    */
102   public static class ScanMapper extends
103       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
104     /**
105      * Pass the key and value to reduce.
106      *
107      * @param key The key, here "aaa", "aab" etc.
108      * @param value The value is the same as the key.
109      * @param context The task context.
110      * @throws IOException When reading the rows fails.
111      */
112     @Override
113     public void map(ImmutableBytesWritable key, Result value, Context context)
114         throws IOException, InterruptedException {
115       if (value.size() != 1) {
116         throw new IOException("There should only be one input column");
117       }
118       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
119           value.getMap();
120       if (!cf.containsKey(INPUT_FAMILY)) {
121         throw new IOException("Wrong input columns. Missing: '" +
122             Bytes.toString(INPUT_FAMILY) + "'.");
123       }
124       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
125       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
126           ", value -> " + val);
127       context.write(key, key);
128     }
129   }
130 
131   /**
132    * Checks the last and first keys seen against the scanner boundaries.
133    */
134   public static class ScanReducer
135       extends
136       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
137       NullWritable, NullWritable> {
138     private String first = null;
139     private String last = null;
140 
141     protected void reduce(ImmutableBytesWritable key,
142         Iterable<ImmutableBytesWritable> values, Context context)
143         throws IOException, InterruptedException {
144       int count = 0;
145       for (ImmutableBytesWritable value : values) {
146         String val = Bytes.toStringBinary(value.get());
147         LOG.debug("reduce: key[" + count + "] -> " +
148             Bytes.toStringBinary(key.get()) + ", value -> " + val);
149         if (first == null) first = val;
150         last = val;
151         count++;
152       }
153       assertEquals(3, count);
154     }
155 
156     protected void cleanup(Context context) throws IOException,
157         InterruptedException {
158       Configuration c = context.getConfiguration();
159       String startRow = c.get(KEY_STARTROW);
160       String lastRow = c.get(KEY_LASTROW);
161       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
162           startRow + "\"");
163       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
164           "\"");
165       if (startRow != null && startRow.length() > 0) {
166         assertEquals(startRow, first);
167       }
168       if (lastRow != null && lastRow.length() > 0) {
169         assertEquals(lastRow, last);
170       }
171     }
172   }
173 
174   @Test
175   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
176       ClassNotFoundException {
177     testScan(null, null, null);
178   }
179   
180   @Test
181   public void testScanEmptyToAPP() throws IOException, InterruptedException,
182       ClassNotFoundException {
183     testScan(null, "app", "apo");
184   }
185 
186   @Test
187   public void testScanOBBToOPP() throws IOException, InterruptedException,
188       ClassNotFoundException {
189     testScan("obb", "opp", "opo");
190   }
191 
192   @Test
193   public void testScanYZYToEmpty() throws IOException, InterruptedException,
194       ClassNotFoundException {
195     testScan("yzy", null, "zzz");
196   }
197 
198   /**
199    * Tests a MR scan using specific start and stop rows.
200    *
201    * @throws IOException
202    * @throws ClassNotFoundException
203    * @throws InterruptedException
204    */
205   private void testScan(String start, String stop, String last)
206       throws IOException, InterruptedException, ClassNotFoundException {
207     String jobName =
208         "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
209             (stop != null ? stop.toUpperCase() : "Empty");
210     LOG.info("Before map/reduce startup - job " + jobName);
211     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
212     
213     c.set(KEY_STARTROW, start != null ? start : "");
214     c.set(KEY_LASTROW, last != null ? last : "");
215     
216     List<Scan> scans = new ArrayList<Scan>();
217     
218     for(int i=0; i<3; i++){
219       Scan scan = new Scan();
220       
221       scan.addFamily(INPUT_FAMILY);
222       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(TABLE_NAME + i));
223       
224       if (start != null) {
225         scan.setStartRow(Bytes.toBytes(start));
226       }
227       if (stop != null) {
228         scan.setStopRow(Bytes.toBytes(stop));
229       }
230       
231       scans.add(scan);
232       
233       LOG.info("scan before: " + scan);
234     }
235     
236     Job job = new Job(c, jobName);
237 
238     TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
239         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
240     job.setReducerClass(ScanReducer.class);
241     job.setNumReduceTasks(1); // one to get final "first" and "last" key
242     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
243     LOG.info("Started " + job.getJobName());
244     job.waitForCompletion(true);
245     assertTrue(job.isSuccessful());
246     LOG.info("After map/reduce completion - job " + jobName);
247   }
248 }