View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.concurrent.CountDownLatch;
23  
24  import junit.framework.Assert;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.testclassification.MediumTests;
34  import org.apache.hadoop.hbase.TableNotFoundException;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.hfile.HFile;
42  import org.apache.hadoop.hbase.io.hfile.HFileContext;
43  import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.junit.AfterClass;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  @Category(MediumTests.class)
51  public class TestScannerWithBulkload {
52    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
53  
54    @BeforeClass
55    public static void setUpBeforeClass() throws Exception {
56      TEST_UTIL.startMiniCluster(1);
57    }
58  
59    private static void createTable(HBaseAdmin admin, String tableName) throws IOException {
60      HTableDescriptor desc = new HTableDescriptor(tableName);
61      HColumnDescriptor hcd = new HColumnDescriptor("col");
62      hcd.setMaxVersions(3);
63      desc.addFamily(hcd);
64      admin.createTable(desc);
65    }
66  
67    @Test
68    public void testBulkLoad() throws Exception {
69      String tableName = "testBulkLoad";
70      long l = System.currentTimeMillis();
71      HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
72      createTable(admin, tableName);
73      Scan scan = createScan();
74      final HTable table = init(admin, l, scan, tableName);
75      // use bulkload
76      final Path hfilePath = writeToHFile(l, "/temp/testBulkLoad/", "/temp/testBulkLoad/col/file",
77        false);
78      Configuration conf = TEST_UTIL.getConfiguration();
79      conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
80      final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
81      bulkload.doBulkLoad(hfilePath, table);
82      ResultScanner scanner = table.getScanner(scan);
83      Result result = scanner.next();
84      result = scanAfterBulkLoad(scanner, result, "version2");
85      Put put0 = new Put(Bytes.toBytes("row1"));
86      put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
87          .toBytes("version3")));
88      table.put(put0);
89      table.flushCommits();
90      admin.flush(tableName);
91      scanner = table.getScanner(scan);
92      result = scanner.next();
93      while (result != null) {
94        List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
95        for (KeyValue _kv : kvs) {
96          if (Bytes.toString(_kv.getRow()).equals("row1")) {
97            System.out.println(Bytes.toString(_kv.getRow()));
98            System.out.println(Bytes.toString(_kv.getQualifier()));
99            System.out.println(Bytes.toString(_kv.getValue()));
100           Assert.assertEquals("version3", Bytes.toString(_kv.getValue()));
101         }
102       }
103       result = scanner.next();
104     }
105     scanner.close();
106     table.close();
107   }
108 
109   private Result scanAfterBulkLoad(ResultScanner scanner, Result result, String expctedVal)
110       throws IOException {
111     while (result != null) {
112       List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
113       for (KeyValue _kv : kvs) {
114         if (Bytes.toString(_kv.getRow()).equals("row1")) {
115           System.out.println(Bytes.toString(_kv.getRow()));
116           System.out.println(Bytes.toString(_kv.getQualifier()));
117           System.out.println(Bytes.toString(_kv.getValue()));
118           Assert.assertEquals(expctedVal, Bytes.toString(_kv.getValue()));
119         }
120       }
121       result = scanner.next();
122     }
123     return result;
124   }
125 
126   // If nativeHFile is true, we will set cell seq id and MAX_SEQ_ID_KEY in the file.
127   // Else, we will set BULKLOAD_TIME_KEY.
128   private Path writeToHFile(long l, String hFilePath, String pathStr, boolean nativeHFile)
129       throws IOException {
130     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
131     final Path hfilePath = new Path(hFilePath);
132     fs.mkdirs(hfilePath);
133     Path path = new Path(pathStr);
134     HFile.WriterFactory wf = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration());
135     Assert.assertNotNull(wf);
136     HFileContext context = new HFileContext();
137     HFile.Writer writer = wf.withPath(fs, path).withFileContext(context).create();
138     KeyValue kv = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
139         Bytes.toBytes("version2"));
140 
141     // Set cell mvcc to test bulk load native hfiles.
142     if (nativeHFile) {
143       // Set a big seq id. Scan should not look at this seq id in a bulk loaded file.
144       // Scan should only look at the seq id appended at the bulk load time, and not skip
145       // this kv.
146       kv.setMvccVersion(9999999);
147     }
148 
149     writer.append(kv);
150 
151     if (nativeHFile) {
152       // Set a big MAX_SEQ_ID_KEY. Scan should not look at this seq id in a bulk loaded file.
153       // Scan should only look at the seq id appended at the bulk load time, and not skip its
154       // kv.
155       writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(new Long(9999999)));
156     }
157     else {
158     writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
159     }
160     writer.close();
161     return hfilePath;
162   }
163 
164   private HTable init(HBaseAdmin admin, long l, Scan scan, String tableName) throws Exception {
165     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
166     Put put0 = new Put(Bytes.toBytes("row1"));
167     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
168         .toBytes("version0")));
169     table.put(put0);
170     table.flushCommits();
171     admin.flush(tableName);
172     Put put1 = new Put(Bytes.toBytes("row2"));
173     put1.add(new KeyValue(Bytes.toBytes("row2"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
174         .toBytes("version0")));
175     table.put(put1);
176     table.flushCommits();
177     admin.flush(tableName);
178     admin.close();
179     put0 = new Put(Bytes.toBytes("row1"));
180     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
181         .toBytes("version1")));
182     table.put(put0);
183     table.flushCommits();
184     admin.flush(tableName);
185     admin.compact(tableName);
186 
187     ResultScanner scanner = table.getScanner(scan);
188     Result result = scanner.next();
189     List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
190     Assert.assertEquals(1, kvs.size());
191     Assert.assertEquals("version1", Bytes.toString(kvs.get(0).getValue()));
192     scanner.close();
193     return table;
194   }
195 
196   @Test
197   public void testBulkLoadWithParallelScan() throws Exception {
198     String tableName = "testBulkLoadWithParallelScan";
199     final long l = System.currentTimeMillis();
200     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
201     createTable(admin, tableName);
202     Scan scan = createScan();
203     final HTable table = init(admin, l, scan, tableName);
204     // use bulkload
205     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadWithParallelScan/",
206         "/temp/testBulkLoadWithParallelScan/col/file", false);
207     Configuration conf = TEST_UTIL.getConfiguration();
208     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
209     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
210     ResultScanner scanner = table.getScanner(scan);
211     // Create a scanner and then do bulk load
212     final CountDownLatch latch = new CountDownLatch(1);
213     new Thread() {
214       public void run() {
215         try {
216           Put put1 = new Put(Bytes.toBytes("row5"));
217           put1.add(new KeyValue(Bytes.toBytes("row5"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
218               Bytes.toBytes("version0")));
219           table.put(put1);
220           table.flushCommits();
221           bulkload.doBulkLoad(hfilePath, table);
222           latch.countDown();
223         } catch (TableNotFoundException e) {
224         } catch (IOException e) {
225         }
226       }
227     }.start();
228     latch.await();
229     // We had 'version0', 'version1' for row1,col:q. Bulk load adds 'version2'
230     // By the time we do next() the bulk loaded file is also added to the kv
231     // scanner and is immediately visible with no mvcc check.
232     Result result = scanner.next();
233     scanAfterBulkLoad(scanner, result, "version2");
234     scanner.close();
235     table.close();
236 
237   }
238 
239   @Test
240   public void testBulkLoadNativeHFile() throws Exception {
241     String tableName = "testBulkLoadNativeHFile";
242     long l = System.currentTimeMillis();
243     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
244     createTable(admin, tableName);
245     Scan scan = createScan();
246     final HTable table = init(admin, l, scan, tableName);
247     // use bulkload
248     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadNativeHFile/",
249       "/temp/testBulkLoadNativeHFile/col/file", true);
250     Configuration conf = TEST_UTIL.getConfiguration();
251     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
252     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
253     bulkload.doBulkLoad(hfilePath, table);
254     ResultScanner scanner = table.getScanner(scan);
255     Result result = scanner.next();
256     // We had 'version0', 'version1' for 'row1,col:q' in the table.
257     // Bulk load added 'version2'  scanner should be able to see 'version2'
258     result = scanAfterBulkLoad(scanner, result, "version2");
259     scanner.close();
260     table.close();
261   }
262 
263   private Scan createScan() {
264     Scan scan = new Scan();
265     scan.setMaxVersions(3);
266     return scan;
267   }
268 
269   @AfterClass
270   public static void tearDownAfterClass() throws Exception {
271     TEST_UTIL.shutdownMiniCluster();
272   }
273 }