View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.List;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HBaseTestCase;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.testclassification.MediumTests;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.client.Delete;
41  import org.apache.hadoop.hbase.client.Durability;
42  import org.apache.hadoop.hbase.client.Get;
43  import org.apache.hadoop.hbase.client.Put;
44  import org.apache.hadoop.hbase.client.Scan;
45  import org.apache.hadoop.hbase.io.hfile.BlockCache;
46  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
47  import org.apache.hadoop.hbase.io.hfile.HFile;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  @Category(MediumTests.class)
54  public class TestBlocksRead extends HBaseTestCase {
55    static final Log LOG = LogFactory.getLog(TestBlocksRead.class);
56    static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
57        BloomType.ROW, BloomType.NONE };
58  
59    private static BlockCache blockCache;
60  
61    private HBaseConfiguration getConf() {
62      HBaseConfiguration conf = new HBaseConfiguration();
63  
64      // disable compactions in this test.
65      conf.setInt("hbase.hstore.compactionThreshold", 10000);
66      return conf;
67    }
68  
69    HRegion region = null;
70    private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71    private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
72  
73    /**
74     * @see org.apache.hadoop.hbase.HBaseTestCase#setUp()
75     */
76    @SuppressWarnings("deprecation")
77    @Override
78    protected void setUp() throws Exception {
79      super.setUp();
80    }
81  
82    @SuppressWarnings("deprecation")
83    @Override
84    protected void tearDown() throws Exception {
85      super.tearDown();
86      EnvironmentEdgeManagerTestHelper.reset();
87    }
88  
89    /**
90     * Callers must afterward call {@link HRegion#closeHRegion(HRegion)}
91     * @param tableName
92     * @param callingMethod
93     * @param conf
94     * @param family
95     * @throws IOException
96     * @return created and initialized region.
97     */
98    private HRegion initHRegion(byte[] tableName, String callingMethod,
99        HBaseConfiguration conf, String family) throws IOException {
100     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
101     HColumnDescriptor familyDesc;
102     for (int i = 0; i < BLOOM_TYPE.length; i++) {
103       BloomType bloomType = BLOOM_TYPE[i];
104       familyDesc = new HColumnDescriptor(family + "_" + bloomType)
105           .setBlocksize(1)
106           .setBloomFilterType(BLOOM_TYPE[i]);
107       htd.addFamily(familyDesc);
108     }
109 
110     HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
111     Path path = new Path(DIR + callingMethod);
112     HRegion r = HRegion.createHRegion(info, path, conf, htd);
113     blockCache = new CacheConfig(conf).getBlockCache();
114     return r;
115   }
116 
117   private void putData(String family, String row, String col, long version)
118       throws IOException {
119     for (int i = 0; i < BLOOM_TYPE.length; i++) {
120       putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
121           version);
122     }
123   }
124 
125   // generates a value to put for a row/col/version.
126   private static byte[] genValue(String row, String col, long version) {
127     return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
128   }
129 
130   private void putData(byte[] cf, String row, String col, long versionStart,
131       long versionEnd) throws IOException {
132     byte columnBytes[] = Bytes.toBytes(col);
133     Put put = new Put(Bytes.toBytes(row));
134     put.setDurability(Durability.SKIP_WAL);
135 
136     for (long version = versionStart; version <= versionEnd; version++) {
137       put.add(cf, columnBytes, version, genValue(row, col, version));
138     }
139     region.put(put);
140   }
141 
142   private Cell[] getData(String family, String row, List<String> columns,
143       int expBlocks) throws IOException {
144     return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
145   }
146 
147   private Cell[] getData(String family, String row, List<String> columns,
148       int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
149       throws IOException {
150     int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
151     Cell[] kvs = null;
152 
153     for (int i = 0; i < BLOOM_TYPE.length; i++) {
154       BloomType bloomType = BLOOM_TYPE[i];
155       byte[] cf = Bytes.toBytes(family + "_" + bloomType);
156       long blocksStart = getBlkAccessCount(cf);
157       Get get = new Get(Bytes.toBytes(row));
158 
159       for (String column : columns) {
160         get.addColumn(cf, Bytes.toBytes(column));
161       }
162 
163       kvs = region.get(get).rawCells();
164       long blocksEnd = getBlkAccessCount(cf);
165       if (expBlocks[i] != -1) {
166         assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
167             blocksEnd - blocksStart);
168       }
169       System.out.println("Blocks Read for Bloom: " + bloomType + " = "
170           + (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
171     }
172     return kvs;
173   }
174 
175   private Cell[] getData(String family, String row, String column,
176       int expBlocks) throws IOException {
177     return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
178         expBlocks);
179   }
180 
181   private Cell[] getData(String family, String row, String column,
182       int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
183       throws IOException {
184     return getData(family, row, Arrays.asList(column), expBlocksRowCol,
185         expBlocksRow, expBlocksNone);
186   }
187 
188   private void deleteFamily(String family, String row, long version)
189       throws IOException {
190     Delete del = new Delete(Bytes.toBytes(row));
191     del.deleteFamily(Bytes.toBytes(family + "_ROWCOL"), version);
192     del.deleteFamily(Bytes.toBytes(family + "_ROW"), version);
193     del.deleteFamily(Bytes.toBytes(family + "_NONE"), version);
194     region.delete(del);
195   }
196 
197   private static void verifyData(Cell kv, String expectedRow,
198       String expectedCol, long expectedVersion) {
199     assertTrue("RowCheck", CellUtil.matchingRow(kv,  Bytes.toBytes(expectedRow)));
200     assertTrue("ColumnCheck", CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)));
201     assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
202     assertTrue("ValueCheck", CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol, expectedVersion)));
203   }
204 
205   private static long getBlkAccessCount(byte[] cf) {
206       return HFile.dataBlockReadCnt.get();
207   }
208 
209   private static long getBlkCount() {
210     return blockCache.getBlockCount();
211   }
212 
213   /**
214    * Test # of blocks read for some simple seek cases.
215    *
216    * @throws Exception
217    */
218   @Test
219   public void testBlocksRead() throws Exception {
220     byte[] TABLE = Bytes.toBytes("testBlocksRead");
221     String FAMILY = "cf1";
222     Cell kvs[];
223     HBaseConfiguration conf = getConf();
224     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
225 
226     try {
227       putData(FAMILY, "row", "col1", 1);
228       putData(FAMILY, "row", "col2", 2);
229       putData(FAMILY, "row", "col3", 3);
230       putData(FAMILY, "row", "col4", 4);
231       putData(FAMILY, "row", "col5", 5);
232       putData(FAMILY, "row", "col6", 6);
233       putData(FAMILY, "row", "col7", 7);
234       region.flushcache();
235 
236       // Expected block reads: 1
237       // The top block has the KV we are
238       // interested. So only 1 seek is needed.
239       kvs = getData(FAMILY, "row", "col1", 1);
240       assertEquals(1, kvs.length);
241       verifyData(kvs[0], "row", "col1", 1);
242 
243       // Expected block reads: 2
244       // The top block and next block has the KVs we are
245       // interested. So only 2 seek is needed.
246       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
247       assertEquals(2, kvs.length);
248       verifyData(kvs[0], "row", "col1", 1);
249       verifyData(kvs[1], "row", "col2", 2);
250 
251       // Expected block reads: 3
252       // The first 2 seeks is to find out col2. [HBASE-4443]
253       // One additional seek for col3
254       // So 3 seeks are needed.
255       kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
256       assertEquals(2, kvs.length);
257       verifyData(kvs[0], "row", "col2", 2);
258       verifyData(kvs[1], "row", "col3", 3);
259 
260       // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
261       kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
262       assertEquals(1, kvs.length);
263       verifyData(kvs[0], "row", "col5", 5);
264     } finally {
265       HRegion.closeHRegion(this.region);
266       this.region = null;
267     }
268   }
269 
270   /**
271    * Test # of blocks read (targetted at some of the cases Lazy Seek optimizes).
272    *
273    * @throws Exception
274    */
275   @Test
276   public void testLazySeekBlocksRead() throws Exception {
277     byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
278     String FAMILY = "cf1";
279     Cell kvs[];
280     HBaseConfiguration conf = getConf();
281     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
282 
283     try {
284       // File 1
285       putData(FAMILY, "row", "col1", 1);
286       putData(FAMILY, "row", "col2", 2);
287       region.flushcache();
288 
289       // File 2
290       putData(FAMILY, "row", "col1", 3);
291       putData(FAMILY, "row", "col2", 4);
292       region.flushcache();
293 
294       // Expected blocks read: 1.
295       // File 2's top block is also the KV we are
296       // interested. So only 1 seek is needed.
297       kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
298       assertEquals(1, kvs.length);
299       verifyData(kvs[0], "row", "col1", 3);
300 
301       // Expected blocks read: 2
302       // File 2's top block has the "col1" KV we are
303       // interested. We also need "col2" which is in a block
304       // of its own. So, we need that block as well.
305       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
306       assertEquals(2, kvs.length);
307       verifyData(kvs[0], "row", "col1", 3);
308       verifyData(kvs[1], "row", "col2", 4);
309 
310       // File 3: Add another column
311       putData(FAMILY, "row", "col3", 5);
312       region.flushcache();
313 
314       // Expected blocks read: 1
315       // File 3's top block has the "col3" KV we are
316       // interested. So only 1 seek is needed.
317       kvs = getData(FAMILY, "row", "col3", 1);
318       assertEquals(1, kvs.length);
319       verifyData(kvs[0], "row", "col3", 5);
320 
321       // Get a column from older file.
322       // For ROWCOL Bloom filter: Expected blocks read: 1.
323       // For ROW Bloom filter: Expected blocks read: 2.
324       // For NONE Bloom filter: Expected blocks read: 2.
325       kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
326       assertEquals(1, kvs.length);
327       verifyData(kvs[0], "row", "col1", 3);
328 
329       // File 4: Delete the entire row.
330       deleteFamily(FAMILY, "row", 6);
331       region.flushcache();
332 
333       // For ROWCOL Bloom filter: Expected blocks read: 2.
334       // For ROW Bloom filter: Expected blocks read: 3.
335       // For NONE Bloom filter: Expected blocks read: 3.
336       kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
337       assertEquals(0, kvs.length);
338       kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
339       assertEquals(0, kvs.length);
340       kvs = getData(FAMILY, "row", "col3", 2);
341       assertEquals(0, kvs.length);
342       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
343       assertEquals(0, kvs.length);
344 
345       // File 5: Delete
346       deleteFamily(FAMILY, "row", 10);
347       region.flushcache();
348 
349       // File 6: some more puts, but with timestamps older than the
350       // previous delete.
351       putData(FAMILY, "row", "col1", 7);
352       putData(FAMILY, "row", "col2", 8);
353       putData(FAMILY, "row", "col3", 9);
354       region.flushcache();
355 
356       // Baseline expected blocks read: 8. [HBASE-4532]
357       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
358       assertEquals(0, kvs.length);
359  
360       // File 7: Put back new data
361       putData(FAMILY, "row", "col1", 11);
362       putData(FAMILY, "row", "col2", 12);
363       putData(FAMILY, "row", "col3", 13);
364       region.flushcache();
365 
366 
367       // Expected blocks read: 5. [HBASE-4585]
368       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
369       assertEquals(3, kvs.length);
370       verifyData(kvs[0], "row", "col1", 11);
371       verifyData(kvs[1], "row", "col2", 12);
372       verifyData(kvs[2], "row", "col3", 13);
373     } finally {
374       HRegion.closeHRegion(this.region);
375       this.region = null;
376     }
377   }
378 
379   /**
380    * Test # of blocks read to ensure disabling cache-fill on Scan works.
381    * @throws Exception
382    */
383   @Test
384   public void testBlocksStoredWhenCachingDisabled() throws Exception {
385     byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
386     String FAMILY = "cf1";
387 
388     HBaseConfiguration conf = getConf();
389     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
390 
391     try {
392       putData(FAMILY, "row", "col1", 1);
393       putData(FAMILY, "row", "col2", 2);
394       region.flushcache();
395 
396       // Execute a scan with caching turned off
397       // Expected blocks stored: 0
398       long blocksStart = getBlkCount();
399       Scan scan = new Scan();
400       scan.setCacheBlocks(false);
401       RegionScanner rs = region.getScanner(scan);
402       List<Cell> result = new ArrayList<Cell>(2);
403       rs.next(result);
404       assertEquals(2 * BLOOM_TYPE.length, result.size());
405       rs.close();
406       long blocksEnd = getBlkCount();
407 
408       assertEquals(blocksStart, blocksEnd);
409 
410       // Execute with caching turned on
411       // Expected blocks stored: 2
412       blocksStart = blocksEnd;
413       scan.setCacheBlocks(true);
414       rs = region.getScanner(scan);
415       result = new ArrayList<Cell>(2);
416       rs.next(result);
417       assertEquals(2 * BLOOM_TYPE.length, result.size());
418       rs.close();
419       blocksEnd = getBlkCount();
420     
421       assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
422     } finally {
423       HRegion.closeHRegion(this.region);
424       this.region = null;
425     }
426   }
427 
428   @Test
429   public void testLazySeekBlocksReadWithDelete() throws Exception {
430     byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
431     String FAMILY = "cf1";
432     Cell kvs[];
433     HBaseConfiguration conf = getConf();
434     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
435     try {
436       deleteFamily(FAMILY, "row", 200);
437       for (int i = 0; i < 100; i++) {
438         putData(FAMILY, "row", "col" + i, i);
439       }
440       putData(FAMILY, "row", "col99", 201);
441       region.flushcache();
442 
443       kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
444       assertEquals(0, kvs.length);
445 
446       kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
447       assertEquals(1, kvs.length);
448       verifyData(kvs[0], "row", "col99", 201);
449     } finally {
450       HRegion.closeHRegion(this.region);
451       this.region = null;
452     }
453   }
454 
455 }