View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Collections;
29  import java.util.HashMap;
30  import java.util.HashSet;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  import java.util.Set;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.hbase.Cell;
39  import org.apache.hadoop.hbase.CellUtil;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.testclassification.MediumTests;
45  import org.apache.hadoop.hbase.client.Delete;
46  import org.apache.hadoop.hbase.client.Put;
47  import org.apache.hadoop.hbase.client.Scan;
48  import org.apache.hadoop.hbase.io.compress.Compression;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.junit.After;
51  import org.junit.Before;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.junit.runner.RunWith;
55  import org.junit.runners.Parameterized;
56  import org.junit.runners.Parameterized.Parameters;
57  
58  /**
59   * Test various seek optimizations for correctness and check if they are
60   * actually saving I/O operations.
61   */
62  @RunWith(Parameterized.class)
63  @Category(MediumTests.class)
64  public class TestSeekOptimizations {
65  
66    private static final Log LOG =
67        LogFactory.getLog(TestSeekOptimizations.class);
68  
69    // Constants
70    private static final String FAMILY = "myCF";
71    private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
72  
73    private static final int PUTS_PER_ROW_COL = 50;
74    private static final int DELETES_PER_ROW_COL = 10;
75  
76    private static final int NUM_ROWS = 3;
77    private static final int NUM_COLS = 3;
78  
79    private static final boolean VERBOSE = false;
80  
81    /**
82     * Disable this when this test fails hopelessly and you need to debug a
83     * simpler case.
84     */
85    private static final boolean USE_MANY_STORE_FILES = true;
86  
87    private static final int[][] COLUMN_SETS = new int[][] {
88      {},  // All columns
89      {0},
90      {1},
91      {0, 2},
92      {1, 2},
93      {0, 1, 2},
94    };
95  
96    // Both start row and end row are inclusive here for the purposes of this
97    // test.
98    private static final int[][] ROW_RANGES = new int[][] {
99      {-1, -1},
100     {0, 1},
101     {1, 1},
102     {1, 2},
103     {0, 2}
104   };
105 
106   private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
107 
108   // Instance variables
109   private HRegion region;
110   private Put put;
111   private Delete del;
112   private Random rand;
113   private Set<Long> putTimestamps = new HashSet<Long>();
114   private Set<Long> delTimestamps = new HashSet<Long>();
115   private List<Cell> expectedKVs = new ArrayList<Cell>();
116 
117   private Compression.Algorithm comprAlgo;
118   private BloomType bloomType;
119 
120   private long totalSeekDiligent, totalSeekLazy;
121   
122   private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
123 
124   @Parameters
125   public static final Collection<Object[]> parameters() {
126     return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
127   }
128 
129   public TestSeekOptimizations(Compression.Algorithm comprAlgo,
130       BloomType bloomType) {
131     this.comprAlgo = comprAlgo;
132     this.bloomType = bloomType;
133   }
134 
135   @Before
136   public void setUp() {
137     rand = new Random(91238123L);
138     expectedKVs.clear();
139   }
140 
141   @Test
142   public void testMultipleTimestampRanges() throws IOException {
143     // enable seek counting
144     StoreFileScanner.instrument();
145 
146     region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
147         new HColumnDescriptor(FAMILY)
148             .setCompressionType(comprAlgo)
149             .setBloomFilterType(bloomType)
150             .setMaxVersions(3)
151     );
152 
153     // Delete the given timestamp and everything before.
154     final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
155 
156     createTimestampRange(1, 50, -1);
157     createTimestampRange(51, 100, -1);
158     if (USE_MANY_STORE_FILES) {
159       createTimestampRange(100, 500, 127);
160       createTimestampRange(900, 1300, -1);
161       createTimestampRange(1301, 2500, latestDelTS);
162       createTimestampRange(2502, 2598, -1);
163       createTimestampRange(2599, 2999, -1);
164     }
165 
166     prepareExpectedKVs(latestDelTS);
167 
168     for (int[] columnArr : COLUMN_SETS) {
169       for (int[] rowRange : ROW_RANGES) {
170         for (int maxVersions : MAX_VERSIONS_VALUES) {
171           for (boolean lazySeekEnabled : new boolean[] { false, true }) {
172             testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
173                 maxVersions);
174           }
175         }
176       }
177     }
178 
179     final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
180     System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
181         " total seeks without optimization: " + totalSeekDiligent
182         + ", with optimization: " + totalSeekLazy + " (" +
183         String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
184         "), savings: " + String.format("%.2f%%",
185             100.0 * seekSavings) + "\n");
186 
187     // Test that lazy seeks are buying us something. Without the actual
188     // implementation of the lazy seek optimization this will be 0.
189     final double expectedSeekSavings = 0.0;
190     assertTrue("Lazy seek is only saving " +
191         String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
192         "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
193         seekSavings >= expectedSeekSavings);
194   }
195 
196   private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
197       final int startRow, final int endRow, int maxVersions)
198       throws IOException {
199     StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
200     final Scan scan = new Scan();
201     final Set<String> qualSet = new HashSet<String>();
202     for (int iColumn : columnArr) {
203       String qualStr = getQualStr(iColumn);
204       scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
205       qualSet.add(qualStr);
206     }
207     scan.setMaxVersions(maxVersions);
208     scan.setStartRow(rowBytes(startRow));
209 
210     // Adjust for the fact that for multi-row queries the end row is exclusive.
211     {
212       final byte[] scannerStopRow =
213           rowBytes(endRow + (startRow != endRow ? 1 : 0));
214       scan.setStopRow(scannerStopRow);
215     }
216 
217     final long initialSeekCount = StoreFileScanner.getSeekCount();
218     final InternalScanner scanner = region.getScanner(scan);
219     final List<Cell> results = new ArrayList<Cell>();
220     final List<Cell> actualKVs = new ArrayList<Cell>();
221 
222     // Such a clumsy do-while loop appears to be the official way to use an
223     // internalScanner. scanner.next() return value refers to the _next_
224     // result, not to the one already returned in results.
225     boolean hasNext;
226     do {
227       hasNext = scanner.next(results);
228       actualKVs.addAll(results);
229       results.clear();
230     } while (hasNext);
231 
232     List<Cell> filteredKVs = filterExpectedResults(qualSet,
233         rowBytes(startRow), rowBytes(endRow), maxVersions);
234     final String rowRestrictionStr =
235         (startRow == -1 && endRow == -1) ? "all rows" : (
236             startRow == endRow ? ("row=" + startRow) : ("startRow="
237             + startRow + ", " + "endRow=" + endRow));
238     final String columnRestrictionStr =
239         columnArr.length == 0 ? "all columns"
240             : ("columns=" + Arrays.toString(columnArr));
241     final String testDesc =
242         "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
243             + (scan.isGetScan() ? "Get" : "Scan") + ": "
244             + columnRestrictionStr + ", " + rowRestrictionStr
245             + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
246     long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
247     if (VERBOSE) {
248       System.err.println("Seek count: " + seekCount + ", KVs returned: "
249         + actualKVs.size() + ". " + testDesc +
250         (lazySeekEnabled ? "\n" : ""));
251     }
252     if (lazySeekEnabled) {
253       totalSeekLazy += seekCount;
254     } else {
255       totalSeekDiligent += seekCount;
256     }
257     assertKVListsEqual(testDesc, filteredKVs, actualKVs);
258   }
259 
260   private List<Cell> filterExpectedResults(Set<String> qualSet,
261       byte[] startRow, byte[] endRow, int maxVersions) {
262     final List<Cell> filteredKVs = new ArrayList<Cell>();
263     final Map<String, Integer> verCount = new HashMap<String, Integer>();
264     for (Cell kv : expectedKVs) {
265       if (startRow.length > 0 &&
266           Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
267               startRow, 0, startRow.length) < 0) {
268         continue;
269       }
270 
271       // In this unit test the end row is always inclusive.
272       if (endRow.length > 0 &&
273           Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
274               endRow, 0, endRow.length) > 0) {
275         continue;
276       }
277 
278       if (!qualSet.isEmpty() && (!CellUtil.matchingFamily(kv, FAMILY_BYTES)
279           || !qualSet.contains(Bytes.toString(CellUtil.cloneQualifier(kv))))) {
280         continue;
281       }
282 
283       final String rowColStr =
284         Bytes.toStringBinary(CellUtil.cloneRow(kv)) + "/"
285             + Bytes.toStringBinary(CellUtil.cloneFamily(kv)) + ":"
286             + Bytes.toStringBinary(CellUtil.cloneQualifier(kv));
287       final Integer curNumVer = verCount.get(rowColStr);
288       final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
289       if (newNumVer <= maxVersions) {
290         filteredKVs.add(kv);
291         verCount.put(rowColStr, newNumVer);
292       }
293     }
294 
295     return filteredKVs;
296   }
297 
298   private void prepareExpectedKVs(long latestDelTS) {
299     final List<Cell> filteredKVs = new ArrayList<Cell>();
300     for (Cell kv : expectedKVs) {
301       if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
302         filteredKVs.add(kv);
303       }
304     }
305     expectedKVs = filteredKVs;
306     Collections.sort(expectedKVs, KeyValue.COMPARATOR);
307   }
308 
309   public void put(String qual, long ts) {
310     if (!putTimestamps.contains(ts)) {
311       put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
312       putTimestamps.add(ts);
313     }
314     if (VERBOSE) {
315       LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
316           + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
317     }
318   }
319 
320   private byte[] createValue(long ts) {
321     return Bytes.toBytes("value" + ts);
322   }
323 
324   public void delAtTimestamp(String qual, long ts) {
325     del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
326     logDelete(qual, ts, "at");
327   }
328 
329   private void logDelete(String qual, long ts, String delType) {
330     if (VERBOSE) {
331       LOG.info("del " + delType + ": row "
332           + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
333           + ", qualifier " + qual + ", ts " + ts);
334     }
335   }
336 
337   private void delUpToTimestamp(String qual, long upToTS) {
338     del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
339     logDelete(qual, upToTS, "up to and including");
340   }
341 
342   private long randLong(long n) {
343     long l = rand.nextLong();
344     if (l == Long.MIN_VALUE)
345       l = Long.MAX_VALUE;
346     return Math.abs(l) % n;
347   }
348 
349   private long randBetween(long a, long b) {
350     long x = a + randLong(b - a + 1);
351     assertTrue(a <= x && x <= b);
352     return x;
353   }
354 
355   private final String rowStr(int i) {
356     return ("row" + i).intern();
357   }
358 
359   private final byte[] rowBytes(int i) {
360     if (i == -1) {
361       return HConstants.EMPTY_BYTE_ARRAY;
362     }
363     return Bytes.toBytes(rowStr(i));
364   }
365 
366   private final String getQualStr(int i) {
367     return ("qual" + i).intern();
368   }
369 
370   public void createTimestampRange(long minTS, long maxTS,
371       long deleteUpToTS) throws IOException {
372     assertTrue(minTS < maxTS);
373     assertTrue(deleteUpToTS == -1
374         || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
375 
376     for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
377       final String row = rowStr(iRow);
378       final byte[] rowBytes = Bytes.toBytes(row);
379       for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
380         final String qual = getQualStr(iCol);
381         final byte[] qualBytes = Bytes.toBytes(qual);
382         put = new Put(rowBytes);
383 
384         putTimestamps.clear();
385         put(qual, minTS);
386         put(qual, maxTS);
387         for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
388           put(qual, randBetween(minTS, maxTS));
389         }
390 
391         long[] putTimestampList = new long[putTimestamps.size()];
392         {
393           int i = 0;
394           for (long ts : putTimestamps) {
395             putTimestampList[i++] = ts;
396           }
397         }
398 
399         // Delete a predetermined number of particular timestamps
400         delTimestamps.clear();
401         assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
402         int numToDel = DELETES_PER_ROW_COL;
403         int tsRemaining = putTimestampList.length;
404         del = new Delete(rowBytes);
405         for (long ts : putTimestampList) {
406           if (rand.nextInt(tsRemaining) < numToDel) {
407             delAtTimestamp(qual, ts);
408             putTimestamps.remove(ts);
409             --numToDel;
410           }
411 
412           if (--tsRemaining == 0) {
413             break;
414           }
415         }
416 
417         // Another type of delete: everything up to the given timestamp.
418         if (deleteUpToTS != -1) {
419           delUpToTimestamp(qual, deleteUpToTS);
420         }
421 
422         region.put(put);
423         if (!del.isEmpty()) {
424           region.delete(del);
425         }
426 
427         // Add remaining timestamps (those we have not deleted) to expected
428         // results
429         for (long ts : putTimestamps) {
430           expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
431               KeyValue.Type.Put));
432         }
433       }
434     }
435 
436     region.flushcache();
437   }
438 
439   @After
440   public void tearDown() throws IOException {
441     if (region != null) {
442       HRegion.closeHRegion(region);
443     }
444 
445     // We have to re-set the lazy seek flag back to the default so that other
446     // unit tests are not affected.
447     StoreScanner.enableLazySeekGlobally(
448         StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
449   }
450 
451 
452 }
453