View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.SortedSet;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
36  import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
37  
38  /**
39   * KeyValueScanner adaptor over the Reader.  It also provides hooks into
40   * bloom filter things.
41   */
42  @InterfaceAudience.LimitedPrivate("Coprocessor")
43  public class StoreFileScanner implements KeyValueScanner {
44    static final Log LOG = LogFactory.getLog(HStore.class);
45  
46    // the reader it comes from:
47    private final StoreFile.Reader reader;
48    private final HFileScanner hfs;
49    private KeyValue cur = null;
50  
51    private boolean realSeekDone;
52    private boolean delayedReseek;
53    private KeyValue delayedSeekKV;
54  
55    private boolean enforceMVCC = false;
56    private boolean hasMVCCInfo = false;
57    // A flag represents whether could stop skipping KeyValues for MVCC
58    // if have encountered the next row. Only used for reversed scan
59    private boolean stopSkippingKVsIfNextRow = false;
60  
61    private static AtomicLong seekCount;
62  
63    private ScanQueryMatcher matcher;
64    
65    private long readPt;
66  
67    /**
68     * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
69     * @param hfs HFile scanner
70     */
71    public StoreFileScanner(StoreFile.Reader reader, HFileScanner hfs, boolean useMVCC,
72        boolean hasMVCC, long readPt) {
73      this.readPt = readPt;
74      this.reader = reader;
75      this.hfs = hfs;
76      this.enforceMVCC = useMVCC;
77      this.hasMVCCInfo = hasMVCC;
78    }
79  
80    /**
81     * Return an array of scanners corresponding to the given
82     * set of store files.
83     */
84    public static List<StoreFileScanner> getScannersForStoreFiles(
85        Collection<StoreFile> files,
86        boolean cacheBlocks,
87        boolean usePread, long readPt) throws IOException {
88      return getScannersForStoreFiles(files, cacheBlocks,
89                                     usePread, false, readPt);
90    }
91  
92    /**
93     * Return an array of scanners corresponding to the given set of store files.
94     */
95    public static List<StoreFileScanner> getScannersForStoreFiles(
96        Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
97        boolean isCompaction, long readPt) throws IOException {
98      return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction,
99          null, readPt);
100   }
101 
102   /**
103    * Return an array of scanners corresponding to the given set of store files,
104    * And set the ScanQueryMatcher for each store file scanner for further
105    * optimization
106    */
107   public static List<StoreFileScanner> getScannersForStoreFiles(
108       Collection<StoreFile> files, boolean cacheBlocks, boolean usePread,
109       boolean isCompaction, ScanQueryMatcher matcher, long readPt) throws IOException {
110     List<StoreFileScanner> scanners = new ArrayList<StoreFileScanner>(
111         files.size());
112     for (StoreFile file : files) {
113       StoreFile.Reader r = file.createReader();
114       StoreFileScanner scanner = r.getStoreFileScanner(cacheBlocks, usePread,
115           isCompaction, readPt);
116       scanner.setScanQueryMatcher(matcher);
117       scanners.add(scanner);
118     }
119     return scanners;
120   }
121 
122   public String toString() {
123     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
124   }
125 
126   public KeyValue peek() {
127     return cur;
128   }
129 
130   public KeyValue next() throws IOException {
131     KeyValue retKey = cur;
132 
133     try {
134       // only seek if we aren't at the end. cur == null implies 'end'.
135       if (cur != null) {
136         hfs.next();
137         cur = hfs.getKeyValue();
138         if (hasMVCCInfo)
139           skipKVsNewerThanReadpoint();
140       }
141     } catch(IOException e) {
142       throw new IOException("Could not iterate " + this, e);
143     }
144     return retKey;
145   }
146 
147   public boolean seek(KeyValue key) throws IOException {
148     if (seekCount != null) seekCount.incrementAndGet();
149 
150     try {
151       try {
152         if(!seekAtOrAfter(hfs, key)) {
153           close();
154           return false;
155         }
156 
157         cur = hfs.getKeyValue();
158 
159         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
160       } finally {
161         realSeekDone = true;
162       }
163     } catch (IOException ioe) {
164       throw new IOException("Could not seek " + this + " to key " + key, ioe);
165     }
166   }
167 
168   public boolean reseek(KeyValue key) throws IOException {
169     if (seekCount != null) seekCount.incrementAndGet();
170 
171     try {
172       try {
173         if (!reseekAtOrAfter(hfs, key)) {
174           close();
175           return false;
176         }
177         cur = hfs.getKeyValue();
178 
179         return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
180       } finally {
181         realSeekDone = true;
182       }
183     } catch (IOException ioe) {
184       throw new IOException("Could not reseek " + this + " to key " + key,
185           ioe);
186     }
187   }
188 
189   protected boolean skipKVsNewerThanReadpoint() throws IOException {
190     // We want to ignore all key-values that are newer than our current
191     // readPoint
192     KeyValue startKV = cur;
193     while(enforceMVCC
194         && cur != null
195         && (cur.getMvccVersion() > readPt)) {
196       hfs.next();
197       cur = hfs.getKeyValue();
198       if (this.stopSkippingKVsIfNextRow
199           && getComparator().compareRows(cur.getBuffer(), cur.getRowOffset(),
200               cur.getRowLength(), startKV.getBuffer(), startKV.getRowOffset(),
201               startKV.getRowLength()) > 0) {
202         return false;
203       }
204     }
205 
206     if (cur == null) {
207       close();
208       return false;
209     }
210 
211     // For the optimisation in HBASE-4346, we set the KV's memstoreTS to
212     // 0, if it is older than all the scanners' read points. It is possible
213     // that a newer KV's memstoreTS was reset to 0. But, there is an
214     // older KV which was not reset to 0 (because it was
215     // not old enough during flush). Make sure that we set it correctly now,
216     // so that the comparision order does not change.
217     if (cur.getMvccVersion() <= readPt) {
218       cur.setMvccVersion(0);
219     }
220     return true;
221   }
222 
223   public void close() {
224     // Nothing to close on HFileScanner?
225     cur = null;
226   }
227 
228   /**
229    *
230    * @param s
231    * @param k
232    * @return false if not found or if k is after the end.
233    * @throws IOException
234    */
235   public static boolean seekAtOrAfter(HFileScanner s, KeyValue k)
236   throws IOException {
237     int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
238     if(result < 0) {
239       if (result == HConstants.INDEX_KEY_MAGIC) {
240         // using faked key
241         return true;
242       }
243       // Passed KV is smaller than first KV in file, work from start of file
244       return s.seekTo();
245     } else if(result > 0) {
246       // Passed KV is larger than current KV in file, if there is a next
247       // it is the "after", if not then this scanner is done.
248       return s.next();
249     }
250     // Seeked to the exact key
251     return true;
252   }
253 
254   static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
255   throws IOException {
256     //This function is similar to seekAtOrAfter function
257     int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
258     if (result <= 0) {
259       if (result == HConstants.INDEX_KEY_MAGIC) {
260         // using faked key
261         return true;
262       }
263       // If up to now scanner is not seeked yet, this means passed KV is smaller
264       // than first KV in file, and it is the first time we seek on this file.
265       // So we also need to work from the start of file.
266       if (!s.isSeeked()) {
267         return  s.seekTo();
268       }
269       return true;
270     }
271     // passed KV is larger than current KV in file, if there is a next
272     // it is after, if not then this scanner is done.
273     return s.next();
274   }
275 
276   @Override
277   public long getSequenceID() {
278     return reader.getSequenceID();
279   }
280 
281   /**
282    * Pretend we have done a seek but don't do it yet, if possible. The hope is
283    * that we find requested columns in more recent files and won't have to seek
284    * in older files. Creates a fake key/value with the given row/column and the
285    * highest (most recent) possible timestamp we might get from this file. When
286    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
287    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
288    * <p>
289    * Note that this function does guarantee that the current KV of this scanner
290    * will be advanced to at least the given KV. Because of this, it does have
291    * to do a real seek in cases when the seek timestamp is older than the
292    * highest timestamp of the file, e.g. when we are trying to seek to the next
293    * row/column and use OLDEST_TIMESTAMP in the seek key.
294    */
295   @Override
296   public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom)
297       throws IOException {
298     if (kv.getFamilyLength() == 0) {
299       useBloom = false;
300     }
301 
302     boolean haveToSeek = true;
303     if (useBloom) {
304       // check ROWCOL Bloom filter first.
305       if (reader.getBloomFilterType() == BloomType.ROWCOL) {
306         haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(),
307             kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
308             kv.getQualifierOffset(), kv.getQualifierLength());
309       } else if (this.matcher != null && !matcher.hasNullColumnInQuery() &&
310           (kv.isDeleteFamily() || kv.isDeleteFamilyVersion())) {
311         // if there is no such delete family kv in the store file,
312         // then no need to seek.
313         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(),
314             kv.getRowOffset(), kv.getRowLength());
315       }
316     }
317 
318     delayedReseek = forward;
319     delayedSeekKV = kv;
320 
321     if (haveToSeek) {
322       // This row/column might be in this store file (or we did not use the
323       // Bloom filter), so we still need to seek.
324       realSeekDone = false;
325       long maxTimestampInFile = reader.getMaxTimestamp();
326       long seekTimestamp = kv.getTimestamp();
327       if (seekTimestamp > maxTimestampInFile) {
328         // Create a fake key that is not greater than the real next key.
329         // (Lower timestamps correspond to higher KVs.)
330         // To understand this better, consider that we are asked to seek to
331         // a higher timestamp than the max timestamp in this file. We know that
332         // the next point when we have to consider this file again is when we
333         // pass the max timestamp of this file (with the same row/column).
334         cur = kv.createFirstOnRowColTS(maxTimestampInFile);
335       } else {
336         // This will be the case e.g. when we need to seek to the next
337         // row/column, and we don't know exactly what they are, so we set the
338         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
339         // row/column.
340         enforceSeek();
341       }
342       return cur != null;
343     }
344 
345     // Multi-column Bloom filter optimization.
346     // Create a fake key/value, so that this scanner only bubbles up to the top
347     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
348     // all other store files. The query matcher will then just skip this fake
349     // key/value and the store scanner will progress to the next column. This
350     // is obviously not a "real real" seek, but unlike the fake KV earlier in
351     // this method, we want this to be propagated to ScanQueryMatcher.
352     cur = kv.createLastOnRowCol();
353 
354     realSeekDone = true;
355     return true;
356   }
357 
358   Reader getReaderForTesting() {
359     return reader;
360   }
361 
362   KeyValue.KVComparator getComparator() {
363     return reader.getComparator();
364   }
365 
366   @Override
367   public boolean realSeekDone() {
368     return realSeekDone;
369   }
370 
371   @Override
372   public void enforceSeek() throws IOException {
373     if (realSeekDone)
374       return;
375 
376     if (delayedReseek) {
377       reseek(delayedSeekKV);
378     } else {
379       seek(delayedSeekKV);
380     }
381   }
382 
383   public void setScanQueryMatcher(ScanQueryMatcher matcher) {
384     this.matcher = matcher;
385   }
386 
387   @Override
388   public boolean isFileScanner() {
389     return true;
390   }
391 
392   // Test methods
393 
394   static final long getSeekCount() {
395     return seekCount.get();
396   }
397   static final void instrument() {
398     seekCount = new AtomicLong();
399   }
400 
401   @Override
402   public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
403     return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
404         && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
405   }
406 
407   @Override
408   public boolean seekToPreviousRow(KeyValue key) throws IOException {
409     try {
410       try {
411         KeyValue seekKey = KeyValue.createFirstOnRow(key.getRow());
412         if (seekCount != null) seekCount.incrementAndGet();
413         if (!hfs.seekBefore(seekKey.getBuffer(), seekKey.getKeyOffset(),
414             seekKey.getKeyLength())) {
415           close();
416           return false;
417         }
418         KeyValue firstKeyOfPreviousRow = KeyValue.createFirstOnRow(hfs
419             .getKeyValue().getRow());
420 
421         if (seekCount != null) seekCount.incrementAndGet();
422         if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
423           close();
424           return false;
425         }
426 
427         cur = hfs.getKeyValue();
428         this.stopSkippingKVsIfNextRow = true;
429         boolean resultOfSkipKVs;
430         try {
431           resultOfSkipKVs = skipKVsNewerThanReadpoint();
432         } finally {
433           this.stopSkippingKVsIfNextRow = false;
434         }
435         if (!resultOfSkipKVs
436             || getComparator().compareRows(cur.getBuffer(), cur.getRowOffset(),
437                 cur.getRowLength(), firstKeyOfPreviousRow.getBuffer(),
438                 firstKeyOfPreviousRow.getRowOffset(),
439                 firstKeyOfPreviousRow.getRowLength()) > 0) {
440           return seekToPreviousRow(firstKeyOfPreviousRow);
441         }
442 
443         return true;
444       } finally {
445         realSeekDone = true;
446       }
447     } catch (IOException ioe) {
448       throw new IOException("Could not seekToPreviousRow " + this + " to key "
449           + key, ioe);
450     }
451   }
452 
453   @Override
454   public boolean seekToLastRow() throws IOException {
455     byte[] lastRow = reader.getLastRowKey();
456     if (lastRow == null) {
457       return false;
458     }
459     KeyValue seekKey = KeyValue.createFirstOnRow(lastRow);
460     if (seek(seekKey)) {
461       return true;
462     } else {
463       return seekToPreviousRow(seekKey);
464     }
465   }
466 
467   @Override
468   public boolean backwardSeek(KeyValue key) throws IOException {
469     seek(key);
470     if (cur == null
471         || getComparator().compareRows(cur.getRowArray(), cur.getRowOffset(),
472             cur.getRowLength(), key.getRowArray(), key.getRowOffset(),
473             key.getRowLength()) > 0) {
474       return seekToPreviousRow(key);
475     }
476     return true;
477   }
478 }