1 /* 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 package org.apache.hadoop.hbase.client; 21 22 import java.io.IOException; 23 import java.util.ArrayList; 24 import java.util.HashMap; 25 import java.util.List; 26 import java.util.Map; 27 import java.util.NavigableSet; 28 import java.util.TreeMap; 29 import java.util.TreeSet; 30 31 import org.apache.commons.logging.Log; 32 import org.apache.commons.logging.LogFactory; 33 import org.apache.hadoop.hbase.classification.InterfaceAudience; 34 import org.apache.hadoop.hbase.classification.InterfaceStability; 35 import org.apache.hadoop.hbase.HConstants; 36 import org.apache.hadoop.hbase.filter.Filter; 37 import org.apache.hadoop.hbase.filter.IncompatibleFilterException; 38 import org.apache.hadoop.hbase.io.TimeRange; 39 import org.apache.hadoop.hbase.util.Bytes; 40 41 /** 42 * Used to perform Scan operations. 43 * <p> 44 * All operations are identical to {@link Get} with the exception of 45 * instantiation. Rather than specifying a single row, an optional startRow 46 * and stopRow may be defined. If rows are not specified, the Scanner will 47 * iterate over all rows. 48 * <p> 49 * To scan everything for each row, instantiate a Scan object. 50 * <p> 51 * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}. 52 * If caching is NOT set, we will use the caching value of the hosting {@link HTable}. See 53 * {@link HTable#setScannerCaching(int)}. In addition to row caching, it is possible to specify a 54 * maximum result size, using {@link #setMaxResultSize(long)}. When both are used, 55 * single server requests are limited by either number of rows or maximum result size, whichever 56 * limit comes first. 57 * <p> 58 * To further define the scope of what to get when scanning, perform additional 59 * methods as outlined below. 60 * <p> 61 * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily} 62 * for each family to retrieve. 63 * <p> 64 * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn} 65 * for each column to retrieve. 66 * <p> 67 * To only retrieve columns within a specific range of version timestamps, 68 * execute {@link #setTimeRange(long, long) setTimeRange}. 69 * <p> 70 * To only retrieve columns with a specific timestamp, execute 71 * {@link #setTimeStamp(long) setTimestamp}. 72 * <p> 73 * To limit the number of versions of each column to be returned, execute 74 * {@link #setMaxVersions(int) setMaxVersions}. 75 * <p> 76 * To limit the maximum number of values returned for each call to next(), 77 * execute {@link #setBatch(int) setBatch}. 78 * <p> 79 * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}. 80 * <p> 81 * Expert: To explicitly disable server-side block caching for this scan, 82 * execute {@link #setCacheBlocks(boolean)}. 83 */ 84 @InterfaceAudience.Public 85 @InterfaceStability.Stable 86 public class Scan extends Query { 87 private static final Log LOG = LogFactory.getLog(Scan.class); 88 89 private static final String RAW_ATTR = "_raw_"; 90 91 /** 92 * EXPERT ONLY. 93 * An integer (not long) indicating to the scanner logic how many times we attempt to retrieve the 94 * next KV before we schedule a reseek. 95 * The right value depends on the size of the average KV. A reseek is more efficient when 96 * it can skip 5-10 KVs or 512B-1KB, or when the next KV is likely found in another HFile block. 97 * Setting this only has any effect when columns were added with 98 * {@link #addColumn(byte[], byte[])} 99 * <pre>{@code 100 * Scan s = new Scan(...); 101 * s.addColumn(...); 102 * s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2)); 103 * }</pre> 104 * Default is 0 (always reseek). 105 */ 106 public static final String HINT_LOOKAHEAD = "_look_ahead_"; 107 108 private byte [] startRow = HConstants.EMPTY_START_ROW; 109 private byte [] stopRow = HConstants.EMPTY_END_ROW; 110 private int maxVersions = 1; 111 private int batch = -1; 112 113 private int storeLimit = -1; 114 private int storeOffset = 0; 115 private boolean getScan; 116 117 // If application wants to collect scan metrics, it needs to 118 // call scan.setAttribute(SCAN_ATTRIBUTES_ENABLE, Bytes.toBytes(Boolean.TRUE)) 119 static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable"; 120 static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data"; 121 122 // If an application wants to use multiple scans over different tables each scan must 123 // define this attribute with the appropriate table name by calling 124 // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName)) 125 static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name"; 126 127 /* 128 * -1 means no caching 129 */ 130 private int caching = -1; 131 private long maxResultSize = -1; 132 private boolean cacheBlocks = true; 133 private boolean reversed = false; 134 private TimeRange tr = new TimeRange(); 135 private Map<byte [], NavigableSet<byte []>> familyMap = 136 new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR); 137 private Boolean loadColumnFamiliesOnDemand = null; 138 139 /** 140 * Set it true for small scan to get better performance 141 * 142 * Small scan should use pread and big scan can use seek + read 143 * 144 * seek + read is fast but can cause two problem (1) resource contention (2) 145 * cause too much network io 146 * 147 * [89-fb] Using pread for non-compaction read request 148 * https://issues.apache.org/jira/browse/HBASE-7266 149 * 150 * On the other hand, if setting it true, we would do 151 * openScanner,next,closeScanner in one RPC call. It means the better 152 * performance for small scan. [HBASE-9488]. 153 * 154 * Generally, if the scan range is within one data block(64KB), it could be 155 * considered as a small scan. 156 */ 157 private boolean small = false; 158 159 /** 160 * Create a Scan operation across all rows. 161 */ 162 public Scan() {} 163 164 public Scan(byte [] startRow, Filter filter) { 165 this(startRow); 166 this.filter = filter; 167 } 168 169 /** 170 * Create a Scan operation starting at the specified row. 171 * <p> 172 * If the specified row does not exist, the Scanner will start from the 173 * next closest row after the specified row. 174 * @param startRow row to start scanner at or after 175 */ 176 public Scan(byte [] startRow) { 177 this.startRow = startRow; 178 } 179 180 /** 181 * Create a Scan operation for the range of rows specified. 182 * @param startRow row to start scanner at or after (inclusive) 183 * @param stopRow row to stop scanner before (exclusive) 184 */ 185 public Scan(byte [] startRow, byte [] stopRow) { 186 this.startRow = startRow; 187 this.stopRow = stopRow; 188 //if the startRow and stopRow both are empty, it is not a Get 189 this.getScan = isStartRowAndEqualsStopRow(); 190 } 191 192 /** 193 * Creates a new instance of this class while copying all values. 194 * 195 * @param scan The scan instance to copy from. 196 * @throws IOException When copying the values fails. 197 */ 198 public Scan(Scan scan) throws IOException { 199 startRow = scan.getStartRow(); 200 stopRow = scan.getStopRow(); 201 maxVersions = scan.getMaxVersions(); 202 batch = scan.getBatch(); 203 storeLimit = scan.getMaxResultsPerColumnFamily(); 204 storeOffset = scan.getRowOffsetPerColumnFamily(); 205 caching = scan.getCaching(); 206 maxResultSize = scan.getMaxResultSize(); 207 cacheBlocks = scan.getCacheBlocks(); 208 getScan = scan.isGetScan(); 209 filter = scan.getFilter(); // clone? 210 loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue(); 211 TimeRange ctr = scan.getTimeRange(); 212 tr = new TimeRange(ctr.getMin(), ctr.getMax()); 213 reversed = scan.isReversed(); 214 small = scan.isSmall(); 215 Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap(); 216 for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) { 217 byte [] fam = entry.getKey(); 218 NavigableSet<byte[]> cols = entry.getValue(); 219 if (cols != null && cols.size() > 0) { 220 for (byte[] col : cols) { 221 addColumn(fam, col); 222 } 223 } else { 224 addFamily(fam); 225 } 226 } 227 for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) { 228 setAttribute(attr.getKey(), attr.getValue()); 229 } 230 } 231 232 /** 233 * Builds a scan object with the same specs as get. 234 * @param get get to model scan after 235 */ 236 public Scan(Get get) { 237 this.startRow = get.getRow(); 238 this.stopRow = get.getRow(); 239 this.filter = get.getFilter(); 240 this.cacheBlocks = get.getCacheBlocks(); 241 this.maxVersions = get.getMaxVersions(); 242 this.storeLimit = get.getMaxResultsPerColumnFamily(); 243 this.storeOffset = get.getRowOffsetPerColumnFamily(); 244 this.tr = get.getTimeRange(); 245 this.familyMap = get.getFamilyMap(); 246 this.getScan = true; 247 for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) { 248 setAttribute(attr.getKey(), attr.getValue()); 249 } 250 } 251 252 public boolean isGetScan() { 253 return this.getScan || isStartRowAndEqualsStopRow(); 254 } 255 256 private boolean isStartRowAndEqualsStopRow() { 257 return this.startRow != null && this.startRow.length > 0 && 258 Bytes.equals(this.startRow, this.stopRow); 259 } 260 /** 261 * Get all columns from the specified family. 262 * <p> 263 * Overrides previous calls to addColumn for this family. 264 * @param family family name 265 * @return this 266 */ 267 public Scan addFamily(byte [] family) { 268 familyMap.remove(family); 269 familyMap.put(family, null); 270 return this; 271 } 272 273 /** 274 * Get the column from the specified family with the specified qualifier. 275 * <p> 276 * Overrides previous calls to addFamily for this family. 277 * @param family family name 278 * @param qualifier column qualifier 279 * @return this 280 */ 281 public Scan addColumn(byte [] family, byte [] qualifier) { 282 NavigableSet<byte []> set = familyMap.get(family); 283 if(set == null) { 284 set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); 285 } 286 if (qualifier == null) { 287 qualifier = HConstants.EMPTY_BYTE_ARRAY; 288 } 289 set.add(qualifier); 290 familyMap.put(family, set); 291 return this; 292 } 293 294 /** 295 * Get versions of columns only within the specified timestamp range, 296 * [minStamp, maxStamp). Note, default maximum versions to return is 1. If 297 * your time range spans more than one version and you want all versions 298 * returned, up the number of versions beyond the defaut. 299 * @param minStamp minimum timestamp value, inclusive 300 * @param maxStamp maximum timestamp value, exclusive 301 * @throws IOException if invalid time range 302 * @see #setMaxVersions() 303 * @see #setMaxVersions(int) 304 * @return this 305 */ 306 public Scan setTimeRange(long minStamp, long maxStamp) 307 throws IOException { 308 tr = new TimeRange(minStamp, maxStamp); 309 return this; 310 } 311 312 /** 313 * Get versions of columns with the specified timestamp. Note, default maximum 314 * versions to return is 1. If your time range spans more than one version 315 * and you want all versions returned, up the number of versions beyond the 316 * defaut. 317 * @param timestamp version timestamp 318 * @see #setMaxVersions() 319 * @see #setMaxVersions(int) 320 * @return this 321 */ 322 public Scan setTimeStamp(long timestamp) 323 throws IOException { 324 try { 325 tr = new TimeRange(timestamp, timestamp+1); 326 } catch(IOException e) { 327 // This should never happen, unless integer overflow or something extremely wrong... 328 LOG.error("TimeRange failed, likely caused by integer overflow. ", e); 329 throw e; 330 } 331 return this; 332 } 333 334 /** 335 * Set the start row of the scan. 336 * @param startRow row to start scan on (inclusive) 337 * Note: In order to make startRow exclusive add a trailing 0 byte 338 * @return this 339 */ 340 public Scan setStartRow(byte [] startRow) { 341 this.startRow = startRow; 342 return this; 343 } 344 345 /** 346 * Set the stop row. 347 * @param stopRow row to end at (exclusive) 348 * Note: In order to make stopRow inclusive add a trailing 0 byte 349 * @return this 350 */ 351 public Scan setStopRow(byte [] stopRow) { 352 this.stopRow = stopRow; 353 return this; 354 } 355 356 /** 357 * Get all available versions. 358 * @return this 359 */ 360 public Scan setMaxVersions() { 361 this.maxVersions = Integer.MAX_VALUE; 362 return this; 363 } 364 365 /** 366 * Get up to the specified number of versions of each column. 367 * @param maxVersions maximum versions for each column 368 * @return this 369 */ 370 public Scan setMaxVersions(int maxVersions) { 371 this.maxVersions = maxVersions; 372 return this; 373 } 374 375 /** 376 * Set the maximum number of values to return for each call to next() 377 * @param batch the maximum number of values 378 */ 379 public void setBatch(int batch) { 380 if (this.hasFilter() && this.filter.hasFilterRow()) { 381 throw new IncompatibleFilterException( 382 "Cannot set batch on a scan using a filter" + 383 " that returns true for filter.hasFilterRow"); 384 } 385 this.batch = batch; 386 } 387 388 /** 389 * Set the maximum number of values to return per row per Column Family 390 * @param limit the maximum number of values returned / row / CF 391 */ 392 public void setMaxResultsPerColumnFamily(int limit) { 393 this.storeLimit = limit; 394 } 395 396 /** 397 * Set offset for the row per Column Family. 398 * @param offset is the number of kvs that will be skipped. 399 */ 400 public void setRowOffsetPerColumnFamily(int offset) { 401 this.storeOffset = offset; 402 } 403 404 /** 405 * Set the number of rows for caching that will be passed to scanners. 406 * If not set, the default setting from {@link HTable#getScannerCaching()} will apply. 407 * Higher caching values will enable faster scanners but will use more memory. 408 * @param caching the number of rows for caching 409 */ 410 public void setCaching(int caching) { 411 this.caching = caching; 412 } 413 414 /** 415 * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)} 416 */ 417 public long getMaxResultSize() { 418 return maxResultSize; 419 } 420 421 /** 422 * Set the maximum result size. The default is -1; this means that no specific 423 * maximum result size will be set for this scan, and the global configured 424 * value will be used instead. (Defaults to unlimited). 425 * 426 * @param maxResultSize The maximum result size in bytes. 427 */ 428 public void setMaxResultSize(long maxResultSize) { 429 this.maxResultSize = maxResultSize; 430 } 431 432 @Override 433 public Scan setFilter(Filter filter) { 434 super.setFilter(filter); 435 return this; 436 } 437 438 /** 439 * Setting the familyMap 440 * @param familyMap map of family to qualifier 441 * @return this 442 */ 443 public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) { 444 this.familyMap = familyMap; 445 return this; 446 } 447 448 /** 449 * Getting the familyMap 450 * @return familyMap 451 */ 452 public Map<byte [], NavigableSet<byte []>> getFamilyMap() { 453 return this.familyMap; 454 } 455 456 /** 457 * @return the number of families in familyMap 458 */ 459 public int numFamilies() { 460 if(hasFamilies()) { 461 return this.familyMap.size(); 462 } 463 return 0; 464 } 465 466 /** 467 * @return true if familyMap is non empty, false otherwise 468 */ 469 public boolean hasFamilies() { 470 return !this.familyMap.isEmpty(); 471 } 472 473 /** 474 * @return the keys of the familyMap 475 */ 476 public byte[][] getFamilies() { 477 if(hasFamilies()) { 478 return this.familyMap.keySet().toArray(new byte[0][0]); 479 } 480 return null; 481 } 482 483 /** 484 * @return the startrow 485 */ 486 public byte [] getStartRow() { 487 return this.startRow; 488 } 489 490 /** 491 * @return the stoprow 492 */ 493 public byte [] getStopRow() { 494 return this.stopRow; 495 } 496 497 /** 498 * @return the max number of versions to fetch 499 */ 500 public int getMaxVersions() { 501 return this.maxVersions; 502 } 503 504 /** 505 * @return maximum number of values to return for a single call to next() 506 */ 507 public int getBatch() { 508 return this.batch; 509 } 510 511 /** 512 * @return maximum number of values to return per row per CF 513 */ 514 public int getMaxResultsPerColumnFamily() { 515 return this.storeLimit; 516 } 517 518 /** 519 * Method for retrieving the scan's offset per row per column 520 * family (#kvs to be skipped) 521 * @return row offset 522 */ 523 public int getRowOffsetPerColumnFamily() { 524 return this.storeOffset; 525 } 526 527 /** 528 * @return caching the number of rows fetched when calling next on a scanner 529 */ 530 public int getCaching() { 531 return this.caching; 532 } 533 534 /** 535 * @return TimeRange 536 */ 537 public TimeRange getTimeRange() { 538 return this.tr; 539 } 540 541 /** 542 * @return RowFilter 543 */ 544 public Filter getFilter() { 545 return filter; 546 } 547 548 /** 549 * @return true is a filter has been specified, false if not 550 */ 551 public boolean hasFilter() { 552 return filter != null; 553 } 554 555 /** 556 * Set whether blocks should be cached for this Scan. 557 * <p> 558 * This is true by default. When true, default settings of the table and 559 * family are used (this will never override caching blocks if the block 560 * cache is disabled for that family or entirely). 561 * 562 * @param cacheBlocks if false, default settings are overridden and blocks 563 * will not be cached 564 */ 565 public void setCacheBlocks(boolean cacheBlocks) { 566 this.cacheBlocks = cacheBlocks; 567 } 568 569 /** 570 * Get whether blocks should be cached for this Scan. 571 * @return true if default caching should be used, false if blocks should not 572 * be cached 573 */ 574 public boolean getCacheBlocks() { 575 return cacheBlocks; 576 } 577 578 /** 579 * Set whether this scan is a reversed one 580 * <p> 581 * This is false by default which means forward(normal) scan. 582 * 583 * @param reversed if true, scan will be backward order 584 * @return this 585 */ 586 public Scan setReversed(boolean reversed) { 587 this.reversed = reversed; 588 return this; 589 } 590 591 /** 592 * Get whether this scan is a reversed one. 593 * @return true if backward scan, false if forward(default) scan 594 */ 595 public boolean isReversed() { 596 return reversed; 597 } 598 599 /** 600 * Set the value indicating whether loading CFs on demand should be allowed (cluster 601 * default is false). On-demand CF loading doesn't load column families until necessary, e.g. 602 * if you filter on one column, the other column family data will be loaded only for the rows 603 * that are included in result, not all rows like in normal case. 604 * With column-specific filters, like SingleColumnValueFilter w/filterIfMissing == true, 605 * this can deliver huge perf gains when there's a cf with lots of data; however, it can 606 * also lead to some inconsistent results, as follows: 607 * - if someone does a concurrent update to both column families in question you may get a row 608 * that never existed, e.g. for { rowKey = 5, { cat_videos => 1 }, { video => "my cat" } } 609 * someone puts rowKey 5 with { cat_videos => 0 }, { video => "my dog" }, concurrent scan 610 * filtering on "cat_videos == 1" can get { rowKey = 5, { cat_videos => 1 }, 611 * { video => "my dog" } }. 612 * - if there's a concurrent split and you have more than 2 column families, some rows may be 613 * missing some column families. 614 */ 615 public void setLoadColumnFamiliesOnDemand(boolean value) { 616 this.loadColumnFamiliesOnDemand = value; 617 } 618 619 /** 620 * Get the raw loadColumnFamiliesOnDemand setting; if it's not set, can be null. 621 */ 622 public Boolean getLoadColumnFamiliesOnDemandValue() { 623 return this.loadColumnFamiliesOnDemand; 624 } 625 626 /** 627 * Get the logical value indicating whether on-demand CF loading should be allowed. 628 */ 629 public boolean doLoadColumnFamiliesOnDemand() { 630 return (this.loadColumnFamiliesOnDemand != null) 631 && this.loadColumnFamiliesOnDemand.booleanValue(); 632 } 633 634 /** 635 * Compile the table and column family (i.e. schema) information 636 * into a String. Useful for parsing and aggregation by debugging, 637 * logging, and administration tools. 638 * @return Map 639 */ 640 @Override 641 public Map<String, Object> getFingerprint() { 642 Map<String, Object> map = new HashMap<String, Object>(); 643 List<String> families = new ArrayList<String>(); 644 if(this.familyMap.size() == 0) { 645 map.put("families", "ALL"); 646 return map; 647 } else { 648 map.put("families", families); 649 } 650 for (Map.Entry<byte [], NavigableSet<byte[]>> entry : 651 this.familyMap.entrySet()) { 652 families.add(Bytes.toStringBinary(entry.getKey())); 653 } 654 return map; 655 } 656 657 /** 658 * Compile the details beyond the scope of getFingerprint (row, columns, 659 * timestamps, etc.) into a Map along with the fingerprinted information. 660 * Useful for debugging, logging, and administration tools. 661 * @param maxCols a limit on the number of columns output prior to truncation 662 * @return Map 663 */ 664 @Override 665 public Map<String, Object> toMap(int maxCols) { 666 // start with the fingerpring map and build on top of it 667 Map<String, Object> map = getFingerprint(); 668 // map from families to column list replaces fingerprint's list of families 669 Map<String, List<String>> familyColumns = 670 new HashMap<String, List<String>>(); 671 map.put("families", familyColumns); 672 // add scalar information first 673 map.put("startRow", Bytes.toStringBinary(this.startRow)); 674 map.put("stopRow", Bytes.toStringBinary(this.stopRow)); 675 map.put("maxVersions", this.maxVersions); 676 map.put("batch", this.batch); 677 map.put("caching", this.caching); 678 map.put("maxResultSize", this.maxResultSize); 679 map.put("cacheBlocks", this.cacheBlocks); 680 map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand); 681 List<Long> timeRange = new ArrayList<Long>(); 682 timeRange.add(this.tr.getMin()); 683 timeRange.add(this.tr.getMax()); 684 map.put("timeRange", timeRange); 685 int colCount = 0; 686 // iterate through affected families and list out up to maxCols columns 687 for (Map.Entry<byte [], NavigableSet<byte[]>> entry : 688 this.familyMap.entrySet()) { 689 List<String> columns = new ArrayList<String>(); 690 familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns); 691 if(entry.getValue() == null) { 692 colCount++; 693 --maxCols; 694 columns.add("ALL"); 695 } else { 696 colCount += entry.getValue().size(); 697 if (maxCols <= 0) { 698 continue; 699 } 700 for (byte [] column : entry.getValue()) { 701 if (--maxCols <= 0) { 702 continue; 703 } 704 columns.add(Bytes.toStringBinary(column)); 705 } 706 } 707 } 708 map.put("totalColumns", colCount); 709 if (this.filter != null) { 710 map.put("filter", this.filter.toString()); 711 } 712 // add the id if set 713 if (getId() != null) { 714 map.put("id", getId()); 715 } 716 return map; 717 } 718 719 /** 720 * Enable/disable "raw" mode for this scan. 721 * If "raw" is enabled the scan will return all 722 * delete marker and deleted rows that have not 723 * been collected, yet. 724 * This is mostly useful for Scan on column families 725 * that have KEEP_DELETED_ROWS enabled. 726 * It is an error to specify any column when "raw" is set. 727 * @param raw True/False to enable/disable "raw" mode. 728 */ 729 public void setRaw(boolean raw) { 730 setAttribute(RAW_ATTR, Bytes.toBytes(raw)); 731 } 732 733 /** 734 * @return True if this Scan is in "raw" mode. 735 */ 736 public boolean isRaw() { 737 byte[] attr = getAttribute(RAW_ATTR); 738 return attr == null ? false : Bytes.toBoolean(attr); 739 } 740 741 /** 742 * Set whether this scan is a small scan 743 * <p> 744 * Small scan should use pread and big scan can use seek + read 745 * 746 * seek + read is fast but can cause two problem (1) resource contention (2) 747 * cause too much network io 748 * 749 * [89-fb] Using pread for non-compaction read request 750 * https://issues.apache.org/jira/browse/HBASE-7266 751 * 752 * On the other hand, if setting it true, we would do 753 * openScanner,next,closeScanner in one RPC call. It means the better 754 * performance for small scan. [HBASE-9488]. 755 * 756 * Generally, if the scan range is within one data block(64KB), it could be 757 * considered as a small scan. 758 * 759 * @param small 760 */ 761 public void setSmall(boolean small) { 762 this.small = small; 763 } 764 765 /** 766 * Get whether this scan is a small scan 767 * @return true if small scan 768 */ 769 public boolean isSmall() { 770 return small; 771 } 772 }