View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.lang.ref.WeakReference;
22  import java.nio.ByteBuffer;
23  import java.util.EnumMap;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.PriorityQueue;
28  import java.util.SortedSet;
29  import java.util.TreeSet;
30  import java.util.concurrent.ConcurrentHashMap;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.ScheduledExecutorService;
33  import java.util.concurrent.TimeUnit;
34  import java.util.concurrent.atomic.AtomicLong;
35  import java.util.concurrent.locks.ReentrantLock;
36  
37  import com.google.common.base.Objects;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.hbase.io.HeapSize;
43  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
44  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.ClassSize;
47  import org.apache.hadoop.hbase.util.HasThread;
48  import org.apache.hadoop.hbase.util.Threads;
49  import org.apache.hadoop.util.StringUtils;
50  import org.codehaus.jackson.annotate.JsonIgnoreProperties;
51  
52  import com.google.common.annotations.VisibleForTesting;
53  import com.google.common.util.concurrent.ThreadFactoryBuilder;
54  
55  /**
56   * A block cache implementation that is memory-aware using {@link HeapSize},
57   * memory-bound using an LRU eviction algorithm, and concurrent: backed by a
58   * {@link ConcurrentHashMap} and with a non-blocking eviction thread giving
59   * constant-time {@link #cacheBlock} and {@link #getBlock} operations.<p>
60   *
61   * Contains three levels of block priority to allow for
62   * scan-resistance and in-memory families.  A block is added with an inMemory
63   * flag if necessary, otherwise a block becomes a single access priority.  Once
64   * a blocked is accessed again, it changes to multiple access.  This is used
65   * to prevent scans from thrashing the cache, adding a least-frequently-used
66   * element to the eviction algorithm.<p>
67   *
68   * Each priority is given its own chunk of the total cache to ensure
69   * fairness during eviction.  Each priority will retain close to its maximum
70   * size, however, if any priority is not using its entire chunk the others
71   * are able to grow beyond their chunk size.<p>
72   *
73   * Instantiated at a minimum with the total size and average block size.
74   * All sizes are in bytes.  The block size is not especially important as this
75   * cache is fully dynamic in its sizing of blocks.  It is only used for
76   * pre-allocating data structures and in initial heap estimation of the map.<p>
77   *
78   * The detailed constructor defines the sizes for the three priorities (they
79   * should total to the maximum size defined).  It also sets the levels that
80   * trigger and control the eviction thread.<p>
81   *
82   * The acceptable size is the cache size level which triggers the eviction
83   * process to start.  It evicts enough blocks to get the size below the
84   * minimum size specified.<p>
85   *
86   * Eviction happens in a separate thread and involves a single full-scan
87   * of the map.  It determines how many bytes must be freed to reach the minimum
88   * size, and then while scanning determines the fewest least-recently-used
89   * blocks necessary from each of the three priorities (would be 3 times bytes
90   * to free).  It then uses the priority chunk sizes to evict fairly according
91   * to the relative sizes and usage.
92   */
93  @InterfaceAudience.Private
94  @JsonIgnoreProperties({"encodingCountsForTest"})
95  public class LruBlockCache implements BlockCache, HeapSize {
96  
97    static final Log LOG = LogFactory.getLog(LruBlockCache.class);
98  
99    static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";
100   static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.acceptable.factor";
101   static final String LRU_SINGLE_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.single.percentage";
102   static final String LRU_MULTI_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.multi.percentage";
103   static final String LRU_MEMORY_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.memory.percentage";
104 
105   /**
106    * Configuration key to force data-block always(except in-memory are too much)
107    * cached in memory for in-memory hfile, unlike inMemory, which is a column-family
108    * configuration, inMemoryForceMode is a cluster-wide configuration
109    */
110   static final String LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME = "hbase.lru.rs.inmemoryforcemode";
111 
112   /** Default Configuration Parameters*/
113 
114   /** Backing Concurrent Map Configuration */
115   static final float DEFAULT_LOAD_FACTOR = 0.75f;
116   static final int DEFAULT_CONCURRENCY_LEVEL = 16;
117 
118   /** Eviction thresholds */
119   static final float DEFAULT_MIN_FACTOR = 0.95f;
120   static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;
121 
122   /** Priority buckets */
123   static final float DEFAULT_SINGLE_FACTOR = 0.25f;
124   static final float DEFAULT_MULTI_FACTOR = 0.50f;
125   static final float DEFAULT_MEMORY_FACTOR = 0.25f;
126 
127   static final boolean DEFAULT_IN_MEMORY_FORCE_MODE = false;
128 
129   /** Statistics thread */
130   static final int statThreadPeriod = 60 * 5;
131 
132   /** Concurrent map (the cache) */
133   private final Map<BlockCacheKey,LruCachedBlock> map;
134 
135   /** Eviction lock (locked when eviction in process) */
136   private final ReentrantLock evictionLock = new ReentrantLock(true);
137 
138   /** Volatile boolean to track if we are in an eviction process or not */
139   private volatile boolean evictionInProgress = false;
140 
141   /** Eviction thread */
142   private final EvictionThread evictionThread;
143 
144   /** Statistics thread schedule pool (for heavy debugging, could remove) */
145   private final ScheduledExecutorService scheduleThreadPool =
146     Executors.newScheduledThreadPool(1,
147       new ThreadFactoryBuilder()
148         .setNameFormat("LruStats #%d")
149         .setDaemon(true)
150         .build());
151 
152   /** Current size of cache */
153   private final AtomicLong size;
154 
155   /** Current number of cached elements */
156   private final AtomicLong elements;
157 
158   /** Cache access count (sequential ID) */
159   private final AtomicLong count;
160 
161   /** Cache statistics */
162   private final CacheStats stats;
163 
164   /** Maximum allowable size of cache (block put if size > max, evict) */
165   private long maxSize;
166 
167   /** Approximate block size */
168   private long blockSize;
169 
170   /** Acceptable size of cache (no evictions if size < acceptable) */
171   private float acceptableFactor;
172 
173   /** Minimum threshold of cache (when evicting, evict until size < min) */
174   private float minFactor;
175 
176   /** Single access bucket size */
177   private float singleFactor;
178 
179   /** Multiple access bucket size */
180   private float multiFactor;
181 
182   /** In-memory bucket size */
183   private float memoryFactor;
184 
185   /** Overhead of the structure itself */
186   private long overhead;
187 
188   /** Whether in-memory hfile's data block has higher priority when evicting */
189   private boolean forceInMemory;
190 
191   /** Where to send victims (blocks evicted from the cache) */
192   private BucketCache victimHandler = null;
193 
194   /**
195    * Default constructor.  Specify maximum size and expected average block
196    * size (approximation is fine).
197    *
198    * <p>All other factors will be calculated based on defaults specified in
199    * this class.
200    * @param maxSize maximum size of cache, in bytes
201    * @param blockSize approximate size of each block, in bytes
202    */
203   public LruBlockCache(long maxSize, long blockSize) {
204     this(maxSize, blockSize, true);
205   }
206 
207   /**
208    * Constructor used for testing.  Allows disabling of the eviction thread.
209    */
210   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
211     this(maxSize, blockSize, evictionThread,
212         (int)Math.ceil(1.2*maxSize/blockSize),
213         DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
214         DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
215         DEFAULT_SINGLE_FACTOR,
216         DEFAULT_MULTI_FACTOR,
217         DEFAULT_MEMORY_FACTOR,
218         false
219         );
220   }
221 
222   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
223     this(maxSize, blockSize, evictionThread,
224         (int)Math.ceil(1.2*maxSize/blockSize),
225         DEFAULT_LOAD_FACTOR,
226         DEFAULT_CONCURRENCY_LEVEL,
227         conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
228         conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
229         conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR),
230         conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR),
231         conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR),
232         conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE)
233         );
234   }
235 
236   public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
237     this(maxSize, blockSize, true, conf);
238   }
239 
240   /**
241    * Configurable constructor.  Use this constructor if not using defaults.
242    * @param maxSize maximum size of this cache, in bytes
243    * @param blockSize expected average size of blocks, in bytes
244    * @param evictionThread whether to run evictions in a bg thread or not
245    * @param mapInitialSize initial size of backing ConcurrentHashMap
246    * @param mapLoadFactor initial load factor of backing ConcurrentHashMap
247    * @param mapConcurrencyLevel initial concurrency factor for backing CHM
248    * @param minFactor percentage of total size that eviction will evict until
249    * @param acceptableFactor percentage of total size that triggers eviction
250    * @param singleFactor percentage of total size for single-access blocks
251    * @param multiFactor percentage of total size for multiple-access blocks
252    * @param memoryFactor percentage of total size for in-memory blocks
253    */
254   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread,
255       int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel,
256       float minFactor, float acceptableFactor, float singleFactor,
257       float multiFactor, float memoryFactor, boolean forceInMemory) {
258     if(singleFactor + multiFactor + memoryFactor != 1 ||
259         singleFactor < 0 || multiFactor < 0 || memoryFactor < 0) {
260       throw new IllegalArgumentException("Single, multi, and memory factors " +
261           " should be non-negative and total 1.0");
262     }
263     if(minFactor >= acceptableFactor) {
264       throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
265     }
266     if(minFactor >= 1.0f || acceptableFactor >= 1.0f) {
267       throw new IllegalArgumentException("all factors must be < 1");
268     }
269     this.maxSize = maxSize;
270     this.blockSize = blockSize;
271     this.forceInMemory = forceInMemory;
272     map = new ConcurrentHashMap<BlockCacheKey,LruCachedBlock>(mapInitialSize,
273         mapLoadFactor, mapConcurrencyLevel);
274     this.minFactor = minFactor;
275     this.acceptableFactor = acceptableFactor;
276     this.singleFactor = singleFactor;
277     this.multiFactor = multiFactor;
278     this.memoryFactor = memoryFactor;
279     this.stats = new CacheStats();
280     this.count = new AtomicLong(0);
281     this.elements = new AtomicLong(0);
282     this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
283     this.size = new AtomicLong(this.overhead);
284     if(evictionThread) {
285       this.evictionThread = new EvictionThread(this);
286       this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
287     } else {
288       this.evictionThread = null;
289     }
290     this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
291         statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
292   }
293 
294   public void setMaxSize(long maxSize) {
295     this.maxSize = maxSize;
296     if(this.size.get() > acceptableSize() && !evictionInProgress) {
297       runEviction();
298     }
299   }
300 
301   // BlockCache implementation
302 
303   /**
304    * Cache the block with the specified name and buffer.
305    * <p>
306    * It is assumed this will NOT be called on an already cached block. In rare cases (HBASE-8547)
307    * this can happen, for which we compare the buffer contents.
308    * @param cacheKey block's cache key
309    * @param buf block buffer
310    * @param inMemory if block is in-memory
311    */
312   @Override
313   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
314     LruCachedBlock cb = map.get(cacheKey);
315     if(cb != null) {
316       // compare the contents, if they are not equal, we are in big trouble
317       if (compare(buf, cb.getBuffer()) != 0) {
318         throw new RuntimeException("Cached block contents differ, which should not have happened."
319           + "cacheKey:" + cacheKey);
320       }
321       String msg = "Cached an already cached block: " + cacheKey + " cb:" + cb.getCacheKey();
322       msg += ". This is harmless and can happen in rare cases (see HBASE-8547)";
323       LOG.warn(msg);
324       return;
325     }
326     cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
327     long newSize = updateSizeMetrics(cb, false);
328     map.put(cacheKey, cb);
329     long val = elements.incrementAndGet();
330     if (LOG.isTraceEnabled()) {
331       long size = map.size();
332       assertCounterSanity(size, val);
333     }
334     if (newSize > acceptableSize() && !evictionInProgress) {
335       runEviction();
336     }
337   }
338 
339   /**
340    * Sanity-checking for parity between actual block cache content and metrics.
341    * Intended only for use with TRACE level logging and -ea JVM.
342    */
343   private static void assertCounterSanity(long mapSize, long counterVal) {
344     if (counterVal < 0) {
345       LOG.trace("counterVal overflow. Assertions unreliable. counterVal=" + counterVal +
346         ", mapSize=" + mapSize);
347       return;
348     }
349     if (mapSize < Integer.MAX_VALUE) {
350       double pct_diff = Math.abs((((double) counterVal) / ((double) mapSize)) - 1.);
351       if (pct_diff > 0.05) {
352         LOG.trace("delta between reported and actual size > 5%. counterVal=" + counterVal +
353           ", mapSize=" + mapSize);
354       }
355     }
356   }
357 
358   private int compare(Cacheable left, Cacheable right) {
359     ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
360     left.serialize(l);
361     ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
362     right.serialize(r);
363     return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(),
364       r.array(), r.arrayOffset(), r.limit());
365   }
366 
367   /**
368    * Cache the block with the specified name and buffer.
369    * <p>
370    * It is assumed this will NEVER be called on an already cached block.  If
371    * that is done, it is assumed that you are reinserting the same exact
372    * block due to a race condition and will update the buffer but not modify
373    * the size of the cache.
374    * @param cacheKey block's cache key
375    * @param buf block buffer
376    */
377   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
378     cacheBlock(cacheKey, buf, false);
379   }
380 
381   /**
382    * Helper function that updates the local size counter and also updates any
383    * per-cf or per-blocktype metrics it can discern from given
384    * {@link LruCachedBlock}
385    *
386    * @param cb
387    * @param evict
388    */
389   protected long updateSizeMetrics(LruCachedBlock cb, boolean evict) {
390     long heapsize = cb.heapSize();
391     if (evict) {
392       heapsize *= -1;
393     }
394     return size.addAndGet(heapsize);
395   }
396 
397   /**
398    * Get the buffer of the block with the specified name.
399    * @param cacheKey block's cache key
400    * @param caching true if the caller caches blocks on cache misses
401    * @param repeat Whether this is a repeat lookup for the same block
402    *        (used to avoid double counting cache misses when doing double-check locking)
403    * @param updateCacheMetrics Whether to update cache metrics or not
404    * @return buffer of specified cache key, or null if not in cache
405    */
406   @Override
407   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
408       boolean updateCacheMetrics) {
409     LruCachedBlock cb = map.get(cacheKey);
410     if(cb == null) {
411       if (!repeat && updateCacheMetrics) stats.miss(caching);
412       if (victimHandler != null)
413         return victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
414       return null;
415     }
416     if (updateCacheMetrics) stats.hit(caching);
417     cb.access(count.incrementAndGet());
418     return cb.getBuffer();
419   }
420 
421   /**
422    * Whether the cache contains block with specified cacheKey
423    * @param cacheKey
424    * @return true if contains the block
425    */
426   public boolean containsBlock(BlockCacheKey cacheKey) {
427     return map.containsKey(cacheKey);
428   }
429 
430   @Override
431   public boolean evictBlock(BlockCacheKey cacheKey) {
432     LruCachedBlock cb = map.get(cacheKey);
433     if (cb == null) return false;
434     evictBlock(cb, false);
435     return true;
436   }
437 
438   /**
439    * Evicts all blocks for a specific HFile. This is an
440    * expensive operation implemented as a linear-time search through all blocks
441    * in the cache. Ideally this should be a search in a log-access-time map.
442    *
443    * <p>
444    * This is used for evict-on-close to remove all blocks of a specific HFile.
445    *
446    * @return the number of blocks evicted
447    */
448   @Override
449   public int evictBlocksByHfileName(String hfileName) {
450     int numEvicted = 0;
451     for (BlockCacheKey key : map.keySet()) {
452       if (key.getHfileName().equals(hfileName)) {
453         if (evictBlock(key))
454           ++numEvicted;
455       }
456     }
457     if (victimHandler != null) {
458       numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
459     }
460     return numEvicted;
461   }
462 
463   /**
464    * Evict the block, and it will be cached by the victim handler if exists &&
465    * block may be read again later
466    * @param block
467    * @param evictedByEvictionProcess true if the given block is evicted by
468    *          EvictionThread
469    * @return the heap size of evicted block
470    */
471   protected long evictBlock(LruCachedBlock block, boolean evictedByEvictionProcess) {
472     map.remove(block.getCacheKey());
473     updateSizeMetrics(block, true);
474     long val = elements.decrementAndGet();
475     if (LOG.isTraceEnabled()) {
476       long size = map.size();
477       assertCounterSanity(size, val);
478     }
479     stats.evicted();
480     if (evictedByEvictionProcess && victimHandler != null) {
481       boolean wait = getCurrentSize() < acceptableSize();
482       boolean inMemory = block.getPriority() == BlockPriority.MEMORY;
483       victimHandler.cacheBlockWithWait(block.getCacheKey(), block.getBuffer(),
484           inMemory, wait);
485     }
486     return block.heapSize();
487   }
488 
489   /**
490    * Multi-threaded call to run the eviction process.
491    */
492   private void runEviction() {
493     if(evictionThread == null) {
494       evict();
495     } else {
496       evictionThread.evict();
497     }
498   }
499 
500   /**
501    * Eviction method.
502    */
503   void evict() {
504 
505     // Ensure only one eviction at a time
506     if(!evictionLock.tryLock()) return;
507 
508     try {
509       evictionInProgress = true;
510       long currentSize = this.size.get();
511       long bytesToFree = currentSize - minSize();
512 
513       if (LOG.isTraceEnabled()) {
514         LOG.trace("Block cache LRU eviction started; Attempting to free " +
515           StringUtils.byteDesc(bytesToFree) + " of total=" +
516           StringUtils.byteDesc(currentSize));
517       }
518 
519       if(bytesToFree <= 0) return;
520 
521       // Instantiate priority buckets
522       BlockBucket bucketSingle = new BlockBucket("single", bytesToFree, blockSize,
523           singleSize());
524       BlockBucket bucketMulti = new BlockBucket("multi", bytesToFree, blockSize,
525           multiSize());
526       BlockBucket bucketMemory = new BlockBucket("memory", bytesToFree, blockSize,
527           memorySize());
528 
529       // Scan entire map putting into appropriate buckets
530       for(LruCachedBlock cachedBlock : map.values()) {
531         switch(cachedBlock.getPriority()) {
532           case SINGLE: {
533             bucketSingle.add(cachedBlock);
534             break;
535           }
536           case MULTI: {
537             bucketMulti.add(cachedBlock);
538             break;
539           }
540           case MEMORY: {
541             bucketMemory.add(cachedBlock);
542             break;
543           }
544         }
545       }
546 
547       long bytesFreed = 0;
548       if (forceInMemory || memoryFactor > 0.999f) {
549         long s = bucketSingle.totalSize();
550         long m = bucketMulti.totalSize();
551         if (bytesToFree > (s + m)) {
552           // this means we need to evict blocks in memory bucket to make room,
553           // so the single and multi buckets will be emptied
554           bytesFreed = bucketSingle.free(s);
555           bytesFreed += bucketMulti.free(m);
556           if (LOG.isTraceEnabled()) {
557             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
558               " from single and multi buckets");
559           }
560           bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
561           if (LOG.isTraceEnabled()) {
562             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
563               " total from all three buckets ");
564           }
565         } else {
566           // this means no need to evict block in memory bucket,
567           // and we try best to make the ratio between single-bucket and
568           // multi-bucket is 1:2
569           long bytesRemain = s + m - bytesToFree;
570           if (3 * s <= bytesRemain) {
571             // single-bucket is small enough that no eviction happens for it
572             // hence all eviction goes from multi-bucket
573             bytesFreed = bucketMulti.free(bytesToFree);
574           } else if (3 * m <= 2 * bytesRemain) {
575             // multi-bucket is small enough that no eviction happens for it
576             // hence all eviction goes from single-bucket
577             bytesFreed = bucketSingle.free(bytesToFree);
578           } else {
579             // both buckets need to evict some blocks
580             bytesFreed = bucketSingle.free(s - bytesRemain / 3);
581             if (bytesFreed < bytesToFree) {
582               bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
583             }
584           }
585         }
586       } else {
587         PriorityQueue<BlockBucket> bucketQueue =
588           new PriorityQueue<BlockBucket>(3);
589 
590         bucketQueue.add(bucketSingle);
591         bucketQueue.add(bucketMulti);
592         bucketQueue.add(bucketMemory);
593 
594         int remainingBuckets = 3;
595 
596         BlockBucket bucket;
597         while((bucket = bucketQueue.poll()) != null) {
598           long overflow = bucket.overflow();
599           if(overflow > 0) {
600             long bucketBytesToFree = Math.min(overflow,
601                 (bytesToFree - bytesFreed) / remainingBuckets);
602             bytesFreed += bucket.free(bucketBytesToFree);
603           }
604           remainingBuckets--;
605         }
606       }
607 
608       if (LOG.isTraceEnabled()) {
609         long single = bucketSingle.totalSize();
610         long multi = bucketMulti.totalSize();
611         long memory = bucketMemory.totalSize();
612         LOG.trace("Block cache LRU eviction completed; " +
613           "freed=" + StringUtils.byteDesc(bytesFreed) + ", " +
614           "total=" + StringUtils.byteDesc(this.size.get()) + ", " +
615           "single=" + StringUtils.byteDesc(single) + ", " +
616           "multi=" + StringUtils.byteDesc(multi) + ", " +
617           "memory=" + StringUtils.byteDesc(memory));
618       }
619     } finally {
620       stats.evict();
621       evictionInProgress = false;
622       evictionLock.unlock();
623     }
624   }
625 
626   @Override
627   public String toString() {
628     return Objects.toStringHelper(this)
629       .add("blockCount", getBlockCount())
630       .add("currentSize", getCurrentSize())
631       .add("freeSize", getFreeSize())
632       .add("maxSize", getMaxSize())
633       .add("heapSize", heapSize())
634       .add("minSize", minSize())
635       .add("minFactor", minFactor)
636       .add("multiSize", multiSize())
637       .add("multiFactor", multiFactor)
638       .add("singleSize", singleSize())
639       .add("singleFactor", singleFactor)
640       .toString();
641   }
642 
643   /**
644    * Used to group blocks into priority buckets.  There will be a BlockBucket
645    * for each priority (single, multi, memory).  Once bucketed, the eviction
646    * algorithm takes the appropriate number of elements out of each according
647    * to configuration parameters and their relatives sizes.
648    */
649   private class BlockBucket implements Comparable<BlockBucket> {
650 
651     private final String name;
652     private LruCachedBlockQueue queue;
653     private long totalSize = 0;
654     private long bucketSize;
655 
656     public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
657       this.name = name;
658       this.bucketSize = bucketSize;
659       queue = new LruCachedBlockQueue(bytesToFree, blockSize);
660       totalSize = 0;
661     }
662 
663     public void add(LruCachedBlock block) {
664       totalSize += block.heapSize();
665       queue.add(block);
666     }
667 
668     public long free(long toFree) {
669       if (LOG.isTraceEnabled()) {
670         LOG.trace("freeing " + StringUtils.byteDesc(toFree) + " from " + this);
671       }
672       LruCachedBlock cb;
673       long freedBytes = 0;
674       while ((cb = queue.pollLast()) != null) {
675         freedBytes += evictBlock(cb, true);
676         if (freedBytes >= toFree) {
677           return freedBytes;
678         }
679       }
680       if (LOG.isTraceEnabled()) {
681         LOG.trace("freed " + StringUtils.byteDesc(freedBytes) + " from " + this);
682       }
683       return freedBytes;
684     }
685 
686     public long overflow() {
687       return totalSize - bucketSize;
688     }
689 
690     public long totalSize() {
691       return totalSize;
692     }
693 
694     public int compareTo(BlockBucket that) {
695       if(this.overflow() == that.overflow()) return 0;
696       return this.overflow() > that.overflow() ? 1 : -1;
697     }
698 
699     @Override
700     public boolean equals(Object that) {
701       if (that == null || !(that instanceof BlockBucket)){
702         return false;
703       }
704 
705       return compareTo((BlockBucket)that) == 0;
706     }
707 
708     @Override
709     public int hashCode() {
710       return Objects.hashCode(name, bucketSize, queue, totalSize);
711     }
712 
713     @Override
714     public String toString() {
715       return Objects.toStringHelper(this)
716         .add("name", name)
717         .add("totalSize", StringUtils.byteDesc(totalSize))
718         .add("bucketSize", StringUtils.byteDesc(bucketSize))
719         .toString();
720     }
721   }
722 
723   /**
724    * Get the maximum size of this cache.
725    * @return max size in bytes
726    */
727   public long getMaxSize() {
728     return this.maxSize;
729   }
730 
731   @Override
732   public long getCurrentSize() {
733     return this.size.get();
734   }
735 
736   @Override
737   public long getFreeSize() {
738     return getMaxSize() - getCurrentSize();
739   }
740 
741   @Override
742   public long size() {
743     return getMaxSize();
744   }
745 
746   @Override
747   public long getBlockCount() {
748     return this.elements.get();
749   }
750 
751   EvictionThread getEvictionThread() {
752     return this.evictionThread;
753   }
754 
755   /*
756    * Eviction thread.  Sits in waiting state until an eviction is triggered
757    * when the cache size grows above the acceptable level.<p>
758    *
759    * Thread is triggered into action by {@link LruBlockCache#runEviction()}
760    */
761   static class EvictionThread extends HasThread {
762     private WeakReference<LruBlockCache> cache;
763     private boolean go = true;
764     // flag set after enter the run method, used for test
765     private boolean enteringRun = false;
766 
767     public EvictionThread(LruBlockCache cache) {
768       super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
769       setDaemon(true);
770       this.cache = new WeakReference<LruBlockCache>(cache);
771     }
772 
773     @Override
774     public void run() {
775       enteringRun = true;
776       while (this.go) {
777         synchronized(this) {
778           try {
779             this.wait(1000 * 10/*Don't wait for ever*/);
780           } catch(InterruptedException e) {}
781         }
782         LruBlockCache cache = this.cache.get();
783         if (cache == null) break;
784         cache.evict();
785       }
786     }
787 
788     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
789         justification="This is what we want")
790     public void evict() {
791       synchronized(this) {
792         this.notifyAll();
793       }
794     }
795 
796     synchronized void shutdown() {
797       this.go = false;
798       this.notifyAll();
799     }
800 
801     /**
802      * Used for the test.
803      */
804     boolean isEnteringRun() {
805       return this.enteringRun;
806     }
807   }
808 
809   /*
810    * Statistics thread.  Periodically prints the cache statistics to the log.
811    */
812   static class StatisticsThread extends Thread {
813     LruBlockCache lru;
814 
815     public StatisticsThread(LruBlockCache lru) {
816       super("LruBlockCache.StatisticsThread");
817       setDaemon(true);
818       this.lru = lru;
819     }
820     @Override
821     public void run() {
822       lru.logStats();
823     }
824   }
825 
826   public void logStats() {
827     if (!LOG.isDebugEnabled()) return;
828     // Log size
829     long totalSize = heapSize();
830     long freeSize = maxSize - totalSize;
831     LruBlockCache.LOG.debug("Total=" + StringUtils.byteDesc(totalSize) + ", " +
832         "free=" + StringUtils.byteDesc(freeSize) + ", " +
833         "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
834         "blockCount=" + getBlockCount() + ", " +
835         "accesses=" + stats.getRequestCount() + ", " +
836         "hits=" + stats.getHitCount() + ", " +
837         "hitRatio=" +
838           (stats.getHitCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
839         "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
840         "cachingHits=" + stats.getHitCachingCount() + ", " +
841         "cachingHitsRatio=" +
842           (stats.getHitCachingCount() == 0 ? "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
843         "evictions=" + stats.getEvictionCount() + ", " +
844         "evicted=" + stats.getEvictedCount() + ", " +
845         "evictedPerRun=" + stats.evictedPerEviction());
846   }
847 
848   /**
849    * Get counter statistics for this cache.
850    *
851    * <p>Includes: total accesses, hits, misses, evicted blocks, and runs
852    * of the eviction processes.
853    */
854   public CacheStats getStats() {
855     return this.stats;
856   }
857 
858   public final static long CACHE_FIXED_OVERHEAD = ClassSize.align(
859       (3 * Bytes.SIZEOF_LONG) + (9 * ClassSize.REFERENCE) +
860       (5 * Bytes.SIZEOF_FLOAT) + Bytes.SIZEOF_BOOLEAN
861       + ClassSize.OBJECT);
862 
863   // HeapSize implementation
864   public long heapSize() {
865     return getCurrentSize();
866   }
867 
868   public static long calculateOverhead(long maxSize, long blockSize, int concurrency){
869     // FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
870     return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP +
871         ((long)Math.ceil(maxSize*1.2/blockSize)
872             * ClassSize.CONCURRENT_HASHMAP_ENTRY) +
873         ((long)concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
874   }
875 
876   @Override
877   public Iterator<CachedBlock> iterator() {
878     final Iterator<LruCachedBlock> iterator = map.values().iterator();
879 
880     return new Iterator<CachedBlock>() {
881       private final long now = System.nanoTime();
882 
883       @Override
884       public boolean hasNext() {
885         return iterator.hasNext();
886       }
887 
888       @Override
889       public CachedBlock next() {
890         final LruCachedBlock b = iterator.next();
891         return new CachedBlock() {
892           @Override
893           public String toString() {
894             return BlockCacheUtil.toString(this, now);
895           }
896 
897           @Override
898           public BlockPriority getBlockPriority() {
899             return b.getPriority();
900           }
901 
902           @Override
903           public BlockType getBlockType() {
904             return b.getBuffer().getBlockType();
905           }
906 
907           @Override
908           public long getOffset() {
909             return b.getCacheKey().getOffset();
910           }
911 
912           @Override
913           public long getSize() {
914             return b.getBuffer().heapSize();
915           }
916 
917           @Override
918           public long getCachedTime() {
919             return b.getCachedTime();
920           }
921 
922           @Override
923           public String getFilename() {
924             return b.getCacheKey().getHfileName();
925           }
926 
927           @Override
928           public int compareTo(CachedBlock other) {
929             int diff = this.getFilename().compareTo(other.getFilename());
930             if (diff != 0) return diff;
931             diff = (int)(this.getOffset() - other.getOffset());
932             if (diff != 0) return diff;
933             if (other.getCachedTime() < 0 || this.getCachedTime() < 0) {
934               throw new IllegalStateException("" + this.getCachedTime() + ", " +
935                 other.getCachedTime());
936             }
937             return (int)(other.getCachedTime() - this.getCachedTime());
938           }
939 
940           @Override
941           public int hashCode() {
942             return b.hashCode();
943           }
944 
945           @Override
946           public boolean equals(Object obj) {
947             if (obj instanceof CachedBlock) {
948               CachedBlock cb = (CachedBlock)obj;
949               return compareTo(cb) == 0;
950             } else {
951               return false;
952             }
953           }
954         };
955       }
956 
957       @Override
958       public void remove() {
959         throw new UnsupportedOperationException();
960       }
961     };
962   }
963 
964   // Simple calculators of sizes given factors and maxSize
965 
966   private long acceptableSize() {
967     return (long)Math.floor(this.maxSize * this.acceptableFactor);
968   }
969   private long minSize() {
970     return (long)Math.floor(this.maxSize * this.minFactor);
971   }
972   private long singleSize() {
973     return (long)Math.floor(this.maxSize * this.singleFactor * this.minFactor);
974   }
975   private long multiSize() {
976     return (long)Math.floor(this.maxSize * this.multiFactor * this.minFactor);
977   }
978   private long memorySize() {
979     return (long)Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
980   }
981 
982   public void shutdown() {
983     if (victimHandler != null)
984       victimHandler.shutdown();
985     this.scheduleThreadPool.shutdown();
986     for (int i = 0; i < 10; i++) {
987       if (!this.scheduleThreadPool.isShutdown()) Threads.sleep(10);
988     }
989     if (!this.scheduleThreadPool.isShutdown()) {
990       List<Runnable> runnables = this.scheduleThreadPool.shutdownNow();
991       LOG.debug("Still running " + runnables);
992     }
993     this.evictionThread.shutdown();
994   }
995 
996   /** Clears the cache. Used in tests. */
997   @VisibleForTesting
998   public void clearCache() {
999     map.clear();
1000     elements.set(0);
1001   }
1002 
1003   /**
1004    * Used in testing. May be very inefficient.
1005    * @return the set of cached file names
1006    */
1007   @VisibleForTesting
1008   SortedSet<String> getCachedFileNamesForTest() {
1009     SortedSet<String> fileNames = new TreeSet<String>();
1010     for (BlockCacheKey cacheKey : map.keySet()) {
1011       fileNames.add(cacheKey.getHfileName());
1012     }
1013     return fileNames;
1014   }
1015 
1016   @VisibleForTesting
1017   Map<BlockType, Integer> getBlockTypeCountsForTest() {
1018     Map<BlockType, Integer> counts =
1019         new EnumMap<BlockType, Integer>(BlockType.class);
1020     for (LruCachedBlock cb : map.values()) {
1021       BlockType blockType = ((HFileBlock) cb.getBuffer()).getBlockType();
1022       Integer count = counts.get(blockType);
1023       counts.put(blockType, (count == null ? 0 : count) + 1);
1024     }
1025     return counts;
1026   }
1027 
1028   @VisibleForTesting
1029   public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
1030     Map<DataBlockEncoding, Integer> counts =
1031         new EnumMap<DataBlockEncoding, Integer>(DataBlockEncoding.class);
1032     for (BlockCacheKey cacheKey : map.keySet()) {
1033       DataBlockEncoding encoding = cacheKey.getDataBlockEncoding();
1034       Integer count = counts.get(encoding);
1035       counts.put(encoding, (count == null ? 0 : count) + 1);
1036     }
1037     return counts;
1038   }
1039 
1040   public void setVictimCache(BucketCache handler) {
1041     assert victimHandler == null;
1042     victimHandler = handler;
1043   }
1044 
1045   @VisibleForTesting
1046   Map<BlockCacheKey, LruCachedBlock> getMapForTests() {
1047     return map;
1048   }
1049 
1050   @Override
1051   public BlockCache[] getBlockCaches() {
1052     return null;
1053   }
1054 }