View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.EOFException;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.HashMap;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.NavigableMap;
31  import java.util.TreeMap;
32  import java.util.UUID;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
42  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FamilyScope;
43  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.ScopeType;
44  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALKey;
45  import org.apache.hadoop.hbase.util.ByteStringer;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.io.WritableComparable;
48  import org.apache.hadoop.io.WritableUtils;
49  
50  import com.google.common.annotations.VisibleForTesting;
51  import com.google.protobuf.ByteString;
52  
53  /**
54   * A Key for an entry in the change log.
55   *
56   * The log intermingles edits to many tables and rows, so each log entry
57   * identifies the appropriate table and row.  Within a table and row, they're
58   * also sorted.
59   *
60   * <p>Some Transactional edits (START, COMMIT, ABORT) will not have an
61   * associated row.
62   */
63  // TODO: Key and WALEdit are never used separately, or in one-to-many relation, for practical
64  //       purposes. They need to be merged into HLogEntry.
65  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.REPLICATION)
66  public class HLogKey implements WritableComparable<HLogKey> {
67    public static final Log LOG = LogFactory.getLog(HLogKey.class);
68  
69    // should be < 0 (@see #readFields(DataInput))
70    // version 2 supports HLog compression
71    enum Version {
72      UNVERSIONED(0),
73      // Initial number we put on HLogKey when we introduced versioning.
74      INITIAL(-1),
75      // Version -2 introduced a dictionary compression facility.  Only this
76      // dictionary-based compression is available in version -2.
77      COMPRESSED(-2);
78  
79      final int code;
80      static final Version[] byCode;
81      static {
82        byCode = Version.values();
83        for (int i = 0; i < byCode.length; i++) {
84          if (byCode[i].code != -1 * i) {
85            throw new AssertionError("Values in this enum should be descending by one");
86          }
87        }
88      }
89  
90      Version(int code) {
91        this.code = code;
92      }
93  
94      boolean atLeast(Version other) {
95        return code <= other.code;
96      }
97  
98      static Version fromCode(int code) {
99        return byCode[code * -1];
100     }
101   }
102 
103   /*
104    * This is used for reading the log entries created by the previous releases
105    * (0.94.11) which write the clusters information to the scopes of WALEdit.
106    */
107   private static final String PREFIX_CLUSTER_KEY = ".";
108 
109 
110   private static final Version VERSION = Version.COMPRESSED;
111 
112   //  The encoded region name.
113   private byte [] encodedRegionName;
114   private TableName tablename;
115   private long logSeqNum;
116   // Time at which this edit was written.
117   private long writeTime;
118 
119   // The first element in the list is the cluster id on which the change has originated
120   private List<UUID> clusterIds;
121 
122   private NavigableMap<byte[], Integer> scopes;
123 
124   private long nonceGroup = HConstants.NO_NONCE;
125   private long nonce = HConstants.NO_NONCE;
126 
127   private CompressionContext compressionContext;
128 
129   public HLogKey() {
130     init(null, null, 0L, HConstants.LATEST_TIMESTAMP,
131         new ArrayList<UUID>(), HConstants.NO_NONCE, HConstants.NO_NONCE);
132   }
133 
134   @VisibleForTesting
135   public HLogKey(final byte[] encodedRegionName, final TableName tablename, long logSeqNum,
136       final long now, UUID clusterId) {
137     List<UUID> clusterIds = new ArrayList<UUID>();
138     clusterIds.add(clusterId);
139     init(encodedRegionName, tablename, logSeqNum, now, clusterIds,
140         HConstants.NO_NONCE, HConstants.NO_NONCE);
141   }
142 
143   /**
144    * Create the log key for writing to somewhere.
145    * We maintain the tablename mainly for debugging purposes.
146    * A regionName is always a sub-table object.
147    *
148    * @param encodedRegionName Encoded name of the region as returned by
149    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
150    * @param tablename   - name of table
151    * @param logSeqNum   - log sequence number
152    * @param now Time at which this edit was written.
153    * @param clusterIds the clusters that have consumed the change(used in Replication)
154    */
155   public HLogKey(final byte [] encodedRegionName, final TableName tablename,
156       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
157     init(encodedRegionName, tablename, logSeqNum, now, clusterIds, nonceGroup, nonce);
158   }
159 
160   protected void init(final byte [] encodedRegionName, final TableName tablename,
161       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
162     this.logSeqNum = logSeqNum;
163     this.writeTime = now;
164     this.clusterIds = clusterIds;
165     this.encodedRegionName = encodedRegionName;
166     this.tablename = tablename;
167     this.nonceGroup = nonceGroup;
168     this.nonce = nonce;
169   }
170 
171   /**
172    * @param compressionContext Compression context to use
173    */
174   public void setCompressionContext(CompressionContext compressionContext) {
175     this.compressionContext = compressionContext;
176   }
177 
178   /** @return encoded region name */
179   public byte [] getEncodedRegionName() {
180     return encodedRegionName;
181   }
182 
183   /** @return table name */
184   public TableName getTablename() {
185     return tablename;
186   }
187 
188   /** @return log sequence number */
189   public long getLogSeqNum() {
190     return this.logSeqNum;
191   }
192 
193   /**
194    * @return the write time
195    */
196   public long getWriteTime() {
197     return this.writeTime;
198   }
199 
200   public NavigableMap<byte[], Integer> getScopes() {
201     return scopes;
202   }
203 
204   /** @return The nonce group */
205   public long getNonceGroup() {
206     return nonceGroup;
207   }
208 
209   /** @return The nonce */
210   public long getNonce() {
211     return nonce;
212   }
213 
214   public void setScopes(NavigableMap<byte[], Integer> scopes) {
215     this.scopes = scopes;
216   }
217 
218   public void readOlderScopes(NavigableMap<byte[], Integer> scopes) {
219     if (scopes != null) {
220       Iterator<Map.Entry<byte[], Integer>> iterator = scopes.entrySet()
221           .iterator();
222       while (iterator.hasNext()) {
223         Map.Entry<byte[], Integer> scope = iterator.next();
224         String key = Bytes.toString(scope.getKey());
225         if (key.startsWith(PREFIX_CLUSTER_KEY)) {
226           addClusterId(UUID.fromString(key.substring(PREFIX_CLUSTER_KEY
227               .length())));
228           iterator.remove();
229         }
230       }
231       if (scopes.size() > 0) {
232         this.scopes = scopes;
233       }
234     }
235   }
236 
237   /**
238    * Marks that the cluster with the given clusterId has consumed the change
239    */
240   public void addClusterId(UUID clusterId) {
241     if (!clusterIds.contains(clusterId)) {
242       clusterIds.add(clusterId);
243     }
244   }
245 
246   /**
247    * @return the set of cluster Ids that have consumed the change
248    */
249   public List<UUID> getClusterIds() {
250     return clusterIds;
251   }
252 
253   /**
254    * @return the cluster id on which the change has originated. It there is no such cluster, it
255    *         returns DEFAULT_CLUSTER_ID (cases where replication is not enabled)
256    */
257   public UUID getOriginatingClusterId(){
258     return clusterIds.isEmpty() ? HConstants.DEFAULT_CLUSTER_ID : clusterIds.get(0);
259   }
260 
261   @Override
262   public String toString() {
263     return tablename + "/" + Bytes.toString(encodedRegionName) + "/" +
264       logSeqNum;
265   }
266 
267   /**
268    * Produces a string map for this key. Useful for programmatic use and
269    * manipulation of the data stored in an HLogKey, for example, printing
270    * as JSON.
271    *
272    * @return a Map containing data from this key
273    */
274   public Map<String, Object> toStringMap() {
275     Map<String, Object> stringMap = new HashMap<String, Object>();
276     stringMap.put("table", tablename);
277     stringMap.put("region", Bytes.toStringBinary(encodedRegionName));
278     stringMap.put("sequence", logSeqNum);
279     return stringMap;
280   }
281 
282   @Override
283   public boolean equals(Object obj) {
284     if (this == obj) {
285       return true;
286     }
287     if (obj == null || getClass() != obj.getClass()) {
288       return false;
289     }
290     return compareTo((HLogKey)obj) == 0;
291   }
292 
293   @Override
294   public int hashCode() {
295     int result = Bytes.hashCode(this.encodedRegionName);
296     result ^= this.logSeqNum;
297     result ^= this.writeTime;
298     return result;
299   }
300 
301   @Override
302   public int compareTo(HLogKey o) {
303     int result = Bytes.compareTo(this.encodedRegionName, o.encodedRegionName);
304     if (result == 0) {
305       if (this.logSeqNum < o.logSeqNum) {
306         result = -1;
307       } else if (this.logSeqNum  > o.logSeqNum ) {
308         result = 1;
309       }
310       if (result == 0) {
311         if (this.writeTime < o.writeTime) {
312           result = -1;
313         } else if (this.writeTime > o.writeTime) {
314           return 1;
315         }
316       }
317     }
318     // why isn't cluster id accounted for?
319     return result;
320   }
321 
322   /**
323    * Drop this instance's tablename byte array and instead
324    * hold a reference to the provided tablename. This is not
325    * meant to be a general purpose setter - it's only used
326    * to collapse references to conserve memory.
327    */
328   void internTableName(TableName tablename) {
329     // We should not use this as a setter - only to swap
330     // in a new reference to the same table name.
331     assert tablename.equals(this.tablename);
332     this.tablename = tablename;
333   }
334 
335   /**
336    * Drop this instance's region name byte array and instead
337    * hold a reference to the provided region name. This is not
338    * meant to be a general purpose setter - it's only used
339    * to collapse references to conserve memory.
340    */
341   void internEncodedRegionName(byte []encodedRegionName) {
342     // We should not use this as a setter - only to swap
343     // in a new reference to the same table name.
344     assert Bytes.equals(this.encodedRegionName, encodedRegionName);
345     this.encodedRegionName = encodedRegionName;
346   }
347 
348   @Override
349   @Deprecated
350   public void write(DataOutput out) throws IOException {
351     LOG.warn("HLogKey is being serialized to writable - only expected in test code");
352     WritableUtils.writeVInt(out, VERSION.code);
353     if (compressionContext == null) {
354       Bytes.writeByteArray(out, this.encodedRegionName);
355       Bytes.writeByteArray(out, this.tablename.getName());
356     } else {
357       Compressor.writeCompressed(this.encodedRegionName, 0,
358           this.encodedRegionName.length, out,
359           compressionContext.regionDict);
360       Compressor.writeCompressed(this.tablename.getName(), 0, this.tablename.getName().length, out,
361           compressionContext.tableDict);
362     }
363     out.writeLong(this.logSeqNum);
364     out.writeLong(this.writeTime);
365     // Don't need to write the clusters information as we are using protobufs from 0.95
366     // Writing only the first clusterId for testing the legacy read
367     Iterator<UUID> iterator = clusterIds.iterator();
368     if(iterator.hasNext()){
369       out.writeBoolean(true);
370       UUID clusterId = iterator.next();
371       out.writeLong(clusterId.getMostSignificantBits());
372       out.writeLong(clusterId.getLeastSignificantBits());
373     } else {
374       out.writeBoolean(false);
375     }
376   }
377 
378   @Override
379   public void readFields(DataInput in) throws IOException {
380     Version version = Version.UNVERSIONED;
381     // HLogKey was not versioned in the beginning.
382     // In order to introduce it now, we make use of the fact
383     // that encodedRegionName was written with Bytes.writeByteArray,
384     // which encodes the array length as a vint which is >= 0.
385     // Hence if the vint is >= 0 we have an old version and the vint
386     // encodes the length of encodedRegionName.
387     // If < 0 we just read the version and the next vint is the length.
388     // @see Bytes#readByteArray(DataInput)
389     this.scopes = null; // writable HLogKey does not contain scopes
390     int len = WritableUtils.readVInt(in);
391     byte[] tablenameBytes = null;
392     if (len < 0) {
393       // what we just read was the version
394       version = Version.fromCode(len);
395       // We only compress V2 of HLogkey.
396       // If compression is on, the length is handled by the dictionary
397       if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
398         len = WritableUtils.readVInt(in);
399       }
400     }
401     if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
402       this.encodedRegionName = new byte[len];
403       in.readFully(this.encodedRegionName);
404       tablenameBytes = Bytes.readByteArray(in);
405     } else {
406       this.encodedRegionName = Compressor.readCompressed(in, compressionContext.regionDict);
407       tablenameBytes = Compressor.readCompressed(in, compressionContext.tableDict);
408     }
409 
410     this.logSeqNum = in.readLong();
411     this.writeTime = in.readLong();
412 
413     this.clusterIds.clear();
414     if (version.atLeast(Version.INITIAL)) {
415       if (in.readBoolean()) {
416         // read the older log
417         // Definitely is the originating cluster
418         clusterIds.add(new UUID(in.readLong(), in.readLong()));
419       }
420     } else {
421       try {
422         // dummy read (former byte cluster id)
423         in.readByte();
424       } catch(EOFException e) {
425         // Means it's a very old key, just continue
426       }
427     }
428     try {
429       this.tablename = TableName.valueOf(tablenameBytes);
430     } catch (IllegalArgumentException iae) {
431       if (Bytes.toString(tablenameBytes).equals(TableName.OLD_META_STR)) {
432         // It is a pre-namespace meta table edit, continue with new format.
433         LOG.info("Got an old .META. edit, continuing with new format ");
434         this.tablename = TableName.META_TABLE_NAME;
435         this.encodedRegionName = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
436       } else if (Bytes.toString(tablenameBytes).equals(TableName.OLD_ROOT_STR)) {
437         this.tablename = TableName.OLD_ROOT_TABLE_NAME;
438          throw iae;
439       } else throw iae;
440     }
441     // Do not need to read the clusters information as we are using protobufs from 0.95
442   }
443 
444   public WALKey.Builder getBuilder(
445       WALCellCodec.ByteStringCompressor compressor) throws IOException {
446     WALKey.Builder builder = WALKey.newBuilder();
447     if (compressionContext == null) {
448       builder.setEncodedRegionName(ByteStringer.wrap(this.encodedRegionName));
449       builder.setTableName(ByteStringer.wrap(this.tablename.getName()));
450     } else {
451       builder.setEncodedRegionName(
452           compressor.compress(this.encodedRegionName, compressionContext.regionDict));
453       builder.setTableName(compressor.compress(this.tablename.getName(),
454           compressionContext.tableDict));
455     }
456     builder.setLogSequenceNumber(this.logSeqNum);
457     builder.setWriteTime(writeTime);
458     if (this.nonce != HConstants.NO_NONCE) {
459       builder.setNonce(nonce);
460     }
461     if (this.nonceGroup != HConstants.NO_NONCE) {
462       builder.setNonceGroup(nonceGroup);
463     }
464     HBaseProtos.UUID.Builder uuidBuilder = HBaseProtos.UUID.newBuilder();
465     for (UUID clusterId : clusterIds) {
466       uuidBuilder.setLeastSigBits(clusterId.getLeastSignificantBits());
467       uuidBuilder.setMostSigBits(clusterId.getMostSignificantBits());
468       builder.addClusterIds(uuidBuilder.build());
469     }
470     if (scopes != null) {
471       for (Map.Entry<byte[], Integer> e : scopes.entrySet()) {
472         ByteString family = (compressionContext == null) ? ByteStringer.wrap(e.getKey())
473             : compressor.compress(e.getKey(), compressionContext.familyDict);
474         builder.addScopes(FamilyScope.newBuilder()
475             .setFamily(family).setScopeType(ScopeType.valueOf(e.getValue())));
476       }
477     }
478     return builder;
479   }
480 
481   public void readFieldsFromPb(
482       WALKey walKey, WALCellCodec.ByteStringUncompressor uncompressor) throws IOException {
483     if (this.compressionContext != null) {
484       this.encodedRegionName = uncompressor.uncompress(
485           walKey.getEncodedRegionName(), compressionContext.regionDict);
486       byte[] tablenameBytes = uncompressor.uncompress(
487           walKey.getTableName(), compressionContext.tableDict);
488       this.tablename = TableName.valueOf(tablenameBytes);
489     } else {
490       this.encodedRegionName = walKey.getEncodedRegionName().toByteArray();
491       this.tablename = TableName.valueOf(walKey.getTableName().toByteArray());
492     }
493     clusterIds.clear();
494     if (walKey.hasClusterId()) {
495       //When we are reading the older log (0.95.1 release)
496       //This is definitely the originating cluster
497       clusterIds.add(new UUID(walKey.getClusterId().getMostSigBits(), walKey.getClusterId()
498           .getLeastSigBits()));
499     }
500     for (HBaseProtos.UUID clusterId : walKey.getClusterIdsList()) {
501       clusterIds.add(new UUID(clusterId.getMostSigBits(), clusterId.getLeastSigBits()));
502     }
503     if (walKey.hasNonceGroup()) {
504       this.nonceGroup = walKey.getNonceGroup();
505     }
506     if (walKey.hasNonce()) {
507       this.nonce = walKey.getNonce();
508     }
509     this.scopes = null;
510     if (walKey.getScopesCount() > 0) {
511       this.scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
512       for (FamilyScope scope : walKey.getScopesList()) {
513         byte[] family = (compressionContext == null) ? scope.getFamily().toByteArray() :
514           uncompressor.uncompress(scope.getFamily(), compressionContext.familyDict);
515         this.scopes.put(family, scope.getScopeType().getNumber());
516       }
517     }
518     this.logSeqNum = walKey.getLogSequenceNumber();
519     this.writeTime = walKey.getWriteTime();
520   }
521 }