1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import com.google.common.annotations.VisibleForTesting;
21 import org.apache.hadoop.hbase.util.ByteStringer;
22 import com.google.protobuf.InvalidProtocolBufferException;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.classification.InterfaceStability;
26 import org.apache.hadoop.hbase.Cell;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.KeyValueUtil;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
31 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
32 import org.apache.hadoop.hbase.util.Bytes;
33 import org.apache.hadoop.hbase.util.Pair;
34
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.List;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 @InterfaceAudience.Public
68 @InterfaceStability.Evolving
69 public class FuzzyRowFilter extends FilterBase {
70 private List<Pair<byte[], byte[]>> fuzzyKeysData;
71 private boolean done = false;
72
73 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
74 Pair<byte[], byte[]> p;
75 for (int i = 0; i < fuzzyKeysData.size(); i++) {
76 p = fuzzyKeysData.get(i);
77 if (p.getFirst().length != p.getSecond().length) {
78 Pair<String, String> readable = new Pair<String, String>(
79 Bytes.toStringBinary(p.getFirst()),
80 Bytes.toStringBinary(p.getSecond()));
81 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
82 }
83 }
84 this.fuzzyKeysData = fuzzyKeysData;
85 }
86
87
88 @Override
89 public ReturnCode filterKeyValue(Cell kv) {
90
91 KeyValue v = KeyValueUtil.ensureKeyValue(kv);
92
93 byte[] rowKey = v.getRow();
94
95 SatisfiesCode bestOption = SatisfiesCode.NO_NEXT;
96 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
97 SatisfiesCode satisfiesCode =
98 satisfies(isReversed(), rowKey, fuzzyData.getFirst(), fuzzyData.getSecond());
99 if (satisfiesCode == SatisfiesCode.YES) {
100 return ReturnCode.INCLUDE;
101 }
102
103 if (satisfiesCode == SatisfiesCode.NEXT_EXISTS) {
104 bestOption = SatisfiesCode.NEXT_EXISTS;
105 }
106 }
107
108 if (bestOption == SatisfiesCode.NEXT_EXISTS) {
109 return ReturnCode.SEEK_NEXT_USING_HINT;
110 }
111
112
113 done = true;
114 return ReturnCode.NEXT_ROW;
115 }
116
117 @Override
118 public Cell getNextCellHint(Cell currentKV) {
119
120 KeyValue v = KeyValueUtil.ensureKeyValue(currentKV);
121
122 byte[] rowKey = v.getRow();
123 byte[] nextRowKey = null;
124
125 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
126 byte[] nextRowKeyCandidate = getNextForFuzzyRule(isReversed(), rowKey,
127 fuzzyData.getFirst(), fuzzyData.getSecond());
128 if (nextRowKeyCandidate == null) {
129 continue;
130 }
131 if (nextRowKey == null ||
132 (reversed && Bytes.compareTo(nextRowKeyCandidate, nextRowKey) > 0) ||
133 (!reversed && Bytes.compareTo(nextRowKeyCandidate, nextRowKey) < 0)) {
134 nextRowKey = nextRowKeyCandidate;
135 }
136 }
137
138 if (!reversed && nextRowKey == null) {
139
140
141
142
143 throw new IllegalStateException("No next row key that satisfies fuzzy exists when" +
144 " getNextKeyHint() is invoked." +
145 " Filter: " + this.toString() +
146 " currentKV: " + currentKV.toString());
147 }
148
149 return nextRowKey == null ? null : KeyValue.createFirstOnRow(nextRowKey);
150 }
151
152 @Override
153 public boolean filterAllRemaining() {
154 return done;
155 }
156
157
158
159
160 public byte [] toByteArray() {
161 FilterProtos.FuzzyRowFilter.Builder builder =
162 FilterProtos.FuzzyRowFilter.newBuilder();
163 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
164 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
165 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
166 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
167 builder.addFuzzyKeysData(bbpBuilder);
168 }
169 return builder.build().toByteArray();
170 }
171
172
173
174
175
176
177
178 public static FuzzyRowFilter parseFrom(final byte [] pbBytes)
179 throws DeserializationException {
180 FilterProtos.FuzzyRowFilter proto;
181 try {
182 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
183 } catch (InvalidProtocolBufferException e) {
184 throw new DeserializationException(e);
185 }
186 int count = proto.getFuzzyKeysDataCount();
187 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData= new ArrayList<Pair<byte[], byte[]>>(count);
188 for (int i = 0; i < count; ++i) {
189 BytesBytesPair current = proto.getFuzzyKeysData(i);
190 byte[] keyBytes = current.getFirst().toByteArray();
191 byte[] keyMeta = current.getSecond().toByteArray();
192 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
193 }
194 return new FuzzyRowFilter(fuzzyKeysData);
195 }
196
197 @Override
198 public String toString() {
199 final StringBuilder sb = new StringBuilder();
200 sb.append("FuzzyRowFilter");
201 sb.append("{fuzzyKeysData=");
202 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
203 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
204 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
205 }
206 sb.append("}, ");
207 return sb.toString();
208 }
209
210
211
212 static enum SatisfiesCode {
213
214 YES,
215
216 NEXT_EXISTS,
217
218 NO_NEXT
219 }
220
221 @VisibleForTesting
222 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
223 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
224 }
225
226 @VisibleForTesting
227 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
228 byte[] fuzzyKeyMeta) {
229 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
230 }
231
232 private static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
233 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
234 if (row == null) {
235
236 return SatisfiesCode.YES;
237 }
238
239 Order order = Order.orderFor(reverse);
240 boolean nextRowKeyCandidateExists = false;
241
242 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
243
244 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
245 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
246 if (fixedByteIncorrect) {
247
248 if (nextRowKeyCandidateExists) {
249 return SatisfiesCode.NEXT_EXISTS;
250 }
251
252
253
254
255 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
256 if (rowByteLessThanFixed && !reverse) {
257 return SatisfiesCode.NEXT_EXISTS;
258 } else if (!rowByteLessThanFixed && reverse) {
259 return SatisfiesCode.NEXT_EXISTS;
260 } else {
261 return SatisfiesCode.NO_NEXT;
262 }
263 }
264
265
266
267
268
269
270
271 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
272 nextRowKeyCandidateExists = true;
273 }
274 }
275
276 return SatisfiesCode.YES;
277 }
278
279 @VisibleForTesting
280 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
281 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
282 }
283
284 @VisibleForTesting
285 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
286 byte[] fuzzyKeyMeta) {
287 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
288 }
289
290
291 private enum Order {
292 ASC {
293 public boolean lt(int lhs, int rhs) {
294 return lhs < rhs;
295 }
296 public boolean gt(int lhs, int rhs) {
297 return lhs > rhs;
298 }
299 public byte inc(byte val) {
300
301 return (byte) (val + 1);
302 }
303 public boolean isMax(byte val) {
304 return val == (byte) 0xff;
305 }
306 public byte min() {
307 return 0;
308 }
309 },
310 DESC {
311 public boolean lt(int lhs, int rhs) {
312 return lhs > rhs;
313 }
314 public boolean gt(int lhs, int rhs) {
315 return lhs < rhs;
316 }
317 public byte inc(byte val) {
318
319 return (byte) (val - 1);
320 }
321 public boolean isMax(byte val) {
322 return val == 0;
323 }
324 public byte min() {
325 return (byte) 0xFF;
326 }
327 };
328
329 public static Order orderFor(boolean reverse) {
330 return reverse ? DESC : ASC;
331 }
332
333
334 public abstract boolean lt(int lhs, int rhs);
335
336 public abstract boolean gt(int lhs, int rhs);
337
338 public abstract byte inc(byte val);
339
340 public abstract boolean isMax(byte val);
341
342 public abstract byte min();
343 }
344
345
346
347
348
349 private static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
350 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
351
352
353
354
355
356
357
358
359 byte[] result = Arrays.copyOf(fuzzyKeyBytes,
360 length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
361 if (reverse && length > fuzzyKeyBytes.length) {
362
363 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
364 result[i] = (byte) 0xFF;
365 }
366 }
367 int toInc = -1;
368 final Order order = Order.orderFor(reverse);
369
370 boolean increased = false;
371 for (int i = 0; i < result.length; i++) {
372 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
373 result[i] = row[offset + i];
374 if (!order.isMax(row[i])) {
375
376 toInc = i;
377 }
378 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == 0) {
379 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
380
381
382 increased = true;
383 break;
384 }
385
386 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
387
388
389
390 break;
391 }
392 }
393 }
394
395 if (!increased) {
396 if (toInc < 0) {
397 return null;
398 }
399 result[toInc] = order.inc(result[toInc]);
400
401
402
403 for (int i = toInc + 1; i < result.length; i++) {
404 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) {
405 result[i] = order.min();
406 }
407 }
408 }
409
410 return result;
411 }
412
413
414
415
416
417 boolean areSerializedFieldsEqual(Filter o) {
418 if (o == this) return true;
419 if (!(o instanceof FuzzyRowFilter)) return false;
420
421 FuzzyRowFilter other = (FuzzyRowFilter)o;
422 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
423 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
424 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
425 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
426 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst())
427 && Bytes.equals(thisData.getSecond(), otherData.getSecond()))) {
428 return false;
429 }
430 }
431 return true;
432 }
433 }