1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import static org.junit.Assert.*;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29
30 import org.apache.hadoop.hbase.*;
31 import org.apache.hadoop.hbase.client.Put;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.client.Durability;
34 import org.apache.hadoop.hbase.regionserver.HRegion;
35 import org.apache.hadoop.hbase.regionserver.InternalScanner;
36 import org.apache.hadoop.hbase.testclassification.SmallTests;
37 import org.apache.hadoop.hbase.util.Bytes;
38 import org.junit.Test;
39 import org.junit.experimental.categories.Category;
40
41 @Category(SmallTests.class)
42 public class TestMultipleColumnPrefixFilter {
43
44 private final static HBaseTestingUtility TEST_UTIL = new
45 HBaseTestingUtility();
46
47 @Test
48 public void testMultipleColumnPrefixFilter() throws IOException {
49 String family = "Family";
50 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("TestMultipleColumnPrefixFilter"));
51 HColumnDescriptor hcd = new HColumnDescriptor(family);
52 hcd.setMaxVersions(3);
53 htd.addFamily(hcd);
54
55 HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
56 HRegion region = HRegion.createHRegion(info, TEST_UTIL.
57 getDataTestDir(), TEST_UTIL.getConfiguration(), htd);
58
59 List<String> rows = generateRandomWords(100, "row");
60 List<String> columns = generateRandomWords(10000, "column");
61 long maxTimestamp = 2;
62
63 List<Cell> kvList = new ArrayList<Cell>();
64
65 Map<String, List<Cell>> prefixMap = new HashMap<String,
66 List<Cell>>();
67
68 prefixMap.put("p", new ArrayList<Cell>());
69 prefixMap.put("q", new ArrayList<Cell>());
70 prefixMap.put("s", new ArrayList<Cell>());
71
72 String valueString = "ValueString";
73
74 for (String row: rows) {
75 Put p = new Put(Bytes.toBytes(row));
76 p.setDurability(Durability.SKIP_WAL);
77 for (String column: columns) {
78 for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
79 KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
80 valueString);
81 p.add(kv);
82 kvList.add(kv);
83 for (String s: prefixMap.keySet()) {
84 if (column.startsWith(s)) {
85 prefixMap.get(s).add(kv);
86 }
87 }
88 }
89 }
90 region.put(p);
91 }
92
93 MultipleColumnPrefixFilter filter;
94 Scan scan = new Scan();
95 scan.setMaxVersions();
96 byte [][] filter_prefix = new byte [2][];
97 filter_prefix[0] = new byte [] {'p'};
98 filter_prefix[1] = new byte [] {'q'};
99
100 filter = new MultipleColumnPrefixFilter(filter_prefix);
101 scan.setFilter(filter);
102 List<Cell> results = new ArrayList<Cell>();
103 InternalScanner scanner = region.getScanner(scan);
104 while(scanner.next(results));
105 assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
106
107 HRegion.closeHRegion(region);
108 }
109
110 @Test
111 public void testMultipleColumnPrefixFilterWithManyFamilies() throws IOException {
112 String family1 = "Family1";
113 String family2 = "Family2";
114 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("TestMultipleColumnPrefixFilter"));
115 HColumnDescriptor hcd1 = new HColumnDescriptor(family1);
116 hcd1.setMaxVersions(3);
117 htd.addFamily(hcd1);
118 HColumnDescriptor hcd2 = new HColumnDescriptor(family2);
119 hcd2.setMaxVersions(3);
120 htd.addFamily(hcd2);
121 HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
122 HRegion region = HRegion.createHRegion(info, TEST_UTIL.
123 getDataTestDir(), TEST_UTIL.getConfiguration(), htd);
124
125 List<String> rows = generateRandomWords(100, "row");
126 List<String> columns = generateRandomWords(10000, "column");
127 long maxTimestamp = 3;
128
129 List<Cell> kvList = new ArrayList<Cell>();
130
131 Map<String, List<Cell>> prefixMap = new HashMap<String,
132 List<Cell>>();
133
134 prefixMap.put("p", new ArrayList<Cell>());
135 prefixMap.put("q", new ArrayList<Cell>());
136 prefixMap.put("s", new ArrayList<Cell>());
137
138 String valueString = "ValueString";
139
140 for (String row: rows) {
141 Put p = new Put(Bytes.toBytes(row));
142 p.setDurability(Durability.SKIP_WAL);
143 for (String column: columns) {
144 for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
145 double rand = Math.random();
146 Cell kv;
147 if (rand < 0.5)
148 kv = KeyValueTestUtil.create(row, family1, column, timestamp,
149 valueString);
150 else
151 kv = KeyValueTestUtil.create(row, family2, column, timestamp,
152 valueString);
153 p.add(kv);
154 kvList.add(kv);
155 for (String s: prefixMap.keySet()) {
156 if (column.startsWith(s)) {
157 prefixMap.get(s).add(kv);
158 }
159 }
160 }
161 }
162 region.put(p);
163 }
164
165 MultipleColumnPrefixFilter filter;
166 Scan scan = new Scan();
167 scan.setMaxVersions();
168 byte [][] filter_prefix = new byte [2][];
169 filter_prefix[0] = new byte [] {'p'};
170 filter_prefix[1] = new byte [] {'q'};
171
172 filter = new MultipleColumnPrefixFilter(filter_prefix);
173 scan.setFilter(filter);
174 List<Cell> results = new ArrayList<Cell>();
175 InternalScanner scanner = region.getScanner(scan);
176 while(scanner.next(results));
177 assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
178
179 HRegion.closeHRegion(region);
180 }
181
182 @Test
183 public void testMultipleColumnPrefixFilterWithColumnPrefixFilter() throws IOException {
184 String family = "Family";
185 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("TestMultipleColumnPrefixFilter"));
186 htd.addFamily(new HColumnDescriptor(family));
187 HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
188 HRegion region = HRegion.createHRegion(info, TEST_UTIL.
189 getDataTestDir(), TEST_UTIL.getConfiguration(),htd);
190
191 List<String> rows = generateRandomWords(100, "row");
192 List<String> columns = generateRandomWords(10000, "column");
193 long maxTimestamp = 2;
194
195 String valueString = "ValueString";
196
197 for (String row: rows) {
198 Put p = new Put(Bytes.toBytes(row));
199 p.setDurability(Durability.SKIP_WAL);
200 for (String column: columns) {
201 for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
202 KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
203 valueString);
204 p.add(kv);
205 }
206 }
207 region.put(p);
208 }
209
210 MultipleColumnPrefixFilter multiplePrefixFilter;
211 Scan scan1 = new Scan();
212 scan1.setMaxVersions();
213 byte [][] filter_prefix = new byte [1][];
214 filter_prefix[0] = new byte [] {'p'};
215
216 multiplePrefixFilter = new MultipleColumnPrefixFilter(filter_prefix);
217 scan1.setFilter(multiplePrefixFilter);
218 List<Cell> results1 = new ArrayList<Cell>();
219 InternalScanner scanner1 = region.getScanner(scan1);
220 while(scanner1.next(results1));
221
222 ColumnPrefixFilter singlePrefixFilter;
223 Scan scan2 = new Scan();
224 scan2.setMaxVersions();
225 singlePrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("p"));
226
227 scan2.setFilter(singlePrefixFilter);
228 List<Cell> results2 = new ArrayList<Cell>();
229 InternalScanner scanner2 = region.getScanner(scan1);
230 while(scanner2.next(results2));
231
232 assertEquals(results1.size(), results2.size());
233
234 HRegion.closeHRegion(region);
235 }
236
237 List<String> generateRandomWords(int numberOfWords, String suffix) {
238 Set<String> wordSet = new HashSet<String>();
239 for (int i = 0; i < numberOfWords; i++) {
240 int lengthOfWords = (int) (Math.random()*2) + 1;
241 char[] wordChar = new char[lengthOfWords];
242 for (int j = 0; j < wordChar.length; j++) {
243 wordChar[j] = (char) (Math.random() * 26 + 97);
244 }
245 String word;
246 if (suffix == null) {
247 word = new String(wordChar);
248 } else {
249 word = new String(wordChar) + suffix;
250 }
251 wordSet.add(word);
252 }
253 List<String> wordList = new ArrayList<String>(wordSet);
254 return wordList;
255 }
256
257 }
258
259