1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.Random;
25
26 import org.apache.commons.cli.CommandLine;
27 import org.apache.commons.cli.CommandLineParser;
28 import org.apache.commons.cli.GnuParser;
29 import org.apache.commons.cli.HelpFormatter;
30 import org.apache.commons.cli.Option;
31 import org.apache.commons.cli.Options;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.hbase.HBaseTestingUtility;
38 import org.apache.hadoop.hbase.HColumnDescriptor;
39 import org.apache.hadoop.hbase.HRegionInfo;
40 import org.apache.hadoop.hbase.HTableDescriptor;
41 import org.apache.hadoop.hbase.testclassification.LargeTests;
42 import org.apache.hadoop.hbase.MiniHBaseCluster;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.client.HTable;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.Result;
47 import org.apache.hadoop.hbase.client.ResultScanner;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.filter.CompareFilter;
50 import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
51 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
52 import org.apache.hadoop.hbase.util.Bytes;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55
56
57
58
59
60
61 @Category(LargeTests.class)
62 public class TestJoinedScanners {
63 static final Log LOG = LogFactory.getLog(TestJoinedScanners.class);
64
65 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
66 private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString();
67
68 private static final byte[] cf_essential = Bytes.toBytes("essential");
69 private static final byte[] cf_joined = Bytes.toBytes("joined");
70 private static final byte[] col_name = Bytes.toBytes("a");
71 private static final byte[] flag_yes = Bytes.toBytes("Y");
72 private static final byte[] flag_no = Bytes.toBytes("N");
73
74 private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
75 private static int selectionRatio = 30;
76 private static int valueWidth = 128 * 1024;
77
78 @Test
79 public void testJoinedScanners() throws Exception {
80 String dataNodeHosts[] = new String[] { "host1", "host2", "host3" };
81 int regionServersCount = 3;
82
83 HBaseTestingUtility htu = new HBaseTestingUtility();
84
85 final int DEFAULT_BLOCK_SIZE = 1024*1024;
86 htu.getConfiguration().setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
87 htu.getConfiguration().setInt("dfs.replication", 1);
88 htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
89 MiniHBaseCluster cluster = null;
90
91 try {
92 cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts);
93 byte [][] families = {cf_essential, cf_joined};
94
95 byte[] tableName = Bytes.toBytes(this.getClass().getSimpleName());
96 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
97 for(byte[] family : families) {
98 HColumnDescriptor hcd = new HColumnDescriptor(family);
99 hcd.setDataBlockEncoding(blockEncoding);
100 desc.addFamily(hcd);
101 }
102 htu.getHBaseAdmin().createTable(desc);
103 HTable ht = new HTable(htu.getConfiguration(), tableName);
104
105 long rows_to_insert = 1000;
106 int insert_batch = 20;
107 long time = System.nanoTime();
108 Random rand = new Random(time);
109
110 LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
111 + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");
112
113 byte [] val_large = new byte[valueWidth];
114
115 List<Put> puts = new ArrayList<Put>();
116
117 for (long i = 0; i < rows_to_insert; i++) {
118 Put put = new Put(Bytes.toBytes(Long.toString (i)));
119 if (rand.nextInt(100) <= selectionRatio) {
120 put.add(cf_essential, col_name, flag_yes);
121 } else {
122 put.add(cf_essential, col_name, flag_no);
123 }
124 put.add(cf_joined, col_name, val_large);
125 puts.add(put);
126 if (puts.size() >= insert_batch) {
127 ht.put(puts);
128 puts.clear();
129 }
130 }
131 if (puts.size() >= 0) {
132 ht.put(puts);
133 puts.clear();
134 }
135
136 LOG.info("Data generated in "
137 + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");
138
139 boolean slow = true;
140 for (int i = 0; i < 10; ++i) {
141 runScanner(ht, slow);
142 slow = !slow;
143 }
144
145 ht.close();
146 } finally {
147 if (cluster != null) {
148 htu.shutdownMiniCluster();
149 }
150 }
151 }
152
153 private void runScanner(HTable table, boolean slow) throws Exception {
154 long time = System.nanoTime();
155 Scan scan = new Scan();
156 scan.addColumn(cf_essential, col_name);
157 scan.addColumn(cf_joined, col_name);
158
159 SingleColumnValueFilter filter = new SingleColumnValueFilter(
160 cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes);
161 filter.setFilterIfMissing(true);
162 scan.setFilter(filter);
163 scan.setLoadColumnFamiliesOnDemand(!slow);
164
165 ResultScanner result_scanner = table.getScanner(scan);
166 Result res;
167 long rows_count = 0;
168 while ((res = result_scanner.next()) != null) {
169 rows_count++;
170 }
171
172 double timeSec = (System.nanoTime() - time) / 1000000000.0;
173 result_scanner.close();
174 LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
175 + " seconds, got " + Long.toString(rows_count/2) + " rows");
176 }
177
178 private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey,
179 String callingMethod, Configuration conf, byte[]... families)
180 throws IOException {
181 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
182 for(byte [] family : families) {
183 HColumnDescriptor hcd = new HColumnDescriptor(family);
184 hcd.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
185 htd.addFamily(hcd);
186 }
187 HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
188 Path path = new Path(DIR + callingMethod);
189 FileSystem fs = FileSystem.get(conf);
190 if (fs.exists(path)) {
191 if (!fs.delete(path, true)) {
192 throw new IOException("Failed delete of " + path);
193 }
194 }
195 return HRegion.createHRegion(info, path, conf, htd);
196 }
197
198 private static Options options = new Options();
199
200
201
202
203
204
205 public static void main(final String[] args) throws Exception {
206 Option encodingOption = new Option("e", "blockEncoding", true,
207 "Data block encoding; Default: FAST_DIFF");
208 encodingOption.setRequired(false);
209 options.addOption(encodingOption);
210
211 Option ratioOption = new Option("r", "selectionRatio", true,
212 "Ratio of selected rows using essential column family");
213 ratioOption.setRequired(false);
214 options.addOption(ratioOption);
215
216 Option widthOption = new Option("w", "valueWidth", true,
217 "Width of value for non-essential column family");
218 widthOption.setRequired(false);
219 options.addOption(widthOption);
220
221 CommandLineParser parser = new GnuParser();
222 CommandLine cmd = parser.parse(options, args);
223 if (args.length < 1) {
224 HelpFormatter formatter = new HelpFormatter();
225 formatter.printHelp("TestJoinedScanners", options, true);
226 }
227
228 if (cmd.hasOption("e")) {
229 blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
230 }
231 if (cmd.hasOption("r")) {
232 selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
233 }
234 if (cmd.hasOption("w")) {
235 valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
236 }
237
238 TestJoinedScanners test = new TestJoinedScanners();
239 test.testJoinedScanners();
240 }
241 }