1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataInputStream;
39 import org.apache.hadoop.fs.FSDataOutputStream;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.fs.RawLocalFileSystem;
43 import org.apache.hadoop.hbase.HBaseTestingUtility;
44 import org.apache.hadoop.hbase.KeyValue;
45 import org.apache.hadoop.hbase.testclassification.MediumTests;
46 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
47 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
48 import org.apache.hadoop.io.BytesWritable;
49 import org.junit.experimental.categories.Category;
50
51
52
53
54
55
56
57
58
59 @Category(MediumTests.class)
60 public class TestHFileSeek extends TestCase {
61 private static final boolean USE_PREAD = true;
62 private MyOptions options;
63 private Configuration conf;
64 private Path path;
65 private FileSystem fs;
66 private NanoTimer timer;
67 private Random rng;
68 private RandomDistribution.DiscreteRNG keyLenGen;
69 private KVGenerator kvGen;
70
71 private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
72
73 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
74
75 @Override
76 public void setUp() throws IOException {
77 if (options == null) {
78 options = new MyOptions(new String[0]);
79 }
80
81 conf = new Configuration();
82
83 if (options.useRawFs) {
84 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
85 }
86
87 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
88 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
89 path = new Path(new Path(options.rootDir), options.file);
90 fs = path.getFileSystem(conf);
91 timer = new NanoTimer(false);
92 rng = new Random(options.seed);
93 keyLenGen =
94 new RandomDistribution.Zipf(new Random(rng.nextLong()),
95 options.minKeyLen, options.maxKeyLen, 1.2);
96 RandomDistribution.DiscreteRNG valLenGen =
97 new RandomDistribution.Flat(new Random(rng.nextLong()),
98 options.minValLength, options.maxValLength);
99 RandomDistribution.DiscreteRNG wordLenGen =
100 new RandomDistribution.Flat(new Random(rng.nextLong()),
101 options.minWordLen, options.maxWordLen);
102 kvGen =
103 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
104 options.dictSize);
105 }
106
107 @Override
108 public void tearDown() {
109 try {
110 fs.close();
111 }
112 catch (Exception e) {
113
114 }
115 }
116
117 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
118 throws IOException {
119 if (fs.exists(name)) {
120 fs.delete(name, true);
121 }
122 FSDataOutputStream fout = fs.create(name);
123 return fout;
124 }
125
126 private void createTFile() throws IOException {
127 long totalBytes = 0;
128 FSDataOutputStream fout = createFSOutput(path, fs);
129 try {
130 HFileContext context = new HFileContextBuilder()
131 .withBlockSize(options.minBlockSize)
132 .withCompression(AbstractHFileWriter.compressionByName(options.compress))
133 .build();
134 Writer writer = HFile.getWriterFactoryNoCache(conf)
135 .withOutputStream(fout)
136 .withFileContext(context)
137 .withComparator(new KeyValue.RawBytesComparator())
138 .create();
139 try {
140 BytesWritable key = new BytesWritable();
141 BytesWritable val = new BytesWritable();
142 timer.start();
143 for (long i = 0; true; ++i) {
144 if (i % 1000 == 0) {
145 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
146 break;
147 }
148 }
149 kvGen.next(key, val, false);
150 byte [] k = new byte [key.getLength()];
151 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
152 byte [] v = new byte [val.getLength()];
153 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
154 writer.append(k, v);
155 totalBytes += key.getLength();
156 totalBytes += val.getLength();
157 }
158 timer.stop();
159 }
160 finally {
161 writer.close();
162 }
163 }
164 finally {
165 fout.close();
166 }
167 double duration = (double)timer.read()/1000;
168 long fsize = fs.getFileStatus(path).getLen();
169
170 System.out.printf(
171 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
172 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
173 / duration);
174 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
175 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
176 }
177
178 public void seekTFile() throws IOException {
179 int miss = 0;
180 long totalBytes = 0;
181 FSDataInputStream fsdis = fs.open(path);
182 Reader reader = HFile.createReaderFromStream(path, fsdis,
183 fs.getFileStatus(path).getLen(), new CacheConfig(conf), conf);
184 reader.loadFileInfo();
185 KeySampler kSampler =
186 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
187 keyLenGen);
188 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
189 BytesWritable key = new BytesWritable();
190 timer.reset();
191 timer.start();
192 for (int i = 0; i < options.seekCount; ++i) {
193 kSampler.next(key);
194 byte [] k = new byte [key.getLength()];
195 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
196 if (scanner.seekTo(k) >= 0) {
197 ByteBuffer bbkey = scanner.getKey();
198 ByteBuffer bbval = scanner.getValue();
199 totalBytes += bbkey.limit();
200 totalBytes += bbval.limit();
201 }
202 else {
203 ++miss;
204 }
205 }
206 timer.stop();
207 System.out.printf(
208 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
209 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
210 / options.seekCount), options.seekCount - miss, miss,
211 (double) totalBytes / 1024 / (options.seekCount - miss));
212
213 }
214
215 public void testSeeks() throws IOException {
216 if (options.doCreate()) {
217 createTFile();
218 }
219
220 if (options.doRead()) {
221 seekTFile();
222 }
223
224 if (options.doCreate()) {
225 fs.delete(path, true);
226 }
227 }
228
229 private static class IntegerRange {
230 private final int from, to;
231
232 public IntegerRange(int from, int to) {
233 this.from = from;
234 this.to = to;
235 }
236
237 public static IntegerRange parse(String s) throws ParseException {
238 StringTokenizer st = new StringTokenizer(s, " \t,");
239 if (st.countTokens() != 2) {
240 throw new ParseException("Bad integer specification: " + s);
241 }
242 int from = Integer.parseInt(st.nextToken());
243 int to = Integer.parseInt(st.nextToken());
244 return new IntegerRange(from, to);
245 }
246
247 public int from() {
248 return from;
249 }
250
251 public int to() {
252 return to;
253 }
254 }
255
256 private static class MyOptions {
257
258 int dictSize = 1000;
259 int minWordLen = 5;
260 int maxWordLen = 20;
261
262 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
263 String rootDir =
264 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
265 String file = "TestTFileSeek";
266
267 String compress = "none";
268 int minKeyLen = 10;
269 int maxKeyLen = 50;
270 int minValLength = 1024;
271 int maxValLength = 2 * 1024;
272 int minBlockSize = 1 * 1024 * 1024;
273 int fsOutputBufferSize = 1;
274 int fsInputBufferSize = 0;
275
276 long fileSize = 10 * 1024 * 1024;
277 long seekCount = 1000;
278 long trialCount = 1;
279 long seed;
280 boolean useRawFs = false;
281
282 static final int OP_CREATE = 1;
283 static final int OP_READ = 2;
284 int op = OP_CREATE | OP_READ;
285
286 boolean proceed = false;
287
288 public MyOptions(String[] args) {
289 seed = System.nanoTime();
290
291 try {
292 Options opts = buildOptions();
293 CommandLineParser parser = new GnuParser();
294 CommandLine line = parser.parse(opts, args, true);
295 processOptions(line, opts);
296 validateOptions();
297 }
298 catch (ParseException e) {
299 System.out.println(e.getMessage());
300 System.out.println("Try \"--help\" option for details.");
301 setStopProceed();
302 }
303 }
304
305 public boolean proceed() {
306 return proceed;
307 }
308
309 private Options buildOptions() {
310 Option compress =
311 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
312 .hasArg().withDescription("compression scheme").create('c');
313
314 Option fileSize =
315 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
316 .hasArg().withDescription("target size of the file (in MB).")
317 .create('s');
318
319 Option fsInputBufferSz =
320 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
321 .hasArg().withDescription(
322 "size of the file system input buffer (in bytes).").create(
323 'i');
324
325 Option fsOutputBufferSize =
326 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
327 .hasArg().withDescription(
328 "size of the file system output buffer (in bytes).").create(
329 'o');
330
331 Option keyLen =
332 OptionBuilder
333 .withLongOpt("key-length")
334 .withArgName("min,max")
335 .hasArg()
336 .withDescription(
337 "the length range of the key (in bytes)")
338 .create('k');
339
340 Option valueLen =
341 OptionBuilder
342 .withLongOpt("value-length")
343 .withArgName("min,max")
344 .hasArg()
345 .withDescription(
346 "the length range of the value (in bytes)")
347 .create('v');
348
349 Option blockSz =
350 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
351 .withDescription("minimum block size (in KB)").create('b');
352
353 Option operation =
354 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
355 .withDescription(
356 "action: seek-only, create-only, seek-after-create").create(
357 'x');
358
359 Option rootDir =
360 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
361 .withDescription(
362 "specify root directory where files will be created.")
363 .create('r');
364
365 Option file =
366 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
367 .withDescription("specify the file name to be created or read.")
368 .create('f');
369
370 Option seekCount =
371 OptionBuilder
372 .withLongOpt("seek")
373 .withArgName("count")
374 .hasArg()
375 .withDescription(
376 "specify how many seek operations we perform (requires -x r or -x rw.")
377 .create('n');
378
379 Option trialCount =
380 OptionBuilder
381 .withLongOpt("trials")
382 .withArgName("n")
383 .hasArg()
384 .withDescription(
385 "specify how many times to run the whole benchmark")
386 .create('t');
387
388 Option useRawFs =
389 OptionBuilder
390 .withLongOpt("rawfs")
391 .withDescription("use raw instead of checksummed file system")
392 .create();
393
394 Option help =
395 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
396 "show this screen").create("h");
397
398 return new Options().addOption(compress).addOption(fileSize).addOption(
399 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
400 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
401 .addOption(operation).addOption(seekCount).addOption(file)
402 .addOption(trialCount).addOption(useRawFs).addOption(help);
403
404 }
405
406 private void processOptions(CommandLine line, Options opts)
407 throws ParseException {
408
409 if (line.hasOption('h')) {
410 HelpFormatter formatter = new HelpFormatter();
411 System.out.println("TFile and SeqFile benchmark.");
412 System.out.println();
413 formatter.printHelp(100,
414 "java ... TestTFileSeqFileComparison [options]",
415 "\nSupported options:", opts, "");
416 return;
417 }
418
419 if (line.hasOption('c')) {
420 compress = line.getOptionValue('c');
421 }
422
423 if (line.hasOption('d')) {
424 dictSize = Integer.parseInt(line.getOptionValue('d'));
425 }
426
427 if (line.hasOption('s')) {
428 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
429 }
430
431 if (line.hasOption('i')) {
432 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
433 }
434
435 if (line.hasOption('o')) {
436 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
437 }
438
439 if (line.hasOption('n')) {
440 seekCount = Integer.parseInt(line.getOptionValue('n'));
441 }
442
443 if (line.hasOption('t')) {
444 trialCount = Integer.parseInt(line.getOptionValue('t'));
445 }
446
447 if (line.hasOption('k')) {
448 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
449 minKeyLen = ir.from();
450 maxKeyLen = ir.to();
451 }
452
453 if (line.hasOption('v')) {
454 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
455 minValLength = ir.from();
456 maxValLength = ir.to();
457 }
458
459 if (line.hasOption('b')) {
460 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
461 }
462
463 if (line.hasOption('r')) {
464 rootDir = line.getOptionValue('r');
465 }
466
467 if (line.hasOption('f')) {
468 file = line.getOptionValue('f');
469 }
470
471 if (line.hasOption('S')) {
472 seed = Long.parseLong(line.getOptionValue('S'));
473 }
474
475 if (line.hasOption('x')) {
476 String strOp = line.getOptionValue('x');
477 if (strOp.equals("r")) {
478 op = OP_READ;
479 }
480 else if (strOp.equals("w")) {
481 op = OP_CREATE;
482 }
483 else if (strOp.equals("rw")) {
484 op = OP_CREATE | OP_READ;
485 }
486 else {
487 throw new ParseException("Unknown action specifier: " + strOp);
488 }
489 }
490
491 useRawFs = line.hasOption("rawfs");
492
493 proceed = true;
494 }
495
496 private void validateOptions() throws ParseException {
497 if (!compress.equals("none") && !compress.equals("lzo")
498 && !compress.equals("gz") && !compress.equals("snappy")) {
499 throw new ParseException("Unknown compression scheme: " + compress);
500 }
501
502 if (minKeyLen >= maxKeyLen) {
503 throw new ParseException(
504 "Max key length must be greater than min key length.");
505 }
506
507 if (minValLength >= maxValLength) {
508 throw new ParseException(
509 "Max value length must be greater than min value length.");
510 }
511
512 if (minWordLen >= maxWordLen) {
513 throw new ParseException(
514 "Max word length must be greater than min word length.");
515 }
516 return;
517 }
518
519 private void setStopProceed() {
520 proceed = false;
521 }
522
523 public boolean doCreate() {
524 return (op & OP_CREATE) != 0;
525 }
526
527 public boolean doRead() {
528 return (op & OP_READ) != 0;
529 }
530 }
531
532 public static void main(String[] argv) throws IOException {
533 TestHFileSeek testCase = new TestHFileSeek();
534 MyOptions options = new MyOptions(argv);
535
536 if (options.proceed == false) {
537 return;
538 }
539
540 testCase.options = options;
541 for (int i = 0; i < options.trialCount; i++) {
542 LOG.info("Beginning trial " + (i+1));
543 testCase.setUp();
544 testCase.testSeeks();
545 testCase.tearDown();
546 }
547 }
548
549 }
550