it.unimi.dsi.fastutil.io.FastBufferedInputStream Java Examples

The following examples show how to use it.unimi.dsi.fastutil.io.FastBufferedInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WorkbenchVirtualizer.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
public void readMetadata() throws IOException, ClassNotFoundException {
	final ObjectInputStream ois = new ObjectInputStream(new FastBufferedInputStream(new FileInputStream(new File(directory, "metadata"))));
	byteArrayDiskQueues.size = ois.readLong();
	byteArrayDiskQueues.appendPointer = ois.readLong();
	byteArrayDiskQueues.used = ois.readLong();
	byteArrayDiskQueues.allocated = ois.readLong();
	final int n = ois.readInt();
	byteArrayDiskQueues.buffers.size(n);
	byteArrayDiskQueues.files.size(n);
	final VisitStateSet schemeAuthority2VisitState = frontier.distributor.schemeAuthority2VisitState;
	byte[] schemeAuthority = new byte[1024];
	for(int i = ois.readInt(); i-- != 0;) {
		final int length = Util.readVByte(ois);
		if (schemeAuthority.length < length) schemeAuthority = new byte[length];
		ois.readFully(schemeAuthority, 0, length);
		final VisitState visitState = schemeAuthority2VisitState.get(schemeAuthority, 0, length);
		// This can happen if the serialization of the visit states has not been completed.
		if (visitState != null) byteArrayDiskQueues.key2QueueData.put(visitState, (QueueData)ois.readObject());
		else LOGGER.error("No visit state found for " + Util.toString(schemeAuthority));
	}

	ois.close();
}
 
Example #2
Source File: GZIPIndexer.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
public static void main(String[] arg) throws IOException, JSAPException {
	final SimpleJSAP jsap = new SimpleJSAP(GZIPIndexer.class.getName(), "Computes and stores a quasi-succinct index for a compressed archive.",
			new Parameter[] {
				new UnflaggedOption("archive", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name a GZIP's archive."),
				new UnflaggedOption("index", JSAP.STRING_PARSER, JSAP.REQUIRED, "The output (a serialized LongBigList of pointers to the records in the archive) filename."),
			}
	);

	final JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final FastBufferedInputStream input = new FastBufferedInputStream(new FileInputStream(jsapResult.getString("archive")));
	ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	pl.start("Scanning...");
	final EliasFanoMonotoneLongBigList list = new EliasFanoMonotoneLongBigList(index(input, pl));
	pl.done();
	BinIO.storeObject(list, jsapResult.getString("index"));
}
 
Example #3
Source File: GZIPArchiveReaderTest.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
@Test
   public void reverseGetEntry() throws IOException {
final LongBigArrayBigList pos = GZIPIndexer.index(new FileInputStream(ARCHIVE_PATH));
GZIPArchive.ReadEntry re;
FastBufferedInputStream fis = new FastBufferedInputStream(new FileInputStream(ARCHIVE_PATH));
GZIPArchiveReader gzar = new GZIPArchiveReader(fis);
byte[] actualMagic = new byte[EXPECTED_MAGIC.length];
for (int i = (int) pos.size64() - 1; i >= 0; i--) {
    gzar.position(pos.getLong(i));
    re = gzar.getEntry();
    if (re == null) break;
    LazyInflater lin = re.lazyInflater;
    InputStream in = lin.get();
    in.read(actualMagic);
    assertArrayEquals(EXPECTED_MAGIC, actualMagic);
    for (int j = 0; j < (i + 1) * 512; j++) in.read();
    lin.consume();

}
fis.close();
   }
 
Example #4
Source File: ByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	int length = 0, b;

	while((b = is.read()) >= 0x80) {
		length |= b & 0x7F;
		length <<= 7;
	}
	if (b == -1) throw new EOFException();
	length |= b;

	final long actual = is.skip(length);
	if (actual != length) throw new IOException("Asked for " + length + " but got " + actual);
}
 
Example #5
Source File: CharSequenceByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	// Borrowed from it.unimi.dsi.lang.MutableString.
	int length = 0, b;

	for(;;) {
		if ((b = is.read()) < 0) throw new EOFException();
		if ((b & 0x80) == 0) break;
		length |= b & 0x7F;
		length <<= 7;
	}
	length |= b;

	for(int i = 0; i < length; i++) {
		b = is.read() & 0xFF;
		switch (b >> 4) {
		case 0:
		case 1:
		case 2:
		case 3:
		case 4:
		case 5:
		case 6:
		case 7:
			break;
		case 12:
		case 13:
			is.skip(1);
			break;
		case 14:
			is.skip(2);
			break;
		default:
			throw new UTFDataFormatException();
		}
	}
}
 
Example #6
Source File: OfflineIterable.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public OfflineIterator<T, U> iterator() {
	try {
		dos.flush();
		final DataInputStream dis = new DataInputStream( new FastBufferedInputStream( new FileInputStream( file ) ) );
		return new OfflineIterator<T, U>( dis, serializer, store, size );
	}
	catch ( IOException e ) {
		throw new RuntimeException( e );
	}
}
 
Example #7
Source File: IndexOffsetScanner.java    From hadoop-sstable with Apache License 2.0 5 votes vote down vote up
/**
 * Hadoop fs based version.
 *
 * @param filename File name.
 * @param fileSystem File system.
 */
public IndexOffsetScanner(final String filename, final FileSystem fileSystem) {
    closer = Closer.create();
    try {
        final FSDataInputStream inputStream = fileSystem.open(new Path(filename));
        this.input = closer.register(new DataInputStream(new FastBufferedInputStream(inputStream)));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
Example #8
Source File: IndexOffsetScanner.java    From hadoop-sstable with Apache License 2.0 5 votes vote down vote up
/**
 * Hadoop fs based version.
 *
 * @param path File path.
 * @param fileSystem File system.
 */
public IndexOffsetScanner(final Path path, final FileSystem fileSystem) {
    closer = Closer.create();
    try {
        final FSDataInputStream inputStream = fileSystem.open(path);
        this.input = closer.register(new DataInputStream(new FastBufferedInputStream(inputStream)));
    } catch (IOException e) {
        throw new IOError(e);
    }
}
 
Example #9
Source File: ByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
@Override
public void skip(final FastBufferedInputStream is) throws IOException {}
 
Example #10
Source File: ByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	is.skip(4);
}
 
Example #11
Source File: ByteArrayListByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
@Override
public void skip(final FastBufferedInputStream is) throws IOException {
	final int length = Util.readVByte(is);
	final long actual = is.skip(length);
	if (actual != length) throw new IOException("Asked for " + length + " but got " + actual);
}
 
Example #12
Source File: MercatorSieve.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
/** Prepares the bucket to be consumed.
 *
 * @throws IOException
 */
public void prepare() throws IOException {
	aux.flush();
	auxFbis = new FastBufferedInputStream(new FileInputStream(auxFile), ioBuffer);
}
 
Example #13
Source File: WarcCompressor.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
public static void main(String arg[]) throws IOException, InterruptedException, JSAPException {

	SimpleJSAP jsap = new SimpleJSAP(WarcCompressor.class.getName(),
		"Given a store uncompressed, write a compressed store.",
		new Parameter[] { new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "The output filename  (- for stdout)."),
		new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."),
	});

	JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in;

	final WarcReader reader = new UncompressedWarcReader(in);
	final ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	final String output = jsapResult.getString("output");

	PrintStream out = "-".equals(output) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output)), false, "UTF-8");
	final WarcWriter writer = new CompressedWarcWriter(out);

	pl.itemsName = "records";
	pl.displayFreeMemory = true;
	pl.displayLocalSpeed = true;
	pl.start("Scanning...");

	for (long storePosition = 0;; storePosition++) {
	    LOGGER.trace("STOREPOSITION " + storePosition);
	    WarcRecord record = null;
	    try {
		record = reader.read();
	    } catch (Exception e) {
		LOGGER.error("Exception while reading record " + storePosition + " ");
		LOGGER.error(e.getMessage());
		e.printStackTrace();
		continue;
	    }
	    if (record == null)
		break;
	    writer.write(record);
	    pl.lightUpdate();
	}
	pl.done();
	writer.close();
    }
 
Example #14
Source File: ParallelFilteredProcessorRunner.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] arg) throws Exception {
	final SimpleJSAP jsap = new SimpleJSAP(ParallelFilteredProcessorRunner.class.getName(), "Processes a store.",
			new Parameter[] {
			new FlaggedOption("filter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f', "filter", "A WarcRecord filter that recods must pass in order to be processed."),
	 		new FlaggedOption("processor", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'p', "processor", "A processor to be applied to data.").setAllowMultipleDeclarations(true),
		 	new FlaggedOption("writer", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'w', "writer", "A writer to be applied to the results.").setAllowMultipleDeclarations(true),
			new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "output", "The output filename  (- for stdout).").setAllowMultipleDeclarations(true),
			new FlaggedOption("threads", JSAP.INTSIZE_PARSER, Integer.toString(Runtime.getRuntime().availableProcessors()), JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used."),
			new Switch("sequential", 'S', "sequential"),
			new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."),
	});

	final JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final String filterSpec = jsapResult.getString("filter");
	final Filter<WarcRecord> filter;
	if (filterSpec != null) {
		final FilterParser<WarcRecord> parser = new FilterParser<>(WarcRecord.class);
		filter = parser.parse(filterSpec);
	} else
		filter = null;
	final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in;
	final ParallelFilteredProcessorRunner parallelFilteredProcessorRunner = new ParallelFilteredProcessorRunner(in, filter);

	final String[] processor =  jsapResult.getStringArray("processor");
	final String[] writer =  jsapResult.getStringArray("writer");
	final String[] output =  jsapResult.getStringArray("output");
	if (processor.length != writer.length) throw new IllegalArgumentException("You must specify the same number or processors and writers");
	if (output.length != writer.length) throw new IllegalArgumentException("You must specify the same number or output specifications and writers");

	final String[] packages = new String[] { ParallelFilteredProcessorRunner.class.getPackage().getName() };
	final PrintStream[] ops = new PrintStream[processor.length];
	for (int i = 0; i < processor.length; i++) {
		ops[i] = "-".equals(output[i]) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output[i])), false, "UTF-8");
		// TODO: these casts to SOMETHING<Object> are necessary for compilation under Eclipse. Check in the future.
		parallelFilteredProcessorRunner.add((Processor<Object>)ObjectParser.fromSpec(processor[i], Processor.class, packages, new String[] { "getInstance" }),
				(Writer<Object>)ObjectParser.fromSpec(writer[i], Writer.class,  packages, new String[] { "getInstance" }),
				ops[i]);
	}

	if (jsapResult.userSpecified("sequential")) parallelFilteredProcessorRunner.runSequentially();
	else parallelFilteredProcessorRunner.run(jsapResult.getInt("threads"));

	for (int i = 0; i < processor.length; i++) ops[i].close();

}
 
Example #15
Source File: ParallelBufferedWarcWriterTest.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
@Test
public void testRecord() throws IOException, InterruptedException, URISyntaxException {

	for(boolean gzip: new boolean[] { true, false }) {
		final FastByteArrayOutputStream out = new FastByteArrayOutputStream();
		final ParallelBufferedWarcWriter warcParallelOutputStream = new ParallelBufferedWarcWriter(out, gzip);
		final Thread thread[] = new Thread[NUM_THREADS];

		final URI fakeUri = new URI("http://this.is/a/fake");
		final RandomTestMocks.HttpResponse[] response = new RandomTestMocks.HttpResponse[NUM_RECORDS];
		for(int i = 0; i < NUM_THREADS; i++)
			(thread[i] = new Thread(Integer.toString(i)) {
				@Override
				public void run() {
					final int index = Integer.parseInt(getName());
					for (int i = index * (NUM_RECORDS / NUM_THREADS); i < (index + 1) * (NUM_RECORDS / NUM_THREADS); i++) {
						try {
							response[i] = new RandomTestMocks.HttpResponse(MAX_NUMBER_OF_HEADERS, MAX_LENGTH_OF_HEADER, MAX_LENGTH_OF_BODY, i);
							HttpResponseWarcRecord record = new HttpResponseWarcRecord(fakeUri, response[i]);
							warcParallelOutputStream.write(record);
							LOGGER.info("Thread " + index + " wrote record " + i);
						} catch(Exception e) { throw new RuntimeException(e); }
					}
				}
			}).start();


		for(Thread t: thread) t.join();
		warcParallelOutputStream.close();
		out.close();

		final FastBufferedInputStream in = new FastBufferedInputStream(new FastByteArrayInputStream(out.array, 0, out.length));
		WarcReader reader = gzip ? new CompressedWarcReader(in) : new UncompressedWarcReader(in);

		final boolean found[] = new boolean[NUM_RECORDS];
		for (int i = 0; i < NUM_RECORDS; i++) {
			final HttpResponseWarcRecord r = (HttpResponseWarcRecord) reader.read();
			final int pos = Integer.parseInt(r.getFirstHeader("Position").getValue());
			found[pos] = true;
			assertArrayEquals(ByteStreams.toByteArray(response[pos].getEntity().getContent()), ByteStreams.toByteArray(r.getEntity().getContent()));
		}
		in.close();

		for(int i = NUM_RECORDS; i-- != 0;) assertTrue(Integer.toString(i), found[i]);
	}
}
 
Example #16
Source File: ByteSerializerDeserializer.java    From BUbiNG with Apache License 2.0 2 votes vote down vote up
/** Skip an object, usually without deserializing it.
 *
 * <p>Note that this method
 * <em>requires explicitly a {@link FastBufferedInputStream}</em>. As
 * a result, you can safely use {@link FastBufferedInputStream#skip(long) skip()} to
 * skip the number of bytes required (see the documentation of {@link FastBufferedInputStream#skip(long)}
 * for some elaboration).
 *
 * <p>Calling this method must be equivalent to calling {@link #fromStream(InputStream)}
 * and discarding the result.
 *
 * @param is the fast buffered input stream from which the next object will be skipped.
 */
public void skip(FastBufferedInputStream is) throws IOException;