it.unimi.dsi.fastutil.io.BinIO Java Examples

The following examples show how to use it.unimi.dsi.fastutil.io.BinIO. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ShiftAddXorSignedStringMapTest.java    From database with GNU General Public License v2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void testNumbers() throws IOException, ClassNotFoundException {
	
	for( int width = 16; width <= Long.SIZE; width += 8 ) {
		String[] s = new String[ 1000 ];
		long[] v = new long[ s.length ];
		for( int i = s.length; i-- != 0; ) s[ (int)( v[ i ] = i  )] = Integer.toString( i );

		// Test with mph
		Object2LongOpenHashMap<String> mph = new Object2LongOpenHashMap<String>( s, v );
		ShiftAddXorSignedStringMap map = new ShiftAddXorSignedStringMap( Arrays.asList( s ).iterator(), mph, width );

		for( int i = s.length; i-- != 0; ) assertEquals( i, map.getLong( Integer.toString( i ) ) );
		for( int i = s.length + 100; i-- != s.length; ) assertEquals( -1, map.getLong( Integer.toString( i ) ) );

		File temp = File.createTempFile( getClass().getSimpleName(), "test" );
		temp.deleteOnExit();
		BinIO.storeObject( map, temp );
		map = (ShiftAddXorSignedStringMap)BinIO.loadObject( temp );

		for( int i = s.length; i-- != 0; ) assertEquals( i, map.getLong( Integer.toString( i ) ) );
		for( int i = s.length + 100; i-- != s.length; ) assertEquals( -1, map.getLong( Integer.toString( i ) ) );
	
	}
}
 
Example #2
Source File: GZIPIndexer.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
public static void main(String[] arg) throws IOException, JSAPException {
	final SimpleJSAP jsap = new SimpleJSAP(GZIPIndexer.class.getName(), "Computes and stores a quasi-succinct index for a compressed archive.",
			new Parameter[] {
				new UnflaggedOption("archive", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name a GZIP's archive."),
				new UnflaggedOption("index", JSAP.STRING_PARSER, JSAP.REQUIRED, "The output (a serialized LongBigList of pointers to the records in the archive) filename."),
			}
	);

	final JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final FastBufferedInputStream input = new FastBufferedInputStream(new FileInputStream(jsapResult.getString("archive")));
	ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	pl.start("Scanning...");
	final EliasFanoMonotoneLongBigList list = new EliasFanoMonotoneLongBigList(index(input, pl));
	pl.done();
	BinIO.storeObject(list, jsapResult.getString("index"));
}
 
Example #3
Source File: KnowledgeBase.java    From fasten with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("resource")
public static KnowledgeBase getInstance(final String kbDir, final String kbMetadataPathname, final boolean readOnly) throws RocksDBException, ClassNotFoundException, IOException {
	final boolean metadataExists = new File(kbMetadataPathname).exists();
	final boolean kbDirExists = new File(kbDir).exists();
	if (metadataExists != kbDirExists) throw new IllegalArgumentException("Either both or none of the knowledge-base directory and metadata must exist");

	RocksDB.loadLibrary();
	final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions().setCompressionType(CompressionType.LZ4_COMPRESSION);
	final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true);
	final List<ColumnFamilyDescriptor> cfDescriptors = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOptions), new ColumnFamilyDescriptor(GID2URI, cfOptions), new ColumnFamilyDescriptor(URI2GID, cfOptions));

	final List<ColumnFamilyHandle> columnFamilyHandles = new ArrayList<>();
	final RocksDB db = readOnly ? RocksDB.openReadOnly(dbOptions, kbDir, cfDescriptors, columnFamilyHandles) : RocksDB.open(dbOptions, kbDir, cfDescriptors, columnFamilyHandles);

	final KnowledgeBase kb;
	if (metadataExists) {
		kb = (KnowledgeBase) BinIO.loadObject(kbMetadataPathname);
		kb.readOnly = readOnly;
		kb.callGraphDB = db;
		kb.defaultHandle = columnFamilyHandles.get(0);
		kb.gid2uriFamilyHandle = columnFamilyHandles.get(1);
		kb.uri2gidFamilyHandle = columnFamilyHandles.get(2);
	} else kb = new KnowledgeBase(db, columnFamilyHandles.get(0), columnFamilyHandles.get(1), columnFamilyHandles.get(2), kbMetadataPathname, readOnly);
	return kb;
}
 
Example #4
Source File: ShiftAddXorSignedStringMap.java    From database with GNU General Public License v2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {

	final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
			new Parameter[] {
		new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read strings." ),
		new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
		new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
		new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ),
		new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
		new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ),
		new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ),
	});

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;

	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String functionName = jsapResult.getString( "function" );
	final String mapName = jsapResult.getString( "map" );
	final String stringFile = jsapResult.getString( "stringFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final int width = jsapResult.getInt( "width" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );

	final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in;
	final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) );
	final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName );
	LOGGER.info( "Signing..." );
	BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName );
	LOGGER.info( "Completed." );
}
 
Example #5
Source File: LiterallySignedStringMapTest.java    From database with GNU General Public License v2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void testNumbers() throws IOException, ClassNotFoundException {
	for( int n = 10; n < 10000; n *= 10 ) {
		String[] s = new String[ n ];
		for( int i = s.length; i-- != 0; ) s[ i ] = Integer.toString( i );
		Collections.shuffle( Arrays.asList( s ) );

		FrontCodedStringList fcl = new FrontCodedStringList( Arrays.asList( s ), 8, true );
		// Test with mph
		Object2LongOpenCustomHashMap<CharSequence> mph = new Object2LongOpenCustomHashMap<CharSequence>( new CharSequenceStrategy());
		mph.defaultReturnValue( -1 );
		for( int i = 0; i < s.length; i++ ) mph.put( new MutableString( s[ i ] ),  i );
		
		LiterallySignedStringMap map = new LiterallySignedStringMap( mph, fcl );

		for( int i = s.length; i-- != 0; ) assertEquals( i, map.getLong( s[ i ] ) );
		for( int i = s.length + n; i-- != s.length; ) assertEquals( -1, map.getLong( Integer.toString( i ) ) );

		File temp = File.createTempFile( getClass().getSimpleName(), "test" );
		temp.deleteOnExit();
		BinIO.storeObject( map, temp );
		map = (LiterallySignedStringMap)BinIO.loadObject( temp );

		for( int i = s.length; i-- != 0; ) assertEquals( i, map.getLong( s[ i ] ) );
		for( int i = s.length + n; i-- != s.length; ) assertEquals( -1, map.getLong( Integer.toString( i ) ) );
	}
}
 
Example #6
Source File: BloomFilter.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
	
	final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ),
				new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ),
				new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." )
	});
	
	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String filterName = jsapResult.getString( "bloomFilter" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );

	BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) );
	final ProgressLogger pl = new ProgressLogger();
	pl.itemsName = "terms";
	pl.start( "Reading terms..." );
	MutableString s = new MutableString();
	FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize );
	while( reader.readLine( s ) != null ) { 
		filter.add( s );
		pl.lightUpdate();
	}
	pl.done();

	BinIO.storeObject( filter, filterName );
}
 
Example #7
Source File: LiterallySignedStringMap.java    From database with GNU General Public License v2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws IOException, JSAPException, ClassNotFoundException, SecurityException, NoSuchMethodException {

	final SimpleJSAP jsap = new SimpleJSAP( LiterallySignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
			new Parameter[] {
		new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
		new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
		new Switch( "text", 't', "text", "The string list actually a text file, with one string per line." ),
		new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
		new UnflaggedOption( "list", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the serialised list of strings, or of a text file containing a list of strings, if -t is specified." ),
		new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting map." ),
	});

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;

	final String functionName = jsapResult.getString( "function" );
	final String listName = jsapResult.getString( "list" );
	final String mapName = jsapResult.getString( "map" );

	
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );
	final boolean text = jsapResult.getBoolean( "text" );
	
	ObjectList<MutableString> list = text ? new FileLinesCollection( listName, encoding.toString(), zipped ).allLines() : (ObjectList)BinIO.loadObject( listName );
	
	LOGGER.info( "Signing..." );
	BinIO.storeObject( new LiterallySignedStringMap( (Object2LongFunction)BinIO.loadObject( functionName ), list ), mapName );
	LOGGER.info( "Completed." );
}
 
Example #8
Source File: TernaryIntervalSearchTree.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {

		final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." )
		});

		JSAPResult jsapResult = jsap.parse( arg );
		if ( jsap.messagePrinted() ) return;

		final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree();
		
		MutableString term = new MutableString();
		final ProgressLogger pl = new ProgressLogger();
		pl.itemsName = "terms";
		final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) );
				
		pl.start( "Reading terms..." );

		while( terms.readLine( term ) != null ) {
			pl.update();
			tree.add( term );
		}

		pl.done();

		BinIO.storeObject( tree, jsapResult.getString( "tree" ) );
	}
 
Example #9
Source File: FrontCodedStringList.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
	
	final SimpleJSAP jsap = new SimpleJSAP( FrontCodedStringList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new FlaggedOption( "ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r',  "ratio", "The compression ratio." ),
				new Switch( "utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays." ),
				new Switch( "zipped", 'z', "zipped", "The term list is compressed in gzip format." ),
				new UnflaggedOption( "frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." )
	});
	
	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final int ratio = jsapResult.getInt( "ratio" );
	final boolean utf8 = jsapResult.getBoolean( "utf8" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );
	final String listName = jsapResult.getString( "frontCodedList" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	
	final ProgressLogger pl = new ProgressLogger();
	pl.itemsName = "words";
	pl.start( "Reading words..." );
	final FrontCodedStringList frontCodedStringList = new FrontCodedStringList( new LineIterator( new FastBufferedReader( 
			new InputStreamReader( zipped ? new GZIPInputStream( System.in ) : System.in, encoding ), bufferSize ), pl ), ratio, utf8 );
	pl.done();

	System.err.print( "Writing to file..." );
	BinIO.storeObject( frontCodedStringList, listName );
	System.err.println( " done." );
}
 
Example #10
Source File: PermutedFrontCodedStringList.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public static void main( final String[] arg ) throws IOException, ClassNotFoundException, JSAPException {

		SimpleJSAP jsap = new SimpleJSAP( PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and permutation",  
				new Parameter[] {
					new Switch( "invert", 'i', "invert", "Invert permutation before creating the permuted list." ),
					new UnflaggedOption( "list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list." ),
					new UnflaggedOption( "permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list." ),
					new UnflaggedOption( "permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list." ),
			} );

		JSAPResult jsapResult = jsap.parse( arg ); 
		if ( jsap.messagePrinted() ) return;
		
		int[] basePermutation =  BinIO.loadInts( jsapResult.getString( "permutation" ) ), permutation;
		if ( jsapResult.getBoolean( "invert" ) ) {
			int i = basePermutation.length;
			permutation = new int[ i ];
			while( i-- != 0 ) permutation[ basePermutation[ i ] ] = i;
		}
		else permutation = basePermutation;
		
		basePermutation = null;
		
		BinIO.storeObject( 
				new PermutedFrontCodedStringList( (FrontCodedStringList)BinIO.loadObject( jsapResult.getString( "list" ) ), permutation ), 
				jsapResult.getString( "permutedList" ) 
		);
	}
 
Example #11
Source File: ObjectDiskQueue.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
/** Dequeues an object from the queue in FIFO fashion. */
@SuppressWarnings("unchecked")
public synchronized T dequeue() throws IOException {
	final int length = byteDiskQueue.dequeueInt();
	fbaos.array = ByteArrays.grow(fbaos.array, length);
	byteDiskQueue.dequeue(fbaos.array, 0, length);
	size--;
	try {
		return (T)BinIO.loadObject(new FastByteArrayInputStream(fbaos.array, 0, length));
	}
	catch (final ClassNotFoundException e) {
		throw new RuntimeException(e.getMessage(), e);
	}
}
 
Example #12
Source File: BuildRepetitionSet.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
public static void main(String[] arg) throws IOException {
	if (arg.length == 0) {
		System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
		System.exit(1);
	}

	final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	final LongOpenHashSet repeatedSet = new LongOpenHashSet();
	final String outputFilename = arg[0];
	final ProgressLogger pl = new ProgressLogger();

	MutableString lastUrl = new MutableString();
	pl.itemsName = "lines";
	pl.start("Reading... ");
	while(fastBufferedReader.readLine(s) != null) {
		final int firstTab = s.indexOf('\t');
		final int secondTab = s.indexOf('\t', firstTab + 1);
		MutableString url = s.substring(secondTab + 1);
		if (url.equals(lastUrl)) {
			final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
			final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
			repeatedSet.add((long)storeIndex << 48 | storePosition);
			System.out.print(storeIndex);
			System.out.print('\t');
			System.out.print(storePosition);
			System.out.print('\t');
			System.out.println(url);
		}

		lastUrl = url;
		pl.lightUpdate();
	}

	pl.done();

	fastBufferedReader.close();
	BinIO.storeObject(repeatedSet, outputFilename);
}
 
Example #13
Source File: KnowledgeBase.java    From fasten with Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws IOException {
	try {
		if (!readOnly) BinIO.storeObject(this, kbMetadataPathname);
	} finally {
		defaultHandle.close();
		gid2uriFamilyHandle.close();
		uri2gidFamilyHandle.close();
		callGraphDB.close();
	}
}
 
Example #14
Source File: ImmutableExternalPrefixMap.java    From database with GNU General Public License v2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws ClassNotFoundException, IOException, JSAPException, SecurityException, NoSuchMethodException {

	final SimpleJSAP jsap = new SimpleJSAP( ImmutableExternalPrefixMap.class.getName(), "Builds an external map reading from standard input a newline-separated list of terms or a serialised term list. If the dump stream name is not specified, the map will be self-contained.", 
			new Parameter[] {
				new FlaggedOption( "blockSize", JSAP.INTSIZE_PARSER, ( STD_BLOCK_SIZE / 1024 ) + "Ki", JSAP.NOT_REQUIRED, 'b', "block-size", "The size of a block in the dump stream." ),
				new Switch( "serialised", 's', "serialised", "The data source (file or standard input) provides a serialised java.util.List of terms." ),
				new Switch( "zipped", 'z', "zipped", "Standard input is compressed in gzip format." ),
				new FlaggedOption( "termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file instead of standard input." ),					
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term list encoding." ),
				new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised map." ),
				new UnflaggedOption( "dump", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "An optional dump stream (the resulting map will not be self-contained)." )
		}
	);

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	Collection<? extends CharSequence> termList;
	
	final String termFile = jsapResult.getString( "termFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );
	final boolean serialised = jsapResult.getBoolean( "serialised" );

	if ( zipped && serialised ) throw new IllegalArgumentException( "The zipped and serialised options are incompatible" );

	if ( serialised ) termList = (List<? extends CharSequence>) ( termFile != null ? BinIO.loadObject( termFile ) : BinIO.loadObject( System.in ) );
	else {
		if ( termFile != null ) termList = new FileLinesCollection( termFile, encoding.name(), zipped );
		else {
			final ObjectArrayList<MutableString> list = new ObjectArrayList<MutableString>();
			termList = list;
			final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( 
					zipped ? new GZIPInputStream( System.in ) : System.in, encoding.name() ) );
			final MutableString term = new MutableString();
			while( terms.readLine( term ) != null ) list.add( term.copy() );
			terms.close();
		}
	}

	BinIO.storeObject( new ImmutableExternalPrefixMap( termList, jsapResult.getInt( "blockSize" ), jsapResult.getString( "dump" ) ), jsapResult.getString( "map" ) );
}
 
Example #15
Source File: URLPositionWriter.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
public URLPositionWriter(final String storeIndex, final String repeatedSetFile) throws ClassNotFoundException, IOException, IllegalArgumentException {
	this.storeIndexMask = (long)Integer.parseInt(storeIndex) << 48;
	this.repeatedSet = (LongOpenHashSet)BinIO.loadObject(repeatedSetFile);
}
 
Example #16
Source File: URLDigestFinalPositionWriter.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
public URLDigestFinalPositionWriter(final String storeIndex, final String repeatedSetFile) throws ClassNotFoundException, IOException, IllegalArgumentException {
	this.storeIndexMask = (long)Integer.parseInt(storeIndex) << 48;
	this.repeatedSet = (LongOpenHashSet)BinIO.loadObject(repeatedSetFile);
}
 
Example #17
Source File: BitVectorTestCase.java    From database with GNU General Public License v2.0 4 votes vote down vote up
public static void testSerialisation( BitVector b ) throws IOException, ClassNotFoundException {
	final File file = File.createTempFile( BitVectorTestCase.class.getSimpleName(), "test" );
	file.deleteOnExit();

	b.clear();
	
	BinIO.storeObject( b, file );
	assertEquals( b, BinIO.loadObject( file ) );

	b.length( 1000 );
	
	BinIO.storeObject( b, file );
	assertEquals( b, BinIO.loadObject( file ) );

	b.fill( true );
	
	BinIO.storeObject( b, file );
	assertEquals( b, BinIO.loadObject( file ) );
}
 
Example #18
Source File: SpamTextProcessor.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public SpamTextProcessor(final String termSetOnthologyURI) throws ClassNotFoundException, MalformedURLException, IOException {
	 termSetOnthology = (Object2LongFunction<MutableString>)BinIO.loadObject(new URL(termSetOnthologyURI).openStream());
}