Java Code Examples for com.martiansoftware.jsap.JSAP#NOT_REQUIRED
The following examples show how to use
com.martiansoftware.jsap.JSAP#NOT_REQUIRED .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Agent.java From BUbiNG with Apache License 2.0 | 5 votes |
public static void main(final String arg[]) throws Exception { final SimpleJSAP jsap = new SimpleJSAP(Agent.class.getName(), "Starts a BUbiNG agent (note that you must enable JMX by means of the standard Java system properties).", new Parameter[] { new FlaggedOption("weight", JSAP.INTEGER_PARSER, "1", JSAP.NOT_REQUIRED, 'w', "weight", "The agent weight."), new FlaggedOption("group", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "group", "The JGroups group identifier (must be the same for all cooperating agents)."), new FlaggedOption("jmxHost", JSAP.STRING_PARSER, InetAddress.getLocalHost().getHostAddress(), JSAP.REQUIRED, 'h', "jmx-host", "The IP address (possibly specified by a host name) that will be used to expose the JMX RMI connector to other agents."), new FlaggedOption("rootDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', "root-dir", "The root directory."), new Switch("new", 'n', "new", "Start a new crawl"), new FlaggedOption("properties", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'P', "properties", "The properties used to configure the agent."), new UnflaggedOption("name", JSAP.STRING_PARSER, JSAP.REQUIRED, "The agent name (an identifier that must be unique across the group).") }); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); // JMX *must* be set up. final String portProperty = System.getProperty(JMX_REMOTE_PORT_SYSTEM_PROPERTY); if (portProperty == null) throw new IllegalArgumentException("You must specify a JMX service port using the property " + JMX_REMOTE_PORT_SYSTEM_PROPERTY); final String name = jsapResult.getString("name"); final int weight = jsapResult.getInt("weight"); final String group = jsapResult.getString("group"); final String host = jsapResult.getString("jmxHost"); final int port = Integer.parseInt(portProperty); final BaseConfiguration additional = new BaseConfiguration(); additional.addProperty("name", name); additional.addProperty("group", group); additional.addProperty("weight", Integer.toString(weight)); additional.addProperty("crawlIsNew", Boolean.valueOf(jsapResult.getBoolean("new"))); if (jsapResult.userSpecified("rootDir")) additional.addProperty("rootDir", jsapResult.getString("rootDir")); new Agent(host, port, new RuntimeConfiguration(new StartupConfiguration(jsapResult.getString("properties"), additional))); System.exit(0); // Kills remaining FetchingThread instances, if any. }
Example 2
Source File: RandomReadWritesTest.java From BUbiNG with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws JSAPException, IOException, InterruptedException { final SimpleJSAP jsap = new SimpleJSAP(RandomReadWritesTest.class.getName(), "Writes some random records on disk.", new Parameter[] { new FlaggedOption("random", JSAP.INTEGER_PARSER, "100", JSAP.NOT_REQUIRED, 'r', "random", "The number of random record to sample from."), new FlaggedOption("body", JSAP.INTSIZE_PARSER, "4K", JSAP.NOT_REQUIRED, 'b', "body", "The maximum size of the random generated body (in bytes)."), new Switch("fully", 'f', "fully", "Whether to read fully the record (and do a minimal sequential cosnsistency check)."), new Switch("writeonly", 'w', "writeonly", "Whether to skip the read part (if present, 'fully' will be ignored."), new UnflaggedOption("path", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The path to write to."), new UnflaggedOption("records", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The numer of records to write."), }); final JSAPResult jsapResult = jsap.parse(args); if (jsap.messagePrinted()) System.exit(1); final String path = jsapResult.getString("path"); final boolean compress = path.endsWith(".gz"); final boolean fully = jsapResult.getBoolean("fully"); final int parallel = compress ? 1 : 0; final int body = jsapResult.getInt("body"); final WarcRecord[] rnd = prepareRndRecords(jsapResult.getInt("random"), RESPONSE_PROBABILITY, MAX_NUMBER_OF_HEADERS, MAX_LENGTH_OF_HEADER, body); final int[] sequence = writeRecords(path, jsapResult.getInt("records"), rnd, parallel); if (! jsapResult.getBoolean("writeonly")) readRecords(path, sequence, body, fully, compress); }
Example 3
Source File: ShiftAddXorSignedStringMap.java From database with GNU General Public License v2.0 | 5 votes |
@SuppressWarnings("unchecked") public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.", new Parameter[] { new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read strings." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ), new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ), new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ), new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ), new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ), new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ), }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final int bufferSize = jsapResult.getInt( "bufferSize" ); final String functionName = jsapResult.getString( "function" ); final String mapName = jsapResult.getString( "map" ); final String stringFile = jsapResult.getString( "stringFile" ); final Charset encoding = (Charset)jsapResult.getObject( "encoding" ); final int width = jsapResult.getInt( "width" ); final boolean zipped = jsapResult.getBoolean( "zipped" ); final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in; final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) ); final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName ); LOGGER.info( "Signing..." ); BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName ); LOGGER.info( "Completed." ); }
Example 4
Source File: FrontCodedStringList.java From database with GNU General Public License v2.0 | 5 votes |
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( FrontCodedStringList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new FlaggedOption( "ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r', "ratio", "The compression ratio." ), new Switch( "utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays." ), new Switch( "zipped", 'z', "zipped", "The term list is compressed in gzip format." ), new UnflaggedOption( "frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final int bufferSize = jsapResult.getInt( "bufferSize" ); final int ratio = jsapResult.getInt( "ratio" ); final boolean utf8 = jsapResult.getBoolean( "utf8" ); final boolean zipped = jsapResult.getBoolean( "zipped" ); final String listName = jsapResult.getString( "frontCodedList" ); final Charset encoding = (Charset)jsapResult.getObject( "encoding" ); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "words"; pl.start( "Reading words..." ); final FrontCodedStringList frontCodedStringList = new FrontCodedStringList( new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( System.in ) : System.in, encoding ), bufferSize ), pl ), ratio, utf8 ); pl.done(); System.err.print( "Writing to file..." ); BinIO.storeObject( frontCodedStringList, listName ); System.err.println( " done." ); }
Example 5
Source File: TernaryIntervalSearchTree.java From database with GNU General Public License v2.0 | 5 votes |
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree(); MutableString term = new MutableString(); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "terms"; final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) ); pl.start( "Reading terms..." ); while( terms.readLine( term ) != null ) { pl.update(); tree.add( term ); } pl.done(); BinIO.storeObject( tree, jsapResult.getString( "tree" ) ); }
Example 6
Source File: LiterallySignedStringMap.java From database with GNU General Public License v2.0 | 5 votes |
@SuppressWarnings("unchecked") public static void main( final String[] arg ) throws IOException, JSAPException, ClassNotFoundException, SecurityException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( LiterallySignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.", new Parameter[] { new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ), new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ), new Switch( "text", 't', "text", "The string list actually a text file, with one string per line." ), new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ), new UnflaggedOption( "list", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the serialised list of strings, or of a text file containing a list of strings, if -t is specified." ), new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting map." ), }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final String functionName = jsapResult.getString( "function" ); final String listName = jsapResult.getString( "list" ); final String mapName = jsapResult.getString( "map" ); final Charset encoding = (Charset)jsapResult.getObject( "encoding" ); final boolean zipped = jsapResult.getBoolean( "zipped" ); final boolean text = jsapResult.getBoolean( "text" ); ObjectList<MutableString> list = text ? new FileLinesCollection( listName, encoding.toString(), zipped ).allLines() : (ObjectList)BinIO.loadObject( listName ); LOGGER.info( "Signing..." ); BinIO.storeObject( new LiterallySignedStringMap( (Object2LongFunction)BinIO.loadObject( functionName ), list ), mapName ); LOGGER.info( "Completed." ); }
Example 7
Source File: BloomFilter.java From database with GNU General Public License v2.0 | 5 votes |
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ), new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ), new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final int bufferSize = jsapResult.getInt( "bufferSize" ); final String filterName = jsapResult.getString( "bloomFilter" ); final Charset encoding = (Charset)jsapResult.getObject( "encoding" ); BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) ); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "terms"; pl.start( "Reading terms..." ); MutableString s = new MutableString(); FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize ); while( reader.readLine( s ) != null ) { filter.add( s ); pl.lightUpdate(); } pl.done(); BinIO.storeObject( filter, filterName ); }
Example 8
Source File: ExtractProperties.java From fasten with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws JSAPException, ClassNotFoundException, RocksDBException, IOException { final SimpleJSAP jsap = new SimpleJSAP(ExtractProperties.class.getName(), "Extract properties files from a knowledge base.", new Parameter[] { new FlaggedOption("min", JSAP.INTEGER_PARSER, "0", JSAP.NOT_REQUIRED, 'm', "min", "Consider only graphs with at least this number of internal nodes."), new FlaggedOption("n", JSAP.LONG_PARSER, Long.toString(Long.MAX_VALUE), JSAP.NOT_REQUIRED, 'n', "n", "Analyze just this number of graphs."), new UnflaggedOption("kb", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The directory of the RocksDB instance containing the knowledge base." ), new UnflaggedOption("kbmeta", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The file containing the knowledge base metadata." ), }); final JSAPResult jsapResult = jsap.parse(args); if ( jsap.messagePrinted() ) return; final int minNodes = jsapResult.getInt("min"); final long n = jsapResult.getLong("n"); final String kbDir = jsapResult.getString("kb"); if (!new File(kbDir).exists()) throw new IllegalArgumentException("No such directory: " + kbDir); final String kbMetadataFilename = jsapResult.getString("kbmeta"); if (!new File(kbMetadataFilename).exists()) throw new IllegalArgumentException("No such file: " + kbMetadataFilename); LOGGER.info("Loading KnowledgeBase metadata"); final KnowledgeBase kb = KnowledgeBase.getInstance(kbDir, kbMetadataFilename, true); final ProgressLogger pl = new ProgressLogger(); pl.count = kb.callGraphs.size(); pl.itemsName = "graphs"; pl.start("Enumerating graphs"); long i = 0; for(final CallGraph callGraph: kb.callGraphs.values()) { if (i++ >= n) break; pl.update(); if (callGraph.nInternal < minNodes) continue; final CallGraphData callGraphData = callGraph.callGraphData(); System.out.print(callGraph.index); System.out.print('\t'); System.out.print(callGraph.product); System.out.print('\t'); System.out.print(callGraph.version); System.out.print('\t'); System.out.print(callGraphData.graphProperties); System.out.print('\t'); System.out.print(callGraphData.transposeProperties); System.out.println(); } LOGGER.info("Closing KnowledgeBase"); kb.close(); }
Example 9
Source File: Indexer.java From fasten with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws JSONException, JSAPException, IOException, RocksDBException, InterruptedException, ExecutionException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP( Indexer.class.getName(), "Creates or updates a knowledge base (associated to a given database), indexing either a list of JSON files or a Kafka topic where JSON object are published", new Parameter[] { new FlaggedOption("topic", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 't', "topic", "A kafka topic containing the input." ), new FlaggedOption("host", JSAP.STRING_PARSER, "localhost", JSAP.NOT_REQUIRED, 'h', "host", "The host of the Kafka server." ), new FlaggedOption("port", JSAP.INTEGER_PARSER, "30001", JSAP.NOT_REQUIRED, 'p', "port", "The port of the Kafka server." ), new FlaggedOption("max", JSAP.LONG_PARSER, String.valueOf(Long.MAX_VALUE), JSAP.NOT_REQUIRED, 'm', "max", "The maximum number of call graphs that will be indexed." ), new UnflaggedOption("kb", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The directory of the RocksDB instance containing the knowledge base." ), new UnflaggedOption("kbmeta", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The file containing the knowledge base metadata." ), new UnflaggedOption("filename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.GREEDY, "The name of the file containing the JSON object." ), }); final JSAPResult jsapResult = jsap.parse(args); if ( jsap.messagePrinted() ) return; final String kbDir = jsapResult.getString("kb"); final String kbMetadataFilename = jsapResult.getString("kbmeta"); if (new File(kbDir).exists()) throw new IllegalArgumentException("Knowledge base directory exists"); if (new File(kbMetadataFilename).exists()) throw new IllegalArgumentException("Knowledge-base metadata file exists"); final KnowledgeBase kb = KnowledgeBase.getInstance(kbDir, kbMetadataFilename, false); final Indexer indexer = new Indexer(kb); final long max = jsapResult.getLong("max"); final Consumer<String, String> consumer; if (jsapResult.userSpecified("topic")) { // Kafka indexing final String topic = jsapResult.getString("topic"); final Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, jsapResult.getString("host") + ":" + Integer.toString(jsapResult.getInt("port"))); props.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString()); // We want to have a random consumer group. props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.put("auto.offset.reset", "earliest"); props.put("max.poll.records", Integer.toString(Integer.MAX_VALUE)); consumer = new KafkaConsumer<>(props); final Future<Void> future = indexer.index(max, consumer, topic); future.get(); // Wait for indexing to complete } else // File indexing indexer.index(max, jsapResult.getStringArray("filename")); kb.close(); }
Example 10
Source File: HTMLParser.java From BUbiNG with Apache License 2.0 | 4 votes |
public static void main(final String arg[]) throws IllegalArgumentException, IOException, URISyntaxException, JSAPException, NoSuchAlgorithmException { final SimpleJSAP jsap = new SimpleJSAP(HTMLParser.class.getName(), "Produce the digest of a page: the page is downloaded or passed as argument by specifying a file", new Parameter[] { new UnflaggedOption("url", JSAP.STRING_PARSER, JSAP.REQUIRED, "The url of the page."), new Switch("crossAuthorityDuplicates", 'c', "cross-authority-duplicates"), new FlaggedOption("charBufferSize", JSAP.INTSIZE_PARSER, Integer.toString(CHAR_BUFFER_SIZE), JSAP.NOT_REQUIRED, 'b', "buffer", "The size of the parser character buffer (0 for dynamic sizing)."), new FlaggedOption("file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f', "file", "The page to be processed."), new FlaggedOption("digester", JSAP.STRING_PARSER, "MD5", JSAP.NOT_REQUIRED, 'd', "digester", "The digester to be used.") }); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String url = jsapResult.getString("url"); final String digester = jsapResult.getString("digester"); final boolean crossAuthorityDuplicates = jsapResult.userSpecified("crossAuthorityDuplicates"); final int charBufferSize = jsapResult.getInt("charBufferSize"); final HTMLParser<Void> htmlParser = new HTMLParser<>(BinaryParser.forName(digester), (TextProcessor<Void>)null, crossAuthorityDuplicates, charBufferSize); final SetLinkReceiver linkReceiver = new SetLinkReceiver(); final byte[] digest; if (!jsapResult.userSpecified("file")) { final URI uri = new URI(url); final HttpGet request = new HttpGet(uri); request.setConfig(RequestConfig.custom().setRedirectsEnabled(false).build()); digest = htmlParser.parse(uri, HttpClients.createDefault().execute(request), linkReceiver); } else { final String file = jsapResult.getString("file"); final String content = IOUtils.toString(new InputStreamReader(new FileInputStream(file))); digest = htmlParser.parse(BURL.parse(url) , new StringHttpMessages.HttpResponse(content), linkReceiver); } System.out.println("DigestHexString: " + Hex.encodeHexString(digest)); System.out.println("Links: " + linkReceiver.urls); final Set<String> urlStrings = new ObjectOpenHashSet<>(); for (final URI link: linkReceiver.urls) urlStrings.add(link.toString()); if (urlStrings.size() != linkReceiver.urls.size()) System.out.println("There are " + linkReceiver.urls.size() + " URIs but " + urlStrings.size() + " strings"); }
Example 11
Source File: WarcCompressor.java From BUbiNG with Apache License 2.0 | 4 votes |
public static void main(String arg[]) throws IOException, InterruptedException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(WarcCompressor.class.getName(), "Given a store uncompressed, write a compressed store.", new Parameter[] { new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "The output filename (- for stdout)."), new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return; final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in; final WarcReader reader = new UncompressedWarcReader(in); final ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records"); final String output = jsapResult.getString("output"); PrintStream out = "-".equals(output) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output)), false, "UTF-8"); final WarcWriter writer = new CompressedWarcWriter(out); pl.itemsName = "records"; pl.displayFreeMemory = true; pl.displayLocalSpeed = true; pl.start("Scanning..."); for (long storePosition = 0;; storePosition++) { LOGGER.trace("STOREPOSITION " + storePosition); WarcRecord record = null; try { record = reader.read(); } catch (Exception e) { LOGGER.error("Exception while reading record " + storePosition + " "); LOGGER.error(e.getMessage()); e.printStackTrace(); continue; } if (record == null) break; writer.write(record); pl.lightUpdate(); } pl.done(); writer.close(); }
Example 12
Source File: ParallelFilteredProcessorRunner.java From BUbiNG with Apache License 2.0 | 4 votes |
public static void main(final String[] arg) throws Exception { final SimpleJSAP jsap = new SimpleJSAP(ParallelFilteredProcessorRunner.class.getName(), "Processes a store.", new Parameter[] { new FlaggedOption("filter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f', "filter", "A WarcRecord filter that recods must pass in order to be processed."), new FlaggedOption("processor", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'p', "processor", "A processor to be applied to data.").setAllowMultipleDeclarations(true), new FlaggedOption("writer", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'w', "writer", "A writer to be applied to the results.").setAllowMultipleDeclarations(true), new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "output", "The output filename (- for stdout).").setAllowMultipleDeclarations(true), new FlaggedOption("threads", JSAP.INTSIZE_PARSER, Integer.toString(Runtime.getRuntime().availableProcessors()), JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used."), new Switch("sequential", 'S', "sequential"), new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."), }); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return; final String filterSpec = jsapResult.getString("filter"); final Filter<WarcRecord> filter; if (filterSpec != null) { final FilterParser<WarcRecord> parser = new FilterParser<>(WarcRecord.class); filter = parser.parse(filterSpec); } else filter = null; final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in; final ParallelFilteredProcessorRunner parallelFilteredProcessorRunner = new ParallelFilteredProcessorRunner(in, filter); final String[] processor = jsapResult.getStringArray("processor"); final String[] writer = jsapResult.getStringArray("writer"); final String[] output = jsapResult.getStringArray("output"); if (processor.length != writer.length) throw new IllegalArgumentException("You must specify the same number or processors and writers"); if (output.length != writer.length) throw new IllegalArgumentException("You must specify the same number or output specifications and writers"); final String[] packages = new String[] { ParallelFilteredProcessorRunner.class.getPackage().getName() }; final PrintStream[] ops = new PrintStream[processor.length]; for (int i = 0; i < processor.length; i++) { ops[i] = "-".equals(output[i]) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output[i])), false, "UTF-8"); // TODO: these casts to SOMETHING<Object> are necessary for compilation under Eclipse. Check in the future. parallelFilteredProcessorRunner.add((Processor<Object>)ObjectParser.fromSpec(processor[i], Processor.class, packages, new String[] { "getInstance" }), (Writer<Object>)ObjectParser.fromSpec(writer[i], Writer.class, packages, new String[] { "getInstance" }), ops[i]); } if (jsapResult.userSpecified("sequential")) parallelFilteredProcessorRunner.runSequentially(); else parallelFilteredProcessorRunner.run(jsapResult.getInt("threads")); for (int i = 0; i < processor.length; i++) ops[i].close(); }
Example 13
Source File: ImmutableExternalPrefixMap.java From database with GNU General Public License v2.0 | 4 votes |
@SuppressWarnings("unchecked") public static void main( final String[] arg ) throws ClassNotFoundException, IOException, JSAPException, SecurityException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( ImmutableExternalPrefixMap.class.getName(), "Builds an external map reading from standard input a newline-separated list of terms or a serialised term list. If the dump stream name is not specified, the map will be self-contained.", new Parameter[] { new FlaggedOption( "blockSize", JSAP.INTSIZE_PARSER, ( STD_BLOCK_SIZE / 1024 ) + "Ki", JSAP.NOT_REQUIRED, 'b', "block-size", "The size of a block in the dump stream." ), new Switch( "serialised", 's', "serialised", "The data source (file or standard input) provides a serialised java.util.List of terms." ), new Switch( "zipped", 'z', "zipped", "Standard input is compressed in gzip format." ), new FlaggedOption( "termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file instead of standard input." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term list encoding." ), new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised map." ), new UnflaggedOption( "dump", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "An optional dump stream (the resulting map will not be self-contained)." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; Collection<? extends CharSequence> termList; final String termFile = jsapResult.getString( "termFile" ); final Charset encoding = (Charset)jsapResult.getObject( "encoding" ); final boolean zipped = jsapResult.getBoolean( "zipped" ); final boolean serialised = jsapResult.getBoolean( "serialised" ); if ( zipped && serialised ) throw new IllegalArgumentException( "The zipped and serialised options are incompatible" ); if ( serialised ) termList = (List<? extends CharSequence>) ( termFile != null ? BinIO.loadObject( termFile ) : BinIO.loadObject( System.in ) ); else { if ( termFile != null ) termList = new FileLinesCollection( termFile, encoding.name(), zipped ); else { final ObjectArrayList<MutableString> list = new ObjectArrayList<MutableString>(); termList = list; final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( System.in ) : System.in, encoding.name() ) ); final MutableString term = new MutableString(); while( terms.readLine( term ) != null ) list.add( term.copy() ); terms.close(); } } BinIO.storeObject( new ImmutableExternalPrefixMap( termList, jsapResult.getInt( "blockSize" ), jsapResult.getString( "dump" ) ), jsapResult.getString( "map" ) ); }