Java Code Examples for it.unimi.dsi.logging.ProgressLogger#lightUpdate()

The following examples show how to use it.unimi.dsi.logging.ProgressLogger#lightUpdate() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParallelFilteredProcessorRunner.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
@Override
public Void call() throws Exception {
	final ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");

	pl.itemsName = "records";
	pl.displayFreeMemory = true;
	pl.displayLocalSpeed = true;
	pl.start("Scanning...");

	try {
		for(;;) {
			final Result<?>[] result = queue.take();
			if (result == Result.END_OF_RESULTS) {
				pl.done();
				return null;
			}
			for (final Result<?> r : result) if (r != null) r.write();
			pl.lightUpdate();
		}
	}
	catch(final Exception e) {
		LOGGER.error("Exception in flushing thread", e);
		throw e;
	}
}
 
Example 2
Source File: GZIPIndexer.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a list of pointers to a GZIP archive entries positions (including the end of file).
 *
 * @param in the stream from which to read the GZIP archive.
 * @param pl a progress logger.
 * @return a list of longs where the <em>i</em>-th long is the offset in the stream of the first byte of the <em>i</em>-th archive entry.
 */
public static LongBigArrayBigList index(final InputStream in, final ProgressLogger pl) throws IOException {
	final LongBigArrayBigList pointers = new LongBigArrayBigList();
	long current = 0;
	final GZIPArchiveReader gzar = new GZIPArchiveReader(in);
	GZIPArchive.ReadEntry re;
	for (;;) {
		re = gzar.skipEntry();
		if (re == null) break;
		pointers.add(current);
		current += re.compressedSkipLength;
		if (pl != null) pl.lightUpdate();
	}
	in.close();
	return pointers;
}
 
Example 3
Source File: BuildRepetitionSet.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
public static void main(String[] arg) throws IOException {
	if (arg.length == 0) {
		System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
		System.exit(1);
	}

	final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	final LongOpenHashSet repeatedSet = new LongOpenHashSet();
	final String outputFilename = arg[0];
	final ProgressLogger pl = new ProgressLogger();

	MutableString lastUrl = new MutableString();
	pl.itemsName = "lines";
	pl.start("Reading... ");
	while(fastBufferedReader.readLine(s) != null) {
		final int firstTab = s.indexOf('\t');
		final int secondTab = s.indexOf('\t', firstTab + 1);
		MutableString url = s.substring(secondTab + 1);
		if (url.equals(lastUrl)) {
			final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
			final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
			repeatedSet.add((long)storeIndex << 48 | storePosition);
			System.out.print(storeIndex);
			System.out.print('\t');
			System.out.print(storePosition);
			System.out.print('\t');
			System.out.println(url);
		}

		lastUrl = url;
		pl.lightUpdate();
	}

	pl.done();

	fastBufferedReader.close();
	BinIO.storeObject(repeatedSet, outputFilename);
}
 
Example 4
Source File: RandomReadWritesTest.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("resource")
public static int[] writeRecords(final String path, final int numRecords, final WarcRecord[] randomRecords, final int parallel) throws IOException, InterruptedException {
	final ProgressLogger pl = new ProgressLogger(LOGGER, "records");
	if (parallel <= 1) pl.expectedUpdates = numRecords;
	final ProgressLogger plb = new ProgressLogger(LOGGER, "KB");
	final CountingOutputStream cos = new CountingOutputStream(new FastBufferedOutputStream(new FileOutputStream (path)));
	final WarcWriter ww;
	if (parallel == 0) {
		ww = new UncompressedWarcWriter(cos);
		pl.start("Writing records…");
	} else if (parallel == 1) {
		ww = new CompressedWarcWriter(cos);
		pl.start("Writing records (compressed)…");
	} else {
		ww = null;
		pl.start("SHOULD NOT HAPPEN");
		throw new IllegalStateException();
	}
	plb.start();
	long written = 0;
	final int[] position = new int[numRecords];
	for (int i = 0; i < numRecords; i++) {
		final int pos = RandomTestMocks.RNG.nextInt(randomRecords.length);
		position[i] = pos;
		ww.write(randomRecords[pos]);
		if (parallel <= 0) {
			pl.lightUpdate();
			plb.update((cos.getCount() - written) / 1024);
		}
		written = cos.getCount();
	}
	ww.close();
	pl.done(numRecords);
	plb.done(cos.getCount());
	return position;
}
 
Example 5
Source File: BloomFilter.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException {
	
	final SimpleJSAP jsap = new SimpleJSAP( BloomFilter.class.getName(), "Creates a Bloom filter reading from standard input a newline-separated list of terms.",
			new Parameter[] {
				new FlaggedOption( "bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read terms." ),
				new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ),
				new UnflaggedOption( "bloomFilter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list." ),
				new UnflaggedOption( "size", JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)." ),
				new UnflaggedOption( "precision", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The precision of the filter." )
	});
	
	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;
	
	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String filterName = jsapResult.getString( "bloomFilter" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );

	BloomFilter filter = new BloomFilter( jsapResult.getInt( "size" ), jsapResult.getInt( "precision" ) );
	final ProgressLogger pl = new ProgressLogger();
	pl.itemsName = "terms";
	pl.start( "Reading terms..." );
	MutableString s = new MutableString();
	FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, encoding ), bufferSize );
	while( reader.readLine( s ) != null ) { 
		filter.add( s );
		pl.lightUpdate();
	}
	pl.done();

	BinIO.storeObject( filter, filterName );
}
 
Example 6
Source File: GlobalVisitStats.java    From fasten with Apache License 2.0 4 votes vote down vote up
public static Result reaches(final KnowledgeBase kb, final long startSig, final int maxRevs, final ProgressLogger pl) {
	final LongOpenHashSet result = new LongOpenHashSet();
	final Object2ObjectOpenHashMap<String, IntOpenHashSet> product2Revs = new Object2ObjectOpenHashMap<>();
	final MutableLong totRevs = new MutableLong();

	// Visit queue
	final LongArrayFIFOQueue queue = new LongArrayFIFOQueue();
	queue.enqueue(startSig);
	result.add(startSig);

	String p = kb.callGraphs.get(index(startSig)).product;
	IntOpenHashSet revs = new IntOpenHashSet();
	revs.add(index(startSig));
	product2Revs.put(p, revs);
	totRevs.increment();


	pl.itemsName = "nodes";
	pl.info = new Object() {
		@Override
		public String toString() {
			return "[nodes: " + result.size() + " products: " + product2Revs.size() + " revisions: " + totRevs.getValue() + "]";
		}
	};

	pl.start("Visiting reachable nodes...");

	while (!queue.isEmpty()) {
		final long node = queue.dequeueLong();

		for (final long s : kb.successors(node)) if (!result.contains(s)) {
			p = kb.callGraphs.get(index(s)).product;
			final long gid = gid(s);
			if (badGIDs.contains(gid)) continue;
			final String targetNameSpace = kb.new Node(gid, index(s)).toFastenURI().getRawNamespace();
			if (targetNameSpace.startsWith("java.") || targetNameSpace.startsWith("javax.") || targetNameSpace.startsWith("jdk.")) {
				badGIDs.add(gid);
				continue;
			}
			revs = product2Revs.get(p);
			if (revs == null) product2Revs.put(p, revs = new IntOpenHashSet());
			if (revs.contains(index(s)) || revs.size() < maxRevs) {
				queue.enqueue(s);
				result.add(s);
				//System.out.println(kb.new Node(gid(node), index(node)).toFastenURI() + " -> " + kb.new Node(gid(s), index(s)).toFastenURI());
				if (revs.add(index(s))) totRevs.increment();
			}
		}
		pl.lightUpdate();
	}

	pl.done();
	return new Result(result, product2Revs.size(), totRevs.getValue().longValue());
}
 
Example 7
Source File: GlobalVisitStats.java    From fasten with Apache License 2.0 4 votes vote down vote up
public static Result coreaches(final KnowledgeBase kb, final long startSig, final int maxRevs, final ProgressLogger pl) {
	final LongOpenHashSet result = new LongOpenHashSet();
	final Object2ObjectOpenHashMap<String, IntOpenHashSet> product2Revs = new Object2ObjectOpenHashMap<>();
	final MutableLong totRevs = new MutableLong();

	// Visit queue
	final LongArrayFIFOQueue queue = new LongArrayFIFOQueue();
	queue.enqueue(startSig);
	result.add(startSig);

	String p = kb.callGraphs.get(index(startSig)).product;
	IntOpenHashSet revs = new IntOpenHashSet();
	revs.add(index(startSig));
	product2Revs.put(p, revs);
	totRevs.increment();


	pl.itemsName = "nodes";
	pl.info = new Object() {
		@Override
		public String toString() {
			return "[nodes: " + result.size() + " products: " + product2Revs.size() + " revisions: " + totRevs.getValue() + "]";
		}
	};
	pl.start("Visiting coreachable nodes...");
	while (!queue.isEmpty()) {
		final long node = queue.dequeueLong();

		for (final long s : kb.predecessors(node)) if (!result.contains(s)) {
			p = kb.callGraphs.get(index(s)).product;
			final String targetNameSpace = kb.new Node(gid(s), index(s)).toFastenURI().getRawNamespace();
			if (targetNameSpace.startsWith("java.") || targetNameSpace.startsWith("javax.") || targetNameSpace.startsWith("jdk.")) continue;
			revs = product2Revs.get(p);
			if (revs == null) product2Revs.put(p, revs = new IntOpenHashSet());
			if (revs.contains(index(s)) || revs.size() < maxRevs) {
				queue.enqueue(s);
				result.add(s);
				//System.out.println(kb.new Node(gid(node), index(node)).toFastenURI() + " -> " + kb.new Node(gid(s), index(s)).toFastenURI());
				if (revs.add(index(s))) totRevs.increment();
			}
		}
		pl.lightUpdate();
	}

	pl.done();
	return new Result(result, product2Revs.size(), totRevs.getValue().longValue());
}
 
Example 8
Source File: WarcCompressor.java    From BUbiNG with Apache License 2.0 4 votes vote down vote up
public static void main(String arg[]) throws IOException, InterruptedException, JSAPException {

	SimpleJSAP jsap = new SimpleJSAP(WarcCompressor.class.getName(),
		"Given a store uncompressed, write a compressed store.",
		new Parameter[] { new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "The output filename  (- for stdout)."),
		new UnflaggedOption("store", JSAP.STRING_PARSER, JSAP.NOT_REQUIRED, "The name of the store (if omitted, stdin)."),
	});

	JSAPResult jsapResult = jsap.parse(arg);
	if (jsap.messagePrinted()) return;

	final InputStream in = jsapResult.userSpecified("store") ? new FastBufferedInputStream(new FileInputStream(jsapResult.getString("store"))) : System.in;

	final WarcReader reader = new UncompressedWarcReader(in);
	final ProgressLogger pl = new ProgressLogger(LOGGER, 1, TimeUnit.MINUTES, "records");
	final String output = jsapResult.getString("output");

	PrintStream out = "-".equals(output) ? System.out : new PrintStream(new FastBufferedOutputStream(new FileOutputStream(output)), false, "UTF-8");
	final WarcWriter writer = new CompressedWarcWriter(out);

	pl.itemsName = "records";
	pl.displayFreeMemory = true;
	pl.displayLocalSpeed = true;
	pl.start("Scanning...");

	for (long storePosition = 0;; storePosition++) {
	    LOGGER.trace("STOREPOSITION " + storePosition);
	    WarcRecord record = null;
	    try {
		record = reader.read();
	    } catch (Exception e) {
		LOGGER.error("Exception while reading record " + storePosition + " ");
		LOGGER.error(e.getMessage());
		e.printStackTrace();
		continue;
	    }
	    if (record == null)
		break;
	    writer.write(record);
	    pl.lightUpdate();
	}
	pl.done();
	writer.close();
    }
 
Example 9
Source File: EntityEmbeddings.java    From entity-fishing with Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    ProgressLogger pl = new ProgressLogger();
    final int nwords = vectors.size();
    final int d = embeddingsSize;
    try {
        pl.count = count / nbThreads;
        pl.itemsName = "entities";
        final PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter(output+"."+rank, false)));
        /*if (rank == 0)
            pw.println(count + " " + d);*/

        float alpha = 10;
        EntityEmbeddings eb = new EntityEmbeddings();
        final BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input)) , "UTF-8"));
        pl.start();
        String line;
        int nb = 0;
        int nbWritten = 0;
        while((line = br.readLine()) != null) {
            if ((nb < (count/nbThreads)*rank)) {
                nb++; 
                continue;
            }

            if ((rank != nbThreads-1) && (nb > (count/nbThreads)*(rank+1)))
                break;

            String[] parts = line.split("\t");
            if(parts.length > 1) {
                TrainingExamples ex = eb.getVectors(parts[1], vectors, rho, nwords, maxWords);
                if (ex.y.length == 0) {
                    nb++;
                    continue;
                }
                float[] w = eb.trainLR2(ex.x, d, ex.y, alpha);
                pw.print(parts[0] + " ");
                for(int i = 0; i < d; i++) {
                    pw.print(w[i] + " ");
                }
                pw.println();
                pl.lightUpdate();
                nbWritten++;
                if (nbWritten == 1000) {
                    pw.flush();
                    nbWritten = 0;
                }

                for(int i = 0; i < ex.y.length; i++) {
                    if(ex.y[i] > 0) {
                        eb.scoreLR(ex.x[i], w);
                        //double v = eb.scoreLR(ex.x[i], w);
                    }
                }
            }
            nb++;
        }
        br.close();
        pw.close();
        pl.stop();
    } catch(Exception e) {
        e.printStackTrace();
    }
}