Java Code Examples for org.apache.commons.io.LineIterator#closeQuietly()

The following examples show how to use org.apache.commons.io.LineIterator#closeQuietly() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadLargeFileUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public final void givenUsingApacheIo_whenStreamingThroughAFile_thenCorrect() throws IOException {
    final String path = "G:\\full\\train\\input\\" + "trainDataNegative.csv";
    // final String path = "G:\\full\\train\\input\\" + "trainDataPositive.csv";

    logMemory();

    final LineIterator it = FileUtils.lineIterator(new File(path), "UTF-8");
    try {
        while (it.hasNext()) {
            final String line = it.nextLine();
            // do something with line
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    logMemory();
}
 
Example 2
Source File: ClassCountParser.java    From sonar-ruby-plugin with MIT License 6 votes vote down vote up
public static int countClasses(File file) {
    int numClasses = 0;
    LineIterator iterator = null;
    try {
        iterator = FileUtils.lineIterator(file);

        while (iterator.hasNext()) {
            String line = iterator.nextLine();
            if (StringUtils.contains(line.trim(), "class ")) {
                numClasses++;
            }
        }
    } catch (IOException e) {
        LOG.error("Error determining class count for file " + file, e);
    } finally {
        LineIterator.closeQuietly(iterator);
    }

    return numClasses;
}
 
Example 3
Source File: Document.java    From tassal with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @return the original document text
 */
public String getOriginal() {

	final StringBuffer doc = new StringBuffer();
	LineIterator iterator = null;
	try {
		iterator = FileUtils.lineIterator(docLoc);
	} catch (final IOException e) {
		e.printStackTrace();
	}

	while (iterator.hasNext())
		doc.append(iterator.nextLine().trim() + "\n");
	LineIterator.closeQuietly(iterator);

	return doc.toString();
}
 
Example 4
Source File: SequenceScaling.java    From sequence-mining with GNU General Public License v3.0 6 votes vote down vote up
/** Print useful statistics for the transaction database */
public static void printTransactionDBStats(final File dbFile) throws IOException {

	int noTransactions = 0;
	double sparsity = 0;
	final Set<Integer> singletons = new HashSet<>();
	final LineIterator it = FileUtils.lineIterator(dbFile, "UTF-8");
	while (it.hasNext()) {
		final String[] items = it.nextLine().replace("-2", "").split(" -1 ");
		for (final String item : items)
			singletons.add(Integer.parseInt(item));
		sparsity += items.length;
		noTransactions++;
	}
	LineIterator.closeQuietly(it);

	System.out.println("\nDatabase: " + dbFile);
	System.out.println("Items: " + singletons.size());
	System.out.println("Transactions: " + noTransactions);
	System.out.println("Avg. items per transaction: " + sparsity / noTransactions + "\n");

}
 
Example 5
Source File: WikipediaDomainMap.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * Import the GRISP general domains
 */
private void importDomains() throws IOException {
    domain2id = new HashMap<String, Integer>();
    id2domain = new HashMap<Integer, String>();

    LineIterator domainIterator = FileUtils.lineIterator(new File(grispDomains));
    int n = 0;
    while (domainIterator.hasNext()) {
        String line = domainIterator.next();
        final String domain = line.replace('\t', ' ').trim();
        domain2id.put(domain, new Integer(n));
        id2domain.put(new Integer(n), domain);
        n++;
    }
    LineIterator.closeQuietly(domainIterator);
}
 
Example 6
Source File: Util.java    From MABED with GNU General Public License v3.0 6 votes vote down vote up
static public LinkedList<String> readStopWords(String pathToStopwordsFile){
    LinkedList<String> stopWords = new LinkedList<>();
    if(pathToStopwordsFile != null){
        LineIterator it = null;
        try {
            it = FileUtils.lineIterator(new File(pathToStopwordsFile), "UTF-8");
            while (it.hasNext()) {
                stopWords.add(it.nextLine());
            }
        } catch (IOException ex) {
            Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            LineIterator.closeQuietly(it);
        }
    }
    return stopWords;
}
 
Example 7
Source File: CountLinesTextFile.java    From levelup-java-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void count_lines_text_apache() throws IOException {

	LineIterator lineIterator = FileUtils.lineIterator(
			Paths.get(fileLocation).toFile(), Charset.defaultCharset()
					.toString());

	long linesInTextFile = 0;
	try {
		while (lineIterator.hasNext()) {
			linesInTextFile++;
			lineIterator.nextLine();
		}
	} finally {
		LineIterator.closeQuietly(lineIterator);
	}

	assertEquals(10, linesInTextFile);
}
 
Example 8
Source File: EnhanceMojo.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Read the missing meta data report from a previous run.
 */
private void readMissingMetaDataReport(File aReportFile, Multimap<String, String> aReportData)
        throws MojoExecutionException {
  if (!aReportFile.exists()) {
    // Ignore if the file is missing
    return;
  }

  LineIterator i = null;
  try {
    String clazz = null;
    i = IOUtils.lineIterator(new FileInputStream(aReportFile), encoding);
    while (i.hasNext()) {
      String line = i.next();
      // Report say there is no missing meta data
      if (line.startsWith(MARK_NO_MISSING_META_DATA)) {
        return;
      }
      // Line containing class name
      if (line.startsWith(MARK_CLASS)) {
        clazz = line.substring(MARK_CLASS.length()).trim();
      } else if (StringUtils.isBlank(line)) {
        // Empty line, ignore
      } else {
        // Line containing a missing meta data instance
        if (clazz == null) {
          throw new MojoExecutionException("Missing meta data report has invalid format.");
        }
        aReportData.put(clazz, line.trim());
      }
    }
  } catch (IOException e) {
    throw new MojoExecutionException("Unable to read missing meta data report: "
            + ExceptionUtils.getRootCauseMessage(e), e);
  } finally {
    LineIterator.closeQuietly(i);
  }
}
 
Example 9
Source File: PreScrubberStep.java    From kfs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
    return new CustomBatchExecutor() {
        public boolean execute() {
            StopWatch stopWatch = new StopWatch();
            stopWatch.start();

            String inputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            String outputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            
            PreScrubberReportData preScrubberReportData = null;
            LineIterator oeIterator = null;
            try {
                oeIterator = FileUtils.lineIterator(new File(inputFile));
                preScrubberReportData = preScrubberService.preprocessOriginEntries(oeIterator, outputFile);
            }
            catch (IOException e) {
                LOG.error("IO exception occurred during pre scrubbing.", e);
                throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
            }
            finally {
                LineIterator.closeQuietly(oeIterator);
            }

            if (preScrubberReportData != null) {
                new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
            }
            
            stopWatch.stop();
            if (LOG.isDebugEnabled()) {
                LOG.debug("scrubber step of took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
            }
            return true;
        }
    };
}
 
Example 10
Source File: PAMCore.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * This method scans the input database to calculate the support of single
 * items.
 *
 * @param inputFile
 *            the input file
 * @return a multiset for storing the support of each singleton
 */
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
		throws IOException {

	final Multiset<Sequence> singletons = HashMultiset.create();

	// for each line (transaction) until the end of file
	final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// split the line into items
		final String[] lineSplit = line.split(" ");
		// for each item
		final HashSet<Sequence> seenItems = new HashSet<>();
		for (final String itemString : lineSplit) {
			final int item = Integer.parseInt(itemString);
			if (item >= 0) { // ignore end of itemset/sequence tags
				final Sequence seq = new Sequence(item);
				PAMCore.recursiveSetOccurrence(seq, seenItems); // set
																// occurrence
				seenItems.add(seq); // add item to seen
			}
		}
		singletons.addAll(seenItems); // increase the support of the items
	}

	// close the input file
	LineIterator.closeQuietly(it);

	return singletons;
}
 
Example 11
Source File: HiCQTLAnnotatorBlockbased.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static void processNormalizedIntraContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {

        //ReadIn normalization chr1
        TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
        inputNormChr1.close();

//        System.out.println("Done reading norm factor 1");
        if (!Gpio.exists(fileToRead + ".sorted")) {
            umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {

                            String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
                            String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

                            double factor1;
                            double factor2;

                            if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) {
                                factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base);
                                factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base);

                                double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
                                if (contact >= minValue) {
                                    outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                                    numberToBeMatched++;
                                } else {
                                    outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                                    numberToBeMatched++;
                                }
                            } else {
                                System.out.println("Error in files.");
                                numberToBeMatched++;
                            }
                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }
 
Example 12
Source File: DBpediaEntityCheckIndexTool.java    From gerbil with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void index(Indexer indexer, String file) {
	UriEncodingHandlingSameAsRetriever retriever = new UriEncodingHandlingSameAsRetriever();
	LineIterator iterator = null;
	long size = 0, rounds = 0;
	try {
		iterator = FileUtils.lineIterator(new File(file), "UTF-8");
		String uri = null;
		Set<String> uris;
		String old = null;
		Date start = Calendar.getInstance().getTime();
		// iterate over the lines
		while (iterator.hasNext()) {
			String[] split = iterator.next().split("\\s+");
			if (split.length > 2) {
				// get the subject of the triple
				uri = split[0];
				if (uri.startsWith("<")) {
					uri = uri.substring(1);
				}
				if (uri.endsWith(">")) {
					uri = uri.substring(0, uri.length() - 1);
				}

				// if this subject is new
				if (!uri.equals(old)) {
					// retrieve other writings of this URI
					uris = retriever.retrieveSameURIs(uri);
					if (uris != null) {
						for (String u : uris) {
							indexer.index(u);
						}
					} else {
						indexer.index(uri);
					}
				}
				size++;
				if (size % 100000 == 0) {
					Date end = Calendar.getInstance().getTime();
					rounds++;
					String avgTime = DurationFormatUtils
							.formatDurationHMS((end.getTime() - start.getTime()) / rounds);
					LOGGER.info("Got 100000 entities...(Sum: {}, AvgTime: {})", size, avgTime);
				}
			}
		}
	} catch (IOException e) {
		LOGGER.error("Exception while reading file. It will be ignored.", e);
	} finally {
		LineIterator.closeQuietly(iterator);
	}
	LOGGER.info("Successfully indexed {} triples", size);
}
 
Example 13
Source File: HiCQTLAnnotatorBlockbased.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static void processRawContactInformation(String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException {

        //Check if sorted version is available
        //If not make sorted available.
        if (!Gpio.exists(fileToRead + ".sorted")) {
            if (intra) {
                umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
            } else {
                umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
            }

        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]);
                            if (contact >= minValue) {
                                contactsToCheck.get(numberToBeMatched).setContact();
                                numberToBeMatched++;
                            } else {
                                numberToBeMatched++;
                            }
                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }
 
Example 14
Source File: TransactionGenerator.java    From sequence-mining with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
		final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
				throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Add to distribution class for easy sampling
	final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
	for (final Sequence seq : sequences.keySet()) {
		final List<Integer> singletons = new ArrayList<>();
		final List<Double> probs = new ArrayList<>();
		for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
			singletons.add(entry.getKey());
			probs.add(entry.getValue());
		}
		final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
				Ints.toArray(singletons), Doubles.toArray(probs));
		dists.put(seq, dist);
	}

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}
 
Example 15
Source File: DiscoverHardcodedIPAddressRuleProvider.java    From windup with Eclipse Public License 1.0 4 votes vote down vote up
private boolean ignoreLine(GraphContext context, FileLocationModel model)
{
    boolean isPropertiesFile = model.getFile() instanceof PropertiesModel;

    int lineNumber = model.getLineNumber();
    LineIterator li = null;
    try
    {
        li = FileUtils.lineIterator(model.getFile().asFile());

        int i = 0;
        while (li.hasNext())
        {
            i++;

            // read the line to memory only if it is the line of interest
            if (i == lineNumber)
            {
                String line = StringUtils.trim(li.next());
                // check that it isn't commented.
                if (isPropertiesFile && StringUtils.startsWith(line, "#"))
                    return true;
                // WINDUP-808 - Remove matches with "version" or "revision" on the same line
                else if (StringUtils.containsIgnoreCase(line, "version") || StringUtils.containsIgnoreCase(line, "revision"))
                    return true;
                else if (isMavenVersionTag(context, model))
                    return true;
                else
                    return false;
            }
            else if (i < lineNumber)
            {
                // seek
                li.next();
            }
            else if (i > lineNumber)
            {
                LOG.warning("Did not find line: " + lineNumber + " in file: " + model.getFile().getFileName());
                break;
            }
        }
    }
    catch (IOException | RuntimeException e)
    {
        LOG.log(Level.WARNING, "Exception reading properties from file: " + model.getFile().getFilePath(), e);
    }
    finally
    {
        LineIterator.closeQuietly(li);
    }

    return false;
}
 
Example 16
Source File: LaborPreScrubberStep.java    From kfs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * @see org.kuali.kfs.sys.batch.AbstractWrappedBatchStep#getCustomBatchExecutor()
 */
@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
    return new CustomBatchExecutor() {

        /**
         * @see org.kuali.kfs.sys.batch.service.WrappedBatchExecutorService.CustomBatchExecutor#execute()
         */
        public boolean execute() {
            StopWatch stopWatch = new StopWatch();
            stopWatch.start();

            String inputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            String outputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            
            PreScrubberReportData preScrubberReportData = null;
            LineIterator oeIterator = null;
            try {
                oeIterator = FileUtils.lineIterator(new File(inputFile));
                preScrubberReportData = laborPreScrubberService.preprocessOriginEntries(oeIterator, outputFile);
            }
            catch (IOException e) {
                LOG.error("IO exception occurred during pre scrubbing.", e);
                throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
            }
            finally {
                LineIterator.closeQuietly(oeIterator);
            }
            if (preScrubberReportData != null) {
                ((WrappingBatchService) laborPreScrubberReportWriterService).initialize();
                new PreScrubberReport().generateReport(preScrubberReportData, laborPreScrubberReportWriterService);
                ((WrappingBatchService) laborPreScrubberReportWriterService).destroy();
            }
            
            stopWatch.stop();
            if (LOG.isDebugEnabled()) {
                LOG.debug("labor pre-scrubber scrubber step took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
            }
            return true;
        }
        
    };
}
 
Example 17
Source File: TransactionGenerator.java    From api-mining with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(
		final Map<Sequence, Double> sequences, final int noTransactions,
		final File outFile) throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random,
				sequences, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}
 
Example 18
Source File: WikipediaDomainMap.java    From entity-fishing with Apache License 2.0 4 votes vote down vote up
private Map<Integer, List<Integer>> readMapping(String mappingFilePath) throws IOException {
    LineIterator iterator = FileUtils.lineIterator(new File(mappingFilePath));
    Map<Integer, List<Integer>> domains = new HashMap<Integer, List<Integer>>();

    while (iterator.hasNext()) {
        String line = iterator.nextLine();
        if (isBlank(line)) {
            continue;
        }
        StringTokenizer st = new StringTokenizer(line, "\t");
        String category = null;
        int categoryId = -1;
        if (st.hasMoreTokens()) {
            category = st.nextToken();
            com.scienceminer.nerd.kb.model.Category theCategory = wikipedia.getCategoryByTitle(category);
            if (theCategory == null)
                LOGGER.warn(category + " is not a category found in Wikipedia.");
            else {
                categoryId = theCategory.getId();
                if (domains.get(new Integer(categoryId)) != null) {
                    LOGGER.warn(category + " is already defined in " + mappingFilePath);
                }
            }
        }
        if (categoryId != -1) {
            List<Integer> dom = new ArrayList<Integer>();
            while (st.hasMoreTokens()) {
                String domain = st.nextToken();
                if (domain2id.get(domain) == null)
                    LOGGER.warn(domain + " is an invalid GRISP domain label in " + mappingFilePath);
                else {
                    Integer domainId = domain2id.get(domain);
                    dom.add(domainId);
                }
            }
            domains.put(new Integer(categoryId), dom);
        }
    }
    LineIterator.closeQuietly(iterator);

    return domains;
}
 
Example 19
Source File: HiCQTLAnnotatorSnpBased.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static void processNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {

        //ReadIn normalization chr1
        TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
        inputNormChr1.close();

//        System.out.println("Done reading norm factor 1");
        //ReadIn normalization chr2
        TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorLargerChr = inputNormChr2.readAsArrayList();
        inputNormChr2.close();

//        System.out.println("Done reading norm factor 2");
        if (!Gpio.exists(fileToRead + ".sorted")) {
            umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            if (((posChr1 / getNumericResolution(resolution)) + 1) > normFactorSmallerChr.size()) {
                                System.out.println(baseName);
                                System.out.println("Smaller");
                                System.out.println((posChr1 / getNumericResolution(resolution) + 1));
                                System.out.println(normFactorSmallerChr.size());
                                System.exit(-1);
                            }
                            if (((posChr2 / getNumericResolution(resolution)) + 1) > normFactorLargerChr.size()) {
                                System.out.println(baseName);
                                System.out.println("Larger");
                                System.out.println((posChr2 / getNumericResolution(resolution)) + 1);
                                System.out.println(normFactorLargerChr.size());
                                System.exit(-1);
                            }
                            String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
                            String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

                            double factor1 = 1.0;
                            double factor2 = 1.0;

                            if (NumberUtils.isNumber(factor1Base) && NumberUtils.isNumber(factor2Base)) {
                                factor1 = Double.parseDouble(factor1Base);
                                factor2 = Double.parseDouble(factor2Base);
                            } else if (NumberUtils.isNumber(factor1Base)) {
                                factor1 = Double.parseDouble(factor1Base);
                                System.out.println("Error in files.");
                                System.out.println("Base 2 is reset to 1");
                            } else if (NumberUtils.isNumber(factor2Base)) {
                                factor2 = Double.parseDouble(factor2Base);
                                System.out.println("Error in files.");
                                System.out.println("Base 1 is reset to 1");
                            }

                            double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
                            if (contact >= minValue) {
                                outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                                numberToBeMatched++;
                            } else {
                                outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                                numberToBeMatched++;
                            }

                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }
 
Example 20
Source File: ScrubberProcessImpl.java    From kfs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Scrub this single group read only. This will only output the scrubber report. It won't output any other groups.
 *
 * @param group the origin entry group that should be scrubbed
 * @param the document number of any specific entries to scrub
 */
@Override
public void scrubGroupReportOnly(String fileName, String documentNumber) {
    LOG.debug("scrubGroupReportOnly() started");
    String unsortedFile = fileName;
    this.inputFile = fileName + ".sort";
    this.validFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_VALID_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.errorFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_ERROR_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.expiredFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_EXPIRED_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    String prescrubOutput = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.ledgerSummaryReport = new LedgerSummaryReport();
    runDate = calculateRunDate(dateTimeService.getCurrentDate());

    PreScrubberReportData preScrubberReportData = null;

    // run pre-scrubber on the raw input into the sort process
    LineIterator inputEntries = null;
    try {
        inputEntries = FileUtils.lineIterator(new File(unsortedFile));
        preScrubberReportData = preScrubberService.preprocessOriginEntries(inputEntries, prescrubOutput);
    }
    catch (IOException e1) {
        LOG.error("Error encountered trying to prescrub GLCP/LLCP document", e1);
        throw new RuntimeException("Error encountered trying to prescrub GLCP/LLCP document", e1);
    }
    finally {
        LineIterator.closeQuietly(inputEntries);
    }
    if (preScrubberReportData != null) {
        preScrubberReportWriterService.setDocumentNumber(documentNumber);
        ((WrappingBatchService)preScrubberReportWriterService).initialize();
        try {
            new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
        }
        finally {
            ((WrappingBatchService)preScrubberReportWriterService).destroy();
        }
    }
    BatchSortUtil.sortTextFileWithFields(prescrubOutput, inputFile, new ScrubberSortComparator());

    scrubEntries(true, documentNumber);

    // delete files
    File deleteSortFile = new File(inputFile);
    File deleteValidFile = new File(validFile);
    File deleteErrorFile = new File(errorFile);
    File deleteExpiredFile = new File(expiredFile);
    try {
        deleteSortFile.delete();
        deleteValidFile.delete();
        deleteErrorFile.delete();
        deleteExpiredFile.delete();
    } catch (Exception e){
        LOG.error("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage());
        throw new RuntimeException("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage(), e);
    }
}