org.apache.commons.io.LineIterator#closeQuietly

Source File: ReadLargeFileUnitTest.java From tutorials with MIT License

6 votes

@Test
public final void givenUsingApacheIo_whenStreamingThroughAFile_thenCorrect() throws IOException {
    final String path = "G:\\full\\train\\input\\" + "trainDataNegative.csv";
    // final String path = "G:\\full\\train\\input\\" + "trainDataPositive.csv";

    logMemory();

    final LineIterator it = FileUtils.lineIterator(new File(path), "UTF-8");
    try {
        while (it.hasNext()) {
            final String line = it.nextLine();
            // do something with line
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    logMemory();
}

Source File: ClassCountParser.java From sonar-ruby-plugin with MIT License

6 votes

public static int countClasses(File file) {
    int numClasses = 0;
    LineIterator iterator = null;
    try {
        iterator = FileUtils.lineIterator(file);

        while (iterator.hasNext()) {
            String line = iterator.nextLine();
            if (StringUtils.contains(line.trim(), "class ")) {
                numClasses++;
            }
        }
    } catch (IOException e) {
        LOG.error("Error determining class count for file " + file, e);
    } finally {
        LineIterator.closeQuietly(iterator);
    }

    return numClasses;
}

Source File: Document.java From tassal with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * @return the original document text
 */
public String getOriginal() {

	final StringBuffer doc = new StringBuffer();
	LineIterator iterator = null;
	try {
		iterator = FileUtils.lineIterator(docLoc);
	} catch (final IOException e) {
		e.printStackTrace();
	}

	while (iterator.hasNext())
		doc.append(iterator.nextLine().trim() + "\n");
	LineIterator.closeQuietly(iterator);

	return doc.toString();
}

Source File: SequenceScaling.java From sequence-mining with GNU General Public License v3.0

6 votes

/** Print useful statistics for the transaction database */
public static void printTransactionDBStats(final File dbFile) throws IOException {

	int noTransactions = 0;
	double sparsity = 0;
	final Set<Integer> singletons = new HashSet<>();
	final LineIterator it = FileUtils.lineIterator(dbFile, "UTF-8");
	while (it.hasNext()) {
		final String[] items = it.nextLine().replace("-2", "").split(" -1 ");
		for (final String item : items)
			singletons.add(Integer.parseInt(item));
		sparsity += items.length;
		noTransactions++;
	}
	LineIterator.closeQuietly(it);

	System.out.println("\nDatabase: " + dbFile);
	System.out.println("Items: " + singletons.size());
	System.out.println("Transactions: " + noTransactions);
	System.out.println("Avg. items per transaction: " + sparsity / noTransactions + "\n");

}

Source File: WikipediaDomainMap.java From entity-fishing with Apache License 2.0

6 votes

/**
 * Import the GRISP general domains
 */
private void importDomains() throws IOException {
    domain2id = new HashMap<String, Integer>();
    id2domain = new HashMap<Integer, String>();

    LineIterator domainIterator = FileUtils.lineIterator(new File(grispDomains));
    int n = 0;
    while (domainIterator.hasNext()) {
        String line = domainIterator.next();
        final String domain = line.replace('\t', ' ').trim();
        domain2id.put(domain, new Integer(n));
        id2domain.put(new Integer(n), domain);
        n++;
    }
    LineIterator.closeQuietly(domainIterator);
}

Source File: Util.java From MABED with GNU General Public License v3.0

6 votes

static public LinkedList<String> readStopWords(String pathToStopwordsFile){
    LinkedList<String> stopWords = new LinkedList<>();
    if(pathToStopwordsFile != null){
        LineIterator it = null;
        try {
            it = FileUtils.lineIterator(new File(pathToStopwordsFile), "UTF-8");
            while (it.hasNext()) {
                stopWords.add(it.nextLine());
            }
        } catch (IOException ex) {
            Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            LineIterator.closeQuietly(it);
        }
    }
    return stopWords;
}

Source File: CountLinesTextFile.java From levelup-java-examples with Apache License 2.0

6 votes

@Test
public void count_lines_text_apache() throws IOException {

	LineIterator lineIterator = FileUtils.lineIterator(
			Paths.get(fileLocation).toFile(), Charset.defaultCharset()
					.toString());

	long linesInTextFile = 0;
	try {
		while (lineIterator.hasNext()) {
			linesInTextFile++;
			lineIterator.nextLine();
		}
	} finally {
		LineIterator.closeQuietly(lineIterator);
	}

	assertEquals(10, linesInTextFile);
}

Source File: EnhanceMojo.java From uima-uimafit with Apache License 2.0

5 votes

/**
 * Read the missing meta data report from a previous run.
 */
private void readMissingMetaDataReport(File aReportFile, Multimap<String, String> aReportData)
        throws MojoExecutionException {
  if (!aReportFile.exists()) {
    // Ignore if the file is missing
    return;
  }

  LineIterator i = null;
  try {
    String clazz = null;
    i = IOUtils.lineIterator(new FileInputStream(aReportFile), encoding);
    while (i.hasNext()) {
      String line = i.next();
      // Report say there is no missing meta data
      if (line.startsWith(MARK_NO_MISSING_META_DATA)) {
        return;
      }
      // Line containing class name
      if (line.startsWith(MARK_CLASS)) {
        clazz = line.substring(MARK_CLASS.length()).trim();
      } else if (StringUtils.isBlank(line)) {
        // Empty line, ignore
      } else {
        // Line containing a missing meta data instance
        if (clazz == null) {
          throw new MojoExecutionException("Missing meta data report has invalid format.");
        }
        aReportData.put(clazz, line.trim());
      }
    }
  } catch (IOException e) {
    throw new MojoExecutionException("Unable to read missing meta data report: "
            + ExceptionUtils.getRootCauseMessage(e), e);
  } finally {
    LineIterator.closeQuietly(i);
  }
}

Source File: PreScrubberStep.java From kfs with GNU Affero General Public License v3.0

5 votes

@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
    return new CustomBatchExecutor() {
        public boolean execute() {
            StopWatch stopWatch = new StopWatch();
            stopWatch.start();

            String inputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            String outputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            
            PreScrubberReportData preScrubberReportData = null;
            LineIterator oeIterator = null;
            try {
                oeIterator = FileUtils.lineIterator(new File(inputFile));
                preScrubberReportData = preScrubberService.preprocessOriginEntries(oeIterator, outputFile);
            }
            catch (IOException e) {
                LOG.error("IO exception occurred during pre scrubbing.", e);
                throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
            }
            finally {
                LineIterator.closeQuietly(oeIterator);
            }

            if (preScrubberReportData != null) {
                new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
            }
            
            stopWatch.stop();
            if (LOG.isDebugEnabled()) {
                LOG.debug("scrubber step of took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
            }
            return true;
        }
    };
}

Source File: PAMCore.java From api-mining with GNU General Public License v3.0

5 votes

/**
 * This method scans the input database to calculate the support of single
 * items.
 *
 * @param inputFile
 *            the input file
 * @return a multiset for storing the support of each singleton
 */
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
		throws IOException {

	final Multiset<Sequence> singletons = HashMultiset.create();

	// for each line (transaction) until the end of file
	final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// split the line into items
		final String[] lineSplit = line.split(" ");
		// for each item
		final HashSet<Sequence> seenItems = new HashSet<>();
		for (final String itemString : lineSplit) {
			final int item = Integer.parseInt(itemString);
			if (item >= 0) { // ignore end of itemset/sequence tags
				final Sequence seq = new Sequence(item);
				PAMCore.recursiveSetOccurrence(seq, seenItems); // set
																// occurrence
				seenItems.add(seq); // add item to seen
			}
		}
		singletons.addAll(seenItems); // increase the support of the items
	}

	// close the input file
	LineIterator.closeQuietly(it);

	return singletons;
}

Source File: HiCQTLAnnotatorBlockbased.java From systemsgenetics with GNU General Public License v3.0

4 votes

private static void processNormalizedIntraContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {

        //ReadIn normalization chr1
        TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
        inputNormChr1.close();

//        System.out.println("Done reading norm factor 1");
        if (!Gpio.exists(fileToRead + ".sorted")) {
            umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {

                            String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
                            String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

                            double factor1;
                            double factor2;

                            if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) {
                                factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base);
                                factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base);

                                double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
                                if (contact >= minValue) {
                                    outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                                    numberToBeMatched++;
                                } else {
                                    outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                                    numberToBeMatched++;
                                }
                            } else {
                                System.out.println("Error in files.");
                                numberToBeMatched++;
                            }
                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }

Source File: DBpediaEntityCheckIndexTool.java From gerbil with GNU Affero General Public License v3.0

4 votes

public static void index(Indexer indexer, String file) {
	UriEncodingHandlingSameAsRetriever retriever = new UriEncodingHandlingSameAsRetriever();
	LineIterator iterator = null;
	long size = 0, rounds = 0;
	try {
		iterator = FileUtils.lineIterator(new File(file), "UTF-8");
		String uri = null;
		Set<String> uris;
		String old = null;
		Date start = Calendar.getInstance().getTime();
		// iterate over the lines
		while (iterator.hasNext()) {
			String[] split = iterator.next().split("\\s+");
			if (split.length > 2) {
				// get the subject of the triple
				uri = split[0];
				if (uri.startsWith("<")) {
					uri = uri.substring(1);
				}
				if (uri.endsWith(">")) {
					uri = uri.substring(0, uri.length() - 1);
				}

				// if this subject is new
				if (!uri.equals(old)) {
					// retrieve other writings of this URI
					uris = retriever.retrieveSameURIs(uri);
					if (uris != null) {
						for (String u : uris) {
							indexer.index(u);
						}
					} else {
						indexer.index(uri);
					}
				}
				size++;
				if (size % 100000 == 0) {
					Date end = Calendar.getInstance().getTime();
					rounds++;
					String avgTime = DurationFormatUtils
							.formatDurationHMS((end.getTime() - start.getTime()) / rounds);
					LOGGER.info("Got 100000 entities...(Sum: {}, AvgTime: {})", size, avgTime);
				}
			}
		}
	} catch (IOException e) {
		LOGGER.error("Exception while reading file. It will be ignored.", e);
	} finally {
		LineIterator.closeQuietly(iterator);
	}
	LOGGER.info("Successfully indexed {} triples", size);
}

Source File: HiCQTLAnnotatorBlockbased.java From systemsgenetics with GNU General Public License v3.0

4 votes

private static void processRawContactInformation(String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException {

        //Check if sorted version is available
        //If not make sorted available.
        if (!Gpio.exists(fileToRead + ".sorted")) {
            if (intra) {
                umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
            } else {
                umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
            }

        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]);
                            if (contact >= minValue) {
                                contactsToCheck.get(numberToBeMatched).setContact();
                                numberToBeMatched++;
                            } else {
                                numberToBeMatched++;
                            }
                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }

Source File: TransactionGenerator.java From sequence-mining with GNU General Public License v3.0

4 votes

/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
		final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
				throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Add to distribution class for easy sampling
	final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
	for (final Sequence seq : sequences.keySet()) {
		final List<Integer> singletons = new ArrayList<>();
		final List<Double> probs = new ArrayList<>();
		for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
			singletons.add(entry.getKey());
			probs.add(entry.getValue());
		}
		final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
				Ints.toArray(singletons), Doubles.toArray(probs));
		dists.put(seq, dist);
	}

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}

Source File: DiscoverHardcodedIPAddressRuleProvider.java From windup with Eclipse Public License 1.0

4 votes

private boolean ignoreLine(GraphContext context, FileLocationModel model)
{
    boolean isPropertiesFile = model.getFile() instanceof PropertiesModel;

    int lineNumber = model.getLineNumber();
    LineIterator li = null;
    try
    {
        li = FileUtils.lineIterator(model.getFile().asFile());

        int i = 0;
        while (li.hasNext())
        {
            i++;

            // read the line to memory only if it is the line of interest
            if (i == lineNumber)
            {
                String line = StringUtils.trim(li.next());
                // check that it isn't commented.
                if (isPropertiesFile && StringUtils.startsWith(line, "#"))
                    return true;
                // WINDUP-808 - Remove matches with "version" or "revision" on the same line
                else if (StringUtils.containsIgnoreCase(line, "version") || StringUtils.containsIgnoreCase(line, "revision"))
                    return true;
                else if (isMavenVersionTag(context, model))
                    return true;
                else
                    return false;
            }
            else if (i < lineNumber)
            {
                // seek
                li.next();
            }
            else if (i > lineNumber)
            {
                LOG.warning("Did not find line: " + lineNumber + " in file: " + model.getFile().getFileName());
                break;
            }
        }
    }
    catch (IOException | RuntimeException e)
    {
        LOG.log(Level.WARNING, "Exception reading properties from file: " + model.getFile().getFilePath(), e);
    }
    finally
    {
        LineIterator.closeQuietly(li);
    }

    return false;
}

Source File: LaborPreScrubberStep.java From kfs with GNU Affero General Public License v3.0

4 votes

/**
 * @see org.kuali.kfs.sys.batch.AbstractWrappedBatchStep#getCustomBatchExecutor()
 */
@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
    return new CustomBatchExecutor() {

        /**
         * @see org.kuali.kfs.sys.batch.service.WrappedBatchExecutorService.CustomBatchExecutor#execute()
         */
        public boolean execute() {
            StopWatch stopWatch = new StopWatch();
            stopWatch.start();

            String inputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            String outputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
            
            PreScrubberReportData preScrubberReportData = null;
            LineIterator oeIterator = null;
            try {
                oeIterator = FileUtils.lineIterator(new File(inputFile));
                preScrubberReportData = laborPreScrubberService.preprocessOriginEntries(oeIterator, outputFile);
            }
            catch (IOException e) {
                LOG.error("IO exception occurred during pre scrubbing.", e);
                throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
            }
            finally {
                LineIterator.closeQuietly(oeIterator);
            }
            if (preScrubberReportData != null) {
                ((WrappingBatchService) laborPreScrubberReportWriterService).initialize();
                new PreScrubberReport().generateReport(preScrubberReportData, laborPreScrubberReportWriterService);
                ((WrappingBatchService) laborPreScrubberReportWriterService).destroy();
            }
            
            stopWatch.stop();
            if (LOG.isDebugEnabled()) {
                LOG.debug("labor pre-scrubber scrubber step took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
            }
            return true;
        }
        
    };
}

Source File: TransactionGenerator.java From api-mining with GNU General Public License v3.0

4 votes

/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(
		final Map<Sequence, Double> sequences, final int noTransactions,
		final File outFile) throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random,
				sequences, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}

Source File: WikipediaDomainMap.java From entity-fishing with Apache License 2.0

4 votes

private Map<Integer, List<Integer>> readMapping(String mappingFilePath) throws IOException {
    LineIterator iterator = FileUtils.lineIterator(new File(mappingFilePath));
    Map<Integer, List<Integer>> domains = new HashMap<Integer, List<Integer>>();

    while (iterator.hasNext()) {
        String line = iterator.nextLine();
        if (isBlank(line)) {
            continue;
        }
        StringTokenizer st = new StringTokenizer(line, "\t");
        String category = null;
        int categoryId = -1;
        if (st.hasMoreTokens()) {
            category = st.nextToken();
            com.scienceminer.nerd.kb.model.Category theCategory = wikipedia.getCategoryByTitle(category);
            if (theCategory == null)
                LOGGER.warn(category + " is not a category found in Wikipedia.");
            else {
                categoryId = theCategory.getId();
                if (domains.get(new Integer(categoryId)) != null) {
                    LOGGER.warn(category + " is already defined in " + mappingFilePath);
                }
            }
        }
        if (categoryId != -1) {
            List<Integer> dom = new ArrayList<Integer>();
            while (st.hasMoreTokens()) {
                String domain = st.nextToken();
                if (domain2id.get(domain) == null)
                    LOGGER.warn(domain + " is an invalid GRISP domain label in " + mappingFilePath);
                else {
                    Integer domainId = domain2id.get(domain);
                    dom.add(domainId);
                }
            }
            domains.put(new Integer(categoryId), dom);
        }
    }
    LineIterator.closeQuietly(iterator);

    return domains;
}

Source File: HiCQTLAnnotatorSnpBased.java From systemsgenetics with GNU General Public License v3.0

4 votes

private static void processNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {

        //ReadIn normalization chr1
        TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
        inputNormChr1.close();

//        System.out.println("Done reading norm factor 1");
        //ReadIn normalization chr2
        TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R);
        ArrayList<String> normFactorLargerChr = inputNormChr2.readAsArrayList();
        inputNormChr2.close();

//        System.out.println("Done reading norm factor 2");
        if (!Gpio.exists(fileToRead + ".sorted")) {
            umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
        }

        int numberToBeMatched = 0;

        LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

        try {
            while (it.hasNext()) {
                String[] parts = StringUtils.split(it.nextLine(), '\t');

                int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
                int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

                while (numberToBeMatched < contactsToCheck.size()) {
                    if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        break;
                    } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            break;
                        }
                        if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            if (((posChr1 / getNumericResolution(resolution)) + 1) > normFactorSmallerChr.size()) {
                                System.out.println(baseName);
                                System.out.println("Smaller");
                                System.out.println((posChr1 / getNumericResolution(resolution) + 1));
                                System.out.println(normFactorSmallerChr.size());
                                System.exit(-1);
                            }
                            if (((posChr2 / getNumericResolution(resolution)) + 1) > normFactorLargerChr.size()) {
                                System.out.println(baseName);
                                System.out.println("Larger");
                                System.out.println((posChr2 / getNumericResolution(resolution)) + 1);
                                System.out.println(normFactorLargerChr.size());
                                System.exit(-1);
                            }
                            String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
                            String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

                            double factor1 = 1.0;
                            double factor2 = 1.0;

                            if (NumberUtils.isNumber(factor1Base) && NumberUtils.isNumber(factor2Base)) {
                                factor1 = Double.parseDouble(factor1Base);
                                factor2 = Double.parseDouble(factor2Base);
                            } else if (NumberUtils.isNumber(factor1Base)) {
                                factor1 = Double.parseDouble(factor1Base);
                                System.out.println("Error in files.");
                                System.out.println("Base 2 is reset to 1");
                            } else if (NumberUtils.isNumber(factor2Base)) {
                                factor2 = Double.parseDouble(factor2Base);
                                System.out.println("Error in files.");
                                System.out.println("Base 1 is reset to 1");
                            }

                            double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
                            if (contact >= minValue) {
                                outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                                numberToBeMatched++;
                            } else {
                                outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                                numberToBeMatched++;
                            }

                        } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
                            outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                            numberToBeMatched++;
                        }
                    } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
                        outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
                        numberToBeMatched++;
                    }
                }
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    }

Source File: ScrubberProcessImpl.java From kfs with GNU Affero General Public License v3.0

4 votes

/**
 * Scrub this single group read only. This will only output the scrubber report. It won't output any other groups.
 *
 * @param group the origin entry group that should be scrubbed
 * @param the document number of any specific entries to scrub
 */
@Override
public void scrubGroupReportOnly(String fileName, String documentNumber) {
    LOG.debug("scrubGroupReportOnly() started");
    String unsortedFile = fileName;
    this.inputFile = fileName + ".sort";
    this.validFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_VALID_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.errorFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_ERROR_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.expiredFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_EXPIRED_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    String prescrubOutput = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
    this.ledgerSummaryReport = new LedgerSummaryReport();
    runDate = calculateRunDate(dateTimeService.getCurrentDate());

    PreScrubberReportData preScrubberReportData = null;

    // run pre-scrubber on the raw input into the sort process
    LineIterator inputEntries = null;
    try {
        inputEntries = FileUtils.lineIterator(new File(unsortedFile));
        preScrubberReportData = preScrubberService.preprocessOriginEntries(inputEntries, prescrubOutput);
    }
    catch (IOException e1) {
        LOG.error("Error encountered trying to prescrub GLCP/LLCP document", e1);
        throw new RuntimeException("Error encountered trying to prescrub GLCP/LLCP document", e1);
    }
    finally {
        LineIterator.closeQuietly(inputEntries);
    }
    if (preScrubberReportData != null) {
        preScrubberReportWriterService.setDocumentNumber(documentNumber);
        ((WrappingBatchService)preScrubberReportWriterService).initialize();
        try {
            new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
        }
        finally {
            ((WrappingBatchService)preScrubberReportWriterService).destroy();
        }
    }
    BatchSortUtil.sortTextFileWithFields(prescrubOutput, inputFile, new ScrubberSortComparator());

    scrubEntries(true, documentNumber);

    // delete files
    File deleteSortFile = new File(inputFile);
    File deleteValidFile = new File(validFile);
    File deleteErrorFile = new File(errorFile);
    File deleteExpiredFile = new File(expiredFile);
    try {
        deleteSortFile.delete();
        deleteValidFile.delete();
        deleteErrorFile.delete();
        deleteExpiredFile.delete();
    } catch (Exception e){
        LOG.error("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage());
        throw new RuntimeException("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage(), e);
    }
}

Java Code Examples for org.apache.commons.io.LineIterator#closeQuietly()