org.apache.uima.collection.CollectionException Java Examples
The following examples show how to use
org.apache.uima.collection.CollectionException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FixedDelayTest.java From baleen with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("squid:S2925" /* sleep required for test */) public void testDelay() throws CollectionException, IOException, ResourceInitializationException, InterruptedException { FixedDelay scheduler = create("period", "1"); long start = System.currentTimeMillis(); assertTrue(scheduler.hasNext()); Thread.sleep(1000); assertTrue(scheduler.hasNext()); Thread.sleep(1000); assertTrue(scheduler.hasNext()); long end = System.currentTimeMillis(); System.out.println(end - start); long diff = end - start; assertTrue(String.format("Diff was %d", diff), diff >= 3900 && diff <= 4100); }
Example #2
Source File: ReNounSeedDocument.java From baleen with Apache License 2.0 | 6 votes |
@Override protected void doGetNext(JCas jCas) throws IOException, CollectionException { supplied = true; // @formatter:off jCas.setDocumentText( new StringBuilder() .append(SENTENCE_1) .append(SEP) .append(SENTENCE_2) .append(SEP) .append(SENTENCE_3) .append(SEP) .append(SENTENCE_4) .append(SEP) .append(SENTENCE_5) .append(SEP) .append(SENTENCE_6) .append(SEP) .append(SENTENCE_7) .append(SEP) .append(SENTENCE_8) .toString()); // @formatter:on }
Example #3
Source File: AbstractTermSuiteCollectionReader.java From termsuite-core with Apache License 2.0 | 6 votes |
protected void fillCas(CAS cas, File file) throws IOException, CollectionException { String uri = file.toURI().toString(); SourceDocumentInformation sdi; try { sdi = new SourceDocumentInformation(cas.getJCas()); sdi.setUri(uri); String text = getDocumentText(file.getAbsolutePath(), this.mEncoding); cas.setDocumentLanguage(mLanguage.getCode()); cas.setDocumentText(preparator.prepare(text)); sdi.setDocumentSize((int)file.length()); sdi.setCumulatedDocumentSize(this.currentFileByteSize); sdi.setCorpusSize(this.totalFileByteSize); sdi.setBegin(0); sdi.setEnd(text.length()); sdi.setOffsetInSource(0); sdi.setDocumentIndex(mCurrentIndex); sdi.setNbDocuments(this.mFiles.size()); sdi.setLastSegment(mCurrentIndex == mFiles.size() - 1); sdi.addToIndexes(); } catch (CASException e) { throw new CollectionException(e); } }
Example #4
Source File: WebannoTsv2Reader.java From webanno with Apache License 2.0 | 6 votes |
@Override public void getNext(JCas aJCas) throws IOException, CollectionException { Resource res = nextFile(); initCas(aJCas, res); InputStream is = null; try { is = res.getInputStream(); convertToCas(aJCas, is, encoding); } finally { closeQuietly(is); } }
Example #5
Source File: ActiveMQReader.java From baleen with Apache License 2.0 | 6 votes |
@Override protected void doGetNext(final JCas jCas) throws IOException, CollectionException { final String source = String.join(".", activeMQ.getResourceName(), endpoint); try { final Message msg = consumer.receive(); if (msg instanceof TextMessage) { final String text = ((TextMessage) msg).getText(); final InputStream is = IOUtils.toInputStream(text, Charset.defaultCharset()); extractContent(is, source, jCas); } else { throw new IOException( String.format( "Unexpected message type for message with id %s from source %s", msg.getJMSMessageID(), source)); } } catch (final JMSException e) { throw new CollectionException(e); } }
Example #6
Source File: WebannoTsv3Reader.java From webanno with Apache License 2.0 | 6 votes |
@Override public void getNext(JCas aJCas) throws IOException, CollectionException { Resource res = nextFile(); initCas(aJCas, res); InputStream is = null; try { is = res.getInputStream(); convertToCas(aJCas, is, encoding); } finally { closeQuietly(is); } }
Example #7
Source File: CompressedXmiReader.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
@Override public void getNext(CAS aCAS) throws IOException, CollectionException { // nextTarEntry cannot be null here! ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int size = IOUtils.copy(tarArchiveInputStream, buffer); String entryName = nextTarEntry.getName(); getLogger().debug("Loaded " + size + " bytes from " + entryName); // and move forward fastForwardToNextValidEntry(); // and now create JCas InputStream inputStream = new ByteArrayInputStream(buffer.toByteArray()); try { XmiCasDeserializer.deserialize(inputStream, aCAS, lenient); } catch (SAXException e) { throw new IOException(e); } }
Example #8
Source File: WebannoTsv1Reader.java From webanno with Apache License 2.0 | 6 votes |
@Override public void getNext(JCas aJCas) throws IOException, CollectionException { Resource res = nextFile(); initCas(aJCas, res); InputStream is = null; try { is = res.getInputStream(); convertToCas(aJCas, is, encoding); } finally { closeQuietly(is); } }
Example #9
Source File: JCasPoolIterable.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
public boolean hasNext() { if (this.destroyed) { return false; } else { boolean error = true; boolean var3; try { boolean e = this.collectionReader.hasNext(); error = false; var3 = e; } catch (CollectionException var8) { throw new IllegalStateException(var8); } catch (IOException var9) { throw new IllegalStateException(var9); } finally { if (error && this.selfDestroy) { this.destroy(); } } return var3; } }
Example #10
Source File: FixedRateTest.java From baleen with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("squid:S2925" /* sleep required for test */) public void testDelay() throws CollectionException, IOException, ResourceInitializationException, InterruptedException { FixedRate scheduler = create("period", "1"); long start = System.currentTimeMillis(); assertTrue(scheduler.hasNext()); Thread.sleep(1000); assertTrue(scheduler.hasNext()); Thread.sleep(1000); assertTrue(scheduler.hasNext()); long end = System.currentTimeMillis(); long diff = end - start; assertTrue(String.format("Diff was %d", diff), diff >= 1900 && diff <= 2100); }
Example #11
Source File: SqlDbCellReader.java From baleen with Apache License 2.0 | 6 votes |
@Override public boolean doHasNext() throws IOException, CollectionException { if (currRow.isEmpty()) { try { if (!rsCurrTable.next()) { if (!getNextTable()) { return false; } rowId = 0; rsCurrTable.next(); } rowId++; for (String col : columns) { currRow.put(col, rsCurrTable.getObject(col)); } } catch (SQLException se) { throw new IOException(se); } } return !currRow.isEmpty(); }
Example #12
Source File: BaleenScheduler.java From baleen with Apache License 2.0 | 6 votes |
@Override public final void getNext(final JCas jCas) throws IOException, CollectionException { getMonitor().startFunction("getNext"); MetricsFactory.getInstance() .getPipelineMetrics(monitor.getPipelineName()) .startDocumentProcess(); jCas.setDocumentText(JobSettings.class.getSimpleName()); jCas.setDocumentLanguage("en"); final JobSettings settings = new JobSettings(jCas); for (final Map.Entry<String, String> e : config.entrySet()) { settings.set(e.getKey(), e.getValue()); } getMonitor().finishFunction("getNext"); }
Example #13
Source File: SqlCellReader.java From baleen with Apache License 2.0 | 5 votes |
@Override @SuppressWarnings( "squid:S2077" /* The value of col is read from the database column names and so should be safe to use in this context */) protected void doGetNext(JCas jCas) throws IOException, CollectionException { if (colsToProcess.isEmpty()) { // Get next row currId = idsToProcess.remove(0); colsToProcess.addAll(allCols); } String col = colsToProcess.remove(0); String content; try (ResultSet rs = conn.prepareStatement( "SELECT `" + col + "` FROM `" + table + "` WHERE `" + idColumn + "` = " + currId) .executeQuery()) { if (rs.next()) { content = rs.getObject(col).toString(); } else { throw new IOException("Unable to get cell content - query returned no results"); } } catch (SQLException e) { throw new IOException("Unable to get cell content", e); } String sourceUrl = sqlConn.substring(5) + "." + table + "#" + currId + "." + col; extractContent( new ByteArrayInputStream(content.getBytes(Charset.defaultCharset())), sourceUrl, jCas); }
Example #14
Source File: Conll2003AidaReader.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
/** * Read a single sentence. */ private List<String[]> readSentence() throws IOException, CollectionException { if (!reader.hasNextLine()) { return null; } List<String[]> words = new ArrayList<>(); String line; while (reader.hasNextLine()) { line = reader.nextLine(); if (line.contains("DOCSTART")) { if (isOneFile) { nextDocId = parseDocId(line); return null; } else { throw new RuntimeException("There are more than DOCSTART in one document!"); } } if (StringUtils.isBlank(line)) { break; // End of sentence } String[] fields = line.split("\t"); words.add(fields); if (sentenceEnd == SentenceEndType.DOT && ".".equals(fields[0]) && !"dummy".equals(fields[1])) { break; } } return words; }
Example #15
Source File: SqlCellReader.java From baleen with Apache License 2.0 | 5 votes |
@Override public boolean doHasNext() throws IOException, CollectionException { if (!colsToProcess.isEmpty()) return true; if (!idsToProcess.isEmpty()) return true; idsToProcess.addAll(getIds(currId)); return !idsToProcess.isEmpty(); }
Example #16
Source File: BaleenCollectionReader.java From baleen with Apache License 2.0 | 5 votes |
/** * Override of the UIMA hasNext() method with logic to continuously check for new documents until * one is found. This prevents the collection reader from exiting (unless asked to), and so * creates a persistent collection reader and pipeline. */ @Override public final boolean hasNext() throws IOException, CollectionException { monitor.startFunctionTrace("hasNext"); boolean next = doHasNext(); monitor.finishFunctionTrace("hasNext"); return next; }
Example #17
Source File: NewsleakElasticsearchReader.java From newsleak with GNU Affero General Public License v3.0 | 5 votes |
public void getNext(CAS cas) throws IOException, CollectionException { JCas jcas; try { jcas = cas.getJCas(); } catch (CASException e) { throw new CollectionException(e); } String docId = totalIdList.get(currentRecord); GetResponse response = client.prepareGet(esIndex, ElasticsearchDocumentWriter.ES_TYPE_DOCUMENT, docId) .setFields("Content", "Created").get(); jcas.setDocumentText((String) response.getField("Content").getValue()); jcas.setDocumentLanguage(language); // Set metadata Metadata metaCas = new Metadata(jcas); metaCas.setDocId(docId); String docDate = (String) response.getField("Created").getValue(); metaCas.setTimestamp(docDate); metaCas.addToIndexes(); // heideltime Dct dct = new Dct(jcas); dct.setValue(docDate); dct.addToIndexes(); currentRecord++; logger.log(Level.FINEST, "Document ID: " + docId); logger.log(Level.FINEST, "Document Length: " + jcas.getDocumentText().length()); }
Example #18
Source File: HooverElasticsearchReader.java From newsleak with GNU Affero General Public License v3.0 | 5 votes |
public boolean hasNext() throws IOException, CollectionException { if (currentRecord < totalRecords) { currentRecord++; return true; } else { return false; } }
Example #19
Source File: Conll2003ReaderTcBmeow.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Override public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit) throws CollectionException { List<NamedEntity> neList = JCasUtil.selectCovering(jcas, NamedEntity.class, unit); StringBuffer outcome = new StringBuffer(); if(neList.size() == 1){ NamedEntity ne = neList.get(0); List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, ne); if(tokens.size() == 1){ outcome.append("W-"); } else { for (int i = 0; i < tokens.size(); i++) { if(tokens.get(i).getCoveredText().equals(unit.getCoveredText()) && tokens.get(i).getBegin() == unit.getBegin()){ if(i == 0){ outcome.append("B-"); } else if(i < tokens.size() - 1){ outcome.append("M-"); } else { outcome.append("E-"); } } } } outcome.append(ne.getValue()); } else if(neList.size() == 0){ outcome.append("OTH"); } else { throw new CollectionException( new Throwable("Could not get unique NER annotation to be used as TC outome. List size: " + neList.size() + " " + unit.getCoveredText())); } return outcome.toString(); }
Example #20
Source File: OnceTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void test() throws CollectionException, IOException, ResourceInitializationException { Once once = create(); assertTrue(once.hasNext()); assertFalse(once.hasNext()); assertFalse(once.hasNext()); }
Example #21
Source File: BaleenCollectionReader.java From baleen with Apache License 2.0 | 5 votes |
@Override public final void getNext(JCas jCas) throws IOException, CollectionException { monitor.startFunction("getNext"); MetricsFactory.getInstance() .getPipelineMetrics(monitor.getPipelineName()) .startDocumentProcess(); doGetNext(jCas); monitor.finishFunction("getNext"); monitor.persistCounts(); }
Example #22
Source File: CsvFolderReader.java From baleen with Apache License 2.0 | 5 votes |
/** * Every time doHasNext() is called, check the WatchService for new events and add all new events * to the queue. Then return true if there are files on the queue, or false otherwise. * * <p>If the event indicates that a file has been deleted, ensure it is removed from the queue. */ @Override public boolean doHasNext() throws IOException, CollectionException { WatchKey key; while ((key = watcher.poll()) != null) { for (WatchEvent<?> event : key.pollEvents()) { processEvent(key, event); getMonitor().meter("events").mark(); } key.reset(); } return !currLines.isEmpty() || !queue.isEmpty(); }
Example #23
Source File: MongoReader.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doGetNext(JCas jCas) throws IOException, CollectionException { ObjectId id = queue.remove(0); Document docIdField = new Document(idField, id); Document document = coll.find(docIdField).first(); if (document == null) { getMonitor().error("No document returned from Mongo"); throw new CollectionException(); } String content = (String) document.get(contentField); InputStream is = IOUtils.toInputStream(content, Charset.defaultCharset()); extractContent(is, mongo.getMongoURI() + "." + collection + "#" + id, jCas); for (Entry<String, Object> entry : document.entrySet()) { String key = entry.getKey(); if (contentField.equals(key) || idField.equals(key)) { continue; } else { processMongoMetadataField(jCas, key, entry.getValue()); } } if (deleteSource) { coll.deleteOne(docIdField); } }
Example #24
Source File: SqlDbCellReader.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doGetNext(JCas jCas) throws IOException, CollectionException { String key = currRow.keySet().iterator().next(); Object o = currRow.remove(key); String sourceUrl = sqlConn.substring(5) + "." + currTable + "#" + rowId + "." + key; extractContent( new ByteArrayInputStream(o.toString().getBytes(Charset.defaultCharset())), sourceUrl, jCas); }
Example #25
Source File: FolderReader.java From baleen with Apache License 2.0 | 5 votes |
@Override public void doGetNext(JCas jCas) throws IOException, CollectionException { if (queue.isEmpty()) { getMonitor().error("No documents on the queue - this method should not have been called"); throw new CollectionException(); } Path path = queue.remove(0); getMonitor().info("Processing file {}", path.toString()); try (InputStream is = new FileInputStream(path.toFile()); ) { extractContent(is, path.toString(), jCas); } }
Example #26
Source File: MboxReader.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doGetNext(JCas jCas) throws IOException, CollectionException { if (!attachments.isEmpty()) { // If we have attachments, first process those Map.Entry<String, Body> entry = attachments.firstEntry(); getMonitor().info("Processing attachment {}", entry.getKey()); processBody(jCas, entry.getValue(), entry.getKey()); attachments.remove(entry.getKey()); } else { // No attachments so process the next message String raw = mboxIterator.next().toString(); count++; String uri = "mbox://" + mbox + "#" + count; getMonitor().info("Processing message {}", uri); // Parse message and get body Message msg = messageBuilder.parseMessage(new ByteArrayInputStream(raw.getBytes(charset))); Body body = msg.getBody(); boolean doneBody = false; // Decide how to process body of message if (body instanceof SingleBody) { doneBody = processBody(jCas, body, uri); } else if (body instanceof Multipart) { Multipart mp = (Multipart) body; doneBody = processMultipart(jCas, mp, uri); } // No body found (just attachments? Or invalid message?) if (!doneBody) { throw new IOException("No processable body found"); } } }
Example #27
Source File: StreamingCollectionReader.java From termsuite-core with Apache License 2.0 | 5 votes |
@Override public boolean hasNext() throws IOException, CollectionException { try { if(documentQueue.isEmpty()) logger.info("Waiting for a new document."); currentDoc = documentQueue.take(); if(currentDoc == CollectionDocument.LAST_DOCUMENT) return false; else return true; } catch (InterruptedException e) { logger.info("Stream {} interrupted", this.streamName); return false; } }
Example #28
Source File: LineReader.java From baleen with Apache License 2.0 | 5 votes |
@Override protected void doGetNext(JCas jCas) throws IOException, CollectionException { InputStream is = IOUtils.toInputStream(line, Charset.defaultCharset()); extractContent(is, file.getPath() + "#" + lineNumber, jCas); Metadata md = new Metadata(jCas); md.setKey("lineNumber"); md.setValue(lineNumber.toString()); getSupport().add(md); }
Example #29
Source File: SqlRowReader.java From baleen with Apache License 2.0 | 5 votes |
@Override public boolean doHasNext() throws IOException, CollectionException { if (!idsToProcess.isEmpty()) return true; idsToProcess.addAll(getIds(currId)); return !idsToProcess.isEmpty(); }
Example #30
Source File: AbstractStreamCollectionReaderTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testMax() throws ResourceInitializationException, CollectionException, IOException { FakeStreamCollectionReader r = new FakeStreamCollectionReader(); r.setMaxDocuments(2); r.doInitialize(null); assertTrue(r.doHasNext()); r.doGetNext(null); assertTrue(r.doHasNext()); r.doGetNext(null); assertFalse(r.doHasNext()); }