Java Code Examples for org.apache.uima.jcas.tcas.DocumentAnnotation#getSourceUri()

The following examples show how to use org.apache.uima.jcas.tcas.DocumentAnnotation#getSourceUri() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AddSourceToMetadata.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = getDocumentAnnotation(jCas);
  String source = da.getSourceUri();

  Metadata md = new Metadata(jCas);
  md.setKey(key);
  if (nameOnly) {
    File f = new File(source);
    String file = f.getName();

    if (file.contains(".")) {
      md.setValue(file.substring(0, file.lastIndexOf('.')));
    } else {
      md.setValue(file);
    }
  } else {
    md.setValue(source);
  }
  addToJCasIndex(md);
}
 
Example 2
Source File: DocumentTypeByFilename.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = getDocumentAnnotation(aJCas);

  File f = new File(da.getSourceUri());

  String type = defaultType;

  Matcher m = typePattern.matcher(f.getName());
  if (m.matches()) {
    type = m.group(group);
  }

  if (lowerCase) type = type.toLowerCase();

  da.setDocType(prefix + type.trim());
}
 
Example 3
Source File: AbstractBaleenFileConsumer.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  DocumentAnnotation documentAnnotation = getDocumentAnnotation(jCas);
  String url = documentAnnotation.getSourceUri();
  if (Strings.isNullOrEmpty(url)) {
    url = ConsumerUtils.getExternalId(documentAnnotation, false);
  }

  String extension = getExtension();
  if (!Strings.isNullOrEmpty(extension)) {
    url = url + "." + extension;
  }

  File file = SourceUtils.urlToFile(basePath, url);

  try {
    getMonitor().debug("Writing {} to {}", url, file.getAbsolutePath());
    writeToFile(jCas, file);
  } catch (Exception e) {
    getMonitor().warn("Failed to write file {}, deleting", file.getAbsolutePath(), e);
    if (!file.delete()) {
      getMonitor().warn("Failed to delete file {}", file.getAbsolutePath(), e);
    }
  }
}
 
Example 4
Source File: YYYYMMDDAssigner.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final DocumentAnnotation da = getDocumentAnnotation(jCas);
  final String source = da.getSourceUri();

  final Matcher matcher = pattern.matcher(source);
  if (matcher.matches()) {
    try {
      final int y = Integer.parseInt(matcher.group("year"));
      final int m = Integer.parseInt(matcher.group("month"));
      final int d = Integer.parseInt(matcher.group("day"));

      if (m >= 1 && m <= 12 && d >= 1 && d <= 31) {
        // This will check if its' actually valid (31 Feb) it's actualy valid date...

        final LocalDate date = LocalDate.of(y, m, d);
        final long ts = date.atStartOfDay().atOffset(ZoneOffset.UTC).toInstant().toEpochMilli();

        da.setTimestamp(ts);
      }

    } catch (final Exception e) {
      // Do nothing.. not a valid source path...
      getMonitor().warn("Cant parse date from source uri {} ", source, e);
    }
  }
}
 
Example 5
Source File: DocumentTypeByLocation.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = getDocumentAnnotation(aJCas);

  File f = new File(da.getSourceUri());

  String type = f.getParentFile().getAbsolutePath();
  if (baseDirectory != null && type.startsWith(baseDirectory))
    type = type.substring(baseDirectory.length());

  // remove leading and trailing slashes and backslashes using a regular expression
  type = type.replaceAll("^\\\\+|^\\/+|\\\\+$|\\/+$", "");
  da.setDocType(type);
}
 
Example 6
Source File: FolderReaderTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private String getSource(JCas jCas) {
  DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  return doc.getSourceUri();
}
 
Example 7
Source File: MucReaderTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private String getSource(JCas jCas) {
  DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  return doc.getSourceUri();
}
 
Example 8
Source File: CsvFolderReaderTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private String getSource(JCas jCas) {
  DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  return doc.getSourceUri();
}
 
Example 9
Source File: MboxReaderTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private String getSource(JCas jCas) {
  DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  return doc.getSourceUri();
}
 
Example 10
Source File: LineReaderTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private String getSource(JCas jCas) {
  DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  return doc.getSourceUri();
}
 
Example 11
Source File: SourceUtils.java    From baleen with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the base filename from DocumentAnnotation source URI in the given JCas.
 *
 * <p>The basename is the main part of the filename, without extension or enclosing paths, e.g.
 * for path '/some/directory/SomeFile.txt' this method will return 'SomeFile'.
 *
 * @param jCas the {@link JCas} from which to get the document annotation.
 * @return the filename
 * @throws IllegalArgumentException if there is an error parsing the document source URI.
 */
public static String getDocumentSourceBaseName(final JCas jCas) {
  DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas);
  String sourceUri = documentAnnotation.getSourceUri();
  return FilenameUtils.getName(sourceUri);
}