org.apache.uima.jcas.cas.FSArray#size

Source File: PubmedDatabaseCRTest.java From bluima with Apache License 2.0

6 votes

@Test
public void testAuthors() throws Exception {

    // http://www.ncbi.nlm.nih.gov/pubmed/?term=1&report=xml&format=text
    CollectionReader cr = createReader(PubmedDatabaseCR.class,
            BlueUima.PARAM_BETWEEN, new int[] { 0, 1 },
            BlueUima.PARAM_SKIP_EMPTY_DOCS, false);

    String[] lastNames = { "Makar", "McMartin", "Palese", "Tephly" };
    String[] foreNames = { "A B", "K E", "M", "T R" };
    // AB___A B___Makar__-__KE___K
    // E___McMartin__-__M___M___Palese__-__TR___T R___Tephly
    for (JCas jCas : asList(cr)) {
        Header header = JCasUtil.selectSingle(jCas, Header.class);

        FSArray authors = header.getAuthors();
        for (int i = 0; i < authors.size(); i++) {
            AuthorInfo a = (AuthorInfo) authors.get(i);
            assertEquals(foreNames[i], a.getForeName());
            assertEquals(lastNames[i], a.getLastName());
        }

        assertEquals("1976-01-16", header.getCopyright());
    }
}

Source File: ChunkAnnotator.java From bluima with Apache License 2.0

6 votes

private POSTag getPrefPOSTag(Token token) {
    POSTag tag = null;
    FSArray postags = token.getPosTag();
    for (int i = 0; i < postags.size(); i++) {
        POSTag fs = (POSTag) postags.get(i);
        // if there are POS Tags
        if (fs != null) {

            // compare to the desired type of POS Tag Set
            if (fs.getType().getName().equals(posTagSetPreference)) {

                i = postags.size();
                return (POSTag) fs;
            }
        }
    }
    return tag;
}

Source File: FeatureStructureImplC.java From uima-uimaj with Apache License 2.0

6 votes

/**
 * For printing FSArrays; called after printing the type:nnn
 * Only called if ! IS_V2_PRETTY_PRINT, since v2 didn't print the array contents
 *   prints the length
 *   if the length = 0 that's all
 *   otherwise:
 *   
 * @param arrayLen the length
 * @param f the feature structure
 * @param indent the current indent
 * @param incr the indent incr
 * @param buf the stringbuilder where the result is added
 */
private void printFSArrayElements(FSArray fsarray, int indent, int incr, StringBuilder buf, boolean useShortNames, PrintReferences printRefs, boolean isShortForm) {
  Misc.indent(buf, indent);
  int arraylen = fsarray.size();
  buf.append("Array length: " + arraylen);
  if (arraylen == 0) {
    return;
  }

  Misc.indent(buf, indent);
  buf.append("Array elements: [");
  
  indent += incr;
  for (int i = 0; i < arraylen; i++) {
    Misc.indent(buf, indent);
    ppval((TOP)fsarray.get(i), indent, incr, buf, useShortNames, printRefs, isShortForm);
  }
  Misc.indent(buf, indent - incr);
  buf.append(']');
}

Source File: JCasSerialiser.java From baleen with Apache License 2.0

5 votes

private List<Long> getInternalIds(final FSArray annotationArray) {
  final List<Long> entities = new ArrayList<>();

  for (int x = 0; x < annotationArray.size(); x++) {
    final FeatureStructure featureStructure = annotationArray.get(x);
    if (featureStructure != null && featureStructure instanceof BaleenAnnotation) {
      final BaleenAnnotation ent = (BaleenAnnotation) featureStructure;
      entities.add(ent.getInternalId());
    }
  }

  return entities;
}

Source File: TokenFrequencySummarisation.java From baleen with Apache License 2.0

5 votes

private String getRoot(WordToken token) {
  FSArray arr = token.getLemmas();

  if (arr == null || arr.size() == 0) {
    return token
        .getCoveredText()
        .toLowerCase(); // TODO: Could we stem here instead of using the root word?
  } else {
    return token.getLemmas(0).getLemmaForm();
  }
}

Source File: MaltParser.java From baleen with Apache License 2.0

5 votes

/**
 * Gets the lemma.
 *
 * @param token the token
 * @return the lemma
 */
private String getLemma(final WordToken token) {
  final FSArray array = token.getLemmas();
  if (array == null || array.size() == 0) {
    return "_";
  } else {
    return ((WordLemma) array.get(0)).getLemmaForm();
  }
}

Source File: SentenceFactory.java From baleen with Apache License 2.0

5 votes

private String getLemma(final WordToken token) {
  final FSArray array = token.getLemmas();
  if (array == null || array.size() == 0) {
    return token.getCoveredText().toLowerCase();
  } else {
    return ((WordLemma) array.get(0)).getLemmaForm();
  }
}

Source File: EntityRelationConverter.java From baleen with Apache License 2.0

5 votes

private List<String> getEntityIds(FSArray entityArray) {
  List<String> entities = new ArrayList<>();

  for (int x = 0; x < entityArray.size(); x++) {

    FeatureStructure featureStructure = entityArray.get(x);
    if (featureStructure instanceof Entity) {
      Entity ent = (Entity) featureStructure;
      entities.add(ent.getExternalId());
    }
  }

  return entities;
}

Source File: JsonCasSerializer.java From termsuite-core with Apache License 2.0

5 votes

private static void writeIntFSArrayField(JsonGenerator jg, String fieldName, FSArray words) throws IOException {
    if(words == null)
        return;
    jg.writeArrayFieldStart(fieldName);

    for (int i = 0; i < words.size(); i++){
        WordAnnotation wa = (WordAnnotation) words.get(i);
        jg.writeStartArray();
        jg.writeNumber(wa.getBegin());
        jg.writeNumber(wa.getEnd());
        jg.writeEndArray();
    }
    jg.writeEndArray();
}

Source File: BlueCasUtil.java From bluima with Apache License 2.0

5 votes

public static String getSinglePosTag(Token t) {
    FSArray posTag = t.getPosTag();
    if (posTag != null && posTag.size() > 0) {
        return ((POSTag) posTag.get(0)).getValue();
    }
    return null;
}

Source File: CasCompare.java From uima-uimaj with Apache License 2.0

5 votes

/**
   * This is an optional pre-compare operation.
   * 
   * It is identical to the method above, except that
   * after sorting, it removes duplicates. 

   * @param fs the feature structure having the fsarray feature
   * @param feat the feature having the fsarray
   * @return a runnable, which (when invoked) updates the original array with the sorted result.
   */
  public Runnable sort_dedup_FSArray(TOP fs, Feature feat) {
    FSArray<?> fsArray = (FSArray<?>)(fs.getFeatureValue(feat));
    if (fsArray == null || fsArray.size() < 2) {
      return null;
    }
    TOP[] a = fsArray._getTheArray().clone();
    clearPrevFss();
    inSortContext = true;
    Arrays.sort(a, (TOP afs1, TOP afs2) -> {
      return compareRefs(afs1, afs2, null, null);
    });
    ArrayList<TOP> dedup = new ArrayList<>(a.length);
    TOP prev = null;
    for (TOP top : a) {
      if (top == prev) {
        continue;
      }
      prev = top;
      dedup.add(top);
    }
    TOP[] r = dedup.toArray(new TOP[dedup.size()]);
    if (r.length == a.length) {
      return () -> System.arraycopy(a, 0, fsArray._getTheArray(), 0, fsArray.size());
    } else {
      CASImpl cas = fs.getCASImpl();
      FSArray<?> fsa = (FSArray<?>) cas.createArray(fsArray._getTypeImpl(), r.length);
//      FSArray<?> fsa = new FSArray<>(fs.getJCas(), r.length);
      if (IS_SHOW_PROGRESS) {
        System.out.format("Dedup found dup in cas %d for type/feature %s, removed %d%n", working_on, feat.getName(), a.length - r.length);
      }
      fsa.copyFromArray(r, 0, 0, r.length);
      return () -> fs.setFeatureValue(feat, fsa);
    }
  }

Source File: DKPro2CoreNlp.java From ambiverse-nlu with Apache License 2.0

4 votes

public static Tree createStanfordTree(org.apache.uima.jcas.tcas.Annotation root, TreeFactory tFact, Map<Token, IndexedWord> aIdxTokens) {
  JCas aJCas;
  try {
    aJCas = root.getCAS().getJCas();
  } catch (CASException e) {
    throw new IllegalStateException("Unable to get JCas from JCas wrapper");
  }

  // define the new (root) node
  Tree rootNode;

  // before we can create a node, we must check if we have any children (we have to know
  // whether to create a node or a leaf - not very dynamic)
  if (root instanceof Constituent && !isLeaf((Constituent) root)) {
    Constituent node = (Constituent) root;
    List<Tree> childNodes = new ArrayList<Tree>();

    // get childNodes from child annotations
    FSArray children = node.getChildren();
    for (int i = 0; i < children.size(); i++) {
      childNodes.add(createStanfordTree(node.getChildren(i), tFact, aIdxTokens));
    }

    // now create the node with its children
    rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

  } else {
    // Handle leaf annotations
    // Leafs are always Token-annotations
    // We also have to insert a Preterminal node with the value of the
    // POS-Annotation on the token
    // because the POS is not directly stored within the treee
    Token wordAnnotation = (Token) root;

    // create leaf-node for the tree
    Tree wordNode;
    if (aIdxTokens != null) {
      wordNode = tFact.newLeaf(aIdxTokens.get(wordAnnotation));
    } else {
      wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());
    }

    // create information about preceding and trailing whitespaces in the leaf node
    StringBuilder preWhitespaces = new StringBuilder();
    StringBuilder trailWhitespaces = new StringBuilder();

    List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
    List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

    if (precedingTokenList.size() > 0) {
      Token precedingToken = precedingTokenList.get(0);
      int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
      for (int i = 0; i < precedingWhitespaces; i++) {
        preWhitespaces.append(" ");
      }
    }
    if (followingTokenList.size() > 0) {
      Token followingToken = followingTokenList.get(0);
      int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
      for (int i = 0; i < trailingWhitespaces; i++) {
        trailWhitespaces.append(" ");
      }
    }

    // write whitespace information as CoreAnnotation.BeforeAnnotation and
    // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
    // node label
    ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
    ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

    // get POS-annotation
    POS pos = wordAnnotation.getPos();

    // create POS-Node in the tree and attach word-node to it
    rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode })));
  }

  return rootNode;
}

Source File: XCASSerializer.java From uima-uimaj with Apache License 2.0

4 votes

private void encodeFSArray(FSArray fs, AttributesImpl attrs) throws SAXException {
      String typeName = fs._getTypeImpl().getName();
      final int size = fs.size();
//      int pos = cas.getArrayStartAddress(fs_id);
      // xmlStack.addAttribute(ARRAY_SIZE_ATTR, Integer.toString(size));
      // xmlStack.commitNode();
      addAttribute(attrs, ARRAY_SIZE_ATTR, Integer.toString(size));
      if (typeName.endsWith(TypeSystemImpl.ARRAY_TYPE_SUFFIX)) {
        typeName = CASImpl.TYPE_NAME_FS_ARRAY;
      }
      startElement(typeName, attrs, size);
      for (int i = 0; i < size; i++) {
        String val = null;
        // xmlStack.pushTextNode(ARRAY_ELEMENT_TAG);
        // xmlStack.commitNode();
        TOP element = (TOP)fs.get(i);
        if (null == element && mOutOfTypeSystemData != null) {
          // This array element may have been a reference to an OOTS FS.
          
          List<ArrayElement> ootsElems = mOutOfTypeSystemData.arrayElements.get(fs);
          if (ootsElems != null) {
            Iterator<ArrayElement> iter = ootsElems.iterator();
            // TODO: iteration could be slow for large arrays
            while (iter.hasNext())
            {
              ArrayElement ootsElem = iter.next();
              if (ootsElem.index == i) {
                val = mOutOfTypeSystemData.idMap.get(ootsElem.value);
                break;
              }
            }
          }
        } else if (null != element) {
          val = Integer.toString(element._id);
        }

        if (val != null) {
          startElement(ARRAY_ELEMENT_TAG, emptyAttrs, 1);
          addText(val);
        } else {
          startElement(ARRAY_ELEMENT_TAG, emptyAttrs, 0);
        }
        // xmlStack.popNode();
        endElement(ARRAY_ELEMENT_TAG);
      }

      endElement(typeName);
    }

Source File: CasCompare.java From uima-uimaj with Apache License 2.0

3 votes

/**
 * This is an optional pre-compare operation.
 * 
 * Somtimes, when comparing FSArrays, the order of the elements is not significant,
 * and the compare should be done ignoring order differences.  
 * 
 * This is accomplished by sorting the elements, before the compare is done,
 * using this method.  The sort order is not significant; it just needs to be
 * the same order for otherwise equal FSArrays.
 * 
 * Use this routine to accomplish the sort, on particular FSArrays you designate.
 * Call it for each one you want to sort.
 * 
 * During the sort, links are followed.
 * 
 * The sorting is done in a clone of the array, and the original array is not updated.
 * Instead, a Runnable is returned, which may be invoked later to update the original array with the sorted copy.
 * This allows sorting to be done on the original item values 
 * (in case the links refer back to the originals) 

 * @param fsArray the array to be sorted
 * @return a runnable, which (when invoked) updates the original array with the sorted result.
 */
public Runnable sortFSArray(FSArray<?> fsArray) {
  if (fsArray == null || fsArray.size() < 2) {
    return null;
  }
  TOP[] a = fsArray._getTheArray().clone();
  clearPrevFss();
  inSortContext = true;
  Arrays.sort(a, (TOP afs1, TOP afs2) -> {
    return compareRefs(afs1, afs2, null, null);
  });
  return () -> System.arraycopy(a, 0, fsArray._getTheArray(), 0, fsArray.size());
}

Java Code Examples for org.apache.uima.jcas.cas.FSArray#size()