org.elasticsearch.ingest.IngestDocument#getFieldValue

Source File: CsvProcessor.java From elasticsearch-ingest-csv with Apache License 2.0

6 votes

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
    String content = ingestDocument.getFieldValue(field, String.class);

    if (Strings.hasLength(content)) {
        String[] values;
        synchronized (parser) {
            values = parser.parseLine(content);
        }
        if (values.length != this.columns.size()) {
            // TODO should be error?
            throw new IllegalArgumentException("field[" + this.field + "] size ["
                + values.length + "] doesn't match header size [" + columns.size() + "].");
        }

        for (int i = 0; i < columns.size(); i++) {
            ingestDocument.setFieldValue(columns.get(i), values[i]);
        }
    } else {
        // TODO should we have ignoreMissing flag?
        throw new IllegalArgumentException("field[" + this.field + "] is empty string.");
    }
    return ingestDocument;
}

Source File: LangDetectProcessor.java From elasticsearch-ingest-langdetect with Apache License 2.0

6 votes

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
    Detector detector = DetectorFactory.create();
    detector.setMaxTextLength(maxLength.bytesAsInt());

    String content;
    try {
        content = ingestDocument.getFieldValue(field, String.class);
    } catch (IllegalArgumentException e) {
        if (ignoreMissing) {
            return ingestDocument;
        }
        throw e;
    }
    if (Strings.isEmpty(content)) {
        return ingestDocument;
    }

    detector.append(content);
    String language = detector.detect();

    ingestDocument.setFieldValue(targetField, language);

    return ingestDocument;
}

Source File: OpenNlpProcessorTests.java From elasticsearch-ingest-opennlp with Apache License 2.0

6 votes

public void testAnnotatedText() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put("field", "source_field");
    config.put("annotated_text_field", "my_annotated_text_field");

    OpenNlpProcessor.Factory factory = new OpenNlpProcessor.Factory(service);
    Map<String, Processor.Factory> registry = Collections.emptyMap();
    OpenNlpProcessor processor = factory.create(registry, randomAlphaOfLength(10), config);

    IngestDocument ingestDocument = processor.execute(getIngestDocument());
    String content = ingestDocument.getFieldValue("my_annotated_text_field", String.class);
    assertThat(content, is("[Kobe Bryant](Person_Kobe Bryant) was one of the best basketball players of all times. Not even" +
            " [Michael Jordan](Person_Michael Jordan) has ever scored 81 points in one game. [Munich](Location_Munich) is really" +
            " an awesome city, but [New York](Location_New York) is as well. [Yesterday](Date_Yesterday) has been the hottest" +
            " day of the year."));
}

Source File: GeoExtensionProcessor.java From elasticsearch-plugin-geoshape with MIT License

6 votes

@SuppressWarnings("unchecked")
private List<String> getGeoShapeFieldsFromDoc(IngestDocument ingestDocument) {
    List<String> fields = new ArrayList<>();

    Map<String, Object> baseMap;
    if (path != null) {
        baseMap = ingestDocument.getFieldValue(this.path, Map.class);
    } else {
        baseMap = ingestDocument.getSourceAndMetadata();
    }

    for (String fieldName : baseMap.keySet()) {
        if (Regex.simpleMatch(field, fieldName)) {
            if (path != null) {
                fieldName = path + "." + fieldName;
            }
            fields.add(fieldName);
        }
    }

    return fields;
}

Source File: YauaaProcessor.java From yauaa with Apache License 2.0

5 votes

@Override
public IngestDocument execute(IngestDocument ingestDocument) {
    String content = ingestDocument.getFieldValue(field, String.class);

    UserAgent userAgent = uaa.parse(content);

    Map<String, String> resultMap = userAgent.toMap();
    resultMap.remove(USERAGENT_FIELDNAME);
    ingestDocument.setFieldValue(targetField, resultMap);
    return ingestDocument;
}

Source File: OpenNlpProcessor.java From elasticsearch-ingest-opennlp with Apache License 2.0

5 votes

@Override
public IngestDocument execute(IngestDocument ingestDocument) {
    String content = ingestDocument.getFieldValue(sourceField, String.class);

    if (Strings.hasLength(content)) {
        Map<String, Set<String>> entities = new HashMap<>();
        mergeExisting(entities, ingestDocument, targetField);

        List<ExtractedEntities> extractedEntities = new ArrayList<>();
        for (String field : fields) {
            ExtractedEntities data = openNlpService.find(content, field);
            extractedEntities.add(data);
            merge(entities, field, data.getEntityValues());
        }

        // convert set to list, otherwise toXContent serialization in simulate pipeline fails
        Map<String, List<String>> entitiesToStore = new HashMap<>();
        Iterator<Map.Entry<String, Set<String>>> iterator = entities.entrySet().iterator();
        while (iterator.hasNext()) {
            Map.Entry<String, Set<String>> entry = iterator.next();
            entitiesToStore.put(entry.getKey(), new ArrayList<>(entry.getValue()));
        }

        ingestDocument.setFieldValue(targetField, entitiesToStore);

        if (Strings.hasLength(annotatedTextField) && extractedEntities.isEmpty() == false) {
            String annotatedText = OpenNlpService.createAnnotatedText(content, extractedEntities);
            ingestDocument.setFieldValue(annotatedTextField, annotatedText);
        }
    }

    return ingestDocument;
}

Source File: OpenNlpProcessor.java From elasticsearch-ingest-opennlp with Apache License 2.0

5 votes

private static void mergeExisting(Map<String, Set<String>> entities, IngestDocument ingestDocument, String targetField) {
    if (ingestDocument.hasField(targetField)) {
        @SuppressWarnings("unchecked")
        Map<String, Set<String>> existing = ingestDocument.getFieldValue(targetField, Map.class);
        entities.putAll(existing);
    } else {
        ingestDocument.setFieldValue(targetField, entities);
    }
}

Source File: GeoExtensionProcessor.java From elasticsearch-plugin-geoshape with MIT License

4 votes

@SuppressWarnings("unchecked")
@Override
public IngestDocument execute(IngestDocument ingestDocument) throws IOException, ParseException {
    List<String> geo_objects_list = getGeoShapeFieldsFromDoc(ingestDocument);
    for (String geoShapeField : geo_objects_list) {

        Object geoShapeObject = ingestDocument.getFieldValue(geoShapeField, Object.class);

        if (geoShapeObject == null) {
            continue;
        }

        ShapeBuilder<?,?, ?> shapeBuilder = getShapeBuilderFromObject(geoShapeObject);

        Shape shape = null;
        try {
            shape = shapeBuilder.buildS4J();
        }
        catch (InvalidShapeException ignored) {}

        if (shape == null && fixedField == null) {
            throw new IllegalArgumentException("unable to parse shape [" + shapeBuilder.toWKT() + "]");
        }

        Geometry geom = new WKTReader().read(shapeBuilder.toWKT());

        // fix shapes if needed
        if (shape == null && fixedField != null) {
            geom = GeoUtils.removeDuplicateCoordinates(geom);
        }

        ingestDocument.removeField(geoShapeField);

        if (keepShape) {
            ingestDocument.setFieldValue(geoShapeField + "." + shapeField, geoShapeObject);
        }

        if (fixedField != null) {
            ingestDocument.setFieldValue(geoShapeField + "." + fixedField, new WKTWriter().write(geom));
        }

        // compute and add extra geo sub-fields
        byte[] wkb = new WKBWriter().write(geom);  // elastic will auto-encode this as b64

        if (hashField != null) ingestDocument.setFieldValue(
                geoShapeField + ".hash", String.valueOf(GeoUtils.getHashFromWKB(new BytesRef(wkb))));
        if (wkbField != null) ingestDocument.setFieldValue(
                geoShapeField + "." + wkbField, wkb);
        if (typeField != null) ingestDocument.setFieldValue(
                geoShapeField + "." + typeField, geom.getGeometryType());
        if (areaField != null) ingestDocument.setFieldValue(
                geoShapeField + "." + areaField, geom.getArea());
        if (centroidField != null) ingestDocument.setFieldValue(
                geoShapeField + "." + centroidField, GeoUtils.getCentroidFromGeom(geom));
        if (bboxField != null) {
            Coordinate[] coords = geom.getEnvelope().getCoordinates();
            if (coords.length >= 4) ingestDocument.setFieldValue(
                    geoShapeField + "." + bboxField,
                    GeoUtils.getBboxFromCoords(coords));
        }
    }
    return ingestDocument;
}

Java Code Examples for org.elasticsearch.ingest.IngestDocument#getFieldValue()