Java Code Examples for org.elasticsearch.ingest.IngestDocument#getFieldValue()
The following examples show how to use
org.elasticsearch.ingest.IngestDocument#getFieldValue() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvProcessor.java From elasticsearch-ingest-csv with Apache License 2.0 | 6 votes |
@Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { String content = ingestDocument.getFieldValue(field, String.class); if (Strings.hasLength(content)) { String[] values; synchronized (parser) { values = parser.parseLine(content); } if (values.length != this.columns.size()) { // TODO should be error? throw new IllegalArgumentException("field[" + this.field + "] size [" + values.length + "] doesn't match header size [" + columns.size() + "]."); } for (int i = 0; i < columns.size(); i++) { ingestDocument.setFieldValue(columns.get(i), values[i]); } } else { // TODO should we have ignoreMissing flag? throw new IllegalArgumentException("field[" + this.field + "] is empty string."); } return ingestDocument; }
Example 2
Source File: LangDetectProcessor.java From elasticsearch-ingest-langdetect with Apache License 2.0 | 6 votes |
@Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { Detector detector = DetectorFactory.create(); detector.setMaxTextLength(maxLength.bytesAsInt()); String content; try { content = ingestDocument.getFieldValue(field, String.class); } catch (IllegalArgumentException e) { if (ignoreMissing) { return ingestDocument; } throw e; } if (Strings.isEmpty(content)) { return ingestDocument; } detector.append(content); String language = detector.detect(); ingestDocument.setFieldValue(targetField, language); return ingestDocument; }
Example 3
Source File: OpenNlpProcessorTests.java From elasticsearch-ingest-opennlp with Apache License 2.0 | 6 votes |
public void testAnnotatedText() throws Exception { Map<String, Object> config = new HashMap<>(); config.put("field", "source_field"); config.put("annotated_text_field", "my_annotated_text_field"); OpenNlpProcessor.Factory factory = new OpenNlpProcessor.Factory(service); Map<String, Processor.Factory> registry = Collections.emptyMap(); OpenNlpProcessor processor = factory.create(registry, randomAlphaOfLength(10), config); IngestDocument ingestDocument = processor.execute(getIngestDocument()); String content = ingestDocument.getFieldValue("my_annotated_text_field", String.class); assertThat(content, is("[Kobe Bryant](Person_Kobe Bryant) was one of the best basketball players of all times. Not even" + " [Michael Jordan](Person_Michael Jordan) has ever scored 81 points in one game. [Munich](Location_Munich) is really" + " an awesome city, but [New York](Location_New York) is as well. [Yesterday](Date_Yesterday) has been the hottest" + " day of the year.")); }
Example 4
Source File: GeoExtensionProcessor.java From elasticsearch-plugin-geoshape with MIT License | 6 votes |
@SuppressWarnings("unchecked") private List<String> getGeoShapeFieldsFromDoc(IngestDocument ingestDocument) { List<String> fields = new ArrayList<>(); Map<String, Object> baseMap; if (path != null) { baseMap = ingestDocument.getFieldValue(this.path, Map.class); } else { baseMap = ingestDocument.getSourceAndMetadata(); } for (String fieldName : baseMap.keySet()) { if (Regex.simpleMatch(field, fieldName)) { if (path != null) { fieldName = path + "." + fieldName; } fields.add(fieldName); } } return fields; }
Example 5
Source File: YauaaProcessor.java From yauaa with Apache License 2.0 | 5 votes |
@Override public IngestDocument execute(IngestDocument ingestDocument) { String content = ingestDocument.getFieldValue(field, String.class); UserAgent userAgent = uaa.parse(content); Map<String, String> resultMap = userAgent.toMap(); resultMap.remove(USERAGENT_FIELDNAME); ingestDocument.setFieldValue(targetField, resultMap); return ingestDocument; }
Example 6
Source File: OpenNlpProcessor.java From elasticsearch-ingest-opennlp with Apache License 2.0 | 5 votes |
@Override public IngestDocument execute(IngestDocument ingestDocument) { String content = ingestDocument.getFieldValue(sourceField, String.class); if (Strings.hasLength(content)) { Map<String, Set<String>> entities = new HashMap<>(); mergeExisting(entities, ingestDocument, targetField); List<ExtractedEntities> extractedEntities = new ArrayList<>(); for (String field : fields) { ExtractedEntities data = openNlpService.find(content, field); extractedEntities.add(data); merge(entities, field, data.getEntityValues()); } // convert set to list, otherwise toXContent serialization in simulate pipeline fails Map<String, List<String>> entitiesToStore = new HashMap<>(); Iterator<Map.Entry<String, Set<String>>> iterator = entities.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<String, Set<String>> entry = iterator.next(); entitiesToStore.put(entry.getKey(), new ArrayList<>(entry.getValue())); } ingestDocument.setFieldValue(targetField, entitiesToStore); if (Strings.hasLength(annotatedTextField) && extractedEntities.isEmpty() == false) { String annotatedText = OpenNlpService.createAnnotatedText(content, extractedEntities); ingestDocument.setFieldValue(annotatedTextField, annotatedText); } } return ingestDocument; }
Example 7
Source File: OpenNlpProcessor.java From elasticsearch-ingest-opennlp with Apache License 2.0 | 5 votes |
private static void mergeExisting(Map<String, Set<String>> entities, IngestDocument ingestDocument, String targetField) { if (ingestDocument.hasField(targetField)) { @SuppressWarnings("unchecked") Map<String, Set<String>> existing = ingestDocument.getFieldValue(targetField, Map.class); entities.putAll(existing); } else { ingestDocument.setFieldValue(targetField, entities); } }
Example 8
Source File: GeoExtensionProcessor.java From elasticsearch-plugin-geoshape with MIT License | 4 votes |
@SuppressWarnings("unchecked") @Override public IngestDocument execute(IngestDocument ingestDocument) throws IOException, ParseException { List<String> geo_objects_list = getGeoShapeFieldsFromDoc(ingestDocument); for (String geoShapeField : geo_objects_list) { Object geoShapeObject = ingestDocument.getFieldValue(geoShapeField, Object.class); if (geoShapeObject == null) { continue; } ShapeBuilder<?,?, ?> shapeBuilder = getShapeBuilderFromObject(geoShapeObject); Shape shape = null; try { shape = shapeBuilder.buildS4J(); } catch (InvalidShapeException ignored) {} if (shape == null && fixedField == null) { throw new IllegalArgumentException("unable to parse shape [" + shapeBuilder.toWKT() + "]"); } Geometry geom = new WKTReader().read(shapeBuilder.toWKT()); // fix shapes if needed if (shape == null && fixedField != null) { geom = GeoUtils.removeDuplicateCoordinates(geom); } ingestDocument.removeField(geoShapeField); if (keepShape) { ingestDocument.setFieldValue(geoShapeField + "." + shapeField, geoShapeObject); } if (fixedField != null) { ingestDocument.setFieldValue(geoShapeField + "." + fixedField, new WKTWriter().write(geom)); } // compute and add extra geo sub-fields byte[] wkb = new WKBWriter().write(geom); // elastic will auto-encode this as b64 if (hashField != null) ingestDocument.setFieldValue( geoShapeField + ".hash", String.valueOf(GeoUtils.getHashFromWKB(new BytesRef(wkb)))); if (wkbField != null) ingestDocument.setFieldValue( geoShapeField + "." + wkbField, wkb); if (typeField != null) ingestDocument.setFieldValue( geoShapeField + "." + typeField, geom.getGeometryType()); if (areaField != null) ingestDocument.setFieldValue( geoShapeField + "." + areaField, geom.getArea()); if (centroidField != null) ingestDocument.setFieldValue( geoShapeField + "." + centroidField, GeoUtils.getCentroidFromGeom(geom)); if (bboxField != null) { Coordinate[] coords = geom.getEnvelope().getCoordinates(); if (coords.length >= 4) ingestDocument.setFieldValue( geoShapeField + "." + bboxField, GeoUtils.getBboxFromCoords(coords)); } } return ingestDocument; }