org.elasticsearch.ingest.Processor Java Examples

The following examples show how to use org.elasticsearch.ingest.Processor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CsvProcessor.java    From elasticsearch-ingest-csv with Apache License 2.0 6 votes vote down vote up
@Override
public CsvProcessor create(Map<String, Processor.Factory> factories, String tag, Map<String, Object> config) 
    throws Exception {
    String field = readStringProperty(TYPE, tag, config, "field");
    List<String> columns = readList(TYPE, tag, config, "columns");
    // FIXME should test duplicate name
    if (columns.size() == 0) {
        throw new IllegalArgumentException("columns is missing");
    }
    String quoteChar = readStringProperty(TYPE, tag, config, "quote_char", "\"");
    if (Strings.isEmpty(quoteChar) || quoteChar.length() != 1) {
        throw new IllegalArgumentException("quote_char must be a character, like \" or \'");
    }
    String separator = readStringProperty(TYPE, tag, config, "separator", ",");
    if (Strings.isEmpty(separator) || separator.length() != 1) {
        throw new IllegalArgumentException("separator must be a character, like , or TAB");
    }
    int maxCharsPerColumn = readIntProperty(TYPE, tag, config, "max_chars_per_column", 4096);
    if (maxCharsPerColumn < 1 || maxCharsPerColumn > 64000) {
        throw new IllegalArgumentException("maxCharsPerColumn must be between 1 and 64000 (default 4096)");
    }
    return new CsvProcessor(tag, field, columns, quoteChar.charAt(0), separator.charAt(0),maxCharsPerColumn);
}
 
Example #2
Source File: OpenNlpProcessorTests.java    From elasticsearch-ingest-opennlp with Apache License 2.0 6 votes vote down vote up
public void testAnnotatedText() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put("field", "source_field");
    config.put("annotated_text_field", "my_annotated_text_field");

    OpenNlpProcessor.Factory factory = new OpenNlpProcessor.Factory(service);
    Map<String, Processor.Factory> registry = Collections.emptyMap();
    OpenNlpProcessor processor = factory.create(registry, randomAlphaOfLength(10), config);

    IngestDocument ingestDocument = processor.execute(getIngestDocument());
    String content = ingestDocument.getFieldValue("my_annotated_text_field", String.class);
    assertThat(content, is("[Kobe Bryant](Person_Kobe Bryant) was one of the best basketball players of all times. Not even" +
            " [Michael Jordan](Person_Michael Jordan) has ever scored 81 points in one game. [Munich](Location_Munich) is really" +
            " an awesome city, but [New York](Location_New York) is as well. [Yesterday](Date_Yesterday) has been the hottest" +
            " day of the year."));
}
 
Example #3
Source File: YauaaProcessor.java    From yauaa with Apache License 2.0 5 votes vote down vote up
@Override
public YauaaProcessor create(Map<String, Processor.Factory> factories, String tag, Map<String, Object> config) {
    String       field          = readStringProperty(TYPE, tag, config, "field");
    String       targetField    = readStringProperty(TYPE, tag, config, "target_field", "user_agent");
    List<String> fieldNames     = readOptionalList(TYPE, tag, config, "fieldNames");
    Integer      cacheSize      = readIntProperty(TYPE, tag, config, "cacheSize", -1);
    Integer      preheat        = readIntProperty(TYPE, tag, config, "preheat", -1);
    String       extraRules     = readOptionalStringProperty(TYPE, tag, config, "extraRules");

    return new YauaaProcessor(tag, field, targetField, fieldNames, cacheSize, preheat, extraRules);
}
 
Example #4
Source File: LangDetectProcessor.java    From elasticsearch-ingest-langdetect with Apache License 2.0 5 votes vote down vote up
@Override
public LangDetectProcessor create(Map<String, Processor.Factory> factories, String tag, Map<String, Object> config)
        throws Exception {
    String field = readStringProperty(TYPE, tag, config, "field");
    String targetField = readStringProperty(TYPE, tag, config, "target_field");
    String maxLengthStr = readOptionalStringProperty(TYPE, tag, config, "max_length");

    ByteSizeValue maxLength = ByteSizeValue.parseBytesSizeValue(maxLengthStr, DEFAULT_MAX_LENGTH, "max_length");

    boolean ignoreMissing = readBooleanProperty(TYPE, tag, config, "ignore_missing", false);

    return new LangDetectProcessor(tag, field, targetField, maxLength, ignoreMissing);
}
 
Example #5
Source File: IngestLangDetectPlugin.java    From elasticsearch-ingest-langdetect with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
    try {
        SecureDetectorFactory.loadProfileFromClassPath(parameters.env);
    } catch (LangDetectException | URISyntaxException | IOException e) {
        throw new ElasticsearchException(e);
    }

    Map<String, Processor.Factory> factoryMap = new HashMap<>(1);
    factoryMap.put(LangDetectProcessor.TYPE, new LangDetectProcessor.Factory());
    return factoryMap;
}
 
Example #6
Source File: OpenNlpProcessor.java    From elasticsearch-ingest-opennlp with Apache License 2.0 5 votes vote down vote up
@Override
public OpenNlpProcessor create(Map<String, Processor.Factory> registry, String processorTag, Map<String, Object> config) {
    String field = readStringProperty(TYPE, processorTag, config, "field");
    String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "entities");
    String annotatedTextField = readOptionalStringProperty(TYPE, processorTag, config, "annotated_text_field");
    List<String> fields = readOptionalList(TYPE, processorTag, config, "fields");
    final Set<String> foundFields = fields == null || fields.size() == 0 ? openNlpService.getModels() : new HashSet<>(fields);
    return new OpenNlpProcessor(openNlpService, processorTag, field, targetField, annotatedTextField, foundFields);
}
 
Example #7
Source File: IngestOpenNlpPlugin.java    From elasticsearch-ingest-opennlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
    Path configDirectory = parameters.env.configFile().resolve("ingest-opennlp");
    OpenNlpService openNlpService = new OpenNlpService(configDirectory, parameters.env.settings());
    openNlpService.start();

    return Collections.singletonMap(OpenNlpProcessor.TYPE, new OpenNlpProcessor.Factory(openNlpService));
}
 
Example #8
Source File: OpenNlpProcessorTests.java    From elasticsearch-ingest-opennlp with Apache License 2.0 5 votes vote down vote up
public void testConstructorNoFieldsSpecified() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put("field", "source_field");
    config.put("target_field", "target_field");

    OpenNlpProcessor.Factory factory = new OpenNlpProcessor.Factory(service);
    Map<String, Processor.Factory> registry = Collections.emptyMap();
    OpenNlpProcessor processor = factory.create(registry, randomAlphaOfLength(10), config);

    Map<String, Object> entityData = getIngestDocumentData(processor);

    assertThatHasElements(entityData, "names", "Kobe Bryant", "Michael Jordan");
    assertThatHasElements(entityData, "dates", "Yesterday");
    assertThatHasElements(entityData, "locations", "Munich", "New York");
}
 
Example #9
Source File: IngestCsvPlugin.java    From elasticsearch-ingest-csv with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
    return MapBuilder.<String, Processor.Factory>newMapBuilder()
            .put(CsvProcessor.TYPE, new CsvProcessor.Factory())
            .immutableMap();
}
 
Example #10
Source File: IngestYauaaPlugin.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
    return MapBuilder.<String, Processor.Factory>newMapBuilder()
        .put(YauaaProcessor.TYPE, new YauaaProcessor.Factory())
        .immutableMap();
}
 
Example #11
Source File: YauaaProcessorTest.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Test
public void testIngestPlugin() throws Exception {
    IngestYauaaPlugin plugin = new IngestYauaaPlugin();

    Map<String, Processor.Factory> processors = plugin.getProcessors(null);

    Processor.Factory yauaaFactory = processors.get("yauaa");

    Map<String, Object>  configuration = new HashMap<>();

    configuration.put("field",        SOURCE_FIELD);
    configuration.put("target_field", TARGET_FIELD);
    configuration.put("fieldNames",   Arrays.asList("DeviceClass", "DeviceBrand", "DeviceName", "AgentNameVersionMajor", "FirstProductName"));
    configuration.put("cacheSize",    10);
    configuration.put("preheat",      10);
    configuration.put("extraRules",   "config:\n- matcher:\n    extract:\n      - 'FirstProductName     : 1 :agent.(1)product.(1)name'\n");

    Processor processor = yauaaFactory.create(processors, "tag", configuration);

    assertEquals("yauaa", processor.getType());

    Map<String, Object> document = new HashMap<>();
    document.put(SOURCE_FIELD,
        "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
            "AppleWebKit/537.36 (KHTML, like Gecko) " +
            "Chrome/53.0.2785.124 Mobile Safari/537.36");
    IngestDocument ingestDocument = new IngestDocument("index", "type", "id", null, 42L, VersionType.EXTERNAL, document);

    Map<String, Object> data      = processor.execute(ingestDocument).getSourceAndMetadata();

    MatcherAssert.assertThat(data, hasKey(TARGET_FIELD));

    Map<String, String> results    = (Map<String, String>) data.get(TARGET_FIELD);

    // The EXPLICITLY requested fields
    assertHasKValue(results, "FirstProductName",        "Mozilla");
    assertHasKValue(results, "DeviceClass",             "Phone");
    assertHasKValue(results, "DeviceBrand",             "Google");
    assertHasKValue(results, "DeviceName",              "Google Nexus 6");
    assertHasKValue(results, "AgentNameVersionMajor",   "Chrome 53");

    // The IMPLICITLY requested fields (i.e. partials of the actually requested ones)
    assertHasKValue(results, "AgentName",               "Chrome");
    assertHasKValue(results, "AgentVersion",            "53.0.2785.124");
    assertHasKValue(results, "AgentVersionMajor",       "53");

    // The NOT requested fields
    assertHasNotKey(results, "OperatingSystemClass");
    assertHasNotKey(results, "OperatingSystemName");
    assertHasNotKey(results, "OperatingSystemNameVersion");
    assertHasNotKey(results, "OperatingSystemNameVersionMajor");
    assertHasNotKey(results, "OperatingSystemVersion");
    assertHasNotKey(results, "OperatingSystemVersionBuild");
    assertHasNotKey(results, "OperatingSystemVersionMajor");
    assertHasNotKey(results, "LayoutEngineClass");
    assertHasNotKey(results, "LayoutEngineName");
    assertHasNotKey(results, "LayoutEngineNameVersion");
    assertHasNotKey(results, "LayoutEngineNameVersionMajor");
    assertHasNotKey(results, "LayoutEngineVersion");
    assertHasNotKey(results, "LayoutEngineVersionMajor");
    assertHasNotKey(results, "AgentClass");
    assertHasNotKey(results, "AgentNameVersion");

    LoggerFactory.getLogger("TestYauaaProcessor").info("Complete set of returned results:{}", results);
}
 
Example #12
Source File: GeoExtensionPlugin.java    From elasticsearch-plugin-geoshape with MIT License 4 votes vote down vote up
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
    return Collections.singletonMap(GeoExtensionProcessor.TYPE, new GeoExtensionProcessor.Factory());
}
 
Example #13
Source File: GeoExtensionProcessor.java    From elasticsearch-plugin-geoshape with MIT License 4 votes vote down vote up
@Override
public GeoExtensionProcessor create(Map<String, Processor.Factory> registry, String processorTag,
                                    Map<String, Object> config) {
    String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
    String path = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "path");

    boolean keep_shape = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "keep_original_shape", true);
    String shapeField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "shape_field", "shape");

    boolean fix = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "fix_shape", true);
    String fixedField = null;
    if (fix) {
        fixedField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "fixed_field", "fixed_shape");
    }

    boolean needWkb = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "wkb", true);
    String wkbField = null;
    if (needWkb) {
        wkbField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "wkb_field", "wkb");
    }

    boolean needHash = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "hash", true);
    String hashField = null;
    if (needHash) {
        hashField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_field", "hash");
    }

    boolean needType = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "type", true);
    String typeField = null;
    if (needType) {
        typeField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "type_field", "type");
    }

    boolean needArea = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "area", true);
    String areaField = null;
    if (needArea) {
        areaField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "area_field", "area");
    }

    boolean needBbox = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "bbox", true);
    String bboxField = null;
    if (needBbox) {
        bboxField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "bbox_field", "bbox");
    }

    boolean needCentroid = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "centroid", true);
    String centroidField = null;
    if (needCentroid) {
        centroidField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "centroid_field", "centroid");
    }

    return new GeoExtensionProcessor(
            processorTag,
            field,
            path,
            keep_shape,
            shapeField,
            fixedField,
            wkbField,
            hashField,
            typeField,
            areaField,
            bboxField,
            centroidField
    );
}