org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest Java Exaples

Source File: AnalyzeDemo.java From elasticsearch-full with Apache License 2.0

6 votes

@Test
public void testTokenizer() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("My œsophagus caused a débâcle");
    /**
     * whitespace （空白字符）分词器按空白字符 —— 空格、tabs、换行符等等进行简单拆分
     * letter 分词器 ，采用另外一种策略，按照任何非字符进行拆分
     * standard 分词器使用 Unicode 文本分割算法
     */
    analyzeRequest.addTokenFilter("standard");
    analyzeRequest.addCharFilter("asciifolding");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture =  client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: ElasticsearchUtil.java From SpringBootLearn with Apache License 2.0

5 votes

/**
 * ik分词测试
 * @Author lihaodong
 * @Description
 * @Date 20:09 2018/12/21
 * @Param []
 * @return java.lang.String
 **/
public static String ik() {
    StringBuilder stringBuilder = new StringBuilder();
    AnalyzeRequest analyzeRequest = new AnalyzeRequest("entity")
            .text("书名")
            .analyzer("standard");  //ik_smart  ik_max_word  standard
    List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices()
            .analyze(analyzeRequest)
            .actionGet()
            .getTokens();
    for (AnalyzeResponse.AnalyzeToken token : tokens) {
        stringBuilder.append(token.getTerm() + "\\r\\n");
    }
    return stringBuilder.toString();
}

Source File: Test.java From dht-spider with MIT License

5 votes

public static void anylyze() throws Exception{
    AnalyzeRequest request = new AnalyzeRequest();
    request.text("ReadMe.txt Screenshots,ReadMe.txt Screenshots,1.jpg COVER.jpg Screenshots,4.jpg Screenshots,2.jpg Screenshots,3.jpg FIFA.Street.2012 - RELOADED.rar");
    request.analyzer("ik_max_word");

    AnalyzeResponse response = client.indices().analyze(request, RequestOptions.DEFAULT);
    List<AnalyzeResponse.AnalyzeToken> tokens =
            response.getTokens();
    for(AnalyzeResponse.AnalyzeToken to:tokens){
        System.out.println(to.getTerm());
    }
    System.out.println(response.getTokens().get(0).getAttributes());
}

Source File: RestAnalyzeAction.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {

    String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");

    AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
    analyzeRequest.text(texts);
    analyzeRequest.analyzer(request.param("analyzer"));
    analyzeRequest.field(request.param("field"));
    analyzeRequest.tokenizer(request.param("tokenizer"));
    analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter",
        request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())))));
    analyzeRequest.charFilters(request.paramAsStringArray("char_filter", request.paramAsStringArray("char_filters", analyzeRequest.charFilters())));
    analyzeRequest.explain(request.paramAsBoolean("explain", false));
    analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));

    if (RestActions.hasBodyContent(request)) {
        XContentType type = RestActions.guessBodyContentType(request);
        if (type == null) {
            if (texts == null || texts.length == 0) {
                texts = new String[]{ RestActions.getRestContent(request).toUtf8() };
                analyzeRequest.text(texts);
            }
        } else {
            // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
            buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
        }
    }

    client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
}

Source File: XPackBaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("美女");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: BaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("中华人民共和国");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: CrudDemo.java From javabase with Apache License 2.0

5 votes

private static void showAnaylzerText(IndicesAdminClient indicesAdminClient,String analyzerName, String text) {
    AnalyzeResponse analyzeResponse = indicesAdminClient.analyze(new AnalyzeRequest(INDEX_NAME).analyzer(analyzerName).text(text)).actionGet();
    List<AnalyzeResponse.AnalyzeToken> token=analyzeResponse.getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken : token) {
        log.info(analyzerName+": {}",analyzeToken.getTerm());
    }

}

Source File: AnalyzeHelper.java From es-service-parent with Apache License 2.0

5 votes

/**
 * 分词-无法分词则返回空集合
 * 
 * @param analyzer
 * @param str
 * @return
 */
public static List<String> analyze(String analyzer, String str) {

    AnalyzeResponse ar = null;
    try {
        AnalyzeRequest request = new AnalyzeRequest(str).analyzer(analyzer).index(
                getCurrentValidIndex());
        ar = ESClient.getClient().admin().indices().analyze(request).actionGet();
    } catch (IndexMissingException e) {
        if (!reLoad) {
            synchronized (AnalyzeHelper.class) {
                if (!reLoad) {
                    reLoad = true;
                }
            }
        }
        return analyze(analyzer, str);
    }

    if (ar == null || ar.getTokens() == null || ar.getTokens().size() < 1) {
        return Lists.newArrayList();
    }
    List<String> analyzeTokens = Lists.newArrayList();
    for (AnalyzeToken at : ar.getTokens()) {
        analyzeTokens.add(at.getTerm());
    }
    return analyzeTokens;
}

Source File: AbstractClient.java From Elasticsearch with Apache License 2.0

4 votes

@Override
public ActionFuture<AnalyzeResponse> analyze(final AnalyzeRequest request) {
    return execute(AnalyzeAction.INSTANCE, request);
}

Source File: AbstractClient.java From Elasticsearch with Apache License 2.0

4 votes

@Override
public void analyze(final AnalyzeRequest request, final ActionListener<AnalyzeResponse> listener) {
    execute(AnalyzeAction.INSTANCE, request, listener);
}

Source File: RestAnalyzeAction.java From Elasticsearch with Apache License 2.0

4 votes

public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
    try (XContentParser parser = XContentHelper.createParser(content)) {
        if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
            throw new IllegalArgumentException("Malforrmed content, must start with an object");
        } else {
            XContentParser.Token token;
            String currentFieldName = null;
            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                if (token == XContentParser.Token.FIELD_NAME) {
                    currentFieldName = parser.currentName();
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.text(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.START_ARRAY) {
                    List<String> texts = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain text");
                        }
                        texts.add(parser.text());
                    }
                    analyzeRequest.text(texts.toArray(new String[texts.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.ANALYZER) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.analyzer(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.field(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.tokenizer(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
                    List<String> filters = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain token filter's name");
                        }
                        filters.add(parser.text());
                    }
                    analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
                    List<String> charFilters = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain char filter's name");
                        }
                        charFilters.add(parser.text());
                    }
                    analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN)) {
                    if (parser.isBooleanValue()) {
                        analyzeRequest.explain(parser.booleanValue());
                    } else {
                        throw new IllegalArgumentException(currentFieldName + " must be either 'true' or 'false'");
                    }
                } else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
                    List<String> attributes = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain attribute name");
                        }
                        attributes.add(parser.text());
                    }
                    analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
                } else {
                    throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
                }
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to parse request body", e);
    }
}

Source File: AnalyzeRequestBuilder.java From elasticshell with Apache License 2.0

4 votes

public AnalyzeRequestBuilder(Client client, JsonToString<JsonInput> jsonToString, StringToJson<JsonOutput> stringToJson) {
    super(client, new AnalyzeRequest(null), jsonToString, stringToJson);
}

Source File: AnalyzeRequestBuilder.java From elasticshell with Apache License 2.0

4 votes

@Override
protected ActionFuture<AnalyzeResponse> doExecute(AnalyzeRequest request) {
    return client.admin().indices().analyze(request);
}

Source File: AnalyzeRequestBuilder.java From elasticshell with Apache License 2.0

4 votes

@Override
protected XContentBuilder toXContent(AnalyzeRequest request, AnalyzeResponse response, XContentBuilder builder) throws IOException {
    return super.toXContent(request, response, builder).endObject();
}

Source File: IndicesAdminClient.java From Elasticsearch with Apache License 2.0

2 votes

/**
 * Analyze text under the provided index.
 */
ActionFuture<AnalyzeResponse> analyze(AnalyzeRequest request);

Source File: IndicesAdminClient.java From Elasticsearch with Apache License 2.0

2 votes

/**
 * Analyze text under the provided index.
 */
void analyze(AnalyzeRequest request, ActionListener<AnalyzeResponse> listener);

org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest Java Examples