org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest#text

Source File: AnalyzeDemo.java From elasticsearch-full with Apache License 2.0

6 votes

@Test
public void testTokenizer() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("My œsophagus caused a débâcle");
    /**
     * whitespace （空白字符）分词器按空白字符 —— 空格、tabs、换行符等等进行简单拆分
     * letter 分词器 ，采用另外一种策略，按照任何非字符进行拆分
     * standard 分词器使用 Unicode 文本分割算法
     */
    analyzeRequest.addTokenFilter("standard");
    analyzeRequest.addCharFilter("asciifolding");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture =  client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: Test.java From dht-spider with MIT License

5 votes

public static void anylyze() throws Exception{
    AnalyzeRequest request = new AnalyzeRequest();
    request.text("ReadMe.txt Screenshots,ReadMe.txt Screenshots,1.jpg COVER.jpg Screenshots,4.jpg Screenshots,2.jpg Screenshots,3.jpg FIFA.Street.2012 - RELOADED.rar");
    request.analyzer("ik_max_word");

    AnalyzeResponse response = client.indices().analyze(request, RequestOptions.DEFAULT);
    List<AnalyzeResponse.AnalyzeToken> tokens =
            response.getTokens();
    for(AnalyzeResponse.AnalyzeToken to:tokens){
        System.out.println(to.getTerm());
    }
    System.out.println(response.getTokens().get(0).getAttributes());
}

Source File: RestAnalyzeAction.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {

    String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");

    AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
    analyzeRequest.text(texts);
    analyzeRequest.analyzer(request.param("analyzer"));
    analyzeRequest.field(request.param("field"));
    analyzeRequest.tokenizer(request.param("tokenizer"));
    analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter",
        request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())))));
    analyzeRequest.charFilters(request.paramAsStringArray("char_filter", request.paramAsStringArray("char_filters", analyzeRequest.charFilters())));
    analyzeRequest.explain(request.paramAsBoolean("explain", false));
    analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));

    if (RestActions.hasBodyContent(request)) {
        XContentType type = RestActions.guessBodyContentType(request);
        if (type == null) {
            if (texts == null || texts.length == 0) {
                texts = new String[]{ RestActions.getRestContent(request).toUtf8() };
                analyzeRequest.text(texts);
            }
        } else {
            // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
            buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
        }
    }

    client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
}

Source File: XPackBaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("美女");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: BaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("中华人民共和国");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: RestAnalyzeAction.java From Elasticsearch with Apache License 2.0

4 votes

public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
    try (XContentParser parser = XContentHelper.createParser(content)) {
        if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
            throw new IllegalArgumentException("Malforrmed content, must start with an object");
        } else {
            XContentParser.Token token;
            String currentFieldName = null;
            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                if (token == XContentParser.Token.FIELD_NAME) {
                    currentFieldName = parser.currentName();
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.text(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.START_ARRAY) {
                    List<String> texts = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain text");
                        }
                        texts.add(parser.text());
                    }
                    analyzeRequest.text(texts.toArray(new String[texts.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.ANALYZER) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.analyzer(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.field(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
                    analyzeRequest.tokenizer(parser.text());
                } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
                    List<String> filters = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain token filter's name");
                        }
                        filters.add(parser.text());
                    }
                    analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
                    List<String> charFilters = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain char filter's name");
                        }
                        charFilters.add(parser.text());
                    }
                    analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
                } else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN)) {
                    if (parser.isBooleanValue()) {
                        analyzeRequest.explain(parser.booleanValue());
                    } else {
                        throw new IllegalArgumentException(currentFieldName + " must be either 'true' or 'false'");
                    }
                } else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
                    List<String> attributes = new ArrayList<>();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token.isValue() == false) {
                            throw new IllegalArgumentException(currentFieldName + " array element should only contain attribute name");
                        }
                        attributes.add(parser.text());
                    }
                    analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
                } else {
                    throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
                }
            }
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Failed to parse request body", e);
    }
}

Java Code Examples for org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest#text()