org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse Java Exaples

Source File: ESIndex.java From pyramid with Apache License 2.0

6 votes

/**
 * analyze the given text using the provided analyzer, return an ngram
 * @param text
 * @param analyzer
 * @return
 */
public Ngram analyze(String text, String analyzer){
    List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices().prepareAnalyze(indexName,text).setAnalyzer(analyzer).get().getTokens();

    Ngram ngram = new Ngram();
    StringBuilder sb = new StringBuilder();
    for (int i=0;i<tokens.size();i++)
    {
        AnalyzeResponse.AnalyzeToken token = tokens.get(i);
        sb.append(token.getTerm());
        if (i!=tokens.size()-1){
            sb.append(" ");
        }
    }
    ngram.setNgram(sb.toString());
    return ngram;

}

Source File: AnalyzeDemo.java From elasticsearch-full with Apache License 2.0

6 votes

@Test
public void testTokenizer() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("My œsophagus caused a débâcle");
    /**
     * whitespace （空白字符）分词器按空白字符 —— 空格、tabs、换行符等等进行简单拆分
     * letter 分词器 ，采用另外一种策略，按照任何非字符进行拆分
     * standard 分词器使用 Unicode 文本分割算法
     */
    analyzeRequest.addTokenFilter("standard");
    analyzeRequest.addCharFilter("asciifolding");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture =  client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: Test.java From dht-spider with MIT License

5 votes

public static void anylyze() throws Exception{
    AnalyzeRequest request = new AnalyzeRequest();
    request.text("ReadMe.txt Screenshots,ReadMe.txt Screenshots,1.jpg COVER.jpg Screenshots,4.jpg Screenshots,2.jpg Screenshots,3.jpg FIFA.Street.2012 - RELOADED.rar");
    request.analyzer("ik_max_word");

    AnalyzeResponse response = client.indices().analyze(request, RequestOptions.DEFAULT);
    List<AnalyzeResponse.AnalyzeToken> tokens =
            response.getTokens();
    for(AnalyzeResponse.AnalyzeToken to:tokens){
        System.out.println(to.getTerm());
    }
    System.out.println(response.getTokens().get(0).getAttributes());
}

Source File: SetupIndexServiceImpl.java From searchanalytics-bigdata with MIT License

5 votes

@Override
public List<String> analyzeText(final String indexAliasName,
		final String analyzer, final String[] tokenFilters,
		final String text) {
	final List<String> tokens = new ArrayList<String>();
	final AnalyzeRequestBuilder analyzeRequestBuilder = searchClientService
			.getClient().admin().indices().prepareAnalyze(text);
	if (analyzer != null) {
		analyzeRequestBuilder.setIndex(indexAliasName);
	}
	if (analyzer != null) {
		analyzeRequestBuilder.setAnalyzer(analyzer);
	}
	if (tokenFilters != null) {
		analyzeRequestBuilder.setTokenFilters(tokenFilters);
	}
	logger.debug(
			"Analyze request is text: {}, analyzer: {}, tokenfilters: {}",
			new Object[] { analyzeRequestBuilder.request().text(),
					analyzeRequestBuilder.request().analyzer(),
					analyzeRequestBuilder.request().tokenFilters() });
	final AnalyzeResponse analyzeResponse = analyzeRequestBuilder.get();
	try {
		if (analyzeResponse != null) {
			logger.debug(
					"Analyze response is : {}",
					analyzeResponse
							.toXContent(jsonBuilder().startObject(),
									ToXContent.EMPTY_PARAMS).prettyPrint()
							.string());
		}
	} catch (final IOException e) {
		logger.error("Error printing response.", e);
	}
	for (final AnalyzeToken analyzeToken : analyzeResponse.getTokens()) {
		tokens.add(analyzeToken.getTerm());
	}
	return tokens;
}

Source File: ESIndex.java From pyramid with Apache License 2.0

5 votes

/**
 * analyze the given text using the provided analyzer, return an ngram
 * @param text
 * @param analyzer
 * @return
 */
public List<String> analyzeString(String text, String analyzer){
    List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices().prepareAnalyze(indexName,text).setAnalyzer(analyzer).get().getTokens();
    List<String> list = new ArrayList<>();
    for (int i=0;i<tokens.size();i++)
    {
        AnalyzeResponse.AnalyzeToken token = tokens.get(i);
        list.add(token.getTerm());

    }
    return list;

}

Source File: URLTokenizerIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

5 votes

private List<AnalyzeResponse.AnalyzeToken> assertTokensContain(String url, String analyzer, String... expected) {
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL(url, analyzer);
    for (String e : expected) {
        assertThat(tokens, hasItem(Matchers.<AnalyzeResponse.AnalyzeToken>hasProperty("term", equalTo(e))));
    }
    return tokens;
}

Source File: URLTokenizerIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

5 votes

@Test
public void testAnalyze() {
    assertTokensContain(URLTokenizerTest.TEST_HTTP_URL, "tokenizer_url_protocol", "http");
    assertTokensContain(URLTokenizerTest.TEST_HTTPS_URL, "tokenizer_url_protocol", "https");

    assertTokensContain(URLTokenizerTest.TEST_HTTP_URL, "tokenizer_url_host", "www.foo.bar.com", "foo.bar.com", "bar.com", "com");
    List<AnalyzeResponse.AnalyzeToken> hostTokens = assertTokensContain(URLTokenizerTest.TEST_HTTP_URL, "tokenizer_url_host_single", "www.foo.bar.com");
    assertThat(hostTokens, hasSize(1));

    assertTokensContain(URLTokenizerTest.TEST_HTTP_URL, "tokenizer_url_all", "www.foo.bar.com:9200", "http://www.foo.bar.com");

    assertTokensContain(URLTokenizerTest.TEST_HTTP_URL, "tokenizer_url_protocol_and_host", "http", "www.foo.bar.com", "foo.bar.com", "bar.com", "com");

    assertTokensContain("foo.bar.com/baz.html/query?a=1", "tokenizer_url_all_malformed", "foo.bar.com", "/baz.html/query");
}

Source File: URLTokenFilterIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

5 votes

@Test
public void testPassthrough() {
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL("http://foo.com:9200/foo.bar baz bat.blah", "url_host_passthrough");
    assertThat(tokens, hasSize(4));
    assertThat(tokens.get(0).getTerm(), equalTo("foo.com"));
    assertThat(tokens.get(1).getTerm(), equalTo("com"));
    assertThat(tokens.get(2).getTerm(), equalTo("baz"));
    assertThat(tokens.get(3).getTerm(), equalTo("bat.blah"));
}

Source File: AnalyzeHelper.java From es-service-parent with Apache License 2.0

5 votes

/**
 * 分词-无法分词则返回空集合
 * 
 * @param analyzer
 * @param str
 * @return
 */
public static List<String> analyze(String analyzer, String str) {

    AnalyzeResponse ar = null;
    try {
        AnalyzeRequest request = new AnalyzeRequest(str).analyzer(analyzer).index(
                getCurrentValidIndex());
        ar = ESClient.getClient().admin().indices().analyze(request).actionGet();
    } catch (IndexMissingException e) {
        if (!reLoad) {
            synchronized (AnalyzeHelper.class) {
                if (!reLoad) {
                    reLoad = true;
                }
            }
        }
        return analyze(analyzer, str);
    }

    if (ar == null || ar.getTokens() == null || ar.getTokens().size() < 1) {
        return Lists.newArrayList();
    }
    List<String> analyzeTokens = Lists.newArrayList();
    for (AnalyzeToken at : ar.getTokens()) {
        analyzeTokens.add(at.getTerm());
    }
    return analyzeTokens;
}

Source File: CrudDemo.java From javabase with Apache License 2.0

5 votes

private static void showAnaylzerText(IndicesAdminClient indicesAdminClient,String analyzerName, String text) {
    AnalyzeResponse analyzeResponse = indicesAdminClient.analyze(new AnalyzeRequest(INDEX_NAME).analyzer(analyzerName).text(text)).actionGet();
    List<AnalyzeResponse.AnalyzeToken> token=analyzeResponse.getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken : token) {
        log.info(analyzerName+": {}",analyzeToken.getTerm());
    }

}

Source File: BaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("中华人民共和国");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: XPackBaseDemo.java From elasticsearch-full with Apache License 2.0

5 votes

@Test
public void testClientConnection() throws Exception {
    AnalyzeRequest analyzeRequest = new AnalyzeRequest();
    analyzeRequest.text("美女");
    ActionFuture<AnalyzeResponse> analyzeResponseActionFuture = client.admin().indices().analyze(analyzeRequest);
    List<AnalyzeResponse.AnalyzeToken> analyzeTokens =  analyzeResponseActionFuture.actionGet().getTokens();
    for (AnalyzeResponse.AnalyzeToken analyzeToken  : analyzeTokens){
        System.out.println(analyzeToken.getTerm());
    }
}

Source File: RestAnalyzeAction.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {

    String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");

    AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
    analyzeRequest.text(texts);
    analyzeRequest.analyzer(request.param("analyzer"));
    analyzeRequest.field(request.param("field"));
    analyzeRequest.tokenizer(request.param("tokenizer"));
    analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter",
        request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())))));
    analyzeRequest.charFilters(request.paramAsStringArray("char_filter", request.paramAsStringArray("char_filters", analyzeRequest.charFilters())));
    analyzeRequest.explain(request.paramAsBoolean("explain", false));
    analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));

    if (RestActions.hasBodyContent(request)) {
        XContentType type = RestActions.guessBodyContentType(request);
        if (type == null) {
            if (texts == null || texts.length == 0) {
                texts = new String[]{ RestActions.getRestContent(request).toUtf8() };
                analyzeRequest.text(texts);
            }
        } else {
            // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
            buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
        }
    }

    client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
}

Source File: ElasticsearchUtil.java From SpringBootLearn with Apache License 2.0

5 votes

/**
 * ik分词测试
 * @Author lihaodong
 * @Description
 * @Date 20:09 2018/12/21
 * @Param []
 * @return java.lang.String
 **/
public static String ik() {
    StringBuilder stringBuilder = new StringBuilder();
    AnalyzeRequest analyzeRequest = new AnalyzeRequest("entity")
            .text("书名")
            .analyzer("standard");  //ik_smart  ik_max_word  standard
    List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices()
            .analyze(analyzeRequest)
            .actionGet()
            .getTokens();
    for (AnalyzeResponse.AnalyzeToken token : tokens) {
        stringBuilder.append(token.getTerm() + "\\r\\n");
    }
    return stringBuilder.toString();
}

Source File: EsIndex.java From AsuraFramework with Apache License 2.0

4 votes

public AnalyzeResponse analyze(String indexName,String keyWords,AnalyzeConstant.IKAnalyze iKAnalyze) {
    TransportClient client = esClientFactory.getClient();
    IndicesAdminClient indicesAdminClient = client.admin().indices();
    return indicesAdminClient.prepareAnalyze(esClientFactory.getIndexs(indexName),keyWords).setAnalyzer(iKAnalyze.getCode()).get();
}

Source File: URLTokenFilterIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

4 votes

@Test
public void testEmptyString() {
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL("", "url_protocol");
    assertThat("no tokens", tokens, hasSize(0));
}

Source File: URLTokenFilterIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

4 votes

private void assertURLAnalyzesTo(String url, String analyzer, String expected) {
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL(url, analyzer);
    assertThat("a URL part was parsed", tokens, hasSize(1));
    assertEquals("term value", expected, tokens.get(0).getTerm());
}

Source File: URLTokenizerIntegrationTest.java From elasticsearch-analysis-url with Apache License 2.0

4 votes

@Test
public void testAnalyzeWhole() throws Exception {
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL("http://foo.bar.com", "tokenizer_url_all_malformed");
    assertThat(tokens, notNullValue());
    assertThat(tokens, hasSize(7));
}

Source File: URLAnalysisTestCase.java From elasticsearch-analysis-url with Apache License 2.0

4 votes

protected List<AnalyzeResponse.AnalyzeToken> analyzeURL(String url, String analyzer) {
    return client().admin().indices().prepareAnalyze(INDEX, url).setAnalyzer(analyzer).get().getTokens();
}

Source File: SetupIndexServiceImpl.java From elasticsearch-tutorial with MIT License

4 votes

@Override
public List<String> analyzeText(String indexAliasName, String analyzer, String[] tokenFilters, String text)
{
    List<String> tokens = new ArrayList<String>();
    
    AnalyzeRequestBuilder analyzeRequestBuilder = searchClientService.getClient().admin().indices().prepareAnalyze(text);
    
    if(analyzer !=null)
    {
        analyzeRequestBuilder.setIndex(indexAliasName);
    }
    if(analyzer !=null)
    {
        analyzeRequestBuilder.setAnalyzer(analyzer);
    }
    
    if(tokenFilters !=null)
    {
        analyzeRequestBuilder.setTokenFilters(tokenFilters);
    }
    
    logger.debug("Analyze request is text: {}, analyzer: {}, tokenfilters: {}", new Object[]{analyzeRequestBuilder.request().text(), 
                                                                                analyzeRequestBuilder.request().analyzer(),
                                                                                analyzeRequestBuilder.request().tokenFilters()});
                                                                                        
    AnalyzeResponse analyzeResponse = analyzeRequestBuilder.get();
    
    try
    {
        if(analyzeResponse != null)
        {
            logger.debug("Analyze response is : {}", analyzeResponse.toXContent(jsonBuilder().startObject(), ToXContent.EMPTY_PARAMS).prettyPrint().string());
        }
    } catch (IOException e)
    {
        logger.error("Error printing response.", e);
    }
    
    for (AnalyzeToken analyzeToken : analyzeResponse.getTokens())
    {
        tokens.add(analyzeToken.getTerm());
    }
    return tokens;
}

Source File: AbstractClient.java From Elasticsearch with Apache License 2.0

4 votes

@Override
public void analyze(final AnalyzeRequest request, final ActionListener<AnalyzeResponse> listener) {
    execute(AnalyzeAction.INSTANCE, request, listener);
}

Source File: AbstractClient.java From Elasticsearch with Apache License 2.0

4 votes

@Override
public ActionFuture<AnalyzeResponse> analyze(final AnalyzeRequest request) {
    return execute(AnalyzeAction.INSTANCE, request);
}

Source File: AnalyzeRequestBuilder.java From elasticshell with Apache License 2.0

4 votes

@Override
protected ActionFuture<AnalyzeResponse> doExecute(AnalyzeRequest request) {
    return client.admin().indices().analyze(request);
}

Source File: AnalyzeRequestBuilder.java From elasticshell with Apache License 2.0

4 votes

@Override
protected XContentBuilder toXContent(AnalyzeRequest request, AnalyzeResponse response, XContentBuilder builder) throws IOException {
    return super.toXContent(request, response, builder).endObject();
}

Source File: IEsIndexDao.java From AsuraFramework with Apache License 2.0

2 votes

/**
 * 分词
 * @param indexName
 * @param keyWords
 * @param iKAnalyze
 * @return
 */
AnalyzeResponse analyze(String indexName,String keyWords,AnalyzeConstant.IKAnalyze iKAnalyze);

Source File: IndicesAdminClient.java From Elasticsearch with Apache License 2.0

2 votes

/**
 * Analyze text under the provided index.
 */
void analyze(AnalyzeRequest request, ActionListener<AnalyzeResponse> listener);

Source File: IndicesAdminClient.java From Elasticsearch with Apache License 2.0

2 votes

/**
 * Analyze text under the provided index.
 */
ActionFuture<AnalyzeResponse> analyze(AnalyzeRequest request);

org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse Java Examples