Java Code Examples for org.apache.storm.tuple.Tuple#getBinaryByField()
The following examples show how to use
org.apache.storm.tuple.Tuple#getBinaryByField() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProfileSplitterBolt.java From metron with Apache License 2.0 | 6 votes |
private void doExecute(Tuple input) throws ParseException, UnsupportedEncodingException { // retrieve the input message byte[] data = input.getBinaryByField(VALUE.getFieldName()); if(data == null) { LOG.debug("Received null message. Nothing to do."); return; } // ensure there is a valid profiler configuration ProfilerConfig config = getProfilerConfig(); if(config == null || getProfilerConfig().getProfiles().size() == 0) { LOG.debug("No Profiler configuration found. Nothing to do."); return; } JSONObject message = (JSONObject) parser.parse(new String(data, StandardCharsets.UTF_8)); routeMessage(input, message, config); }
Example 2
Source File: RedirectionBolt.java From storm-crawler with Apache License 2.0 | 6 votes |
@Override public void execute(Tuple tuple) { String url = tuple.getStringByField("url"); byte[] content = tuple.getBinaryByField("content"); Metadata metadata = (Metadata) tuple.getValueByField("metadata"); String text = tuple.getStringByField("text"); Values v = new Values(url, content, metadata, text); // if there is a text - no need to parse it again if (StringUtils.isNotBlank(text)) { collector.emit(tuple, v); } else { collector.emit("tika", tuple, v); } collector.ack(tuple); }
Example 3
Source File: FeedDetectorBolt.java From news-crawl with Apache License 2.0 | 4 votes |
@Override public void execute(Tuple tuple) { Metadata metadata = (Metadata) tuple.getValueByField("metadata"); byte[] content = tuple.getBinaryByField("content"); String url = tuple.getStringByField("url"); boolean isFeed = Boolean.valueOf(metadata.getFirstValue(isFeedKey)); if (!isFeed) { String ct = metadata.getFirstValue(HttpHeaders.CONTENT_TYPE); if (ct != null) { for (String clue : mimeTypeClues) { if (ct.contains(clue)) { isFeed = true; metadata.setValue(isFeedKey, "true"); LOG.info("Feed detected from content type <{}> for {}", ct, url); break; } } } } if (!isFeed) { if (contentDetector.matches(content)) { isFeed = true; metadata.setValue(isFeedKey, "true"); LOG.info("Feed detected from content: {}", url); } } if (isFeed) { // do not parse but run parse filters ParseResult parse = new ParseResult(); ParseData parseData = parse.get(url); parseData.setMetadata(metadata); parseFilters.filter(url, content, null, parse); // emit status collector.emit(Constants.StatusStreamName, tuple, new Values(url, metadata, Status.FETCHED)); } else { // pass on collector.emit(tuple, tuple.getValues()); } collector.ack(tuple); }
Example 4
Source File: NewsSiteMapDetectorBolt.java From news-crawl with Apache License 2.0 | 4 votes |
@Override public void execute(Tuple tuple) { Metadata metadata = (Metadata) tuple.getValueByField("metadata"); byte[] content = tuple.getBinaryByField("content"); String url = tuple.getStringByField("url"); boolean isSitemap = Boolean.valueOf( metadata.getFirstValue(SiteMapParserBolt.isSitemapKey)); boolean isNewsSitemap = Boolean.valueOf( metadata.getFirstValue(NewsSiteMapParserBolt.isSitemapNewsKey)); if (!isNewsSitemap || !isSitemap) { int match = contentDetector.getFirstMatch(content); if (match >= 0) { // a sitemap, not necessarily a news sitemap isSitemap = true; metadata.setValue(SiteMapParserBolt.isSitemapKey, "true"); if (match <= NewsSiteMapParserBolt.contentCluesSitemapNewsMatchUpTo) { isNewsSitemap = true; LOG.info("{} detected as news sitemap based on content", url); metadata.setValue(NewsSiteMapParserBolt.isSitemapNewsKey, "true"); } } } if (isSitemap) { // do not parse but run parse filters ParseResult parse = new ParseResult(); ParseData parseData = parse.get(url); parseData.setMetadata(metadata); parseFilters.filter(url, content, null, parse); // emit status collector.emit(Constants.StatusStreamName, tuple, new Values(url, metadata, Status.FETCHED)); } else { // pass on collector.emit(tuple, tuple.getValues()); } collector.ack(tuple); }