Java Code Examples for org.apache.storm.task.TopologyContext#getThisTaskIndex()
The following examples show how to use
org.apache.storm.task.TopologyContext#getThisTaskIndex() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StormRecorder.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void open(final Map<String, Object> spoutConfig, final TopologyContext topologyContext) { // Load configuration items. // Determine our time bucket window, in seconds, defaulted to 60. int timeBucketSeconds = 60; if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_TIME_BUCKET)) { final Object timeBucketCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_TIME_BUCKET); if (timeBucketCfgValue instanceof Number) { timeBucketSeconds = ((Number) timeBucketCfgValue).intValue(); } } // Conditionally enable prefixing with taskId if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX)) { final Object taskIdCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX); if (taskIdCfgValue instanceof Boolean && (Boolean) taskIdCfgValue) { this.metricPrefix = "task-" + topologyContext.getThisTaskIndex(); } } this.keyBuilder = new KeyBuilder(this.metricPrefix); // Log how we got configured. logger.info("Configured with time window of {} seconds and using taskId prefixes?: {}", timeBucketSeconds, Boolean.toString(metricPrefix.isEmpty())); // Register the top level metrics. assignedValues = topologyContext.registerMetric("GAUGES", new MultiAssignableMetric(), timeBucketSeconds); timers = topologyContext.registerMetric("TIMERS", new MultiReducedMetric(new MeanReducer()), timeBucketSeconds); counters = topologyContext.registerMetric("COUNTERS", new MultiCountMetric(), timeBucketSeconds); }
Example 2
Source File: EsSpout.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; LinkedHashMap copy = new LinkedHashMap(conf); copy.putAll(spoutConfig); StormSettings settings = new StormSettings(copy); InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log); InitializationUtils.setUserProviderIfNotSet(settings, JdkUserProvider.class, log); EsClusterInfoSelector.populate(settings); ackReads = settings.getStormSpoutReliable(); if (ackReads) { inTransitQueue = new LinkedHashMap<Object, Object>(); replayQueue = new LinkedList<Object[]>(); retries = new HashMap<Object, Integer>(); queueSize = settings.getStormSpoutReliableQueueSize(); tupleRetries = settings.getStormSpoutReliableRetriesPerTuple(); tupleFailure = settings.getStormSpoutReliableTupleFailureHandling(); } int totalTasks = context.getComponentTasks(context.getThisComponentId()).size(); int currentTask = context.getThisTaskIndex(); // match the partitions based on the current topology List<PartitionDefinition> partitions = RestService.findPartitions(settings, log); List<PartitionDefinition> assigned = RestService.assignPartitions(partitions, currentTask, totalTasks); iterator = RestService.multiReader(settings, assigned, log); }
Example 3
Source File: WARCFileNameFormat.java From storm-crawler with Apache License 2.0 | 5 votes |
@Override public void prepare(Map conf, TopologyContext topologyContext) { this.taskIndex = topologyContext.getThisTaskIndex(); int totalTasks = topologyContext.getComponentTasks( topologyContext.getThisComponentId()).size(); // single task? let's not bother with the task index in the file name if (totalTasks == 1) { this.taskIndex = -1; } }
Example 4
Source File: SQLSpout.java From storm-crawler with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { super.open(conf, context, collector); maxDocsPerBucket = ConfUtils.getInt(conf, Constants.SQL_MAX_DOCS_BUCKET_PARAM_NAME, 5); tableName = ConfUtils.getString(conf, Constants.SQL_STATUS_TABLE_PARAM_NAME, "urls"); maxNumResults = ConfUtils.getInt(conf, Constants.SQL_MAXRESULTS_PARAM_NAME, 100); try { connection = SQLUtil.getConnection(conf); } catch (SQLException ex) { LOG.error(ex.getMessage(), ex); throw new RuntimeException(ex); } // determine bucket this spout instance will be in charge of int totalTasks = context .getComponentTasks(context.getThisComponentId()).size(); if (totalTasks > 1) { logIdprefix = "[" + context.getThisComponentId() + " #" + context.getThisTaskIndex() + "] "; bucketNum = context.getThisTaskIndex(); } }
Example 5
Source File: SidelineSpoutHandler.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Handler called when the dynamic spout opens, this method is responsible for creating and setting triggers for * handling the spinning up and down of sidelines. * @param spout Dynamic spout instance. * @param topologyConfig Topology configuration. * @param topologyContext Topology context. */ @Override public void onSpoutOpen( final DynamicSpout spout, final Map topologyConfig, final TopologyContext topologyContext ) { this.spout = spout; createSidelineTriggers(); Preconditions.checkArgument( spoutConfig.containsKey(SidelineConfig.REFRESH_INTERVAL_SECONDS) && spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS) != null, "Configuration value for " + SidelineConfig.REFRESH_INTERVAL_SECONDS + " is required." ); final long refreshIntervalSeconds = ((Number) spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS)).longValue(); final long refreshIntervalMillis = TimeUnit.SECONDS.toMillis(refreshIntervalSeconds); // Why not just start the timer at 0? Because we want to block onSpoutOpen() until the first run of loadSidelines() loadSidelines(); // Repeat our sidelines check periodically final String threadName = "[" + DynamicSpout.class.getSimpleName() + ":" + getClass().getSimpleName() + "] Timer on " + topologyContext.getThisComponentId() + ":" + topologyContext.getThisTaskIndex(); timer = new Timer(threadName); timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { // Catch this so that it doesn't kill the recurring task try { loadSidelines(); } catch (Exception ex) { logger.error("Attempting to loadSidelines() failed {}", ex); } } }, refreshIntervalMillis, refreshIntervalMillis); for (final SidelineTrigger sidelineTrigger : sidelineTriggers) { sidelineTrigger.open(getSpoutConfig()); } }
Example 6
Source File: DynamicSpout.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Open is called once the spout instance has been deployed to the Storm cluster * and is ready to get to work. * * @param topologyConfig The Storm Topology configuration. * @param topologyContext The Storm Topology context. * @param spoutOutputCollector The output collector to emit tuples via. * @throws IllegalStateException if you attempt to open the spout multiple times. */ @Override public void open(Map topologyConfig, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) { if (isOpen) { throw new IllegalStateException("This spout has already been opened."); } // Save references. this.topologyContext = topologyContext; this.outputCollector = spoutOutputCollector; // Ensure a consumer id prefix has been correctly set. if (Strings.isNullOrEmpty((String) getSpoutConfigItem(SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX))) { throw new IllegalStateException("Missing required configuration: " + SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX); } // We do not use the getters for things like the metricsRecorder and coordinator here // because each of these getters perform a check to see if the spout is open, and it's not yet until we've // finished setting all of these things up. // Initialize Metric Recorder this.metricsRecorder = getFactoryManager().createNewMetricsRecorder(); this.metricsRecorder.open(getSpoutConfig(), getTopologyContext()); // Create MessageBuffer final MessageBuffer messageBuffer = getFactoryManager().createNewMessageBufferInstance(); messageBuffer.open(getSpoutConfig()); // Create MessageBus instance and store into SpoutMessageBus reference reducing accessible scope. final MessageBus messageBus = new MessageBus(messageBuffer); this.messageBus = messageBus; // Define thread context, this allows us to use contextually relevant thread names. final ThreadContext threadContext = new ThreadContext( topologyContext.getThisComponentId(), topologyContext.getThisTaskIndex() ); // Create Coordinator instance and call open. spoutCoordinator = new SpoutCoordinator( getSpoutConfig(), threadContext, messageBus, metricsRecorder ); spoutCoordinator.open(); // Define consumer cohort definition. final ConsumerPeerContext consumerPeerContext = new ConsumerPeerContext( topologyContext.getComponentTasks(topologyContext.getThisComponentId()).size(), topologyContext.getThisTaskIndex() ); // TODO: This should be configurable and created dynamically, the problem is that right now we are still tightly // coupled to the VirtualSpout implementation. this.virtualSpoutFactory = new VirtualSpoutFactory( spoutConfig, consumerPeerContext, factoryManager, metricsRecorder ); // Our spout is open, it's not dependent upon the handler to finish opening for us to be 'opened' // This is important, because if we waited most of our getters that check the opened state of the // spout would throw an exception and make them unusable. isOpen = true; this.spoutHandler = getFactoryManager().createSpoutHandler(); this.spoutHandler.open(spoutConfig, virtualSpoutFactory); this.spoutHandler.onSpoutOpen(this, topologyConfig, topologyContext); }
Example 7
Source File: InOrderDeliveryTest.java From storm-net-adapter with Apache License 2.0 | 4 votes |
@Override public void open(Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; _base = context.getThisTaskIndex(); }
Example 8
Source File: AbstractSpout.java From storm-crawler with Apache License 2.0 | 4 votes |
@Override public void open(Map stormConf, TopologyContext context, SpoutOutputCollector collector) { super.open(stormConf, context, collector); indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName, "status"); // one ES client per JVM synchronized (AbstractSpout.class) { try { if (client == null) { client = ElasticSearchConnection.getClient(stormConf, ESBoltType); } } catch (Exception e1) { LOG.error("Can't connect to ElasticSearch", e1); throw new RuntimeException(e1); } } // if more than one instance is used we expect their number to be the // same as the number of shards int totalTasks = context.getComponentTasks(context.getThisComponentId()) .size(); if (totalTasks > 1) { logIdprefix = "[" + context.getThisComponentId() + " #" + context.getThisTaskIndex() + "] "; // determine the number of shards so that we can restrict the // search // TODO use the admin API when it gets available // TODO or the low level one with // https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html // TODO identify local shards and use those if possible // ClusterSearchShardsRequest request = new // ClusterSearchShardsRequest( // indexName); // ClusterSearchShardsResponse shardresponse = client.admin() // .cluster().searchShards(request).actionGet(); // ClusterSearchShardsGroup[] shardgroups = // shardresponse.getGroups(); // if (totalTasks != shardgroups.length) { // throw new RuntimeException( // "Number of ES spout instances should be the same as number of // shards (" // + shardgroups.length + ") but is " + totalTasks); // } // shardID = shardgroups[context.getThisTaskIndex()].getShardId() // .getId(); // TEMPORARY simply use the task index as shard index shardID = context.getThisTaskIndex(); LOG.info("{} assigned shard ID {}", logIdprefix, shardID); } partitionField = ConfUtils.getString(stormConf, ESStatusBucketFieldParamName, "key"); bucketSortField = ConfUtils.loadListFromConf( ESStatusBucketSortFieldParamName, stormConf); totalSortField = ConfUtils.getString(stormConf, ESStatusGlobalSortFieldParamName); maxURLsPerBucket = ConfUtils.getInt(stormConf, ESStatusMaxURLsParamName, 1); maxBucketNum = ConfUtils.getInt(stormConf, ESStatusMaxBucketParamName, 10); queryTimeout = ConfUtils.getInt(stormConf, ESStatusQueryTimeoutParamName, -1); filterQueries = ConfUtils.loadListFromConf(ESStatusFilterParamName, stormConf); }