org.apache.storm.task.TopologyContext#getThisTaskIndex

Source File: StormRecorder.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License

5 votes

@Override
public void open(final Map<String, Object> spoutConfig, final TopologyContext topologyContext) {
    // Load configuration items.

    // Determine our time bucket window, in seconds, defaulted to 60.
    int timeBucketSeconds = 60;
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_TIME_BUCKET)) {
        final Object timeBucketCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_TIME_BUCKET);
        if (timeBucketCfgValue instanceof Number) {
            timeBucketSeconds = ((Number) timeBucketCfgValue).intValue();
        }
    }

    // Conditionally enable prefixing with taskId
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX)) {
        final Object taskIdCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX);
        if (taskIdCfgValue instanceof Boolean && (Boolean) taskIdCfgValue) {
            this.metricPrefix = "task-" + topologyContext.getThisTaskIndex();
        }
    }

    this.keyBuilder = new KeyBuilder(this.metricPrefix);

    // Log how we got configured.
    logger.info("Configured with time window of {} seconds and using taskId prefixes?: {}",
        timeBucketSeconds, Boolean.toString(metricPrefix.isEmpty()));

    // Register the top level metrics.
    assignedValues = topologyContext.registerMetric("GAUGES", new MultiAssignableMetric(), timeBucketSeconds);
    timers = topologyContext.registerMetric("TIMERS", new MultiReducedMetric(new MeanReducer()), timeBucketSeconds);
    counters = topologyContext.registerMetric("COUNTERS", new MultiCountMetric(), timeBucketSeconds);
}

Source File: EsSpout.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    this.collector = collector;

    LinkedHashMap copy = new LinkedHashMap(conf);
    copy.putAll(spoutConfig);

    StormSettings settings = new StormSettings(copy);

    InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
    InitializationUtils.setUserProviderIfNotSet(settings, JdkUserProvider.class, log);

    EsClusterInfoSelector.populate(settings);

    ackReads = settings.getStormSpoutReliable();

    if (ackReads) {
        inTransitQueue = new LinkedHashMap<Object, Object>();
        replayQueue = new LinkedList<Object[]>();
        retries = new HashMap<Object, Integer>();
        queueSize = settings.getStormSpoutReliableQueueSize();
        tupleRetries = settings.getStormSpoutReliableRetriesPerTuple();
        tupleFailure = settings.getStormSpoutReliableTupleFailureHandling();
    }

    int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
    int currentTask = context.getThisTaskIndex();

    // match the partitions based on the current topology
    List<PartitionDefinition> partitions = RestService.findPartitions(settings, log);
    List<PartitionDefinition> assigned = RestService.assignPartitions(partitions, currentTask, totalTasks);
    iterator = RestService.multiReader(settings, assigned, log);
}

Source File: WARCFileNameFormat.java From storm-crawler with Apache License 2.0

5 votes

@Override
public void prepare(Map conf, TopologyContext topologyContext) {
    this.taskIndex = topologyContext.getThisTaskIndex();
    int totalTasks = topologyContext.getComponentTasks(
            topologyContext.getThisComponentId()).size();
    // single task? let's not bother with the task index in the file name
    if (totalTasks == 1) {
        this.taskIndex = -1;
    }
}

Source File: SQLSpout.java From storm-crawler with Apache License 2.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void open(Map conf, TopologyContext context,
        SpoutOutputCollector collector) {

    super.open(conf, context, collector);

    maxDocsPerBucket = ConfUtils.getInt(conf,
            Constants.SQL_MAX_DOCS_BUCKET_PARAM_NAME, 5);

    tableName = ConfUtils.getString(conf,
            Constants.SQL_STATUS_TABLE_PARAM_NAME, "urls");

    maxNumResults = ConfUtils.getInt(conf,
            Constants.SQL_MAXRESULTS_PARAM_NAME, 100);

    try {
        connection = SQLUtil.getConnection(conf);
    } catch (SQLException ex) {
        LOG.error(ex.getMessage(), ex);
        throw new RuntimeException(ex);
    }

    // determine bucket this spout instance will be in charge of
    int totalTasks = context
            .getComponentTasks(context.getThisComponentId()).size();
    if (totalTasks > 1) {
        logIdprefix = "[" + context.getThisComponentId() + " #"
                + context.getThisTaskIndex() + "] ";
        bucketNum = context.getThisTaskIndex();
    }
}

Source File: SidelineSpoutHandler.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License

4 votes

/**
 * Handler called when the dynamic spout opens, this method is responsible for creating and setting triggers for
 * handling the spinning up and down of sidelines.
 * @param spout Dynamic spout instance.
 * @param topologyConfig Topology configuration.
 * @param topologyContext Topology context.
 */
@Override
public void onSpoutOpen(
    final DynamicSpout spout,
    final Map topologyConfig,
    final TopologyContext topologyContext
) {
    this.spout = spout;

    createSidelineTriggers();

    Preconditions.checkArgument(
        spoutConfig.containsKey(SidelineConfig.REFRESH_INTERVAL_SECONDS)
        && spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS) != null,
        "Configuration value for " + SidelineConfig.REFRESH_INTERVAL_SECONDS + " is required."
    );

    final long refreshIntervalSeconds = ((Number) spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS)).longValue();

    final long refreshIntervalMillis = TimeUnit.SECONDS.toMillis(refreshIntervalSeconds);

    // Why not just start the timer at 0? Because we want to block onSpoutOpen() until the first run of loadSidelines()
    loadSidelines();

    // Repeat our sidelines check periodically
    final String threadName = "[" + DynamicSpout.class.getSimpleName() + ":" + getClass().getSimpleName() + "] Timer on "
        + topologyContext.getThisComponentId() + ":" + topologyContext.getThisTaskIndex();

    timer = new Timer(threadName);
    timer.scheduleAtFixedRate(new TimerTask() {
        @Override
        public void run() {
            // Catch this so that it doesn't kill the recurring task
            try {
                loadSidelines();
            } catch (Exception ex) {
                logger.error("Attempting to loadSidelines() failed {}", ex);
            }
        }
    }, refreshIntervalMillis, refreshIntervalMillis);

    for (final SidelineTrigger sidelineTrigger : sidelineTriggers) {
        sidelineTrigger.open(getSpoutConfig());
    }
}

Source File: DynamicSpout.java From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License

4 votes

/**
 * Open is called once the spout instance has been deployed to the Storm cluster
 * and is ready to get to work.
 *
 * @param topologyConfig The Storm Topology configuration.
 * @param topologyContext The Storm Topology context.
 * @param spoutOutputCollector The output collector to emit tuples via.
 * @throws IllegalStateException if you attempt to open the spout multiple times.
 */
@Override
public void open(Map topologyConfig, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
    if (isOpen) {
        throw new IllegalStateException("This spout has already been opened.");
    }

    // Save references.
    this.topologyContext = topologyContext;
    this.outputCollector = spoutOutputCollector;

    // Ensure a consumer id prefix has been correctly set.
    if (Strings.isNullOrEmpty((String) getSpoutConfigItem(SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX))) {
        throw new IllegalStateException("Missing required configuration: " + SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX);
    }

    // We do not use the getters for things like the metricsRecorder and coordinator here
    // because each of these getters perform a check to see if the spout is open, and it's not yet until we've
    // finished setting all of these things up.

    // Initialize Metric Recorder
    this.metricsRecorder = getFactoryManager().createNewMetricsRecorder();
    this.metricsRecorder.open(getSpoutConfig(), getTopologyContext());

    // Create MessageBuffer
    final MessageBuffer messageBuffer = getFactoryManager().createNewMessageBufferInstance();
    messageBuffer.open(getSpoutConfig());

    // Create MessageBus instance and store into SpoutMessageBus reference reducing accessible scope.
    final MessageBus messageBus = new MessageBus(messageBuffer);
    this.messageBus = messageBus;

    // Define thread context, this allows us to use contextually relevant thread names.
    final ThreadContext threadContext = new ThreadContext(
        topologyContext.getThisComponentId(),
        topologyContext.getThisTaskIndex()
    );

    // Create Coordinator instance and call open.
    spoutCoordinator = new SpoutCoordinator(
        getSpoutConfig(),
        threadContext,
        messageBus,
        metricsRecorder
    );
    spoutCoordinator.open();

    // Define consumer cohort definition.
    final ConsumerPeerContext consumerPeerContext = new ConsumerPeerContext(
        topologyContext.getComponentTasks(topologyContext.getThisComponentId()).size(),
        topologyContext.getThisTaskIndex()
    );

    // TODO: This should be configurable and created dynamically, the problem is that right now we are still tightly
    // coupled to the VirtualSpout implementation.
    this.virtualSpoutFactory = new VirtualSpoutFactory(
        spoutConfig,
        consumerPeerContext,
        factoryManager,
        metricsRecorder
    );

    // Our spout is open, it's not dependent upon the handler to finish opening for us to be 'opened'
    // This is important, because if we waited most of our getters that check the opened state of the
    // spout would throw an exception and make them unusable.
    isOpen = true;

    this.spoutHandler = getFactoryManager().createSpoutHandler();
    this.spoutHandler.open(spoutConfig, virtualSpoutFactory);
    this.spoutHandler.onSpoutOpen(this, topologyConfig, topologyContext);
}

Source File: InOrderDeliveryTest.java From storm-net-adapter with Apache License 2.0

4 votes

@Override
public void open(Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;
    _base = context.getThisTaskIndex();
}

Source File: AbstractSpout.java From storm-crawler with Apache License 2.0

4 votes

@Override
public void open(Map stormConf, TopologyContext context,
        SpoutOutputCollector collector) {

    super.open(stormConf, context, collector);

    indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName,
            "status");

    // one ES client per JVM
    synchronized (AbstractSpout.class) {
        try {
            if (client == null) {
                client = ElasticSearchConnection.getClient(stormConf,
                        ESBoltType);
            }
        } catch (Exception e1) {
            LOG.error("Can't connect to ElasticSearch", e1);
            throw new RuntimeException(e1);
        }
    }

    // if more than one instance is used we expect their number to be the
    // same as the number of shards
    int totalTasks = context.getComponentTasks(context.getThisComponentId())
            .size();
    if (totalTasks > 1) {
        logIdprefix = "[" + context.getThisComponentId() + " #"
                + context.getThisTaskIndex() + "] ";

        // determine the number of shards so that we can restrict the
        // search

        // TODO use the admin API when it gets available
        // TODO or the low level one with
        // https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html
        // TODO identify local shards and use those if possible

        // ClusterSearchShardsRequest request = new
        // ClusterSearchShardsRequest(
        // indexName);
        // ClusterSearchShardsResponse shardresponse = client.admin()
        // .cluster().searchShards(request).actionGet();
        // ClusterSearchShardsGroup[] shardgroups =
        // shardresponse.getGroups();
        // if (totalTasks != shardgroups.length) {
        // throw new RuntimeException(
        // "Number of ES spout instances should be the same as number of
        // shards ("
        // + shardgroups.length + ") but is " + totalTasks);
        // }
        // shardID = shardgroups[context.getThisTaskIndex()].getShardId()
        // .getId();

        // TEMPORARY simply use the task index as shard index
        shardID = context.getThisTaskIndex();
        LOG.info("{} assigned shard ID {}", logIdprefix, shardID);
    }

    partitionField = ConfUtils.getString(stormConf,
            ESStatusBucketFieldParamName, "key");

    bucketSortField = ConfUtils.loadListFromConf(
            ESStatusBucketSortFieldParamName, stormConf);

    totalSortField = ConfUtils.getString(stormConf,
            ESStatusGlobalSortFieldParamName);

    maxURLsPerBucket = ConfUtils.getInt(stormConf, ESStatusMaxURLsParamName,
            1);
    maxBucketNum = ConfUtils.getInt(stormConf, ESStatusMaxBucketParamName,
            10);

    queryTimeout = ConfUtils.getInt(stormConf,
            ESStatusQueryTimeoutParamName, -1);

    filterQueries = ConfUtils.loadListFromConf(ESStatusFilterParamName, stormConf);
}

Java Code Examples for org.apache.storm.task.TopologyContext#getThisTaskIndex()