org.apache.kafka.common.metrics.stats.Rate Java Examples

The following examples show how to use org.apache.kafka.common.metrics.stats.Rate. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConsumerCollector.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 6 votes vote down vote up
private List<TopicSensors.SensorMetric<ConsumerRecord>> buildSensors(String key) {

    List<TopicSensors.SensorMetric<ConsumerRecord>> sensors = new ArrayList<>();

    // Note: synchronized due to metrics registry not handling concurrent add/check-exists
    // activity in a reliable way
    synchronized (this.metrics) {
      addSensor(key, "consumer-messages-per-sec", new Rate(), sensors, false);
      addSensor(key, "consumer-total-messages", new Total(), sensors, false);
      addSensor(key, "consumer-failed-messages", new Total(), sensors, true);
      addSensor(key, "consumer-total-message-bytes", new Total(), sensors, false,
          (r) -> {
            if (r == null) {
              return 0.0;
            } else {
              return ((double) r.serializedValueSize() + r.serializedKeySize());
            }
          });
      addSensor(key, "failed-messages-per-sec", new Rate(), sensors, true);
    }
    return sensors;
  }
 
Example #2
Source File: ProduceService.java    From kafka-monitor with Apache License 2.0 6 votes vote down vote up
/**
 * 为每个partition添加Sensor
 *
 * @param partition
 */
public void addPartitionSensor(int partition) {
    try {
        Sensor recordsProducedSensor = metrics.sensor("records-produced-partition-" + partition);
        recordsProducedSensor.add(new MetricName("records-produced-rate-partition-" + partition, METRIC_GROUP_NAME,
                "The average number of records per second that are produced to this partition", tags), new Rate());
        _recordsProducedPerPartition.put(partition, recordsProducedSensor);

        Sensor errorsSensor = metrics.sensor("produce-error-partition-" + partition);
        errorsSensor.add(new MetricName("produce-error-rate-partition-" + partition, METRIC_GROUP_NAME,
                "The average number of errors per second when producing to this partition", tags), new Rate());
        _produceErrorPerPartition.put(partition, errorsSensor);
    } catch (Exception e) {
        logger.error("addPartitionSensor exception {}", e);
    }
}
 
Example #3
Source File: CommitAvailabilityMetrics.java    From kafka-monitor with Apache License 2.0 6 votes vote down vote up
/**
 * Metrics for Calculating the offset commit availability of a consumer.
 * @param metrics the commit offset metrics
 * @param tags the tags associated, i.e) kmf.services:name=single-cluster-monitor
 */
public CommitAvailabilityMetrics(final Metrics metrics, final Map<String, String> tags) {
  LOG.info("{} called.", this.getClass().getSimpleName());
  _offsetsCommitted = metrics.sensor("offsets-committed");
  _offsetsCommitted.add(new MetricName("offsets-committed-total", METRIC_GROUP_NAME,
      "The total number of offsets per second that are committed.", tags), new Total());

  _failedCommitOffsets = metrics.sensor("failed-commit-offsets");
  _failedCommitOffsets.add(new MetricName("failed-commit-offsets-avg", METRIC_GROUP_NAME,
      "The average number of offsets per second that have failed.", tags), new Rate());
  _failedCommitOffsets.add(new MetricName("failed-commit-offsets-total", METRIC_GROUP_NAME,
      "The total number of offsets per second that have failed.", tags), new Total());

  metrics.addMetric(new MetricName("offsets-committed-avg", METRIC_GROUP_NAME, "The average offset commits availability.", tags),
    (MetricConfig config, long now) -> {
      Object offsetCommitTotal = metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue();
      Object offsetCommitFailTotal = metrics.metrics().get(metrics.metricName("failed-commit-offsets-total", METRIC_GROUP_NAME, tags)).metricValue();
      if (offsetCommitTotal != null && offsetCommitFailTotal != null) {
        double offsetsCommittedCount = (double) offsetCommitTotal;
        double offsetsCommittedErrorCount = (double) offsetCommitFailTotal;
        return offsetsCommittedCount / (offsetsCommittedCount + offsetsCommittedErrorCount);
      } else {
        return 0;
      }
    });
}
 
Example #4
Source File: WorkersMetrics.java    From kafka-workers with Apache License 2.0 5 votes vote down vote up
public void addConsumerThreadMetrics() {
    Stream.of(
            metrics.sensor(INPUT_RECORDS_SIZE_SENSOR),
            metrics.sensor(KAFKA_POLL_RECORDS_COUNT_SENSOR),
            metrics.sensor(KAFKA_POLL_RECORDS_SIZE_SENSOR)
    ).forEach(
            sensor -> {
                checkState(sensor.add(metrics.metricName("min", sensor.name()), new Min()));
                checkState(sensor.add(metrics.metricName("max", sensor.name()), new Max()));
                checkState(sensor.add(metrics.metricName("avg", sensor.name()), new Avg()));
                checkState(sensor.add(metrics.metricName("count-per-sec", sensor.name()), new Rate(new Count())));
            }
    );
}
 
Example #5
Source File: ProducerCollector.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
private List<TopicSensors.SensorMetric<ProducerRecord>> buildSensors(String key) {
  List<TopicSensors.SensorMetric<ProducerRecord>> sensors = new ArrayList<>();

  // Note: synchronized due to metrics registry not handling concurrent add/check-exists
  // activity in a reliable way
  synchronized (metrics) {
    addSensor(key, "messages-per-sec", new Rate(), sensors, false);
    addSensor(key, "total-messages", new Total(), sensors, false);
    addSensor(key, "failed-messages", new Total(), sensors, true);
    addSensor(key, "failed-messages-per-sec", new Rate(), sensors, true);
  }
  return sensors;
}
 
Example #6
Source File: ProduceMetrics.java    From kafka-monitor with Apache License 2.0 5 votes vote down vote up
public void addPartitionSensors(int partition) {
  Sensor recordsProducedSensor = _metrics.sensor("records-produced-partition-" + partition);
  recordsProducedSensor.add(new MetricName("records-produced-rate-partition-" + partition,
      XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The average number of records per second that are produced to this partition", _tags), new Rate());
  _recordsProducedPerPartition.put(partition, recordsProducedSensor);

  Sensor errorsSensor = _metrics.sensor("produce-error-partition-" + partition);
  errorsSensor.add(new MetricName("produce-error-rate-partition-" + partition,
      XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The average number of errors per second when producing to this partition", _tags), new Rate());
  _produceErrorPerPartition.put(partition, errorsSensor);
}
 
Example #7
Source File: TopicSensors.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 4 votes vote down vote up
public boolean isRate() {
  return metric.measurable() instanceof Rate;
}
 
Example #8
Source File: ConsumeMetrics.java    From kafka-monitor with Apache License 2.0 4 votes vote down vote up
public ConsumeMetrics(final Metrics metrics, Map<String, String> tags, int latencyPercentileMaxMs,
    int latencyPercentileGranularityMs) {

  _bytesConsumed = metrics.sensor("bytes-consumed");
  _bytesConsumed.add(new MetricName("bytes-consumed-rate", METRIC_GROUP_NAME, "The average number of bytes per second that are consumed", tags), new Rate());

  _consumeError = metrics.sensor("consume-error");
  _consumeError.add(new MetricName("consume-error-rate", METRIC_GROUP_NAME, "The average number of errors per second", tags), new Rate());
  _consumeError.add(new MetricName("consume-error-total", METRIC_GROUP_NAME, "The total number of errors", tags), new Total());

  _recordsConsumed = metrics.sensor("records-consumed");
  _recordsConsumed.add(new MetricName("records-consumed-rate", METRIC_GROUP_NAME, "The average number of records per second that are consumed", tags), new Rate());
  _recordsConsumed.add(new MetricName("records-consumed-total", METRIC_GROUP_NAME, "The total number of records that are consumed", tags), new Total());

  _recordsDuplicated = metrics.sensor("records-duplicated");
  _recordsDuplicated.add(new MetricName("records-duplicated-rate", METRIC_GROUP_NAME, "The average number of records per second that are duplicated", tags), new Rate());
  _recordsDuplicated.add(new MetricName("records-duplicated-total", METRIC_GROUP_NAME, "The total number of records that are duplicated", tags), new Total());

  _recordsLost = metrics.sensor("records-lost");
  _recordsLost.add(new MetricName("records-lost-rate", METRIC_GROUP_NAME, "The average number of records per second that are lost", tags), new Rate());
  _recordsLost.add(new MetricName("records-lost-total", METRIC_GROUP_NAME, "The total number of records that are lost", tags), new Total());

  _recordsDelayed = metrics.sensor("records-delayed");
  _recordsDelayed.add(new MetricName("records-delayed-rate", METRIC_GROUP_NAME, "The average number of records per second that are either lost or arrive after maximum allowed latency under SLA", tags), new Rate());
  _recordsDelayed.add(new MetricName("records-delayed-total", METRIC_GROUP_NAME, "The total number of records that are either lost or arrive after maximum allowed latency under SLA", tags), new Total());

  _recordsDelay = metrics.sensor("records-delay");
  _recordsDelay.add(new MetricName("records-delay-ms-avg", METRIC_GROUP_NAME, "The average latency of records from producer to consumer", tags), new Avg());
  _recordsDelay.add(new MetricName("records-delay-ms-max", METRIC_GROUP_NAME, "The maximum latency of records from producer to consumer", tags), new Max());

  // There are 2 extra buckets use for values smaller than 0.0 or larger than max, respectively.
  int bucketNum = latencyPercentileMaxMs / latencyPercentileGranularityMs + 2;
  int sizeInBytes = 4 * bucketNum;
  _recordsDelay.add(new Percentiles(sizeInBytes, latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
      new Percentile(new MetricName("records-delay-ms-99th", METRIC_GROUP_NAME, "The 99th percentile latency of records from producer to consumer", tags), 99.0),
      new Percentile(new MetricName("records-delay-ms-999th", METRIC_GROUP_NAME, "The 99.9th percentile latency of records from producer to consumer", tags), 99.9),
      new Percentile(new MetricName("records-delay-ms-9999th", METRIC_GROUP_NAME, "The 99.99th percentile latency of records from producer to consumer", tags), 99.99)));

  metrics.addMetric(new MetricName("consume-availability-avg", METRIC_GROUP_NAME, "The average consume availability", tags),
    (config, now) -> {
      double recordsConsumedRate = (double) metrics.metrics().get(metrics.metricName("records-consumed-rate", METRIC_GROUP_NAME, tags)).metricValue();
      double recordsLostRate = (double) metrics.metrics().get(metrics.metricName("records-lost-rate", METRIC_GROUP_NAME, tags)).metricValue();
      double recordsDelayedRate = (double) metrics.metrics().get(metrics.metricName("records-delayed-rate", METRIC_GROUP_NAME, tags)).metricValue();

      if (new Double(recordsLostRate).isNaN())
        recordsLostRate = 0;
      if (new Double(recordsDelayedRate).isNaN())
        recordsDelayedRate = 0;

      return recordsConsumedRate + recordsLostRate > 0
          ? (recordsConsumedRate - recordsDelayedRate) / (recordsConsumedRate + recordsLostRate) : 0;
    });
}
 
Example #9
Source File: ProduceMetrics.java    From kafka-monitor with Apache License 2.0 4 votes vote down vote up
public ProduceMetrics(final Metrics metrics, final Map<String, String> tags, int latencyPercentileGranularityMs,
    int latencyPercentileMaxMs, AtomicInteger partitionNumber, boolean treatZeroThroughputAsUnavailable) {
  _metrics = metrics;
  _tags = tags;

  _recordsProducedPerPartition = new ConcurrentHashMap<>();
  _produceErrorPerPartition = new ConcurrentHashMap<>();
  _produceErrorInLastSendPerPartition = new ConcurrentHashMap<>();

  _recordsProduced = metrics.sensor("records-produced");
  _recordsProduced.add(
      new MetricName("records-produced-rate", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The average number of records per second that are produced", tags), new Rate());
  _recordsProduced.add(
      new MetricName("records-produced-total", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The total number of records that are produced", tags), new Total());

  _produceError = metrics.sensor("produce-error");
  _produceError.add(new MetricName("produce-error-rate", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The average number of errors per second", tags), new Rate());
  _produceError.add(new MetricName("produce-error-total", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The total number of errors", tags), new Total());

  _produceDelay = metrics.sensor("produce-delay");
  _produceDelay.add(new MetricName("produce-delay-ms-avg", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The average delay in ms for produce request", tags), new Avg());
  _produceDelay.add(new MetricName("produce-delay-ms-max", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
      "The maximum delay in ms for produce request", tags), new Max());

  // There are 2 extra buckets use for values smaller than 0.0 or larger than max, respectively.
  int bucketNum = latencyPercentileMaxMs / latencyPercentileGranularityMs + 2;
  int sizeInBytes = 4 * bucketNum;
  _produceDelay.add(new Percentiles(sizeInBytes, latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
      new Percentile(new MetricName("produce-delay-ms-99th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The 99th percentile delay in ms for produce request", tags), 99.0), new Percentile(
      new MetricName("produce-delay-ms-999th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The 99.9th percentile delay in ms for produce request", tags), 99.9), new Percentile(
      new MetricName("produce-delay-ms-9999th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The 99.99th percentile delay in ms for produce request", tags), 99.99)));

  metrics.addMetric(
      new MetricName("produce-availability-avg", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
          "The average produce availability", tags), (config, now) -> {
      double availabilitySum = 0.0;
      int partitionNum = partitionNumber.get();
      for (int partition = 0; partition < partitionNum; partition++) {
        double recordsProduced = (double) metrics.metrics()
            .get(metrics.metricName("records-produced-rate-partition-" + partition,
                XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE, tags))
            .metricValue();
        double produceError = (double) metrics.metrics()
            .get(metrics.metricName("produce-error-rate-partition-" + partition,
                XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE, tags))
            .metricValue();
        // If there is no error, error rate sensor may expire and the value may be NaN. Treat NaN as 0 for error rate.
        if (Double.isNaN(produceError) || Double.isInfinite(produceError)) {
          produceError = 0;
        }
        // If there is either succeeded or failed produce to a partition, consider its availability as 0.
        if (recordsProduced + produceError > 0) {
          availabilitySum += recordsProduced / (recordsProduced + produceError);
        } else if (!treatZeroThroughputAsUnavailable) {
          // If user configures treatZeroThroughputAsUnavailable to be false, a partition's availability
          // is 1.0 as long as there is no exception thrown from producer.
          // This allows kafka admin to exactly monitor the availability experienced by Kafka users which
          // will block and retry for a certain amount of time based on its configuration (e.g. retries, retry.backoff.ms).
          // Note that if it takes a long time for messages to be retries and sent, the latency in the ConsumeService
          // will increase and it will reduce ConsumeAvailability if the latency exceeds consume.latency.sla.ms
          // If timeout is set to more than 60 seconds (the current samples window duration),
          // the error sample might be expired before the next error can be produced.
          // In order to detect offline partition with high producer timeout config, the error status during last
          // send is also checked before declaring 1.0 availability for the partition.
          Boolean lastSendError = _produceErrorInLastSendPerPartition.get(partition);
          if (lastSendError == null || !lastSendError) {
            availabilitySum += 1.0;
          }
        }
      }

      // Assign equal weight to per-partition availability when calculating overall availability
      return availabilitySum / partitionNum;
    }
  );
}