Python apache_beam.CombineFn() Examples
The following are 10
code examples of apache_beam.CombineFn().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
apache_beam
, or try the search function
.
Example #1
Source File: jackknife.py From model-analysis with Apache License 2.0 | 6 votes |
def __init__( self, num_jackknife_samples: int, skip_ci_metric_keys: Optional[Set[metric_types.MetricKey]] = None): """Initializes a _MergeJackknifeSamples CombineFn. Args: num_jackknife_samples: The number of samples computed per slice. skip_ci_metric_keys: Set of metric keys for which to skip confidence interval computation. For metric keys in this set, just the unsampled value will be returned. """ self._num_jackknife_samples = num_jackknife_samples self._skip_ci_metric_keys = skip_ci_metric_keys self._num_slices_counter = beam.metrics.Metrics.counter( constants.METRICS_NAMESPACE, 'num_slices') self._missing_samples_counter = beam.metrics.Metrics.counter( constants.METRICS_NAMESPACE, 'num_slices_missing_jackknife_samples') self._small_samples_counter = beam.metrics.Metrics.counter( constants.METRICS_NAMESPACE, 'num_slices_with_small_jackknife_samples') self._sample_id_key = metric_types.MetricKey(_JACKKNIFE_SAMPLE_ID_KEY)
Example #2
Source File: analyzer_impls.py From transform with Apache License 2.0 | 5 votes |
def __init__(self, combiner, tf_config, is_combining_accumulators, should_extract_output=None): """Init method for _CombinerWrapper. Args: combiner: A `analyzer_nodes.Combiner` object used to combine. tf_config: A `tf.ConfigProto`. is_combining_accumulators: A bool which indicates whether this is combining single or batched inputs, or already accumulated objects. should_extract_output: A bool which indicates whether this should call the combiner's extract_output method in extract_output. If not specified, we assume it's the same value as `should_extract_output`. """ self._combiner = combiner self._tf_config = tf_config self._is_combining_accumulators = is_combining_accumulators if should_extract_output is None: should_extract_output = is_combining_accumulators self._should_extract_output = should_extract_output # TODO(b/135541366): Move this to CombineFn.setup once it exists. # That should help simplify several aspects of Quantiles state management. if isinstance(combiner, analyzers.QuantilesCombiner): combiner.initialize_local_state(tf_config)
Example #3
Source File: preprocess.py From professional-services with Apache License 2.0 | 5 votes |
def check_size(p, name, path): """Performs checks on the input pipeline and stores stats in specfied path. Checks performed: counts rows and derives class distribution. Args: p: PCollection, input pipeline. name: string, unique identifier for the beam step. path: string: path to store stats. Returns: PCollection """ class _Combine(beam.CombineFn): """Counts and take the average of positive classes in the pipeline.""" def create_accumulator(self): return (0.0, 0.0) def add_input(self, sum_count, inputs): (s, count) = sum_count return s + inputs, count + 1 def merge_accumulators(self, accumulators): sums, counts = zip(*accumulators) return sum(sums), sum(counts) # We should not consider the case count == 0 as an error (class initialized # with count == 0). def extract_output(self, sum_count): (s, count) = sum_count return count, (1.0 * s / count) if count else float('NaN') return (p | 'CheckMapTo_1_{}'.format(name) >> beam.Map(lambda x: x[constants.LABEL_COLUMN]) | 'CheckSum_{}'.format(name) >> beam.CombineGlobally(_Combine()) | 'CheckRecord_{}'.format(name) >> beam.io.WriteToText( '{}.txt'.format(path)))
Example #4
Source File: model_util.py From model-analysis with Apache License 2.0 | 5 votes |
def __init__(self, model_loaders: Dict[Text, types.ModelLoader]): """Initializes CombineFn using dict of loaders keyed by model location.""" self._model_loaders = model_loaders self._loaded_models = None self._model_load_seconds = None self._model_load_seconds_distribution = beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'model_load_seconds')
Example #5
Source File: metric_types.py From model-analysis with Apache License 2.0 | 5 votes |
def __new__(cls, keys: List[MetricKey], preprocessor: beam.DoFn, combiner: beam.CombineFn): return super(MetricComputation, cls).__new__(cls, keys, preprocessor, combiner)
Example #6
Source File: jackknife.py From model-analysis with Apache License 2.0 | 5 votes |
def __init__(self, combiner: beam.CombineFn): self._combiner = combiner
Example #7
Source File: query_based_metrics_evaluator.py From model-analysis with Apache License 2.0 | 5 votes |
def QueryBasedMetricsEvaluator( # pylint: disable=invalid-name query_id: Text, prediction_key: Text, combine_fns: List[beam.CombineFn], metrics_key: Text = constants.METRICS_KEY, run_after: Text = slice_key_extractor.SLICE_KEY_EXTRACTOR_STAGE_NAME, ) -> evaluator.Evaluator: """Creates an Evaluator for evaluating metrics and plots. Args: query_id: Key of query ID column in the features dictionary. prediction_key: Key in predictions dictionary to use as the prediction (for sorting examples within the query). Use the empty string if the Estimator returns a predictions Tensor (not a dictionary). combine_fns: List of query based metrics combine functions. metrics_key: Name to use for metrics key in Evaluation output. run_after: Extractor to run after (None means before any extractors). Returns: Evaluator for computing query-based metrics. The output will be stored under 'metrics' and 'plots' keys. """ # pylint: disable=no-value-for-parameter return evaluator.Evaluator( stage_name='EvaluateQueryBasedMetrics', run_after=run_after, ptransform=EvaluateQueryBasedMetrics( query_id=query_id, prediction_key=prediction_key, combine_fns=combine_fns, metrics_key=metrics_key)) # pylint: enable=no-value-for-parameter
Example #8
Source File: query_based_metrics_evaluator.py From model-analysis with Apache License 2.0 | 5 votes |
def EvaluateQueryBasedMetrics( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, prediction_key: Text, query_id: Text, combine_fns: List[beam.CombineFn], metrics_key: Text = constants.METRICS_KEY, ) -> evaluator.Evaluation: """Evaluates query-based metrics. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTION_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. prediction_key: Key in predictions dictionary to use as the prediction (for sorting examples within the query). Use the empty string if the Estimator returns a predictions Tensor (not a dictionary). query_id: Key of query ID column in the features dictionary. combine_fns: List of query based metrics combine functions. metrics_key: Name to use for metrics key in Evaluation output. Returns: Evaluation containing metrics dictionaries keyed by 'metrics'. """ # pylint: disable=no-value-for-parameter metrics = ( extracts | 'Filter' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY ]) | 'ComputeQueryBasedMetrics' >> ComputeQueryBasedMetrics( query_id=query_id, combine_fns=combine_fns, prediction_key=prediction_key)) # pylint: enable=no-value-for-parameter return {metrics_key: metrics}
Example #9
Source File: jackknife.py From model-analysis with Apache License 2.0 | 4 votes |
def _make_loo_accumulators( accumulators: List[_AccumulatorType], combiner: beam.CombineFn) -> Iterator[_AccumulatorType]: """Yields accumulators which each leave out one value in accumulators. Args: accumulators: Tuple of values for which to compute complements combiner: A combiner to use for creating and merging accumulators. Yields: Leave-one-out accumulators for each element in `accumulators`. The ith accumulator will be the result of merging all accumulators but the ith, along with the accumulator passed as `complement`. """ def make_loo_accumulators_rec( accumulators: List[_AccumulatorType], complement: _AccumulatorType, combiner: beam.CombineFn) -> Iterator[_AccumulatorType]: """Recursive helper to compute leave one out accumulators.""" if len(accumulators) == 1: yield complement else: split_idx = int(len(accumulators) / 2) left, right = accumulators[:split_idx], accumulators[split_idx:] left_c = copy.deepcopy(complement) left_c = combiner.merge_accumulators([left_c] + right) for c in make_loo_accumulators_rec(left, left_c, combiner): yield c # reuse the complement accumulator on the right recursion. right_c = combiner.merge_accumulators([complement] + left) for c in make_loo_accumulators_rec(right, right_c, combiner): yield c # TODO(b/151445942) use `yield from` when we stop supporting python < 3.3 for acc in make_loo_accumulators_rec(accumulators, combiner.create_accumulator(), combiner): yield acc # TODO(b/152812821): Disble Beam annotations support due to failure in: # //third_party/py/tensorflow_model_analysis/evaluators:jackknife_test.python3 # Output type hint violation at JackknifeCombinePerKey: expected Tuple[Union[ # Tuple[Tuple[str, Union[float, int, str]], ...], Tuple[]], Tuple[Dict[ # MetricKey, Any], ...]], got Tuple[Union[Tuple[Tuple[str, Union[float, int, # str]], ...], Tuple[]], Dict[MetricKey, Any]] # # Since @beam.typehints.no_annotations is not available yet, part of the output # type is put in quotes, which currently makes Beam ignore the hint.
Example #10
Source File: jackknife.py From model-analysis with Apache License 2.0 | 4 votes |
def _make_jackknife_samples( slice_partitions: Tuple[slicer.SliceKeyType, Sequence[_PartitionInfo]], combiner: beam.CombineFn ) -> Iterator[Tuple[slicer.SliceKeyType, 'metric_types.MetricsDict']]: """Computes leave-one-out and unsampled ouputs for the combiner. This function creates leave-one-out combiner outputs by combining all but one accumulator and extracting the output. Second, it creates an unsampled output using all of the accumulators and extracts an unsampled output. The keys yielded by thus function are augmented versions of the input slice key in which the sample ID (or a special placeholder ID for the unsampled value) has been added. Args: slice_partitions: The result of GroupByKey in which the key is a slice_key, and the grouped stream consists of per-partition _PartitionInfo tuples in which the first element is an accumulator for that partition, the second element is the size of that partition, and the third element is the partition ID. combiner: The combiner to be used for converting accumulators to outputs. Yields: Tuples of the form (slice_key, metrics), for each jackknife sample and for the unsampled value. """ slice_key, accumulators_sizes_and_ids = slice_partitions accumulators, sizes, partition_ids = zip(*accumulators_sizes_and_ids) unsampled_accumulator = None for i, loo_accumulator in enumerate( _make_loo_accumulators(list(accumulators), combiner)): # yield sampled output with sample_id of the leftout partition sample_id_key = (_JACKKNIFE_SAMPLE_ID_KEY, partition_ids[i]) yield slice_key + (sample_id_key,), combiner.extract_output(loo_accumulator) if i == 0: # Create the unsampled accumulator from sample 0 and its complement. unsampled_accumulator = combiner.merge_accumulators( [loo_accumulator, accumulators[0]]) # yield unsampled output along with total count as a special metric count_dict = {_JACKKNIFE_EXAMPLE_COUNT_METRIC_KEY: sum(sizes)} sample_id_key = ((_JACKKNIFE_SAMPLE_ID_KEY, _JACKKNIFE_FULL_SAMPLE_ID),) unsampled_output = combiner.extract_output(unsampled_accumulator) unsampled_key = slice_key + sample_id_key unsampled_val = unsampled_output + (count_dict,) yield unsampled_key, unsampled_val