Python Examples of more_itertools.flatten

Source File: displacements.py From langchangetrack with BSD 3-Clause "New" or "Revised" License

6 votes

def calculate_words_displacement(self, column_names, n_jobs = 1):
        """ Calculate word displacements for each word in the Pandas data frame. """

        words = self.get_word_list()
        # Create chunks of the words to be processed.
        chunk_sz = np.ceil(len(words)/float(n_jobs))
        chunks = list(more_itertools.chunked(words, chunk_sz))

        # Calculate the displacements
        chunksL = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_source, self) for chunk in chunks)
        chunksH = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_dest, self) for chunk in chunks)
        L = more_itertools.flatten(chunksL)
        H = more_itertools.flatten(chunksH)
        flattendL = [x for sublist in L for x in sublist]
        flattendH = [x for sublist in H for x in sublist]

        # Store the results in a nice pandas data frame
        dfo, dfn = self.create_data_frames(flattendL, flattendH, column_names)
        return flattendL, flattendH, dfo, dfn

Source File: stack_probabilities_for_linear.py From OpenKiwi with GNU Affero General Public License v3.0

6 votes

def concat(probabilities, prob_sep='|', token_sep='\n', sentence_sep='\n\n'):
    flat_probabilities = [list(flatten(probs)) for probs in probabilities]
    if not all_equal([len(p) for p in flat_probabilities]):
        logger.error('Number of tokens do not match.')
        return None

    probs_per_token_sentence_file = [
        list(zip(*parallel_probs)) for parallel_probs in zip(*probabilities)
    ]

    content_str = sentence_sep.join(
        [
            token_sep.join(
                [prob_sep.join(map(str, tokens)) for tokens in sentence]
            )
            for sentence in probs_per_token_sentence_file
        ]
    )
    content_str += sentence_sep  # Add a trailing newline before EOF.

    return content_str

Source File: test_recipes.py From pipenv with MIT License

5 votes

def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f)))

Source File: test_more.py From Tautulli with GNU General Public License v3.0

5 votes

def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l)))

Source File: test_recipes.py From Tautulli with GNU General Public License v3.0

5 votes

def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f)))

Source File: test_recipes.py From Tautulli with GNU General Public License v3.0

5 votes

def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f)))

Source File: test_more.py From pipenv with MIT License

5 votes

def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l)))

Source File: test_recipes.py From pipenv with MIT License

5 votes

def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f)))

Source File: test_recipes.py From python-netsurv with MIT License

5 votes

def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f)))

Source File: evaluate.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def eval_word_level(golds, pred_files, tag_name):
    scores_table = []
    for pred_file, pred in pred_files[tag_name]:
        _check_lengths(golds[tag_name], pred)

        scores = score_word_level(
            list(flatten(golds[tag_name])), list(flatten(pred))
        )

        scores_table.append((pred_file, *scores))
    # If more than one system is provided, compute ensemble score
    if len(pred_files[tag_name]) > 1:
        ensemble_pred = _average(
            [list(flatten(pred)) for _, pred in pred_files[tag_name]]
        )
        ensemble_score = score_word_level(
            list(flatten(golds[tag_name])), ensemble_pred
        )
        scores_table.append(("*ensemble*", *ensemble_score))

    scores = np.array(
        scores_table,
        dtype=[
            ("File", "object"),
            ("F1_{}".format(const.LABELS[0]), float),
            ("F1_{}".format(const.LABELS[1]), float),
            ("F1_mult", float),
        ],
    )
    # Put the main metric in the first column
    scores = scores[
        [
            "File",
            "F1_mult",
            "F1_{}".format(const.LABELS[0]),
            "F1_{}".format(const.LABELS[1]),
        ]
    ]

    return scores

Source File: evaluate.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def _average(probs_per_file):
    # flat_probs = [list(flatten(probs)) for probs in probs_per_file]
    probabilities = np.array(probs_per_file, dtype="float32")
    return probabilities.mean(axis=0).tolist()

Source File: utils.py From OpenKiwi with GNU Affero General Public License v3.0

5 votes

def unroll(list_of_lists):
    """
    :param list_of_lists: a list that contains lists
    :param rec: unroll recursively
    :return: a flattened list
    """
    if isinstance(first(list_of_lists), (np.ndarray, list)):
        return list(flatten(list_of_lists))
    return list_of_lists

Source File: test_recipes.py From python-netsurv with MIT License

5 votes

def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f)))

Source File: test_recipes.py From python-netsurv with MIT License

5 votes

def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f)))

Source File: test_more.py From python-netsurv with MIT License

5 votes

def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l)))

Source File: test_recipes.py From python-netsurv with MIT License

5 votes

def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f)))

Source File: record_merger.py From recordexpungPDX with MIT License

4 votes

def merge(
        ambiguous_record: AmbiguousRecord,
        ambiguous_charge_id_to_time_eligibility_list: List[Dict[str, TimeEligibility]],
        charge_ids_with_question: List[str],
    ) -> Record:
        ambiguous_charge_id_to_time_eligibilities: Dict[str, List[TimeEligibility]] = collections.defaultdict(list)
        for charge_id_to_time_eligibility in ambiguous_charge_id_to_time_eligibility_list:
            for k, v in charge_id_to_time_eligibility.items():
                if v not in ambiguous_charge_id_to_time_eligibilities[k]:
                    ambiguous_charge_id_to_time_eligibilities[k].append(v)
        charges = list(flatten([record.charges for record in ambiguous_record]))
        record = ambiguous_record[0]
        new_case_list: List[Case] = []
        for case in record.cases:
            new_charges = []
            for charge in case.charges:
                time_eligibilities = ambiguous_charge_id_to_time_eligibilities.get(
                    charge.ambiguous_charge_id
                )  # TODO: Review whether this can return None
                sorted_time_eligibility = (
                    sorted(time_eligibilities, key=lambda e: e.date_will_be_eligible) if time_eligibilities else None
                )
                same_charges = list(filter(lambda c: c.ambiguous_charge_id == charge.ambiguous_charge_id, charges))
                romeo_and_juliet_exception = RecordMerger._is_romeo_and_juliet_exception(same_charges)
                merged_type_eligibility = RecordMerger.merge_type_eligibilities(same_charges)
                merged_time_eligibility = RecordMerger.merge_time_eligibilities(sorted_time_eligibility)
                if charge.ambiguous_charge_id in charge_ids_with_question:
                    charge_eligibility = ChargeEligibility(
                        ChargeEligibilityStatus.NEEDS_MORE_ANALYSIS, "Needs More Analysis"
                    )
                else:
                    charge_eligibility = RecordMerger.compute_charge_eligibility(
                        merged_type_eligibility, sorted_time_eligibility, romeo_and_juliet_exception
                    )
                    if "open" in charge_eligibility.label.lower():
                        charge_eligibility = replace(
                            charge_eligibility,
                            label=f"Eligibility Timeframe Dependent On Open Charge: {charge_eligibility.label}",
                        )
                expungement_result = ExpungementResult(
                    type_eligibility=merged_type_eligibility,
                    time_eligibility=merged_time_eligibility,
                    charge_eligibility=charge_eligibility,
                )
                merged_type_name = " ⬥ ".join(
                    list(unique_everseen([charge.charge_type.type_name for charge in same_charges]))
                )
                merged_charge_type = replace(charge.charge_type, type_name=merged_type_name)
                merged_disposition = RecordMerger.merge_dispositions(same_charges)
                new_charge: Charge = replace(
                    charge,
                    charge_type=merged_charge_type,
                    expungement_result=expungement_result,
                    disposition=merged_disposition,
                )
                new_charges.append(new_charge)
            new_case = replace(case, charges=tuple(new_charges))
            new_case_list.append(new_case)
        return replace(record, cases=tuple(new_case_list))

Source File: dump_timeseries.py From langchangetrack with BSD 3-Clause "New" or "Revised" License

4 votes

def main(args):
    # get the arguments
    method = args.method
    win_size = args.win_size
    step = args.step
    metric_name = args.metric_name
    n_jobs = args.workers

    # Load the data.
    L, H, olddf, newdf = pickle.load(open(args.filename))
    words = pd.Series(olddf.word.values.ravel()).unique()
    oldrows = []
    newrows = []
    sourcexrange = np.arange(args.mint, args.maxt, step)
    destxrange = np.arange(args.mint, args.maxt, step)
    if method == 'win':
        sourcexrange = sourcexrange[win_size:]
        destxrange = destxrange[:-win_size]

    if args.interpolate:
        sourcexinter = np.arange(sourcexrange[0], sourcexrange[-1] + 1, 1)
        destxinter = np.arange(destxrange[0], destxrange[-1] + 1, 1)
    else:
        sourcexinter = sourcexrange
        destxinter = destxrange

    # Construct the series
    assert(len(sourcexinter) == len(destxinter))
    chunk_sz = np.ceil(len(words)/float(n_jobs))
    words_chunks = more_itertools.chunked(words, chunk_sz)
    timeseries_chunks = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, create_word_time_series, olddf, newdf,
                                                                               sourcexinter, destxinter,
                                                                               metric_name=metric_name,
                                                                               interpolate=args.interpolate) for chunk in words_chunks)

    timeseries = list(more_itertools.flatten(timeseries_chunks))

    # Dump the data frame
    for orow, newrow in timeseries:
        if orow and newrow:
            oldrows.append(orow)
            newrows.append(newrow)

    oldtimeseries = pd.DataFrame()
    newtimeseries = pd.DataFrame()
    header = ['word']
    header.extend(sourcexinter)
    newheader = ['word']
    newheader.extend(destxinter)
    oldtimeseries = oldtimeseries.from_records(oldrows, columns=header)
    oldtimeseries = oldtimeseries.fillna(method='backfill', axis=1)
    newtimeseries = newtimeseries.from_records(newrows, columns=newheader)
    newtimeseries = newtimeseries.fillna(method='backfill', axis=1)
    oldtimeseries.to_csv(args.sourcetimef, encoding='utf-8')
    newtimeseries.to_csv(args.endtimef, encoding='utf-8')

Python more_itertools.flatten() Examples