Python more_itertools.chunked() Examples
The following are 23
code examples of more_itertools.chunked().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
more_itertools
, or try the search function
.
Example #1
Source File: filter.py From pygreynoise with MIT License | 6 votes |
def filter(self, text, noise_only): """Filter lines that contain IP addresses from a given text. :param text: Text input :type text: file-like | str :param noise_only: If set, return only lines that contain IP addresses classified as noise, otherwise, return lines that contain IP addresses not classified as noise. :type noise_only: bool :return: Iterator that yields lines in chunks :rtype: iterable """ if isinstance(text, str): text = text.splitlines(True) chunks = more_itertools.chunked(text, self.FILTER_TEXT_CHUNK_SIZE) for chunk in chunks: yield self._filter_chunk(chunk, noise_only)
Example #2
Source File: utils.py From vidreid_cosegmentation with Apache License 2.0 | 6 votes |
def get_spatial_features(model, imgs, test_num_tracks): """to handle higher seq length videos due to OOM error specifically used during test Arguments: model -- model under test imgs -- imgs to get features for Returns: features """ # handle chunked data all_features, all_spatial_features = [], [] for test_imgs in mit.chunked(imgs, test_num_tracks): current_test_imgs = torch.stack(test_imgs) num_current_test_imgs = current_test_imgs.shape[0] features, spatial_feats = model(current_test_imgs) features = features.view(num_current_test_imgs, -1) all_spatial_features.append(spatial_feats) all_features.append(features) return torch.cat(all_features), torch.cat(all_spatial_features)
Example #3
Source File: utils.py From vidreid_cosegmentation with Apache License 2.0 | 6 votes |
def get_features(model, imgs, test_num_tracks): """to handle higher seq length videos due to OOM error specifically used during test Arguments: model -- model under test imgs -- imgs to get features for Returns: features """ # handle chunked data all_features = [] for test_imgs in mit.chunked(imgs, test_num_tracks): current_test_imgs = torch.stack(test_imgs) num_current_test_imgs = current_test_imgs.shape[0] # print(current_test_imgs.shape) features = model(current_test_imgs) features = features.view(num_current_test_imgs, -1) all_features.append(features) return torch.cat(all_features)
Example #4
Source File: displacements.py From langchangetrack with BSD 3-Clause "New" or "Revised" License | 6 votes |
def calculate_words_displacement(self, column_names, n_jobs = 1): """ Calculate word displacements for each word in the Pandas data frame. """ words = self.get_word_list() # Create chunks of the words to be processed. chunk_sz = np.ceil(len(words)/float(n_jobs)) chunks = list(more_itertools.chunked(words, chunk_sz)) # Calculate the displacements chunksL = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_source, self) for chunk in chunks) chunksH = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_dest, self) for chunk in chunks) L = more_itertools.flatten(chunksL) H = more_itertools.flatten(chunksH) flattendL = [x for sublist in L for x in sublist] flattendH = [x for sublist in H for x in sublist] # Store the results in a nice pandas data frame dfo, dfn = self.create_data_frames(flattendL, flattendH, column_names) return flattendL, flattendH, dfo, dfn
Example #5
Source File: dataset.py From machine-learning-for-programming-samples with MIT License | 6 votes |
def get_minibatch_iterator( token_seqs: np.ndarray, batch_size: int, is_training: bool, drop_remainder: bool = True, ) -> Iterator[np.ndarray]: indices = np.arange(token_seqs.shape[0]) if is_training: np.random.shuffle(indices) for minibatch_indices in chunked(indices, batch_size): if len(minibatch_indices) < batch_size and drop_remainder: break # Drop last, smaller batch minibatch_seqs = token_seqs[minibatch_indices] yield minibatch_seqs
Example #6
Source File: test_more.py From python-netsurv with MIT License | 5 votes |
def test_even(self): """Test when ``n`` divides evenly into the length of the iterable.""" self.assertEqual( list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']] )
Example #7
Source File: graph_sequence.py From article-0 with MIT License | 5 votes |
def __init__(self, args, batch_size=32, test=False): self.batch_size = batch_size self.query = """ MATCH p= (person:PERSON) -[:WROTE]-> (review:REVIEW {dataset_name:{dataset_name}, test:{test}}) -[:OF]-> (product:PRODUCT) RETURN person.style_preference + product.style as x, review.score as y """ self.query_params = { "dataset_name": "article_0", "test": test } with open('./settings.json') as f: self.settings = json.load(f)[args.database] driver = GraphDatabase.driver( self.settings["neo4j_url"], auth=(self.settings["neo4j_user"], self.settings["neo4j_password"])) with driver.session() as session: data = session.run(self.query, **self.query_params).data() data = [ (np.array(i["x"]), i["y"]) for i in data] # Split the data up into "batches" data = more_itertools.chunked(data, self.batch_size) # Format our batches in the way Keras expects them: # An array of tuples (x_batch, y_batch) # An x_batch is a numpy array of shape (batch_size, 12), # containing the concatenated style and style_preference vectors. # A y_batch is a numpy array of shape (batch_size,1) containing the review scores. self.data = [ (np.array([j[0] for j in i]), np.array([j[1] for j in i])) for i in data]
Example #8
Source File: biodati_client.py From pybel with MIT License | 5 votes |
def post_graph_chunked( self, graph: BELGraph, chunksize: int, *, use_tqdm: bool = True, collections: Optional[Iterable[str]] = None, overwrite: bool = False, validate: bool = True, email: Union[bool, str] = False ) -> requests.Response: """Post the graph to BioDati in chunks, when the graph is too big for a normal upload. :param graph: A BEL graph :param chunksize: The size of the chunks of nanopubs to upload :param use_tqdm: Should tqdm be used when iterating? :param collections: Tags to add to the nanopubs for lookup on BioDati :param overwrite: Set the BioDati upload "overwrite" setting :param validate: Set the BioDati upload "validate" setting :param email: Who should get emailed with results about the upload? If true, emails to user used for login. If string, emails to that user. If false, no email. :return: Last response from upload """ metadata_extras = dict() if collections is not None: metadata_extras.update(collections=list(collections)) iterable = _iter_graphdati(graph, use_tqdm=use_tqdm, metadata_extras=metadata_extras) res = None for chunk in chunked(iterable, chunksize): res = self.post_graph_json( chunk, overwrite=overwrite, validate=validate, email=email, ) return res
Example #9
Source File: test_more.py From pipenv with MIT License | 5 votes |
def test_even(self): """Test when ``n`` divides evenly into the length of the iterable.""" self.assertEqual( list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']] )
Example #10
Source File: test_more.py From pipenv with MIT License | 5 votes |
def test_odd(self): """Test when ``n`` does not divide evenly into the length of the iterable. """ self.assertEqual( list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']] )
Example #11
Source File: abuse_ch.py From n6 with GNU Affero General Public License v3.0 | 5 votes |
def publish_iteratively(self): for chunk in more_itertools.chunked(self._selected_data, 20000): rk, body, prop_kwargs = self.get_output_components(selected_data="\n".join(chunk)) self.publish_output(rk, body, prop_kwargs) yield yield self.FLUSH_OUT self.save_state(self._state)
Example #12
Source File: varmisuse_data_splitter.py From tf-gnn-samples with MIT License | 5 votes |
def _write_data(out_dir: RichPath, window_idx: int, chunk_size: int, data_window: List[Any]): np.random.shuffle(data_window) for chunk_idx, data_chunk in enumerate(chunked(data_window, chunk_size)): out_file = out_dir.join('chunk_%i-%i.jsonl.gz' % (window_idx, chunk_idx)) out_file.save_as_compressed_file(data_chunk)
Example #13
Source File: score_processor.py From revscoring with MIT License | 5 votes |
def score(self, rev_ids, caches=None, cache=None): if isinstance(rev_ids, int): rev_ids = [rev_ids] batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches, cache) for batch_scores in self.scores_ex.map(self._score_batch, batches): for score in batch_scores: yield score
Example #14
Source File: test_more.py From python-netsurv with MIT License | 5 votes |
def test_odd(self): """Test when ``n`` does not divide evenly into the length of the iterable. """ self.assertEqual( list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']] )
Example #15
Source File: test_more.py From python-netsurv with MIT License | 5 votes |
def test_even(self): """Test when ``n`` divides evenly into the length of the iterable.""" self.assertEqual( list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']] )
Example #16
Source File: test_more.py From python-netsurv with MIT License | 5 votes |
def test_odd(self): """Test when ``n`` does not divide evenly into the length of the iterable. """ self.assertEqual( list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']] )
Example #17
Source File: pydataapi.py From py-data-api with MIT License | 5 votes |
def batch_execute( self, query: Union[Query, Insert, Update, Delete, Select, str], parameter_sets: Optional[List[Dict[str, Any]]], transaction_id: Optional[str] = None, database: Optional[str] = None, ) -> UpdateResults: if self.transaction_id: start_transaction: bool = False else: self.begin(database=database) start_transaction = True try: results_sets = list( flatten( self.client.batch_execute_statement( **Options( resourceArn=self.resource_arn, secretArn=self.secret_arn, database=database or self.database, transactionId=transaction_id or self.transaction_id, parameterSets=chunked_parameter_sets, # type: ignore sql=query, ).build() )["updateResults"] for chunked_parameter_sets in chunked( parameter_sets or [], MAX_RECORDS ) ) ) except: if start_transaction: self.rollback() raise if start_transaction: self.commit() return UpdateResults(results_sets)
Example #18
Source File: dump.py From promnesia with MIT License | 5 votes |
def dump_histories(all_histories: List[Tuple[str, List[DbVisit]]]) -> None: logger = get_logger() output_dir = Path(config.get().output_dir) db_path = output_dir / 'promnesia.sqlite' def iter_visits(): for e, h in all_histories: # TODO sort them somehow for determinism? # TODO what do we do with errors? # TODO maybe conform them to schema and dump too? # TODO or, dump to a separate table? yield from h tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite' engine = create_engine(f'sqlite:///{tpath}') binder = NTBinder.make(DbVisit) meta = MetaData(engine) table = Table('visits', meta, *binder.columns) meta.create_all() with engine.begin() as conn: for chunk in chunked(iter_visits(), n=_CHUNK_BY): bound = [binder.to_row(x) for x in chunk] # pylint: disable=no-value-for-parameter conn.execute(table.insert().values(bound)) shutil.move(str(tpath), str(db_path)) logger.info('saved database to %s', db_path) # TODO log error count
Example #19
Source File: test_more.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def test_even(self): """Test when ``n`` divides evenly into the length of the iterable.""" self.assertEqual( list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']] )
Example #20
Source File: test_more.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def test_odd(self): """Test when ``n`` does not divide evenly into the length of the iterable. """ self.assertEqual( list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']] )
Example #21
Source File: analyzer.py From pygreynoise with MIT License | 4 votes |
def analyze(self, text): """Aggregate stats related to IP addresses from a given text. :param text: Text input :type text: file-like | str :return: Aggregated stats for all the IP addresses found. :rtype: dict """ if isinstance(text, str): text = text.splitlines(True) chunks = more_itertools.chunked(text, self.ANALYZE_TEXT_CHUNK_SIZE) text_stats = { "query": [], "count": 0, "stats": {}, } text_ip_addresses = set() chunks_stats = [ self._analyze_chunk(chunk, text_ip_addresses) for chunk in chunks ] functools.reduce(self._aggregate_stats, chunks_stats, text_stats) # This maps section dictionaries to list of dictionaries # (undoing mapping done previously to keep track of count values) for section_key, section_value in text_stats["stats"].items(): section_element_key = self.SECTION_KEY_TO_ELEMENT_KEY[section_key] text_stats["stats"][section_key] = sorted( [ {section_element_key: element_key, "count": element_count} for element_key, element_count in section_value.items() ], key=lambda element: (-element["count"], element[section_element_key]), ) if text_ip_addresses: noise_ip_addresses = { result["ip"] for result in self.api.quick(text_ip_addresses) if result["noise"] } else: noise_ip_addresses = set() ip_count = len(text_ip_addresses) noise_ip_count = len(noise_ip_addresses) not_noise_ip_count = ip_count - noise_ip_count if ip_count > 0: noise_ip_ratio = float(noise_ip_count) / ip_count else: noise_ip_ratio = 0 text_stats["summary"] = { "ip_count": ip_count, "noise_ip_count": noise_ip_count, "not_noise_ip_count": not_noise_ip_count, "noise_ip_ratio": noise_ip_ratio, } return text_stats
Example #22
Source File: dump_timeseries.py From langchangetrack with BSD 3-Clause "New" or "Revised" License | 4 votes |
def main(args): # get the arguments method = args.method win_size = args.win_size step = args.step metric_name = args.metric_name n_jobs = args.workers # Load the data. L, H, olddf, newdf = pickle.load(open(args.filename)) words = pd.Series(olddf.word.values.ravel()).unique() oldrows = [] newrows = [] sourcexrange = np.arange(args.mint, args.maxt, step) destxrange = np.arange(args.mint, args.maxt, step) if method == 'win': sourcexrange = sourcexrange[win_size:] destxrange = destxrange[:-win_size] if args.interpolate: sourcexinter = np.arange(sourcexrange[0], sourcexrange[-1] + 1, 1) destxinter = np.arange(destxrange[0], destxrange[-1] + 1, 1) else: sourcexinter = sourcexrange destxinter = destxrange # Construct the series assert(len(sourcexinter) == len(destxinter)) chunk_sz = np.ceil(len(words)/float(n_jobs)) words_chunks = more_itertools.chunked(words, chunk_sz) timeseries_chunks = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, create_word_time_series, olddf, newdf, sourcexinter, destxinter, metric_name=metric_name, interpolate=args.interpolate) for chunk in words_chunks) timeseries = list(more_itertools.flatten(timeseries_chunks)) # Dump the data frame for orow, newrow in timeseries: if orow and newrow: oldrows.append(orow) newrows.append(newrow) oldtimeseries = pd.DataFrame() newtimeseries = pd.DataFrame() header = ['word'] header.extend(sourcexinter) newheader = ['word'] newheader.extend(destxinter) oldtimeseries = oldtimeseries.from_records(oldrows, columns=header) oldtimeseries = oldtimeseries.fillna(method='backfill', axis=1) newtimeseries = newtimeseries.from_records(newrows, columns=newheader) newtimeseries = newtimeseries.fillna(method='backfill', axis=1) oldtimeseries.to_csv(args.sourcetimef, encoding='utf-8') newtimeseries.to_csv(args.endtimef, encoding='utf-8')
Example #23
Source File: image_viewer_widget.py From CvStudio with MIT License | 4 votes |
def load_image(self): @work_exception def do_work(): return dask.compute(*[ self.load_image_label(), self.load_image_annotations() ]), None @gui_exception def done_work(args): result, error = args if result: label, annotations = result if label: self._class_label.setVisible(True) self._class_label.setText(label) else: self._class_label.setVisible(False) self._class_label.setText("") if annotations: img_bbox: QRectF = self.image_viewer.pixmap.sceneBoundingRect() offset = QPointF(img_bbox.width() / 2, img_bbox.height() / 2) for entry in annotations: try: vo: AnnotaVO = entry points = map(float, vo.points.split(",")) points = list(more_itertools.chunked(points, 2)) if vo.kind == "box" or vo.kind == "ellipse": x = points[0][0] - offset.x() y = points[0][1] - offset.y() w = math.fabs(points[0][0] - points[1][0]) h = math.fabs(points[0][1] - points[1][1]) roi: QRectF = QRectF(x, y, w, h) if vo.kind == "box": item = EditableBox(roi) else: item = EditableEllipse() item.tag = self.tag.dataset item.setRect(roi) item.label = vo.label self.image_viewer.scene().addItem(item) elif vo.kind == "polygon": item = EditablePolygon() item.label = vo.label item.tag = self.tag.dataset self.image_viewer.scene().addItem(item) for p in points: item.addPoint(QPoint(p[0] - offset.x(), p[1] - offset.y())) except Exception as ex: GUIUtilities.show_error_message("Error loading the annotations: {}".format(ex), "Error") self.image_viewer.remove_annotations() worker = Worker(do_work) worker.signals.result.connect(done_work) self._thread_pool.start(worker)