Python progressbar.progressbar() Examples
The following are 30
code examples of progressbar.progressbar().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
progressbar
, or try the search function
.
Example #1
Source File: eval_ssd.py From tensorrt_demos with MIT License | 6 votes |
def generate_results(ssd, imgs_dir, jpgs, results_file): """Run detection on each jpg and write results to file.""" results = [] for jpg in progressbar(jpgs): img = cv2.imread(os.path.join(imgs_dir, jpg)) image_id = int(jpg.split('.')[0].split('_')[-1]) boxes, confs, clss = ssd.detect(img, conf_th=1e-2) for box, conf, cls in zip(boxes, confs, clss): x = float(box[0]) y = float(box[1]) w = float(box[2] - box[0] + 1) h = float(box[3] - box[1] + 1) results.append({'image_id': image_id, 'category_id': int(cls), 'bbox': [x, y, w, h], 'score': float(conf)}) with open(results_file, 'w') as f: f.write(json.dumps(results, indent=4))
Example #2
Source File: eval.py From tf-retinanet with Apache License 2.0 | 6 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = annotations[num_detections, 5] # Arguments generator : The generator used to retrieve ground truth annotations. # Returns A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #3
Source File: eval.py From keras-m2det with Apache License 2.0 | 6 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = annotations[num_detections, 5] # Arguments generator : The generator used to retrieve ground truth annotations. # Returns A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #4
Source File: eval.py From keras-retinanet with Apache License 2.0 | 6 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = annotations[num_detections, 5] # Arguments generator : The generator used to retrieve ground truth annotations. # Returns A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #5
Source File: evaluator.py From DeepADoTS with MIT License | 6 votes |
def evaluate(self): for ds in progressbar.progressbar(self.datasets): (X_train, y_train, X_test, y_test) = ds.data() for det in progressbar.progressbar(self.detectors): self.logger.info(f'Training {det.name} on {ds.name} with seed {self.seed}') try: det.fit(X_train.copy()) score = det.predict(X_test.copy()) self.results[(ds.name, det.name)] = score try: self.plot_details(det, ds, score) except Exception: pass except Exception as e: self.logger.error(f'An exception occurred while training {det.name} on {ds}: {e}') self.logger.error(traceback.format_exc()) self.results[(ds.name, det.name)] = np.zeros_like(y_test) gc.collect()
Example #6
Source File: pyrdp-convert.py From pyrdp with GNU General Public License v3.0 | 6 votes |
def processReplay(self, infile: Path): widgets = [ progressbar.FormatLabel('Encoding MP4 '), progressbar.BouncingBar(), progressbar.FormatLabel(' Elapsed: %(elapsed)s'), ] with progressbar.ProgressBar(widgets=widgets) as progress: print(f"[*] Converting '{infile}' to MP4.") outfile = self.prefix + infile.stem + '.mp4' sink = Mp4EventHandler(outfile, progress=lambda: progress.update(0)) fd = open(infile, "rb") replay = Replay(fd, handler=sink) print(f"\n[+] Succesfully wrote '{outfile}'") sink.cleanup() fd.close()
Example #7
Source File: pyrdp-convert.py From pyrdp with GNU General Public License v3.0 | 6 votes |
def processPlaintext(self, stream: PacketList, outfile: str, info): """Process a plaintext EXPORTED PDU RDP export to a replay.""" replayer = RDPReplayer(outfile, mp4=self.args.format == 'mp4') (client, server, _, _) = info for packet in progressbar.progressbar(stream): src = ".".join(str(b) for b in packet.load[12:16]) dst = ".".join(str(b) for b in packet.load[20:24]) data = packet.load[60:] if src not in [client, server] or dst not in [client, server]: continue # FIXME: The absolute time is completely wrong here because replayer multiplies by 1000. replayer.setTimeStamp(float(packet.time)) replayer.recv(data, src == client) try: replayer.tcp.recordConnectionClose() except struct.error: print("Couldn't close the connection cleanly. " "Are you sure you got source and destination correct?")
Example #8
Source File: eval_yolov3.py From tensorrt_demos with MIT License | 6 votes |
def generate_results(yolov3, imgs_dir, jpgs, results_file, non_coco): """Run detection on each jpg and write results to file.""" results = [] for jpg in progressbar(jpgs): img = cv2.imread(os.path.join(imgs_dir, jpg)) image_id = int(jpg.split('.')[0].split('_')[-1]) boxes, confs, clss = yolov3.detect(img, conf_th=1e-2) for box, conf, cls in zip(boxes, confs, clss): x = float(box[0]) y = float(box[1]) w = float(box[2] - box[0] + 1) h = float(box[3] - box[1] + 1) cls = cls if non_coco else yolov3_cls_to_ssd[cls] results.append({'image_id': image_id, 'category_id': cls, 'bbox': [x, y, w, h], 'score': float(conf)}) with open(results_file, 'w') as f: f.write(json.dumps(results, indent=4))
Example #9
Source File: generate_grid.py From prjxray with ISC License | 6 votes |
def generate_tileconn(pool, node_tree, nodes, wire_map, grid): tileconn = [] key_history = {} raw_node_data = [] with progressbar.ProgressBar(max_value=len(nodes)) as bar: for idx, node in enumerate(pool.imap_unordered( read_json5, nodes, chunksize=20, )): bar.update(idx) raw_node_data.append(node) process_node( tileconn, key_history, node, wire_map, node_tree, grid) bar.update(idx + 1) tileconn = flatten_tile_conn(tileconn) return tileconn, raw_node_data
Example #10
Source File: function.py From ScanQLi with GNU General Public License v3.0 | 6 votes |
def CheckPageListAllVulns(pageset): global bar global currenttested result = [] bar = progressbar.progressbar("bar", "Search vulns") bar.totalcount = len(config.vulncheck) bar.count = 0 for vulnlist in config.vulncheck: bar.total = len(vulnlist[0]) bar.value = 0 bar.count += 1 currenttested = vulnlist[1] for vuln in vulnlist[0]: bar.progress(1) payload = CheckPageListVuln(pageset, vuln) if payload: result.append(payload) break bar.delbar() return result
Example #11
Source File: generate_grid.py From prjxray with ISC License | 6 votes |
def generate_tileconn(pool, node_tree, nodes, wire_map, grid): tileconn = [] key_history = {} raw_node_data = [] with progressbar.ProgressBar(max_value=len(nodes)) as bar: for idx, node in enumerate(pool.imap_unordered( read_json5, nodes, chunksize=20, )): bar.update(idx) raw_node_data.append(node) process_node( tileconn, key_history, node, wire_map, node_tree, grid) bar.update(idx + 1) tileconn = flatten_tile_conn(tileconn) return tileconn, raw_node_data
Example #12
Source File: create_node_tree.py From prjxray with ISC License | 6 votes |
def build_node_index(fname): node_index = {} with open(fname, 'rb') as f: f.seek(0, 2) bytes = f.tell() f.seek(0, 0) with progressbar.ProgressBar(max_value=bytes) as bar: end_of_line = 0 for l in f: parts = l.decode('utf8').split(' ') pip, node = parts[0:2] if node not in node_index: node_index[node] = [] node_index[node].append(end_of_line) end_of_line = f.tell() bar.update(end_of_line) return node_index
Example #13
Source File: create_node_tree.py From prjxray with ISC License | 6 votes |
def build_node_index(fname): node_index = {} with open(fname, 'rb') as f: f.seek(0, 2) bytes = f.tell() f.seek(0, 0) with progressbar.ProgressBar(max_value=bytes) as bar: end_of_line = 0 for l in f: parts = l.decode('utf8').split(' ') pip, node = parts[0:2] if node not in node_index: node_index[node] = [] node_index[node].append(end_of_line) end_of_line = f.tell() bar.update(end_of_line) return node_index
Example #14
Source File: eval.py From DeepForest with MIT License | 6 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = annotations[num_detections, 5] # Arguments generator : The generator used to retrieve ground truth annotations. # Returns A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #15
Source File: eval.py From CameraRadarFusionNet with Apache License 2.0 | 5 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = annotations[num_detections, 5] # Arguments generator : The generator used to retrieve ground truth annotations. # Returns A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue if len(annotations['bboxes']) ==0: all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label].copy() continue box_and_dist_and_vis = np.concatenate((annotations['bboxes'], np.expand_dims(annotations['distances'], axis=1), \ np.expand_dims(annotations['visibilities'], axis=1).astype(np.float64)), axis=1) all_annotations[i][label] = box_and_dist_and_vis[annotations['labels'] == label].copy() return all_annotations
Example #16
Source File: interfaces.py From HoneyBot with MIT License | 5 votes |
def get_submissions_status(): """ :return: A list of statuses of all the submissions in the database """ results = [] database = Database() print("Fetching analysis statuses...Please wait.") for row in progressbar.progressbar(Database().select_pcaps()): _id, name, capture_start, capture_end, upload_start, upload_end, size = row try: raw_result = next(database.select_completed(_id)) res = json.loads(raw_result[1]) except StopIteration: res = PTClient().get_pcap_status(_id) if res and res.get('analysisCompleted'): try: database.insert_completed([_id, json.dumps(res)]) except Exception as e: logger.warning('Could not cache status for {} - {}'.format(_id, e)) queued, analysis_started, analysis_completed = False, False, False link = None malicious = None if res: submission = res.get('submission', {}) if submission.get('queuedTimestamp'): queued = True if submission.get('analysisStarted'): analysis_started = True if submission.get('analysisCompleted'): analysis_completed = True if 'signature_alerts' in submission.get('logsTransmitted'): malicious = True else: malicious = False if analysis_completed: link = "https://packettotal.com/app/analysis?id={}".format(_id) results.append([_id, name, capture_start, capture_end, upload_start, upload_end, size, queued, analysis_started, analysis_completed, malicious, link]) return results
Example #17
Source File: invalidate_problem_stats.py From eoj3 with MIT License | 5 votes |
def run(*args): for problem in progressbar.progressbar(Problem.objects.all()): invalidate_problem(problem)
Example #18
Source File: prepare_training_data.py From eoj3 with MIT License | 5 votes |
def run(*args): export_dir = os.environ.get("EXPORT_DIR", ".") problem_list = {} print("Processing problems...") for problem in progressbar.progressbar(Problem.objects.all()): problem_list[problem.id] = { "tags": [tag.name for tag in problem.tags], "ac_user_count": problem.ac_user_count, "total_user_count": problem.total_user_count, "ac_count": problem.ac_count, "total_count": problem.total_count, "reward": problem.reward, "labeled_difficulty": problem.level } print("Processing submissions...") sub_list_unfiltered = {} submission_record = defaultdict(int) for submission in progressbar.progressbar( Submission.objects.all().only("id", "author_id", "problem_id", "create_time", "status").order_by("create_time")): user_problem = (submission.author_id, submission.problem_id) if submission.status == SubmissionStatus.ACCEPTED: if submission.author_id not in sub_list_unfiltered: sub_list_unfiltered[submission.author_id] = [] lst = sub_list_unfiltered[submission.author_id] if submission_record[user_problem] != -1: lst.append((datetime.timestamp(submission.create_time), # time submission_record[user_problem], # attempts submission.problem_id)) # problem id submission_record[user_problem] = -1 else: if submission_record[user_problem] != -1: submission_record[user_problem] += 1 sub_list = {k: v for k, v in sub_list_unfiltered.items() if len(v) >= 10} with open(os.path.join(export_dir, "problems.pickle"), "wb") as f: pickle.dump(problem_list, f) with open(os.path.join(export_dir, "submissions.pickle"), "wb") as f: pickle.dump(sub_list, f)
Example #19
Source File: common.py From keras-CenterNet with Apache License 2.0 | 5 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_annotations[num_images][num_classes] = annotations[num_class_annotations, 5] Args: generator: The generator used to retrieve ground truth annotations. Returns: A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #20
Source File: common.py From EfficientDet with Apache License 2.0 | 5 votes |
def _get_annotations(generator): """ Get the ground truth annotations from the generator. The result is a list of lists such that the size is: all_annotations[num_images][num_classes] = annotations[num_class_annotations, 5] Args: generator: The generator used to retrieve ground truth annotations. Returns: A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): # load the annotations annotations = generator.load_annotations(i) # copy detections to all_annotations for label in range(generator.num_classes()): if not generator.has_label(label): continue all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy() return all_annotations
Example #21
Source File: preprocess.py From urgent-care-comparative with GNU General Public License v3.0 | 5 votes |
def pivot_icd(subj): '''subj: list of cohort subject_ids''' df = pd.read_csv(path_tables + '/diagnoses_icd.csv') #icd names icd_names = pd.read_csv(path_tables + '/d_icd_diagnoses.csv') #make dictionary of icd9 codes dct = {} for i in progressbar.progressbar(range(len(subj))): s = subj[i] dictionary = df[(df.subject_id == s)][['hadm_id', 'icd9_code']].groupby('hadm_id')['icd9_code'].apply(list).to_dict() dictionary = dict([(k,v ) for k,v in dictionary.items()]) dct[s] = dictionary lengths = [dct[i].values() for i in dct.keys()] lengths = flatten(lengths) lengths = flatten(lengths) unique, counts = np.unique(lengths, return_counts=True) #frequency dictionary dct_freq = dict(zip(unique, counts)) items = sorted(dct_freq.items(), key = lambda x: x[1], reverse = True) ## add names ## common = list(set(icd_names.icd9_code).intersection([i[0] for i in items])) common = icd_names[icd_names.icd9_code.isin(common)] common = common[['icd9_code', 'short_title']].groupby('icd9_code')['short_title'].apply(list).to_dict() dct_freq = [] for idx, count in items: if idx in common.keys(): dct_freq.append((idx, common[idx][0], count)) return dct, dct_freq #### Features ####
Example #22
Source File: interfaces.py From HoneyBot with MIT License | 5 votes |
def learn(self, timeout=60): """ Builds a whitelist of IP addresses for every connection captured during this time-period :param timeout: The number of seconds to capture traffic """ src_ips = set() dst_ips = set() with open('ip.whitelist', 'w') as f: if not sys.warnoptions: warnings.simplefilter("ignore") print('Generating whitelist of IP addresses based on traffic from the next {} seconds.'.format(timeout)) bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength) for conn in self.listener(timeout=timeout): try: src, dst, proto = conn if IP(src).iptype() == 'PUBLIC': src_ips.add(src) bar.update(len(src_ips) + len(dst_ips)) if IP(dst).iptype() == 'PUBLIC': dst_ips.add(dst) bar.update(len(src_ips) + len(dst_ips)) except AttributeError: pass all_ips = list(src_ips) all_ips.extend(dst_ips) all_ips = set(all_ips) for ip in all_ips: f.write(ip + '\n')
Example #23
Source File: pyrdp-convert.py From pyrdp with GNU General Public License v3.0 | 5 votes |
def processTLS(self, stream: Decrypted, outfile: str): """Process an encrypted TCP stream into a replay file.""" replayer = RDPReplayer(outfile, mp4=self.args.format == 'mp4') client = None # The RDP client's IP. for packet in progressbar.progressbar(stream): ip = packet.getlayer(IP) if not client: client = ip.src continue if TLSApplicationData not in packet: # This is not TLS application data, skip it, as PyRDP's # network stack cannot parse TLS handshakes. continue ts = float(packet.time) for payload in packet[TLS].iterpayloads(): if TLSApplicationData not in payload: continue # Not application data. for m in payload.msg: replayer.setTimeStamp(ts) replayer.recv(m.data, ip.src == client) try: replayer.tcp.recordConnectionClose() except struct.error: print("Couldn't close the connection cleanly. " "Are you sure you got source and destination correct?")
Example #24
Source File: function.py From ScanQLi with GNU General Public License v3.0 | 5 votes |
def GetAllPages(urllist): links = {None:None} templinks = {None} linksfollowed = {None} newlinks = {None} for url in urllist: html = GetHTML(url) links.update({url:html}) templinks.update(GetLinks(url, html)) templinks.update(GetAllURLsParams(url)) linksfollowed.update(url) newlinks.update(url) links.pop(None) templinks.remove(None) linksfollowed.remove(None) newlinks.remove(None) bar = progressbar.progressbar("count", "Get URLs") while templinks: bar.progress(len(templinks)) for link in templinks: html = GetHTML(link) links.update({link:html}) newlinks.update(GetLinks(link, html)) newlinks.update(GetAllURLsParams(link)) linksfollowed.update({link}) templinks = newlinks.difference(linksfollowed) bar.delbar() result = {} for link in links: if not CheckBlackListURLs(link): result.update({link:links[link]}) return result
Example #25
Source File: generate_grid.py From prjxray with ISC License | 5 votes |
def generate_tilegrid(pool, tiles): wire_map = {} grid = {} num_tiles = 0 for tile_type in tiles: num_tiles += len(tiles[tile_type]) idx = 0 with progressbar.ProgressBar(max_value=num_tiles) as bar: for tile_type in tiles: for tile in pool.imap_unordered( get_tile_grid_info, tiles[tile_type], chunksize=20, ): bar.update(idx) assert len(tile) == 1, tile tilename = tuple(tile.keys())[0] for wire in tile[tilename]['wires']: assert wire not in wire_map, (wire, wire_map) assert wire.startswith(tilename + '/'), (wire, tilename) wire_map[wire] = { 'tile': tilename, 'type': tile[tilename]['type'], 'shortname': wire[len(tilename) + 1:], } del tile[tilename]['wires'] grid.update(tile) idx += 1 bar.update(idx) return grid, wire_map
Example #26
Source File: converter.py From Object_Detection_Tracking with Apache License 2.0 | 5 votes |
def _get_official_format(video_list, annotation_dir): jobs = [(video_name, annotation_dir) for video_name in video_list] pool = ProcessPoolExecutor() activities = [] for result in progressbar(pool.map(_worker, jobs)): activities.extend(result) reference = {'filesProcessed': video_list, 'activities': activities} file_index = {video_name: {'framerate': 30.0, 'selected': {0: 1, 9000: 0}} for video_name in video_list} return reference, file_index
Example #27
Source File: lib.py From prjxray with ISC License | 5 votes |
def load_from_root_csv(self, nodes): import pyjson5 as json5 import progressbar for node in progressbar.progressbar(nodes): with open(node) as f: node_wires = json5.load(f) assert node_wires['node'] not in self.nodes self.nodes[node_wires['node']] = node_wires['wires']
Example #28
Source File: generate_grid.py From prjxray with ISC License | 5 votes |
def generate_tilegrid(pool, tiles): wire_map = {} grid = {} num_tiles = 0 for tile_type in tiles: num_tiles += len(tiles[tile_type]) idx = 0 with progressbar.ProgressBar(max_value=num_tiles) as bar: for tile_type in tiles: for tile in pool.imap_unordered( get_tile_grid_info, tiles[tile_type], chunksize=20, ): bar.update(idx) assert len(tile) == 1, tile tilename = tuple(tile.keys())[0] for wire in tile[tilename]['wires']: assert wire not in wire_map, (wire, wire_map) assert wire.startswith(tilename + '/'), (wire, tilename) wire_map[wire] = { 'tile': tilename, 'type': tile[tilename]['type'], 'shortname': wire[len(tilename) + 1:], } del tile[tilename]['wires'] grid.update(tile) idx += 1 bar.update(idx) return grid, wire_map
Example #29
Source File: generate_grid.py From prjxray with ISC License | 4 votes |
def connect_wires(tiles, tileconn, wire_map): """ Connect individual wires into groups of wires called nodes. """ # Initialize all nodes to originally only contain the wire by itself. wire_nodes = {} for wire in wire_map: wire_nodes[wire] = set([wire]) wire_connection_map = {} for conn in tileconn: for idx, (wire1, wire2) in enumerate(conn['wire_pairs']): key1 = (conn['tile_types'][0], wire1) if key1 not in wire_connection_map: wire_connection_map[key1] = [] wire_connection_map[key1].append((conn, idx)) key2 = (conn['tile_types'][1], wire2) if key2 not in wire_connection_map: wire_connection_map[key2] = [] wire_connection_map[key2].append((conn, idx)) coord_to_tile = create_coord_to_tile(tiles) for wire, wire_info in progressbar.progressbar(wire_map.items()): key = (wire_info['type'], wire_info['shortname']) if key not in wire_connection_map: continue for conn, idx in wire_connection_map[key]: for target_tile, target_wire in get_connections( wire, wire_info, conn, idx, coord_to_tile, tiles): full_wire_name = coord_to_tile[target_tile] + '/' + target_wire assert wire_map[full_wire_name]['shortname'] == target_wire, ( target_tile, target_wire, wire, conn) assert wire_map[full_wire_name]['tile'] == coord_to_tile[ target_tile], ( wire_map[full_wire_name]['tile'], coord_to_tile[target_tile]) make_connection(wire_nodes, wire, full_wire_name) # Find unique nodes nodes = {} for node in wire_nodes.values(): nodes[id(node)] = node # Flatten to list of lists. return tuple(tuple(node) for node in nodes.values())
Example #30
Source File: preprocess.py From urgent-care-comparative with GNU General Public License v3.0 | 4 votes |
def preprocess(features, labels, demographics): '''pre: features and labels post: X = [[x1, ... xT]_1, ...], y= [(mort, readm, los, dx)] ''' from sklearn.preprocessing import MinMaxScaler subj = list(set(labels.keys())) hadm = list(set(features.keys())) col_dict = dict ([(v,k) for k,v in enumerate(features[hadm[0]][list(features[hadm[0]].keys())[0]].keys())]) cols = sorted(col_dict.keys()) items = [] for i in progressbar.progressbar(range(len( subj ) ) ): s = subj[i] h = labels[s]['hadm_id'] if h in hadm: x = np.zeros((len(features[h].keys()), len(col_dict))) for index in range(len(sorted(features[h].keys()))): t = sorted(features[h].keys())[index] x[index, [col_dict[k] for k in cols]] = [features[h][t][k] for k in cols] mort = labels[s]['mort'] los = list(one_hot([labels[s]['los_bin']], 9)[0]) readmit = labels[s]['readmit'] dx = labels[s]['dx'] y = (mort, readmit, los, dx) z = demographics[s] #auxiliary features x48 = np.concatenate((np.min(x, axis=0), np.max(x, axis=0), np.mean(x,axis=0), np.std(x,axis=0)),axis=-1) sentence = labels[s]['dx_lst'] items.append((x, y, z, x48, sentence)) X, y, Z, X48, sentences = zip(*items) X, y, Z, X48, sentences = np.array(list(X)), list(y), np.array(list(Z)), np.array(list(X48)), list(sentences) #normalize each feature to [0,1] words = [[] for i in range(len(X))] for i in range(len(X[0,0,:])): #add to visit words mean, std, minimum, maximum = np.mean(X[:,:,i]), np.std(X[:,:,i]), np.min(X[:,:,i],axis=1), np.max(X[:,:,i], axis=1) arr_min, arr_max = minimum < (mean - std), maximum > (mean + std) for j in range(len(arr_min)): if arr_min[j]: words[j].append(str(i) + '_low') if arr_max[j]: words[j].append(str(i) + '_high') #scale X scaler = MinMaxScaler() x_row = scaler.fit_transform(X[:,:,i]) X[:,:,i] = x_row #transform X48 scaler = MinMaxScaler() X48 = scaler.fit_transform(X48) return X, y, Z, X48, sentences, words