Python tensorflow.gfile.GFile() Examples
The following are 30
code examples of tensorflow.gfile.GFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.gfile
, or try the search function
.
Example #1
Source File: selector_keras.py From active-qa with Apache License 2.0 | 6 votes |
def _build_embedding_matrix(self): """Builds the embedding matrix for the model. Returns: words: a list of strings representing the words in the vocabulary. embeddings: a float32 array of shape [vocab_size, embeddings_dim]. """ logging.info('Loading Glove embeddings.') words = [] embeddings = [] with gfile.GFile(FLAGS.glove_path) as f: for line in f: values = line.split() words.append(values[0]) embeddings.append(np.asarray(values[1:], dtype='float32')) logging.info('Found %s word vectors.', len(embeddings)) return words, np.array(embeddings)
Example #2
Source File: create_data.py From active-learning with Apache License 2.0 | 6 votes |
def get_csv_data(filename): """Parse csv and return Dataset object with data and targets. Create pickle data from csv, assumes the first column contains the targets Args: filename: complete path of the csv file Returns: Dataset object """ f = gfile.GFile(filename, 'r') mat = [] for l in f: row = l.strip() row = row.replace('"', '') row = row.split(',') row = [float(x) for x in row] mat.append(row) mat = np.array(mat) y = mat[:, 0] X = mat[:, 1:] data = Dataset(X, y) return data
Example #3
Source File: identify_overlap_iwslt17.py From language with Apache License 2.0 | 6 votes |
def _parse_lines(path): """Parses lines from IWSLT17 dataset.""" lines = [] with gfile.GFile(path) as fp: for line in fp: line = line.strip() # Skip lines that are tags entirely. if _WHOLE_TAG_REGEX.match(line): continue # Try to parse as content between an opening and closing tags. match = _FLAT_HTML_REGEX.match(line) # Always append text not contained between the tags. if match is None: lines.append(line) elif (match.group(1) == match.group(3) and match.group(1).lower() in _ALLOWED_TAGS): lines.append(match.group(2).strip()) return lines
Example #4
Source File: scoring.py From professional-services with Apache License 2.0 | 6 votes |
def get_prediction_input(files): """Reads and concatenates text files in input directory. Args: files: List of `str`, containing absolute path to files to read. Returns: List of `str` containing independent text reviews. Raises: ValueError: If input files are empty. """ instances = [] for path in files: with gfile.GFile(path, 'r') as lines: instances += lines if not instances: raise ValueError('No review found in input files.') return instances
Example #5
Source File: decode_text.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def _get_unk_mapping(filename): """Reads a file that specifies a mapping from source to target tokens. The file must contain lines of the form <source>\t<target>" Args: filename: path to the mapping file Returns: A dictionary that maps from source -> target tokens. """ with gfile.GFile(filename, "r") as mapping_file: lines = mapping_file.readlines() mapping = dict([_.split("\t")[0:2] for _ in lines]) mapping = {k.strip(): v.strip() for k, v in mapping.items()} return mapping
Example #6
Source File: utils.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def dump(self, model_dir): """Dumps the options to a file in the model directory. Args: model_dir: Path to the model directory. The options will be dumped into a file in this directory. """ gfile.MakeDirs(model_dir) options_dict = { "model_class": self.model_class, "model_params": self.model_params, } with gfile.GFile(TrainOptions.path(model_dir), "wb") as file: file.write(json.dumps(options_dict).encode("utf-8"))
Example #7
Source File: hooks.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def begin(self): # Dump to file on the chief worker if self.is_chief: opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS opts['dump_to_file'] = os.path.abspath(self._filename) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=opts) # Print the model analysis with gfile.GFile(self._filename) as file: tf.logging.info(file.read())
Example #8
Source File: hooks.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def after_run(self, _run_context, run_values): result_dict, step = run_values.results self._iter_count = step if not self._should_trigger: return None # Convert dict of lists to list of dicts result_dicts = [ dict(zip(result_dict, t)) for t in zip(*result_dict.values()) ] # Print results result_str = "" result_str += "Prediction followed by Target @ Step {}\n".format(step) result_str += ("=" * 100) + "\n" for result in result_dicts: target_len = result["target_len"] predicted_slice = result["predicted_tokens"][:target_len - 1] target_slice = result["target_words"][1:target_len] result_str += self._target_delimiter.encode("utf-8").join( predicted_slice).decode("utf-8") + "\n" result_str += self._target_delimiter.encode("utf-8").join( target_slice).decode("utf-8") + "\n\n" result_str += ("=" * 100) + "\n\n" tf.logging.info(result_str) if self._sample_dir: filepath = os.path.join(self._sample_dir, "samples_{:06d}.txt".format(step)) with gfile.GFile(filepath, "w") as file: file.write(result_str) self._timer.update_last_triggered_step(self._iter_count - 1)
Example #9
Source File: hooks.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def after_run(self, _run_context, run_values): if not self.is_chief or self._done: return step_done = run_values.results if self._active: tf.logging.info("Captured full trace at step %s", step_done) # Create output directory gfile.MakeDirs(self._output_dir) # Save run metadata trace_path = os.path.join(self._output_dir, "run_meta") with gfile.GFile(trace_path, "wb") as trace_file: trace_file.write(run_values.run_metadata.SerializeToString()) tf.logging.info("Saved run_metadata to %s", trace_path) # Save timeline timeline_path = os.path.join(self._output_dir, "timeline.json") with gfile.GFile(timeline_path, "w") as timeline_file: tl_info = timeline.Timeline(run_values.run_metadata.step_stats) tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True) timeline_file.write(tl_chrome) tf.logging.info("Saved timeline to %s", timeline_path) # Save tfprof op log tf.contrib.tfprof.tfprof_logger.write_op_log( graph=tf.get_default_graph(), log_dir=self._output_dir, run_meta=run_values.run_metadata) tf.logging.info("Saved op log to %s", self._output_dir) self._active = False self._done = True self._active = (step_done >= self.params["step"])
Example #10
Source File: vocab.py From seq2seq with Apache License 2.0 | 5 votes |
def get_vocab_info(vocab_path): """Creates a `VocabInfo` instance that contains the vocabulary size and the special vocabulary for the given file. Args: vocab_path: Path to a vocabulary file with one word per line. Returns: A VocabInfo tuple. """ with gfile.GFile(vocab_path) as file: vocab_size = sum(1 for _ in file) special_vocab = get_special_vocab(vocab_size) return VocabInfo(vocab_path, vocab_size, special_vocab)
Example #11
Source File: utils.py From seq2seq with Apache License 2.0 | 5 votes |
def dump(self, model_dir): """Dumps the options to a file in the model directory. Args: model_dir: Path to the model directory. The options will be dumped into a file in this directory. """ gfile.MakeDirs(model_dir) options_dict = { "model_class": self.model_class, "model_params": self.model_params, } with gfile.GFile(TrainOptions.path(model_dir), "wb") as file: file.write(json.dumps(options_dict).encode("utf-8"))
Example #12
Source File: hooks.py From seq2seq with Apache License 2.0 | 5 votes |
def after_run(self, _run_context, run_values): if not self.is_chief or self._done: return step_done = run_values.results if self._active: tf.logging.info("Captured full trace at step %s", step_done) # Create output directory gfile.MakeDirs(self._output_dir) # Save run metadata trace_path = os.path.join(self._output_dir, "run_meta") with gfile.GFile(trace_path, "wb") as trace_file: trace_file.write(run_values.run_metadata.SerializeToString()) tf.logging.info("Saved run_metadata to %s", trace_path) # Save timeline timeline_path = os.path.join(self._output_dir, "timeline.json") with gfile.GFile(timeline_path, "w") as timeline_file: tl_info = timeline.Timeline(run_values.run_metadata.step_stats) tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True) timeline_file.write(tl_chrome) tf.logging.info("Saved timeline to %s", timeline_path) # Save tfprof op log tf.contrib.tfprof.tfprof_logger.write_op_log( graph=tf.get_default_graph(), log_dir=self._output_dir, run_meta=run_values.run_metadata) tf.logging.info("Saved op log to %s", self._output_dir) self._active = False self._done = True self._active = (step_done >= self.params["step"])
Example #13
Source File: hooks.py From seq2seq with Apache License 2.0 | 5 votes |
def after_run(self, _run_context, run_values): result_dict, step = run_values.results self._iter_count = step if not self._should_trigger: return None # Convert dict of lists to list of dicts result_dicts = [ dict(zip(result_dict, t)) for t in zip(*result_dict.values()) ] # Print results result_str = "" result_str += "Prediction followed by Target @ Step {}\n".format(step) result_str += ("=" * 100) + "\n" for result in result_dicts: target_len = result["target_len"] predicted_slice = result["predicted_tokens"][:target_len - 1] target_slice = result["target_words"][1:target_len] result_str += self._target_delimiter.encode("utf-8").join( predicted_slice).decode("utf-8") + "\n" result_str += self._target_delimiter.encode("utf-8").join( target_slice).decode("utf-8") + "\n\n" result_str += ("=" * 100) + "\n\n" tf.logging.info(result_str) if self._sample_dir: filepath = os.path.join(self._sample_dir, "samples_{:06d}.txt".format(step)) with gfile.GFile(filepath, "w") as file: file.write(result_str) self._timer.update_last_triggered_step(self._iter_count - 1)
Example #14
Source File: hooks.py From seq2seq with Apache License 2.0 | 5 votes |
def begin(self): # Dump to file on the chief worker if self.is_chief: opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS opts['dump_to_file'] = os.path.abspath(self._filename) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=opts) # Print the model analysis with gfile.GFile(self._filename) as file: tf.logging.info(file.read())
Example #15
Source File: hooks_test.py From seq2seq with Apache License 2.0 | 5 votes |
def test_begin(self): model_dir = tempfile.mkdtemp() outfile = tempfile.NamedTemporaryFile() tf.get_variable("weigths", [128, 128]) hook = hooks.PrintModelAnalysisHook( params={}, model_dir=model_dir, run_config=tf.contrib.learn.RunConfig()) hook.begin() with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file: file_contents = file.read().strip() self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n" " weigths (128x128, 16.38k/16.38k params)") outfile.close()
Example #16
Source File: example_config_test.py From seq2seq with Apache License 2.0 | 5 votes |
def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode): """Loads model from a configuration file""" with gfile.GFile(config_path) as config_file: config = yaml.load(config_file) model_cls = locate(config["model"]) or getattr(models, config["model"]) model_params = config["model_params"] if hparam_overrides: model_params.update(hparam_overrides) # Change the max decode length to make the test run faster model_params["decoder.params"]["max_decode_length"] = 5 model_params["vocab_source"] = vocab_file model_params["vocab_target"] = vocab_file return model_cls(params=model_params, mode=mode)
Example #17
Source File: decode_text.py From seq2seq with Apache License 2.0 | 5 votes |
def _get_unk_mapping(filename): """Reads a file that specifies a mapping from source to target tokens. The file must contain lines of the form <source>\t<target>" Args: filename: path to the mapping file Returns: A dictionary that maps from source -> target tokens. """ with gfile.GFile(filename, "r") as mapping_file: lines = mapping_file.readlines() mapping = dict([_.split("\t")[0:2] for _ in lines]) mapping = {k.strip(): v.strip() for k, v in mapping.items()} return mapping
Example #18
Source File: embedding.py From texar with Apache License 2.0 | 5 votes |
def load_word2vec(filename, vocab, word_vecs): """Loads embeddings in the word2vec binary format which has a header line containing the number of vectors and their dimensionality (two integers), followed with number-of-vectors lines each of which is formatted as '<word-string> <embedding-vector>'. Args: filename (str): Path to the embedding file. vocab (dict): A dictionary that maps token strings to integer index. Tokens not in :attr:`vocab` are not read. word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]` which is updated as reading from the file. Returns: The updated :attr:`word_vecs`. """ with gfile.GFile(filename, "rb") as fin: header = fin.readline() vocab_size, vector_size = [int(s) for s in header.split()] if vector_size != word_vecs.shape[1]: raise ValueError("Inconsistent word vector sizes: %d vs %d" % (vector_size, word_vecs.shape[1])) binary_len = np.dtype('float32').itemsize * vector_size for _ in np.arange(vocab_size): chars = [] while True: char = fin.read(1) if char == b' ': break if char != b'\n': chars.append(char) word = b''.join(chars) word = tf.compat.as_text(word) if word in vocab: word_vecs[vocab[word]] = np.fromstring( fin.read(binary_len), dtype='float32') else: fin.read(binary_len) return word_vecs
Example #19
Source File: profile.py From conv_seq2seq with Apache License 2.0 | 5 votes |
def load_metadata(model_dir): """Loads RunMetadata, Graph and OpLog from files """ # Import RunMetadata run_meta_path = os.path.join(model_dir, "metadata/run_meta") run_meta = tf.RunMetadata() if gfile.Exists(run_meta_path): with gfile.GFile(run_meta_path, "rb") as file: run_meta.MergeFromString(file.read()) print("Loaded RunMetadata from {}".format(run_meta_path)) else: print("RunMetadata does not exist a {}. Skipping.".format(run_meta_path)) # Import Graph graph_def_path = os.path.join(model_dir, "graph.pbtxt") graph = tf.Graph() if gfile.Exists(graph_def_path): with graph.as_default(): _register_function_ops(CUSTOM_OP_FUNCTIONS) graph_def = tf.GraphDef() with gfile.GFile(graph_def_path, "rb") as file: text_format.Parse(file.read(), graph_def) tf.import_graph_def(graph_def, name="") print("Loaded Graph from {}".format(graph_def_path)) else: print("Graph does not exist a {}. Skipping.".format(graph_def_path)) # Import OpLog op_log_path = os.path.join(model_dir, "metadata/tfprof_log") op_log = tfprof_log_pb2.OpLog() if gfile.Exists(op_log_path): with gfile.GFile(op_log_path, "rb") as file: op_log.MergeFromString(file.read()) print("Loaded OpLog from {}".format(op_log_path)) else: print("OpLog does not exist a {}. Skipping.".format(op_log_path)) return run_meta, graph, op_log
Example #20
Source File: embedding.py From Counterfactual-StoryRW with MIT License | 5 votes |
def load_word2vec(filename, vocab, word_vecs): """Loads embeddings in the word2vec binary format which has a header line containing the number of vectors and their dimensionality (two integers), followed with number-of-vectors lines each of which is formatted as '<word-string> <embedding-vector>'. Args: filename (str): Path to the embedding file. vocab (dict): A dictionary that maps token strings to integer index. Tokens not in :attr:`vocab` are not read. word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]` which is updated as reading from the file. Returns: The updated :attr:`word_vecs`. """ with gfile.GFile(filename, "rb") as fin: header = fin.readline() vocab_size, vector_size = [int(s) for s in header.split()] if vector_size != word_vecs.shape[1]: raise ValueError("Inconsistent word vector sizes: %d vs %d" % (vector_size, word_vecs.shape[1])) binary_len = np.dtype('float32').itemsize * vector_size for _ in np.arange(vocab_size): chars = [] while True: char = fin.read(1) if char == b' ': break if char != b'\n': chars.append(char) word = b''.join(chars) word = tf.compat.as_text(word) if word in vocab: word_vecs[vocab[word]] = np.fromstring( fin.read(binary_len), dtype='float32') else: fin.read(binary_len) return word_vecs
Example #21
Source File: utils.py From active-learning with Apache License 2.0 | 5 votes |
def get_mldata(data_dir, name): """Loads data from data_dir. Looks for the file in data_dir. Assumes that data is in pickle format with dictionary fields data and target. Args: data_dir: directory to look in name: dataset name, assumes data is saved in the save_dir with filename <name>.pkl Returns: data and targets Raises: NameError: dataset not found in data folder. """ dataname = name if dataname == "checkerboard": X, y = create_checker_unbalanced(split=[1./5, 4./5], n=10000, grid_size=4) else: filename = os.path.join(data_dir, dataname + ".pkl") if not gfile.Exists(filename): raise NameError("ERROR: dataset not available") data = pickle.load(gfile.GFile(filename, "r")) X = data["data"] y = data["target"] if "keras" in dataname: X = X / 255 y = y.flatten() return X, y
Example #22
Source File: hooks_test.py From reaction_prediction_seq2seq with Apache License 2.0 | 5 votes |
def test_begin(self): model_dir = tempfile.mkdtemp() outfile = tempfile.NamedTemporaryFile() tf.get_variable("weigths", [128, 128]) hook = hooks.PrintModelAnalysisHook(params={}, model_dir=model_dir) hook.begin() with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file: file_contents = file.read().strip() self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n" " weigths (128x128, 16.38k/16.38k params)") outfile.close()
Example #23
Source File: utils.py From active-learning with Apache License 2.0 | 5 votes |
def __init__(self, filename): self.terminal = sys.stdout self.log = gfile.GFile(filename, "w")
Example #24
Source File: example_config_test.py From reaction_prediction_seq2seq with Apache License 2.0 | 5 votes |
def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode): """Loads model from a configuration file""" with gfile.GFile(config_path) as config_file: config = yaml.load(config_file) model_cls = locate(config["model"]) or getattr(models, config["model"]) model_params = config["model_params"] if hparam_overrides: model_params.update(hparam_overrides) # Change the max decode length to make the test run faster model_params["decoder.params"]["max_decode_length"] = 5 model_params["vocab_source"] = vocab_file model_params["vocab_target"] = vocab_file return model_cls(params=model_params, mode=mode)
Example #25
Source File: selfplay.py From training with Apache License 2.0 | 5 votes |
def run_game(load_file, selfplay_dir=None, holdout_dir=None, sgf_dir=None, holdout_pct=0.05): """Takes a played game and record results and game data.""" if sgf_dir is not None: minimal_sgf_dir = os.path.join(sgf_dir, 'clean') full_sgf_dir = os.path.join(sgf_dir, 'full') utils.ensure_dir_exists(minimal_sgf_dir) utils.ensure_dir_exists(full_sgf_dir) if selfplay_dir is not None: utils.ensure_dir_exists(selfplay_dir) utils.ensure_dir_exists(holdout_dir) with utils.logged_timer("Loading weights from %s ... " % load_file): network = dual_net.DualNetwork(load_file) with utils.logged_timer("Playing game"): player = play(network) output_name = '{}-{}'.format(int(time.time()), socket.gethostname()) game_data = player.extract_data() if sgf_dir is not None: with gfile.GFile(os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf(use_comments=False)) with gfile.GFile(os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f: f.write(player.to_sgf()) tf_examples = preprocessing.make_dataset_from_selfplay(game_data) if selfplay_dir is not None: # Hold out 5% of games for validation. if random.random() < holdout_pct: fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name)) else: fname = os.path.join(selfplay_dir, "{}.tfrecord.zz".format(output_name)) preprocessing.write_tf_examples(fname, tf_examples)
Example #26
Source File: add_model.py From training with Apache License 2.0 | 5 votes |
def copy_to_gcs(src, dst): assert gfile.Exists(src) assert not gfile.Exists(dst) with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f: shutil.copyfileobj(src_f, dst_f)
Example #27
Source File: evaluator_ringmaster_wrapper.py From training with Apache License 2.0 | 5 votes |
def copy_to_gcs(src, dst): assert gfile.Exists(src), src assert not gfile.Exists(dst), dst print("Saving to", dst) with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f: shutil.copyfileobj(src_f, dst_f)
Example #28
Source File: prepare_bigquery.py From training with Apache License 2.0 | 5 votes |
def extract_data(filename): with gfile.GFile(filename) as f: contents = f.read() root_node = sgf_wrapper.get_sgf_root_node(contents) game_data = extract_game_data(filename, root_node) move_data = extract_move_data( root_node, game_data['worker_id'], game_data['completed_time'], game_data['board_size']) return game_data, move_data
Example #29
Source File: prepare_bigquery.py From training with Apache License 2.0 | 5 votes |
def extract_holdout_model(model): game_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'games', model) move_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'moves', model) gfile.MakeDirs(os.path.basename(game_output_path)) gfile.MakeDirs(os.path.basename(move_output_path)) with gfile.GFile(game_output_path, 'w') as game_f, \ gfile.GFile(move_output_path, 'w') as move_f: for sgf_name in tqdm(get_sgf_names(model)): game_data, move_data = extract_data(sgf_name) game_f.write(json.dumps(game_data) + '\n') for move_datum in move_data: move_f.write(json.dumps(move_datum) + '\n')
Example #30
Source File: embedding.py From texar with Apache License 2.0 | 5 votes |
def load_glove(filename, vocab, word_vecs): """Loads embeddings in the glove text format in which each line is '<word-string> <embedding-vector>'. Dimensions of the embedding vector are separated with whitespace characters. Args: filename (str): Path to the embedding file. vocab (dict): A dictionary that maps token strings to integer index. Tokens not in :attr:`vocab` are not read. word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]` which is updated as reading from the file. Returns: The updated :attr:`word_vecs`. """ with gfile.GFile(filename) as fin: for line in fin: vec = line.strip().split() if len(vec) == 0: continue word, vec = vec[0], vec[1:] word = tf.compat.as_text(word) if word not in vocab: continue if len(vec) != word_vecs.shape[1]: raise ValueError("Inconsistent word vector sizes: %d vs %d" % (len(vec), word_vecs.shape[1])) word_vecs[vocab[word]] = np.array([float(v) for v in vec]) return word_vecs