Python tensorflow.gfile.GFile() Examples

The following are 30 code examples of tensorflow.gfile.GFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.gfile , or try the search function .
Example #1
Source File: selector_keras.py    From active-qa with Apache License 2.0 6 votes vote down vote up
def _build_embedding_matrix(self):
    """Builds the embedding matrix for the model.

    Returns:
      words: a list of strings representing the words in the vocabulary.
      embeddings: a float32 array of shape [vocab_size, embeddings_dim].
    """
    logging.info('Loading Glove embeddings.')
    words = []
    embeddings = []
    with gfile.GFile(FLAGS.glove_path) as f:
      for line in f:
        values = line.split()
        words.append(values[0])
        embeddings.append(np.asarray(values[1:], dtype='float32'))

    logging.info('Found %s word vectors.', len(embeddings))
    return words, np.array(embeddings) 
Example #2
Source File: create_data.py    From active-learning with Apache License 2.0 6 votes vote down vote up
def get_csv_data(filename):
  """Parse csv and return Dataset object with data and targets.

  Create pickle data from csv, assumes the first column contains the targets
  Args:
    filename: complete path of the csv file
  Returns:
    Dataset object
  """
  f = gfile.GFile(filename, 'r')
  mat = []
  for l in f:
    row = l.strip()
    row = row.replace('"', '')
    row = row.split(',')
    row = [float(x) for x in row]
    mat.append(row)
  mat = np.array(mat)
  y = mat[:, 0]
  X = mat[:, 1:]
  data = Dataset(X, y)
  return data 
Example #3
Source File: identify_overlap_iwslt17.py    From language with Apache License 2.0 6 votes vote down vote up
def _parse_lines(path):
  """Parses lines from IWSLT17 dataset."""
  lines = []
  with gfile.GFile(path) as fp:
    for line in fp:
      line = line.strip()
      # Skip lines that are tags entirely.
      if _WHOLE_TAG_REGEX.match(line):
        continue
      # Try to parse as content between an opening and closing tags.
      match = _FLAT_HTML_REGEX.match(line)
      # Always append text not contained between the tags.
      if match is None:
        lines.append(line)
      elif (match.group(1) == match.group(3) and
            match.group(1).lower() in _ALLOWED_TAGS):
        lines.append(match.group(2).strip())
  return lines 
Example #4
Source File: scoring.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def get_prediction_input(files):
  """Reads and concatenates text files in input directory.

  Args:
    files: List of `str`, containing absolute path to files to read.

  Returns:
    List of `str` containing independent text reviews.

  Raises:
    ValueError: If input files are empty.
  """

  instances = []
  for path in files:
    with gfile.GFile(path, 'r') as lines:
      instances += lines
  if not instances:
    raise ValueError('No review found in input files.')
  return instances 
Example #5
Source File: decode_text.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def _get_unk_mapping(filename):
  """Reads a file that specifies a mapping from source to target tokens.
  The file must contain lines of the form <source>\t<target>"

  Args:
    filename: path to the mapping file

  Returns:
    A dictionary that maps from source -> target tokens.
  """
  with gfile.GFile(filename, "r") as mapping_file:
    lines = mapping_file.readlines()
    mapping = dict([_.split("\t")[0:2] for _ in lines])
    mapping = {k.strip(): v.strip() for k, v in mapping.items()}
  return mapping 
Example #6
Source File: utils.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def dump(self, model_dir):
    """Dumps the options to a file in the model directory.

    Args:
      model_dir: Path to the model directory. The options will be
      dumped into a file in this directory.
    """
    gfile.MakeDirs(model_dir)
    options_dict = {
        "model_class": self.model_class,
        "model_params": self.model_params,
    }

    with gfile.GFile(TrainOptions.path(model_dir), "wb") as file:
      file.write(json.dumps(options_dict).encode("utf-8")) 
Example #7
Source File: hooks.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def begin(self):
    # Dump to file on the chief worker
    if self.is_chief:
      opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
      opts['dump_to_file'] = os.path.abspath(self._filename)
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          tf.get_default_graph(), tfprof_options=opts)

    # Print the model analysis
    with gfile.GFile(self._filename) as file:
      tf.logging.info(file.read()) 
Example #8
Source File: hooks.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def after_run(self, _run_context, run_values):
    result_dict, step = run_values.results
    self._iter_count = step

    if not self._should_trigger:
      return None

    # Convert dict of lists to list of dicts
    result_dicts = [
        dict(zip(result_dict, t)) for t in zip(*result_dict.values())
    ]

    # Print results
    result_str = ""
    result_str += "Prediction followed by Target @ Step {}\n".format(step)
    result_str += ("=" * 100) + "\n"
    for result in result_dicts:
      target_len = result["target_len"]
      predicted_slice = result["predicted_tokens"][:target_len - 1]
      target_slice = result["target_words"][1:target_len]
      result_str += self._target_delimiter.encode("utf-8").join(
          predicted_slice).decode("utf-8") + "\n"
      result_str += self._target_delimiter.encode("utf-8").join(
          target_slice).decode("utf-8") + "\n\n"
    result_str += ("=" * 100) + "\n\n"
    tf.logging.info(result_str)
    if self._sample_dir:
      filepath = os.path.join(self._sample_dir,
                              "samples_{:06d}.txt".format(step))
      with gfile.GFile(filepath, "w") as file:
        file.write(result_str)
    self._timer.update_last_triggered_step(self._iter_count - 1) 
Example #9
Source File: hooks.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def after_run(self, _run_context, run_values):
    if not self.is_chief or self._done:
      return

    step_done = run_values.results
    if self._active:
      tf.logging.info("Captured full trace at step %s", step_done)
      # Create output directory
      gfile.MakeDirs(self._output_dir)

      # Save run metadata
      trace_path = os.path.join(self._output_dir, "run_meta")
      with gfile.GFile(trace_path, "wb") as trace_file:
        trace_file.write(run_values.run_metadata.SerializeToString())
        tf.logging.info("Saved run_metadata to %s", trace_path)

      # Save timeline
      timeline_path = os.path.join(self._output_dir, "timeline.json")
      with gfile.GFile(timeline_path, "w") as timeline_file:
        tl_info = timeline.Timeline(run_values.run_metadata.step_stats)
        tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True)
        timeline_file.write(tl_chrome)
        tf.logging.info("Saved timeline to %s", timeline_path)

      # Save tfprof op log
      tf.contrib.tfprof.tfprof_logger.write_op_log(
          graph=tf.get_default_graph(),
          log_dir=self._output_dir,
          run_meta=run_values.run_metadata)
      tf.logging.info("Saved op log to %s", self._output_dir)
      self._active = False
      self._done = True

    self._active = (step_done >= self.params["step"]) 
Example #10
Source File: vocab.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def get_vocab_info(vocab_path):
  """Creates a `VocabInfo` instance that contains the vocabulary size and
    the special vocabulary for the given file.

  Args:
    vocab_path: Path to a vocabulary file with one word per line.

  Returns:
    A VocabInfo tuple.
  """
  with gfile.GFile(vocab_path) as file:
    vocab_size = sum(1 for _ in file)
  special_vocab = get_special_vocab(vocab_size)
  return VocabInfo(vocab_path, vocab_size, special_vocab) 
Example #11
Source File: utils.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def dump(self, model_dir):
    """Dumps the options to a file in the model directory.

    Args:
      model_dir: Path to the model directory. The options will be
      dumped into a file in this directory.
    """
    gfile.MakeDirs(model_dir)
    options_dict = {
        "model_class": self.model_class,
        "model_params": self.model_params,
    }

    with gfile.GFile(TrainOptions.path(model_dir), "wb") as file:
      file.write(json.dumps(options_dict).encode("utf-8")) 
Example #12
Source File: hooks.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def after_run(self, _run_context, run_values):
    if not self.is_chief or self._done:
      return

    step_done = run_values.results
    if self._active:
      tf.logging.info("Captured full trace at step %s", step_done)
      # Create output directory
      gfile.MakeDirs(self._output_dir)

      # Save run metadata
      trace_path = os.path.join(self._output_dir, "run_meta")
      with gfile.GFile(trace_path, "wb") as trace_file:
        trace_file.write(run_values.run_metadata.SerializeToString())
        tf.logging.info("Saved run_metadata to %s", trace_path)

      # Save timeline
      timeline_path = os.path.join(self._output_dir, "timeline.json")
      with gfile.GFile(timeline_path, "w") as timeline_file:
        tl_info = timeline.Timeline(run_values.run_metadata.step_stats)
        tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True)
        timeline_file.write(tl_chrome)
        tf.logging.info("Saved timeline to %s", timeline_path)

      # Save tfprof op log
      tf.contrib.tfprof.tfprof_logger.write_op_log(
          graph=tf.get_default_graph(),
          log_dir=self._output_dir,
          run_meta=run_values.run_metadata)
      tf.logging.info("Saved op log to %s", self._output_dir)
      self._active = False
      self._done = True

    self._active = (step_done >= self.params["step"]) 
Example #13
Source File: hooks.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def after_run(self, _run_context, run_values):
    result_dict, step = run_values.results
    self._iter_count = step

    if not self._should_trigger:
      return None

    # Convert dict of lists to list of dicts
    result_dicts = [
        dict(zip(result_dict, t)) for t in zip(*result_dict.values())
    ]

    # Print results
    result_str = ""
    result_str += "Prediction followed by Target @ Step {}\n".format(step)
    result_str += ("=" * 100) + "\n"
    for result in result_dicts:
      target_len = result["target_len"]
      predicted_slice = result["predicted_tokens"][:target_len - 1]
      target_slice = result["target_words"][1:target_len]
      result_str += self._target_delimiter.encode("utf-8").join(
          predicted_slice).decode("utf-8") + "\n"
      result_str += self._target_delimiter.encode("utf-8").join(
          target_slice).decode("utf-8") + "\n\n"
    result_str += ("=" * 100) + "\n\n"
    tf.logging.info(result_str)
    if self._sample_dir:
      filepath = os.path.join(self._sample_dir,
                              "samples_{:06d}.txt".format(step))
      with gfile.GFile(filepath, "w") as file:
        file.write(result_str)
    self._timer.update_last_triggered_step(self._iter_count - 1) 
Example #14
Source File: hooks.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def begin(self):
    # Dump to file on the chief worker
    if self.is_chief:
      opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
      opts['dump_to_file'] = os.path.abspath(self._filename)
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          tf.get_default_graph(), tfprof_options=opts)

    # Print the model analysis
    with gfile.GFile(self._filename) as file:
      tf.logging.info(file.read()) 
Example #15
Source File: hooks_test.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def test_begin(self):
    model_dir = tempfile.mkdtemp()
    outfile = tempfile.NamedTemporaryFile()
    tf.get_variable("weigths", [128, 128])
    hook = hooks.PrintModelAnalysisHook(
        params={}, model_dir=model_dir, run_config=tf.contrib.learn.RunConfig())
    hook.begin()

    with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
      file_contents = file.read().strip()

    self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
                     "  weigths (128x128, 16.38k/16.38k params)")
    outfile.close() 
Example #16
Source File: example_config_test.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode):
  """Loads model from a configuration file"""
  with gfile.GFile(config_path) as config_file:
    config = yaml.load(config_file)
  model_cls = locate(config["model"]) or getattr(models, config["model"])
  model_params = config["model_params"]
  if hparam_overrides:
    model_params.update(hparam_overrides)
  # Change the max decode length to make the test run faster
  model_params["decoder.params"]["max_decode_length"] = 5
  model_params["vocab_source"] = vocab_file
  model_params["vocab_target"] = vocab_file
  return model_cls(params=model_params, mode=mode) 
Example #17
Source File: decode_text.py    From seq2seq with Apache License 2.0 5 votes vote down vote up
def _get_unk_mapping(filename):
  """Reads a file that specifies a mapping from source to target tokens.
  The file must contain lines of the form <source>\t<target>"

  Args:
    filename: path to the mapping file

  Returns:
    A dictionary that maps from source -> target tokens.
  """
  with gfile.GFile(filename, "r") as mapping_file:
    lines = mapping_file.readlines()
    mapping = dict([_.split("\t")[0:2] for _ in lines])
    mapping = {k.strip(): v.strip() for k, v in mapping.items()}
  return mapping 
Example #18
Source File: embedding.py    From texar with Apache License 2.0 5 votes vote down vote up
def load_word2vec(filename, vocab, word_vecs):
    """Loads embeddings in the word2vec binary format which has a header line
    containing the number of vectors and their dimensionality (two integers),
    followed with number-of-vectors lines each of which is formatted as
    '<word-string> <embedding-vector>'.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename, "rb") as fin:
        header = fin.readline()
        vocab_size, vector_size = [int(s) for s in header.split()]
        if vector_size != word_vecs.shape[1]:
            raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                             (vector_size, word_vecs.shape[1]))
        binary_len = np.dtype('float32').itemsize * vector_size
        for _ in np.arange(vocab_size):
            chars = []
            while True:
                char = fin.read(1)
                if char == b' ':
                    break
                if char != b'\n':
                    chars.append(char)
            word = b''.join(chars)
            word = tf.compat.as_text(word)
            if word in vocab:
                word_vecs[vocab[word]] = np.fromstring(
                    fin.read(binary_len), dtype='float32')
            else:
                fin.read(binary_len)
    return word_vecs 
Example #19
Source File: profile.py    From conv_seq2seq with Apache License 2.0 5 votes vote down vote up
def load_metadata(model_dir):
  """Loads RunMetadata, Graph and OpLog from files
  """
  # Import RunMetadata
  run_meta_path = os.path.join(model_dir, "metadata/run_meta")
  run_meta = tf.RunMetadata()
  if gfile.Exists(run_meta_path):
    with gfile.GFile(run_meta_path, "rb") as file:
      run_meta.MergeFromString(file.read())
    print("Loaded RunMetadata from {}".format(run_meta_path))
  else:
    print("RunMetadata does not exist a {}. Skipping.".format(run_meta_path))

  # Import Graph
  graph_def_path = os.path.join(model_dir, "graph.pbtxt")
  graph = tf.Graph()
  if gfile.Exists(graph_def_path):
    with graph.as_default():
      _register_function_ops(CUSTOM_OP_FUNCTIONS)
      graph_def = tf.GraphDef()
      with gfile.GFile(graph_def_path, "rb") as file:
        text_format.Parse(file.read(), graph_def)
      tf.import_graph_def(graph_def, name="")
      print("Loaded Graph from {}".format(graph_def_path))
  else:
    print("Graph does not exist a {}. Skipping.".format(graph_def_path))

  # Import OpLog
  op_log_path = os.path.join(model_dir, "metadata/tfprof_log")
  op_log = tfprof_log_pb2.OpLog()
  if gfile.Exists(op_log_path):
    with gfile.GFile(op_log_path, "rb") as file:
      op_log.MergeFromString(file.read())
      print("Loaded OpLog from {}".format(op_log_path))
  else:
    print("OpLog does not exist a {}. Skipping.".format(op_log_path))

  return run_meta, graph, op_log 
Example #20
Source File: embedding.py    From Counterfactual-StoryRW with MIT License 5 votes vote down vote up
def load_word2vec(filename, vocab, word_vecs):
    """Loads embeddings in the word2vec binary format which has a header line
    containing the number of vectors and their dimensionality (two integers),
    followed with number-of-vectors lines each of which is formatted as
    '<word-string> <embedding-vector>'.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename, "rb") as fin:
        header = fin.readline()
        vocab_size, vector_size = [int(s) for s in header.split()]
        if vector_size != word_vecs.shape[1]:
            raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                             (vector_size, word_vecs.shape[1]))
        binary_len = np.dtype('float32').itemsize * vector_size
        for _ in np.arange(vocab_size):
            chars = []
            while True:
                char = fin.read(1)
                if char == b' ':
                    break
                if char != b'\n':
                    chars.append(char)
            word = b''.join(chars)
            word = tf.compat.as_text(word)
            if word in vocab:
                word_vecs[vocab[word]] = np.fromstring(
                    fin.read(binary_len), dtype='float32')
            else:
                fin.read(binary_len)
    return word_vecs 
Example #21
Source File: utils.py    From active-learning with Apache License 2.0 5 votes vote down vote up
def get_mldata(data_dir, name):
  """Loads data from data_dir.

  Looks for the file in data_dir.
  Assumes that data is in pickle format with dictionary fields data and target.


  Args:
    data_dir: directory to look in
    name: dataset name, assumes data is saved in the save_dir with filename
      <name>.pkl
  Returns:
    data and targets
  Raises:
    NameError: dataset not found in data folder.
  """
  dataname = name
  if dataname == "checkerboard":
    X, y = create_checker_unbalanced(split=[1./5, 4./5], n=10000, grid_size=4)
  else:
    filename = os.path.join(data_dir, dataname + ".pkl")
    if not gfile.Exists(filename):
      raise NameError("ERROR: dataset not available")
    data = pickle.load(gfile.GFile(filename, "r"))
    X = data["data"]
    y = data["target"]
    if "keras" in dataname:
      X = X / 255
      y = y.flatten()
  return X, y 
Example #22
Source File: hooks_test.py    From reaction_prediction_seq2seq with Apache License 2.0 5 votes vote down vote up
def test_begin(self):
    model_dir = tempfile.mkdtemp()
    outfile = tempfile.NamedTemporaryFile()
    tf.get_variable("weigths", [128, 128])
    hook = hooks.PrintModelAnalysisHook(params={}, model_dir=model_dir)
    hook.begin()

    with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
      file_contents = file.read().strip()

    self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
                     "  weigths (128x128, 16.38k/16.38k params)")
    outfile.close() 
Example #23
Source File: utils.py    From active-learning with Apache License 2.0 5 votes vote down vote up
def __init__(self, filename):
    self.terminal = sys.stdout
    self.log = gfile.GFile(filename, "w") 
Example #24
Source File: example_config_test.py    From reaction_prediction_seq2seq with Apache License 2.0 5 votes vote down vote up
def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode):
  """Loads model from a configuration file"""
  with gfile.GFile(config_path) as config_file:
    config = yaml.load(config_file)
  model_cls = locate(config["model"]) or getattr(models, config["model"])
  model_params = config["model_params"]
  if hparam_overrides:
    model_params.update(hparam_overrides)
  # Change the max decode length to make the test run faster
  model_params["decoder.params"]["max_decode_length"] = 5
  model_params["vocab_source"] = vocab_file
  model_params["vocab_target"] = vocab_file
  return model_cls(params=model_params, mode=mode) 
Example #25
Source File: selfplay.py    From training with Apache License 2.0 5 votes vote down vote up
def run_game(load_file, selfplay_dir=None, holdout_dir=None,
             sgf_dir=None, holdout_pct=0.05):
    """Takes a played game and record results and game data."""
    if sgf_dir is not None:
        minimal_sgf_dir = os.path.join(sgf_dir, 'clean')
        full_sgf_dir = os.path.join(sgf_dir, 'full')
        utils.ensure_dir_exists(minimal_sgf_dir)
        utils.ensure_dir_exists(full_sgf_dir)
    if selfplay_dir is not None:
        utils.ensure_dir_exists(selfplay_dir)
        utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with utils.logged_timer("Playing game"):
        player = play(network)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
    game_data = player.extract_data()
    if sgf_dir is not None:
        with gfile.GFile(os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf(use_comments=False))
        with gfile.GFile(os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    if selfplay_dir is not None:
        # Hold out 5% of games for validation.
        if random.random() < holdout_pct:
            fname = os.path.join(holdout_dir,
                                 "{}.tfrecord.zz".format(output_name))
        else:
            fname = os.path.join(selfplay_dir,
                                 "{}.tfrecord.zz".format(output_name))

        preprocessing.write_tf_examples(fname, tf_examples) 
Example #26
Source File: add_model.py    From training with Apache License 2.0 5 votes vote down vote up
def copy_to_gcs(src, dst):
    assert gfile.Exists(src)
    assert not gfile.Exists(dst)

    with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f:
        shutil.copyfileobj(src_f, dst_f) 
Example #27
Source File: evaluator_ringmaster_wrapper.py    From training with Apache License 2.0 5 votes vote down vote up
def copy_to_gcs(src, dst):
    assert gfile.Exists(src), src
    assert not gfile.Exists(dst), dst

    print("Saving to", dst)
    with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f:
        shutil.copyfileobj(src_f, dst_f) 
Example #28
Source File: prepare_bigquery.py    From training with Apache License 2.0 5 votes vote down vote up
def extract_data(filename):
    with gfile.GFile(filename) as f:
        contents = f.read()
    root_node = sgf_wrapper.get_sgf_root_node(contents)
    game_data = extract_game_data(filename, root_node)
    move_data = extract_move_data(
        root_node,
        game_data['worker_id'],
        game_data['completed_time'],
        game_data['board_size'])
    return game_data, move_data 
Example #29
Source File: prepare_bigquery.py    From training with Apache License 2.0 5 votes vote down vote up
def extract_holdout_model(model):
    game_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'games', model)
    move_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'moves', model)
    gfile.MakeDirs(os.path.basename(game_output_path))
    gfile.MakeDirs(os.path.basename(move_output_path))

    with gfile.GFile(game_output_path, 'w') as game_f, \
            gfile.GFile(move_output_path, 'w') as move_f:
        for sgf_name in tqdm(get_sgf_names(model)):
            game_data, move_data = extract_data(sgf_name)
            game_f.write(json.dumps(game_data) + '\n')
            for move_datum in move_data:
                move_f.write(json.dumps(move_datum) + '\n') 
Example #30
Source File: embedding.py    From texar with Apache License 2.0 5 votes vote down vote up
def load_glove(filename, vocab, word_vecs):
    """Loads embeddings in the glove text format in which each line is
    '<word-string> <embedding-vector>'. Dimensions of the embedding vector
    are separated with whitespace characters.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename) as fin:
        for line in fin:
            vec = line.strip().split()
            if len(vec) == 0:
                continue
            word, vec = vec[0], vec[1:]
            word = tf.compat.as_text(word)
            if word not in vocab:
                continue
            if len(vec) != word_vecs.shape[1]:
                raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                                 (len(vec), word_vecs.shape[1]))
            word_vecs[vocab[word]] = np.array([float(v) for v in vec])
    return word_vecs