Python Examples of tensorflow.gfile.GFile

Source File: selector_keras.py From active-qa with Apache License 2.0

6 votes

def _build_embedding_matrix(self):
    """Builds the embedding matrix for the model.

    Returns:
      words: a list of strings representing the words in the vocabulary.
      embeddings: a float32 array of shape [vocab_size, embeddings_dim].
    """
    logging.info('Loading Glove embeddings.')
    words = []
    embeddings = []
    with gfile.GFile(FLAGS.glove_path) as f:
      for line in f:
        values = line.split()
        words.append(values[0])
        embeddings.append(np.asarray(values[1:], dtype='float32'))

    logging.info('Found %s word vectors.', len(embeddings))
    return words, np.array(embeddings)

Source File: create_data.py From active-learning with Apache License 2.0

6 votes

def get_csv_data(filename):
  """Parse csv and return Dataset object with data and targets.

  Create pickle data from csv, assumes the first column contains the targets
  Args:
    filename: complete path of the csv file
  Returns:
    Dataset object
  """
  f = gfile.GFile(filename, 'r')
  mat = []
  for l in f:
    row = l.strip()
    row = row.replace('"', '')
    row = row.split(',')
    row = [float(x) for x in row]
    mat.append(row)
  mat = np.array(mat)
  y = mat[:, 0]
  X = mat[:, 1:]
  data = Dataset(X, y)
  return data

Source File: identify_overlap_iwslt17.py From language with Apache License 2.0

6 votes

def _parse_lines(path):
  """Parses lines from IWSLT17 dataset."""
  lines = []
  with gfile.GFile(path) as fp:
    for line in fp:
      line = line.strip()
      # Skip lines that are tags entirely.
      if _WHOLE_TAG_REGEX.match(line):
        continue
      # Try to parse as content between an opening and closing tags.
      match = _FLAT_HTML_REGEX.match(line)
      # Always append text not contained between the tags.
      if match is None:
        lines.append(line)
      elif (match.group(1) == match.group(3) and
            match.group(1).lower() in _ALLOWED_TAGS):
        lines.append(match.group(2).strip())
  return lines

Source File: scoring.py From professional-services with Apache License 2.0

6 votes

def get_prediction_input(files):
  """Reads and concatenates text files in input directory.

  Args:
    files: List of `str`, containing absolute path to files to read.

  Returns:
    List of `str` containing independent text reviews.

  Raises:
    ValueError: If input files are empty.
  """

  instances = []
  for path in files:
    with gfile.GFile(path, 'r') as lines:
      instances += lines
  if not instances:
    raise ValueError('No review found in input files.')
  return instances

Source File: decode_text.py From conv_seq2seq with Apache License 2.0

5 votes

def _get_unk_mapping(filename):
  """Reads a file that specifies a mapping from source to target tokens.
  The file must contain lines of the form <source>\t<target>"

  Args:
    filename: path to the mapping file

  Returns:
    A dictionary that maps from source -> target tokens.
  """
  with gfile.GFile(filename, "r") as mapping_file:
    lines = mapping_file.readlines()
    mapping = dict([_.split("\t")[0:2] for _ in lines])
    mapping = {k.strip(): v.strip() for k, v in mapping.items()}
  return mapping

Source File: utils.py From conv_seq2seq with Apache License 2.0

5 votes

def dump(self, model_dir):
    """Dumps the options to a file in the model directory.

    Args:
      model_dir: Path to the model directory. The options will be
      dumped into a file in this directory.
    """
    gfile.MakeDirs(model_dir)
    options_dict = {
        "model_class": self.model_class,
        "model_params": self.model_params,
    }

    with gfile.GFile(TrainOptions.path(model_dir), "wb") as file:
      file.write(json.dumps(options_dict).encode("utf-8"))

Source File: hooks.py From conv_seq2seq with Apache License 2.0

5 votes

def begin(self):
    # Dump to file on the chief worker
    if self.is_chief:
      opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
      opts['dump_to_file'] = os.path.abspath(self._filename)
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          tf.get_default_graph(), tfprof_options=opts)

    # Print the model analysis
    with gfile.GFile(self._filename) as file:
      tf.logging.info(file.read())

Source File: hooks.py From conv_seq2seq with Apache License 2.0

5 votes

def after_run(self, _run_context, run_values):
    result_dict, step = run_values.results
    self._iter_count = step

    if not self._should_trigger:
      return None

    # Convert dict of lists to list of dicts
    result_dicts = [
        dict(zip(result_dict, t)) for t in zip(*result_dict.values())
    ]

    # Print results
    result_str = ""
    result_str += "Prediction followed by Target @ Step {}\n".format(step)
    result_str += ("=" * 100) + "\n"
    for result in result_dicts:
      target_len = result["target_len"]
      predicted_slice = result["predicted_tokens"][:target_len - 1]
      target_slice = result["target_words"][1:target_len]
      result_str += self._target_delimiter.encode("utf-8").join(
          predicted_slice).decode("utf-8") + "\n"
      result_str += self._target_delimiter.encode("utf-8").join(
          target_slice).decode("utf-8") + "\n\n"
    result_str += ("=" * 100) + "\n\n"
    tf.logging.info(result_str)
    if self._sample_dir:
      filepath = os.path.join(self._sample_dir,
                              "samples_{:06d}.txt".format(step))
      with gfile.GFile(filepath, "w") as file:
        file.write(result_str)
    self._timer.update_last_triggered_step(self._iter_count - 1)

Source File: hooks.py From conv_seq2seq with Apache License 2.0

5 votes

def after_run(self, _run_context, run_values):
    if not self.is_chief or self._done:
      return

    step_done = run_values.results
    if self._active:
      tf.logging.info("Captured full trace at step %s", step_done)
      # Create output directory
      gfile.MakeDirs(self._output_dir)

      # Save run metadata
      trace_path = os.path.join(self._output_dir, "run_meta")
      with gfile.GFile(trace_path, "wb") as trace_file:
        trace_file.write(run_values.run_metadata.SerializeToString())
        tf.logging.info("Saved run_metadata to %s", trace_path)

      # Save timeline
      timeline_path = os.path.join(self._output_dir, "timeline.json")
      with gfile.GFile(timeline_path, "w") as timeline_file:
        tl_info = timeline.Timeline(run_values.run_metadata.step_stats)
        tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True)
        timeline_file.write(tl_chrome)
        tf.logging.info("Saved timeline to %s", timeline_path)

      # Save tfprof op log
      tf.contrib.tfprof.tfprof_logger.write_op_log(
          graph=tf.get_default_graph(),
          log_dir=self._output_dir,
          run_meta=run_values.run_metadata)
      tf.logging.info("Saved op log to %s", self._output_dir)
      self._active = False
      self._done = True

    self._active = (step_done >= self.params["step"])

Source File: vocab.py From seq2seq with Apache License 2.0

5 votes

def get_vocab_info(vocab_path):
  """Creates a `VocabInfo` instance that contains the vocabulary size and
    the special vocabulary for the given file.

  Args:
    vocab_path: Path to a vocabulary file with one word per line.

  Returns:
    A VocabInfo tuple.
  """
  with gfile.GFile(vocab_path) as file:
    vocab_size = sum(1 for _ in file)
  special_vocab = get_special_vocab(vocab_size)
  return VocabInfo(vocab_path, vocab_size, special_vocab)

Source File: utils.py From seq2seq with Apache License 2.0

5 votes

def dump(self, model_dir):
    """Dumps the options to a file in the model directory.

    Args:
      model_dir: Path to the model directory. The options will be
      dumped into a file in this directory.
    """
    gfile.MakeDirs(model_dir)
    options_dict = {
        "model_class": self.model_class,
        "model_params": self.model_params,
    }

    with gfile.GFile(TrainOptions.path(model_dir), "wb") as file:
      file.write(json.dumps(options_dict).encode("utf-8"))

Source File: hooks.py From seq2seq with Apache License 2.0

5 votes

def after_run(self, _run_context, run_values):
    if not self.is_chief or self._done:
      return

    step_done = run_values.results
    if self._active:
      tf.logging.info("Captured full trace at step %s", step_done)
      # Create output directory
      gfile.MakeDirs(self._output_dir)

      # Save run metadata
      trace_path = os.path.join(self._output_dir, "run_meta")
      with gfile.GFile(trace_path, "wb") as trace_file:
        trace_file.write(run_values.run_metadata.SerializeToString())
        tf.logging.info("Saved run_metadata to %s", trace_path)

      # Save timeline
      timeline_path = os.path.join(self._output_dir, "timeline.json")
      with gfile.GFile(timeline_path, "w") as timeline_file:
        tl_info = timeline.Timeline(run_values.run_metadata.step_stats)
        tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True)
        timeline_file.write(tl_chrome)
        tf.logging.info("Saved timeline to %s", timeline_path)

      # Save tfprof op log
      tf.contrib.tfprof.tfprof_logger.write_op_log(
          graph=tf.get_default_graph(),
          log_dir=self._output_dir,
          run_meta=run_values.run_metadata)
      tf.logging.info("Saved op log to %s", self._output_dir)
      self._active = False
      self._done = True

    self._active = (step_done >= self.params["step"])

Source File: hooks.py From seq2seq with Apache License 2.0

5 votes

def after_run(self, _run_context, run_values):
    result_dict, step = run_values.results
    self._iter_count = step

    if not self._should_trigger:
      return None

    # Convert dict of lists to list of dicts
    result_dicts = [
        dict(zip(result_dict, t)) for t in zip(*result_dict.values())
    ]

    # Print results
    result_str = ""
    result_str += "Prediction followed by Target @ Step {}\n".format(step)
    result_str += ("=" * 100) + "\n"
    for result in result_dicts:
      target_len = result["target_len"]
      predicted_slice = result["predicted_tokens"][:target_len - 1]
      target_slice = result["target_words"][1:target_len]
      result_str += self._target_delimiter.encode("utf-8").join(
          predicted_slice).decode("utf-8") + "\n"
      result_str += self._target_delimiter.encode("utf-8").join(
          target_slice).decode("utf-8") + "\n\n"
    result_str += ("=" * 100) + "\n\n"
    tf.logging.info(result_str)
    if self._sample_dir:
      filepath = os.path.join(self._sample_dir,
                              "samples_{:06d}.txt".format(step))
      with gfile.GFile(filepath, "w") as file:
        file.write(result_str)
    self._timer.update_last_triggered_step(self._iter_count - 1)

Source File: hooks.py From seq2seq with Apache License 2.0

5 votes

def begin(self):
    # Dump to file on the chief worker
    if self.is_chief:
      opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
      opts['dump_to_file'] = os.path.abspath(self._filename)
      tf.contrib.tfprof.model_analyzer.print_model_analysis(
          tf.get_default_graph(), tfprof_options=opts)

    # Print the model analysis
    with gfile.GFile(self._filename) as file:
      tf.logging.info(file.read())

Source File: hooks_test.py From seq2seq with Apache License 2.0

5 votes

def test_begin(self):
    model_dir = tempfile.mkdtemp()
    outfile = tempfile.NamedTemporaryFile()
    tf.get_variable("weigths", [128, 128])
    hook = hooks.PrintModelAnalysisHook(
        params={}, model_dir=model_dir, run_config=tf.contrib.learn.RunConfig())
    hook.begin()

    with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
      file_contents = file.read().strip()

    self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
                     "  weigths (128x128, 16.38k/16.38k params)")
    outfile.close()

Source File: example_config_test.py From seq2seq with Apache License 2.0

5 votes

def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode):
  """Loads model from a configuration file"""
  with gfile.GFile(config_path) as config_file:
    config = yaml.load(config_file)
  model_cls = locate(config["model"]) or getattr(models, config["model"])
  model_params = config["model_params"]
  if hparam_overrides:
    model_params.update(hparam_overrides)
  # Change the max decode length to make the test run faster
  model_params["decoder.params"]["max_decode_length"] = 5
  model_params["vocab_source"] = vocab_file
  model_params["vocab_target"] = vocab_file
  return model_cls(params=model_params, mode=mode)

Source File: decode_text.py From seq2seq with Apache License 2.0

5 votes

def _get_unk_mapping(filename):
  """Reads a file that specifies a mapping from source to target tokens.
  The file must contain lines of the form <source>\t<target>"

  Args:
    filename: path to the mapping file

  Returns:
    A dictionary that maps from source -> target tokens.
  """
  with gfile.GFile(filename, "r") as mapping_file:
    lines = mapping_file.readlines()
    mapping = dict([_.split("\t")[0:2] for _ in lines])
    mapping = {k.strip(): v.strip() for k, v in mapping.items()}
  return mapping

Source File: embedding.py From texar with Apache License 2.0

5 votes

def load_word2vec(filename, vocab, word_vecs):
    """Loads embeddings in the word2vec binary format which has a header line
    containing the number of vectors and their dimensionality (two integers),
    followed with number-of-vectors lines each of which is formatted as
    '<word-string> <embedding-vector>'.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename, "rb") as fin:
        header = fin.readline()
        vocab_size, vector_size = [int(s) for s in header.split()]
        if vector_size != word_vecs.shape[1]:
            raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                             (vector_size, word_vecs.shape[1]))
        binary_len = np.dtype('float32').itemsize * vector_size
        for _ in np.arange(vocab_size):
            chars = []
            while True:
                char = fin.read(1)
                if char == b' ':
                    break
                if char != b'\n':
                    chars.append(char)
            word = b''.join(chars)
            word = tf.compat.as_text(word)
            if word in vocab:
                word_vecs[vocab[word]] = np.fromstring(
                    fin.read(binary_len), dtype='float32')
            else:
                fin.read(binary_len)
    return word_vecs

Source File: profile.py From conv_seq2seq with Apache License 2.0

5 votes

def load_metadata(model_dir):
  """Loads RunMetadata, Graph and OpLog from files
  """
  # Import RunMetadata
  run_meta_path = os.path.join(model_dir, "metadata/run_meta")
  run_meta = tf.RunMetadata()
  if gfile.Exists(run_meta_path):
    with gfile.GFile(run_meta_path, "rb") as file:
      run_meta.MergeFromString(file.read())
    print("Loaded RunMetadata from {}".format(run_meta_path))
  else:
    print("RunMetadata does not exist a {}. Skipping.".format(run_meta_path))

  # Import Graph
  graph_def_path = os.path.join(model_dir, "graph.pbtxt")
  graph = tf.Graph()
  if gfile.Exists(graph_def_path):
    with graph.as_default():
      _register_function_ops(CUSTOM_OP_FUNCTIONS)
      graph_def = tf.GraphDef()
      with gfile.GFile(graph_def_path, "rb") as file:
        text_format.Parse(file.read(), graph_def)
      tf.import_graph_def(graph_def, name="")
      print("Loaded Graph from {}".format(graph_def_path))
  else:
    print("Graph does not exist a {}. Skipping.".format(graph_def_path))

  # Import OpLog
  op_log_path = os.path.join(model_dir, "metadata/tfprof_log")
  op_log = tfprof_log_pb2.OpLog()
  if gfile.Exists(op_log_path):
    with gfile.GFile(op_log_path, "rb") as file:
      op_log.MergeFromString(file.read())
      print("Loaded OpLog from {}".format(op_log_path))
  else:
    print("OpLog does not exist a {}. Skipping.".format(op_log_path))

  return run_meta, graph, op_log

Source File: embedding.py From Counterfactual-StoryRW with MIT License

5 votes

def load_word2vec(filename, vocab, word_vecs):
    """Loads embeddings in the word2vec binary format which has a header line
    containing the number of vectors and their dimensionality (two integers),
    followed with number-of-vectors lines each of which is formatted as
    '<word-string> <embedding-vector>'.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename, "rb") as fin:
        header = fin.readline()
        vocab_size, vector_size = [int(s) for s in header.split()]
        if vector_size != word_vecs.shape[1]:
            raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                             (vector_size, word_vecs.shape[1]))
        binary_len = np.dtype('float32').itemsize * vector_size
        for _ in np.arange(vocab_size):
            chars = []
            while True:
                char = fin.read(1)
                if char == b' ':
                    break
                if char != b'\n':
                    chars.append(char)
            word = b''.join(chars)
            word = tf.compat.as_text(word)
            if word in vocab:
                word_vecs[vocab[word]] = np.fromstring(
                    fin.read(binary_len), dtype='float32')
            else:
                fin.read(binary_len)
    return word_vecs

Source File: utils.py From active-learning with Apache License 2.0

5 votes

def get_mldata(data_dir, name):
  """Loads data from data_dir.

  Looks for the file in data_dir.
  Assumes that data is in pickle format with dictionary fields data and target.


  Args:
    data_dir: directory to look in
    name: dataset name, assumes data is saved in the save_dir with filename
      <name>.pkl
  Returns:
    data and targets
  Raises:
    NameError: dataset not found in data folder.
  """
  dataname = name
  if dataname == "checkerboard":
    X, y = create_checker_unbalanced(split=[1./5, 4./5], n=10000, grid_size=4)
  else:
    filename = os.path.join(data_dir, dataname + ".pkl")
    if not gfile.Exists(filename):
      raise NameError("ERROR: dataset not available")
    data = pickle.load(gfile.GFile(filename, "r"))
    X = data["data"]
    y = data["target"]
    if "keras" in dataname:
      X = X / 255
      y = y.flatten()
  return X, y

Source File: hooks_test.py From reaction_prediction_seq2seq with Apache License 2.0

5 votes

def test_begin(self):
    model_dir = tempfile.mkdtemp()
    outfile = tempfile.NamedTemporaryFile()
    tf.get_variable("weigths", [128, 128])
    hook = hooks.PrintModelAnalysisHook(params={}, model_dir=model_dir)
    hook.begin()

    with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
      file_contents = file.read().strip()

    self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
                     "  weigths (128x128, 16.38k/16.38k params)")
    outfile.close()

Source File: utils.py From active-learning with Apache License 2.0

5 votes

def __init__(self, filename):
    self.terminal = sys.stdout
    self.log = gfile.GFile(filename, "w")

Source File: example_config_test.py From reaction_prediction_seq2seq with Apache License 2.0

5 votes

def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode):
  """Loads model from a configuration file"""
  with gfile.GFile(config_path) as config_file:
    config = yaml.load(config_file)
  model_cls = locate(config["model"]) or getattr(models, config["model"])
  model_params = config["model_params"]
  if hparam_overrides:
    model_params.update(hparam_overrides)
  # Change the max decode length to make the test run faster
  model_params["decoder.params"]["max_decode_length"] = 5
  model_params["vocab_source"] = vocab_file
  model_params["vocab_target"] = vocab_file
  return model_cls(params=model_params, mode=mode)

Source File: selfplay.py From training with Apache License 2.0

5 votes

def run_game(load_file, selfplay_dir=None, holdout_dir=None,
             sgf_dir=None, holdout_pct=0.05):
    """Takes a played game and record results and game data."""
    if sgf_dir is not None:
        minimal_sgf_dir = os.path.join(sgf_dir, 'clean')
        full_sgf_dir = os.path.join(sgf_dir, 'full')
        utils.ensure_dir_exists(minimal_sgf_dir)
        utils.ensure_dir_exists(full_sgf_dir)
    if selfplay_dir is not None:
        utils.ensure_dir_exists(selfplay_dir)
        utils.ensure_dir_exists(holdout_dir)

    with utils.logged_timer("Loading weights from %s ... " % load_file):
        network = dual_net.DualNetwork(load_file)

    with utils.logged_timer("Playing game"):
        player = play(network)

    output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
    game_data = player.extract_data()
    if sgf_dir is not None:
        with gfile.GFile(os.path.join(minimal_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf(use_comments=False))
        with gfile.GFile(os.path.join(full_sgf_dir, '{}.sgf'.format(output_name)), 'w') as f:
            f.write(player.to_sgf())

    tf_examples = preprocessing.make_dataset_from_selfplay(game_data)

    if selfplay_dir is not None:
        # Hold out 5% of games for validation.
        if random.random() < holdout_pct:
            fname = os.path.join(holdout_dir,
                                 "{}.tfrecord.zz".format(output_name))
        else:
            fname = os.path.join(selfplay_dir,
                                 "{}.tfrecord.zz".format(output_name))

        preprocessing.write_tf_examples(fname, tf_examples)

Source File: add_model.py From training with Apache License 2.0

5 votes

def copy_to_gcs(src, dst):
    assert gfile.Exists(src)
    assert not gfile.Exists(dst)

    with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f:
        shutil.copyfileobj(src_f, dst_f)

Source File: evaluator_ringmaster_wrapper.py From training with Apache License 2.0

5 votes

def copy_to_gcs(src, dst):
    assert gfile.Exists(src), src
    assert not gfile.Exists(dst), dst

    print("Saving to", dst)
    with gfile.GFile(src, "rb") as src_f, gfile.GFile(dst, "wb") as dst_f:
        shutil.copyfileobj(src_f, dst_f)

Source File: prepare_bigquery.py From training with Apache License 2.0

5 votes

def extract_data(filename):
    with gfile.GFile(filename) as f:
        contents = f.read()
    root_node = sgf_wrapper.get_sgf_root_node(contents)
    game_data = extract_game_data(filename, root_node)
    move_data = extract_move_data(
        root_node,
        game_data['worker_id'],
        game_data['completed_time'],
        game_data['board_size'])
    return game_data, move_data

Source File: prepare_bigquery.py From training with Apache License 2.0

5 votes

def extract_holdout_model(model):
    game_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'games', model)
    move_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'moves', model)
    gfile.MakeDirs(os.path.basename(game_output_path))
    gfile.MakeDirs(os.path.basename(move_output_path))

    with gfile.GFile(game_output_path, 'w') as game_f, \
            gfile.GFile(move_output_path, 'w') as move_f:
        for sgf_name in tqdm(get_sgf_names(model)):
            game_data, move_data = extract_data(sgf_name)
            game_f.write(json.dumps(game_data) + '\n')
            for move_datum in move_data:
                move_f.write(json.dumps(move_datum) + '\n')

Source File: embedding.py From texar with Apache License 2.0

5 votes

def load_glove(filename, vocab, word_vecs):
    """Loads embeddings in the glove text format in which each line is
    '<word-string> <embedding-vector>'. Dimensions of the embedding vector
    are separated with whitespace characters.

    Args:
        filename (str): Path to the embedding file.
        vocab (dict): A dictionary that maps token strings to integer index.
            Tokens not in :attr:`vocab` are not read.
        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`
            which is updated as reading from the file.

    Returns:
        The updated :attr:`word_vecs`.
    """
    with gfile.GFile(filename) as fin:
        for line in fin:
            vec = line.strip().split()
            if len(vec) == 0:
                continue
            word, vec = vec[0], vec[1:]
            word = tf.compat.as_text(word)
            if word not in vocab:
                continue
            if len(vec) != word_vecs.shape[1]:
                raise ValueError("Inconsistent word vector sizes: %d vs %d" %
                                 (len(vec), word_vecs.shape[1]))
            word_vecs[vocab[word]] = np.array([float(v) for v in vec])
    return word_vecs

Python tensorflow.gfile.GFile() Examples