Python tensorflow.example() Examples
The following are 15
code examples of tensorflow.example().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: feature_transforms.py From pydatalab with Apache License 2.0 | 6 votes |
def read_vocab_file(file_path): """Reads a vocab file to memeory. Args: file_path: Each line of the vocab is in the form "token,example_count" Returns: Two lists, one for the vocab, and one for just the example counts. """ with file_io.FileIO(file_path, 'r') as f: vocab_pd = pd.read_csv( f, header=None, names=['vocab', 'count'], dtype=str, # Prevent pd from converting numerical categories. na_filter=False) # Prevent pd from converting 'NA' to a NaN. vocab = vocab_pd['vocab'].tolist() ex_count = vocab_pd['count'].astype(int).tolist() return vocab, ex_count
Example #2
Source File: test_training.py From pydatalab with Apache License 2.0 | 6 votes |
def _run_training_transform(self, problem_type, model_type, extra_args=[]): """Runs training starting with transformed tf.example files. Args: problem_type: 'regression' or 'classification' model_type: 'linear' or 'dnn' extra_args: list of strings to pass to the trainer. """ cmd = ['cd %s && ' % CODE_PATH, 'python -m trainer.task', '--train=' + os.path.join(self._transform_output, 'features_train*'), '--eval=' + os.path.join(self._transform_output, 'features_eval*'), '--job-dir=' + self._train_output, '--analysis=' + self._analysis_output, '--model=%s_%s' % (model_type, problem_type), '--train-batch-size=100', '--eval-batch-size=50', '--max-steps=' + str(self._max_steps)] + extra_args self._logger.debug('Running subprocess: %s \n\n' % ' '.join(cmd)) subprocess.check_call(' '.join(cmd), shell=True)
Example #3
Source File: transform.py From pydatalab with Apache License 2.0 | 5 votes |
def serialize_example(transformed_json_data, info_dict): """Makes a serialized tf.example. Args: transformed_json_data: dict of transformed data. info_dict: output of feature_transforms.get_transfrormed_feature_info() Returns: The serialized tf.example version of transformed_json_data. """ import six import tensorflow as tf def _make_int64_list(x): return tf.train.Feature(int64_list=tf.train.Int64List(value=x)) def _make_bytes_list(x): return tf.train.Feature(bytes_list=tf.train.BytesList(value=x)) def _make_float_list(x): return tf.train.Feature(float_list=tf.train.FloatList(value=x)) if sorted(six.iterkeys(transformed_json_data)) != sorted(six.iterkeys(info_dict)): raise ValueError('Keys do not match %s, %s' % (list(six.iterkeys(transformed_json_data)), list(six.iterkeys(info_dict)))) ex_dict = {} for name, info in six.iteritems(info_dict): if info['dtype'] == tf.int64: ex_dict[name] = _make_int64_list(transformed_json_data[name]) elif info['dtype'] == tf.float32: ex_dict[name] = _make_float_list(transformed_json_data[name]) elif info['dtype'] == tf.string: ex_dict[name] = _make_bytes_list(transformed_json_data[name]) else: raise ValueError('Unsupported data type %s' % info['dtype']) ex = tf.train.Example(features=tf.train.Features(feature=ex_dict)) return ex.SerializeToString()
Example #4
Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0 | 5 votes |
def _check_tensorflow(): if tf is None: raise ImportError( 'Failed to import `tensorflow`. ' 'Please install `tensorflow`. ' 'For example with `pip install tensorflow`.')
Example #5
Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0 | 5 votes |
def _create_intent_dict(training_data): """Create intent dictionary""" distinct_intents = set([example.get("intent") for example in training_data.intent_examples]) return {intent: idx for idx, intent in enumerate(sorted(distinct_intents))}
Example #6
Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0 | 5 votes |
def persist(self, file_name: Text, model_dir: Text): # type: (Text) -> Dict[Text, Any] """Persist this model into the passed directory. Return the metadata necessary to load the model again.""" if self.estimator is None: return {"classifier_file": None} # build feature spec for tf.example parsing feature_spec = tf.feature_column.make_parse_example_spec(self.feature_columns) # build tf.example parser serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec) # export tf model path = self.estimator.export_savedmodel(model_dir, serving_input_receiver_fn) # decode model path to string file_dir = os.path.basename(path).decode('utf-8') with io.open(os.path.join( model_dir, file_name + "_inv_intent_dict.pkl"), 'wb') as f: pickle.dump(self.inv_intent_dict, f) with io.open(os.path.join( model_dir, file_name + "_encoded_all_intents.pkl"), 'wb') as f: pickle.dump(self.encoded_all_intents, f) return {"file": file_name}
Example #7
Source File: tagging_data_lib.py From models with Apache License 2.0 | 5 votes |
def add_word_and_label_id(self, word, label_id): """Adds word and label_id pair in the example.""" self.words.append(word) self.label_ids.append(label_id)
Example #8
Source File: tagging_data_lib.py From models with Apache License 2.0 | 5 votes |
def _read_one_file(file_name, label_list): """Reads one file and returns a list of `InputExample` instances.""" lines = tf.io.gfile.GFile(file_name, "r").readlines() examples = [] label_id_map = {label: i for i, label in enumerate(label_list)} sentence_id = 0 example = InputExample(sentence_id=0) for line in lines: line = line.strip("\n") if line: # The format is: <token>\t<label> for train/dev set and <token> for test. items = line.split("\t") assert len(items) == 2 or len(items) == 1 token = items[0].strip() # Assign a dummy label_id for test set label_id = label_id_map[items[1].strip()] if len(items) == 2 else 0 example.add_word_and_label_id(token, label_id) else: # Empty line indicates a new sentence. if example.words: examples.append(example) sentence_id += 1 example = InputExample(sentence_id=sentence_id) if example.words: examples.append(example) return examples
Example #9
Source File: tagging_data_lib.py From models with Apache License 2.0 | 5 votes |
def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None): """Tokenizes words and breaks long example into short ones.""" # Needs additional [CLS] and [SEP] tokens. max_length = max_length - 2 new_examples = [] new_example = InputExample(sentence_id=example.sentence_id) for i, word in enumerate(example.words): if any([x < 0 for x in example.label_ids]): raise ValueError("Unexpected negative label_id: %s" % example.label_ids) if text_preprocessing: word = text_preprocessing(word) subwords = tokenizer.tokenize(word) if (not subwords or len(subwords) > max_length) and word: subwords = [_UNK_TOKEN] if len(subwords) + len(new_example.words) > max_length: # Start a new example. new_examples.append(new_example) new_example = InputExample(sentence_id=example.sentence_id) for j, subword in enumerate(subwords): # Use the real label for the first subword, and pad label for # the remainings. subword_label = example.label_ids[i] if j == 0 else _PADDING_LABEL_ID new_example.add_word_and_label_id(subword, subword_label) if new_example.words: new_examples.append(new_example) return new_examples
Example #10
Source File: tagging_data_lib.py From models with Apache License 2.0 | 5 votes |
def _convert_single_example(example, max_seq_length, tokenizer): """Converts an `InputExample` instance to a `tf.train.Example` instance.""" tokens = ["[CLS]"] tokens.extend(example.words) tokens.append("[SEP]") input_ids = tokenizer.convert_tokens_to_ids(tokens) label_ids = [_PADDING_LABEL_ID] label_ids.extend(example.label_ids) label_ids.append(_PADDING_LABEL_ID) segment_ids = [0] * len(input_ids) input_mask = [1] * len(input_ids) # Pad up to the sequence length. while len(input_ids) < max_seq_length: input_ids.append(0) input_mask.append(0) segment_ids.append(0) label_ids.append(_PADDING_LABEL_ID) def create_int_feature(values): return tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) features = collections.OrderedDict() features["input_ids"] = create_int_feature(input_ids) features["input_mask"] = create_int_feature(input_mask) features["segment_ids"] = create_int_feature(segment_ids) features["label_ids"] = create_int_feature(label_ids) features["sentence_id"] = create_int_feature([example.sentence_id]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) return tf_example
Example #11
Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0 | 4 votes |
def process(self, message, **kwargs): # type: (Message, **Any) -> None """Return the most likely intent and its similarity to the input.""" intent = {"name": None, "confidence": 0.0} intent_ranking = [] if self.predictor is None: logger.error("There is no trained tf.session: " "component is either not trained or " "didn't receive enough training data") else: X = message.get("text_features").tolist() examples = [] feature = {} # convert input x to tf.feature with float feature spec feature['a_in'] = tf.train.Feature(float_list=tf.train.FloatList(value=X)) # build tf.example for prediction example = tf.train.Example( features=tf.train.Features( feature=feature ) ) # serialize tf.example to string examples.append(example.SerializeToString()) # Make predictions. result_dict = self.predictor({'inputs': examples}) result_score_list = result_dict['scores'][0] max_score = np.max(result_dict['scores'][0]) max_index = np.argmax(result_dict['scores'][0]) # if X contains all zeros do not predict some label if len(X)>0: intent = { "name": self.inv_intent_dict[max_index], "confidence": float(max_score) } ranking = result_score_list[:INTENT_RANKING_LENGTH] intent_ranking = [{"name": self.inv_intent_dict[intent_idx], "confidence": float(score)} for intent_idx, score in enumerate(ranking)] intent_ranking = sorted(intent_ranking, key=lambda s: s['confidence'], reverse=True) message.set("intent", intent, add_to_output=True) message.set("intent_ranking", intent_ranking, add_to_output=True)
Example #12
Source File: parse_sdf_utils.py From deep-molecular-massspec with Apache License 2.0 | 4 votes |
def dict_to_tfexample(mol_dict): """Convert dictionary of molecular info to tfExample. Args: mol_dict : dictionary containing molecule info. Returns: example : tf.example containing mol_dict info. """ example = tf.train.Example() feature_map = example.features.feature feature_map[fmap_constants.ATOM_WEIGHTS].float_list.value.extend( mol_dict[fmap_constants.ATOM_WEIGHTS]) feature_map[fmap_constants.ATOM_IDS].int64_list.value.extend( mol_dict[fmap_constants.ATOM_IDS]) feature_map[fmap_constants.ADJACENCY_MATRIX].int64_list.value.extend( mol_dict[fmap_constants.ADJACENCY_MATRIX]) feature_map[fmap_constants.MOLECULE_WEIGHT].float_list.value.append( mol_dict[fmap_constants.MOLECULE_WEIGHT]) feature_map[fmap_constants.DENSE_MASS_SPEC].float_list.value.extend( mol_dict[fmap_constants.DENSE_MASS_SPEC]) feature_map[fmap_constants.INCHIKEY].bytes_list.value.append( mol_dict[fmap_constants.INCHIKEY].encode('utf-8')) feature_map[fmap_constants.MOLECULAR_FORMULA].bytes_list.value.append( mol_dict[fmap_constants.MOLECULAR_FORMULA].encode('utf-8')) feature_map[fmap_constants.NAME].bytes_list.value.append( mol_dict[fmap_constants.NAME].encode('utf-8')) feature_map[fmap_constants.SMILES].bytes_list.value.append( mol_dict[fmap_constants.SMILES].encode('utf-8')) if fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY in mol_dict: feature_map[ fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY].int64_list.value.append( mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY]) for fp_len in ms_constants.NUM_CIRCULAR_FP_BITS_LIST: for rad in ms_constants.CIRCULAR_FP_RADII_LIST: for fp_type in fmap_constants.FP_TYPE_LIST: fp_key = ms_constants.CircularFingerprintKey(fp_type, fp_len, rad) feature_map[str(fp_key)].float_list.value.extend(mol_dict[fp_key]) return example
Example #13
Source File: parse_sdf_utils.py From deep-molecular-massspec with Apache License 2.0 | 4 votes |
def write_dicts_to_example(mol_list, record_path_name, max_atoms, max_mass_spec_peak_loc, true_library_array_path_name=None): """Helper function for writing tf.record from all examples. Uses dict_to_tfexample to write the actual tf.example Args: mol_list : list of rdkit.Mol objects record_path_name : file name for storing tf record max_atoms : max. number of atoms to consider in a molecule. max_mass_spec_peak_loc : largest mass/charge ratio to allow in a spectra true_library_array_path_name: path for storing np.array of true spectra Returns: - Writes tf.Record of an example for each eligible molecule (i.e. # atoms < max_atoms) - Writes np.array (len(mol_list), max_mass_spec_peak_loc) to true_library_array_path_name if it is defined. """ options = tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.ZLIB) # Wrapper function to add index value to dictionary if true_library_array_path_name: spectra_matrix = np.zeros((len(mol_list), max_mass_spec_peak_loc)) def make_mol_dict_with_saved_array(idx, mol): mol_dict = make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc) mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY] = idx spectra_matrix[idx, :] = mol_dict[fmap_constants.DENSE_MASS_SPEC] return mol_dict make_mol_dict_fn = make_mol_dict_with_saved_array else: def make_mol_dict_without_saved_array(idx, mol): del idx return make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc) make_mol_dict_fn = make_mol_dict_without_saved_array with tf.python_io.TFRecordWriter(record_path_name, options) as writer: for idx, mol in enumerate(mol_list): mol_dict = make_mol_dict_fn(idx, mol) example = dict_to_tfexample(mol_dict) writer.write(example.SerializeToString()) if true_library_array_path_name: with tf.gfile.Open(true_library_array_path_name, 'w') as f: np.save(f, spectra_matrix)
Example #14
Source File: model_agnostic_predict.py From model-analysis with Apache License 2.0 | 4 votes |
def __new__(cls, label_keys: List[Text], prediction_keys: List[Text], feature_spec: Dict[Text, Any]): """Creates a ModelAgnosticConfig instance. Creates a config spec for doing ModelAgnostic evaluation (Model evaluation without the training eval saved model). This spec defines the basic parameters with which to define Features, Predictions, and Labels from input Examples. Args: label_keys: A list of Text, the keys in the input examples which should be treated as labels. Currently, this cannot be empty. prediction_keys: A list of Text, the keys in the input examples which should be treated as predictions. Currently, this cannot be empty. feature_spec: In the case only FPL is provided (via Examples), a dict defining how to parse the example. This should be of the form "key" -> FixedLenFeature or VarLenFeature. This is required to parse input examples. Returns: A ModelAgnosticConfig instance. Raises: ValueError: This inputs supplied are properly defined.. """ if not label_keys: raise ValueError('ModelAgnosticConfig must have label keys set.') if not prediction_keys: raise ValueError('ModelAgnosticConfig must have prediction keys set.') if not feature_spec: raise ValueError('ModelAgnosticConfig must have feature_spec set.') for key in prediction_keys: if key not in feature_spec: raise ValueError('Prediction key %s not defined in feature_spec.' % key) for key in label_keys: if key not in feature_spec: raise ValueError('Label key %s not defined in feature_spec.' % key) return super(ModelAgnosticConfig, cls).__new__( cls, label_keys=label_keys, prediction_keys=prediction_keys, feature_spec=feature_spec)
Example #15
Source File: model_agnostic_predict.py From model-analysis with Apache License 2.0 | 4 votes |
def get_fpls_from_examples(self, input_example_bytes_list: List[bytes] ) -> List[Any]: """Generates FPLs from serialized examples using a ModelAgnostic graph. Args: input_example_bytes_list: A string representing the serialized tf.example protos to be parsed by the graph. Returns: A list of FeaturesPredictionsLabels generated from the input examples. """ # Call the graph via the created session callable _get_features_fn and # get the tensor representation of the features. features = self._get_features_fn(input_example_bytes_list) split_features = {} num_examples = 0 # Split the features by the example keys. Also verify all each example # key has the same number of total examples. for key in features.keys(): split_features[key] = util.split_tensor_value(features[key]) if num_examples == 0: num_examples = len(split_features[key]) elif num_examples != len(split_features[key]): raise ValueError( 'Different keys unexpectedly had different number of ' 'examples. Key %s unexpectedly had %s elements.' % key, len(split_features[key])) # Sort out the examples into individual FPLs: one example -> one FPL. # Sort them into Features, Predictions, or Labels according to the input # config. result = [] for i in range(num_examples): labels = {} predictions = {} features = {} for key in split_features: if key in self._config.label_keys: labels[key] = {encoding.NODE_SUFFIX: split_features[key][i]} if key in self._config.prediction_keys: predictions[key] = {encoding.NODE_SUFFIX: split_features[key][i]} features[key] = {encoding.NODE_SUFFIX: split_features[key][i]} result.append( types.FeaturesPredictionsLabels( input_ref=i, features=features, predictions=predictions, labels=labels)) return result