Python data.example_generator() Examples
The following are 15
code examples of data.example_generator().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data
, or try the search function
.
Example #1
Source File: batcher.py From TransferRL with MIT License | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. if abstract_sentences is None or len(abstract_sentences) == 0: continue example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #2
Source File: batcher.py From TransferRL with MIT License | 6 votes |
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" cnt = 0 while True: e = example_generator.next() # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text)==0 or len(abstract_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 tf.logging.warning('Found an example with empty article text. Skipping it.') else: if self._single_pass and cnt < self._decode_after: #skip already decoded docs cnt +=1 continue yield (article_text, abstract_text)
Example #3
Source File: batcher.py From RLSeq2Seq with MIT License | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #4
Source File: batcher.py From RLSeq2Seq with MIT License | 6 votes |
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" cnt = 0 while True: e = example_generator.next() # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 tf.logging.warning('Found an example with empty article text. Skipping it.') else: if self._single_pass and cnt < self._decode_after: #skip already decoded docs cnt +=1 continue yield (article_text, abstract_text)
Example #5
Source File: batcher.py From MAX-Text-Summarizer with Apache License 2.0 | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: # (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings. (article) = next(input_gen) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") # abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, article, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #6
Source File: batcher.py From MAX-Text-Summarizer with Apache License 2.0 | 6 votes |
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" while True: e = next(example_generator) # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[ 0].decode() # the article text was saved under the key 'article' in the data files # abstract_text = e.features.feature['abstract'].bytes_list.value[0].decode() # the abstract text was saved under the key 'abstract' in the data files except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text) == 0: # See https://github.com/abisee/pointer-generator/issues/1 tf.logging.warning('Found an example with empty article text. Skipping it.') else: # yield (article_text, abstract_text) yield (article_text)
Example #7
Source File: batcher.py From Reinforce-Paraphrase-Generation with MIT License | 6 votes |
def fill_example_queue(self): input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: if self._single_pass: self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") break # abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. # abstract = str(abstract, encoding='utf8') abstract_sentences = [abstract] example = Example(article, abstract_sentences, self._vocab) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #8
Source File: batcher.py From Reinforce-Paraphrase-Generation with MIT License | 6 votes |
def text_generator(self, example_generator): while True: try: e = next(example_generator) # e is a tf.Example article_text = e.features.feature['article'].bytes_list.value[0].decode() # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0].decode() # the abstract text was saved under the key 'abstract' in the data files except ValueError: # tf.logging.error('Failed to get article or abstract from example') continue except StopIteration: # tf.logging.info("The example generator for this example queue filling thread has exhausted data.") break if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 # tf.logging.warning('Found an example with empty article text. Skipping it.') continue else: yield (article_text, abstract_text)
Example #9
Source File: batcher.py From pointer_summarizer with Apache License 2.0 | 6 votes |
def fill_example_queue(self): input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #10
Source File: batcher.py From unified-summarization with MIT License | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract, extract_ids) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") article_sentences = [sent.strip() for sent in data.document2sents(article)] abstract_sentences = [sent.strip() for sent in data.document2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. extract_ids = extract_ids.split(',') extract_ids = [int(i) for i in extract_ids] example = Example(article_sentences, extract_ids, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #11
Source File: batcher.py From unified-summarization with MIT License | 6 votes |
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" while True: e = example_generator.next() # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files extract_ids_str = e.features.feature['extract_ids'].bytes_list.value[0] except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 tf.logging.warning('Found an example with empty article text. Skipping it.') else: yield (article_text, abstract_text, extract_ids_str)
Example #12
Source File: batcher.py From pointer-generator with Apache License 2.0 | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #13
Source File: batcher.py From pointer-generator with Apache License 2.0 | 6 votes |
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" while True: e = example_generator.next() # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 tf.logging.warning('Found an example with empty article text. Skipping it.') else: yield (article_text, abstract_text)
Example #14
Source File: batcher.py From pointer_summarizer with Apache License 2.0 | 5 votes |
def text_generator(self, example_generator): while True: e = example_generator.next() # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files except ValueError: tf.logging.error('Failed to get article or abstract from example') continue if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1 #tf.logging.warning('Found an example with empty article text. Skipping it.') continue else: yield (article_text, abstract_text)
Example #15
Source File: batch_reader.py From long-summarization with Apache License 2.0 | 4 votes |
def _fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator( data.example_generator(self._data_path, self._single_pass)) cnt = 0 fail = 0 while True: try: # read the next example from file. article and abstract are # both strings. (article_id, article_text, abstract_sents, labels, section_names, sections) = six.next(input_gen) except StopIteration: # if there are no more examples: tf.logging.info( "The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception( "single_pass mode is off but the example generator is out of data; error.") # Use the <s> and </s> tags in abstract to get a list of sentences. # abstract_sentences = [sent.strip() for sent in data.abstract2sents(''.join(abstract_sents))] abstract_sentences = [e.replace(data.SENTENCE_START, '').replace(data.SENTENCE_END, '').strip() for e in abstract_sents] # at least 2 sections, some articles do not have sections if "_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _" in article_text: continue if len(sections) <= 1: continue if not sections or len(sections) == 0: continue # do not process that are too long if len(article_text) > self._hps.max_article_sents: continue # Do not process documents with unusually long or short abstracts abst_len = len(' '.join(abstract_sentences).split()) if abst_len > self._hps.max_abstract_len or\ abst_len < self._hps.min_abstract_len: continue # Process into an Example. example = Example(article_text, abstract_sentences, article_id, sections, section_names, labels, self._vocab, self._hps) # place the Example in the example queue. if example.discard: fail += 1 cnt += 1 if example is not None and not example.discard: self._example_queue.put(example) if cnt % 100 == 0: print('total in queue: {} of {}'.format(cnt - fail, cnt))