Python data.example_generator() Examples

The following are 15 code examples of data.example_generator(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module data , or try the search function .
Example #1
Source File: batcher.py    From TransferRL with MIT License 6 votes vote down vote up
def fill_example_queue(self):
    """Reads data from file and processes into Examples which are then placed into the example queue."""

    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

    while True:
      try:
        (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        if self._single_pass:
          tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")

      abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
      if abstract_sentences is None or len(abstract_sentences) == 0: continue
      example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #2
Source File: batcher.py    From TransferRL with MIT License 6 votes vote down vote up
def text_generator(self, example_generator):
    """Generates article and abstract text from tf.Example.

    Args:
      example_generator: a generator of tf.Examples from file. See data.example_generator"""
    cnt = 0
    while True:
      e = example_generator.next() # e is a tf.Example
      try:
        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
      except ValueError:
        tf.logging.error('Failed to get article or abstract from example')
        continue
      if len(article_text)==0 or len(abstract_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
        tf.logging.warning('Found an example with empty article text. Skipping it.')
      else:
        if self._single_pass and cnt < self._decode_after: #skip already decoded docs
          cnt +=1
          continue
        yield (article_text, abstract_text) 
Example #3
Source File: batcher.py    From RLSeq2Seq with MIT License 6 votes vote down vote up
def fill_example_queue(self):
    """Reads data from file and processes into Examples which are then placed into the example queue."""

    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

    while True:
      try:
        (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        if self._single_pass:
          tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")

      abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
      example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #4
Source File: batcher.py    From RLSeq2Seq with MIT License 6 votes vote down vote up
def text_generator(self, example_generator):
    """Generates article and abstract text from tf.Example.

    Args:
      example_generator: a generator of tf.Examples from file. See data.example_generator"""
    cnt = 0
    while True:
      e = example_generator.next() # e is a tf.Example
      try:
        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
      except ValueError:
        tf.logging.error('Failed to get article or abstract from example')
        continue
      if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
        tf.logging.warning('Found an example with empty article text. Skipping it.')
      else:
        if self._single_pass and cnt < self._decode_after: #skip already decoded docs
          cnt +=1
          continue
        yield (article_text, abstract_text) 
Example #5
Source File: batcher.py    From MAX-Text-Summarizer with Apache License 2.0 6 votes vote down vote up
def fill_example_queue(self):
        """Reads data from file and processes into Examples which are then placed into the example queue."""

        input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

        while True:
            try:
                # (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings.
                (article) = next(input_gen)  # read the next example from file. article and abstract are both strings.
            except StopIteration:  # if there are no more examples:
                tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
                if self._single_pass:
                    tf.logging.info(
                        "single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
                    self._finished_reading = True
                    break
                else:
                    raise Exception("single_pass mode is off but the example generator is out of data; error.")

            # abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
            example = Example(article, article, self._vocab, self._hps)  # Process into an Example.
            self._example_queue.put(example)  # place the Example in the example queue. 
Example #6
Source File: batcher.py    From MAX-Text-Summarizer with Apache License 2.0 6 votes vote down vote up
def text_generator(self, example_generator):
        """Generates article and abstract text from tf.Example.

    Args:
      example_generator: a generator of tf.Examples from file. See data.example_generator"""
        while True:
            e = next(example_generator)  # e is a tf.Example
            try:
                article_text = e.features.feature['article'].bytes_list.value[
                    0].decode()  # the article text was saved under the key 'article' in the data files
                # abstract_text = e.features.feature['abstract'].bytes_list.value[0].decode() # the abstract text was saved under the key 'abstract' in the data files
            except ValueError:
                tf.logging.error('Failed to get article or abstract from example')
                continue
            if len(article_text) == 0:  # See https://github.com/abisee/pointer-generator/issues/1
                tf.logging.warning('Found an example with empty article text. Skipping it.')
            else:
                # yield (article_text, abstract_text)
                yield (article_text) 
Example #7
Source File: batcher.py    From Reinforce-Paraphrase-Generation with MIT License 6 votes vote down vote up
def fill_example_queue(self):
    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))
    while True:
      try:
        (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        if self._single_pass:
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")
          break

#      abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
#      abstract = str(abstract, encoding='utf8')
      abstract_sentences = [abstract]
      example = Example(article, abstract_sentences, self._vocab) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #8
Source File: batcher.py    From Reinforce-Paraphrase-Generation with MIT License 6 votes vote down vote up
def text_generator(self, example_generator):
    while True:
      try:
        e = next(example_generator) # e is a tf.Example
        article_text = e.features.feature['article'].bytes_list.value[0].decode() # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0].decode() # the abstract text was saved under the key 'abstract' in the data files
      except ValueError:
#        tf.logging.error('Failed to get article or abstract from example')
        continue
      except StopIteration:
#        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        break
      if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
#        tf.logging.warning('Found an example with empty article text. Skipping it.')
        continue
      else:
        yield (article_text, abstract_text) 
Example #9
Source File: batcher.py    From pointer_summarizer with Apache License 2.0 6 votes vote down vote up
def fill_example_queue(self):
    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

    while True:
      try:
        (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        if self._single_pass:
          tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")

      abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
      example = Example(article, abstract_sentences, self._vocab) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #10
Source File: batcher.py    From unified-summarization with MIT License 6 votes vote down vote up
def fill_example_queue(self):
    """Reads data from file and processes into Examples which are then placed into the example queue."""

    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

    while True:
      try:
        (article, abstract, extract_ids) = input_gen.next() # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        if self._single_pass:
          tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")

      article_sentences = [sent.strip() for sent in data.document2sents(article)]
      abstract_sentences = [sent.strip() for sent in data.document2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
      extract_ids = extract_ids.split(',')
      extract_ids = [int(i) for i in extract_ids]
      example = Example(article_sentences, extract_ids, abstract_sentences, self._vocab, self._hps) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #11
Source File: batcher.py    From unified-summarization with MIT License 6 votes vote down vote up
def text_generator(self, example_generator):
    """Generates article and abstract text from tf.Example.

    Args:
      example_generator: a generator of tf.Examples from file. See data.example_generator"""
    while True:
      e = example_generator.next() # e is a tf.Example
      try:
        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
        extract_ids_str = e.features.feature['extract_ids'].bytes_list.value[0]
      except ValueError:
        tf.logging.error('Failed to get article or abstract from example')
        continue
      if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
        tf.logging.warning('Found an example with empty article text. Skipping it.')
      else:
        yield (article_text, abstract_text, extract_ids_str) 
Example #12
Source File: batcher.py    From pointer-generator with Apache License 2.0 6 votes vote down vote up
def fill_example_queue(self):
    """Reads data from file and processes into Examples which are then placed into the example queue."""

    input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass))

    while True:
      try:
        (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings.
      except StopIteration: # if there are no more examples:
        tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
        if self._single_pass:
          tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
          self._finished_reading = True
          break
        else:
          raise Exception("single_pass mode is off but the example generator is out of data; error.")

      abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences.
      example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example.
      self._example_queue.put(example) # place the Example in the example queue. 
Example #13
Source File: batcher.py    From pointer-generator with Apache License 2.0 6 votes vote down vote up
def text_generator(self, example_generator):
    """Generates article and abstract text from tf.Example.

    Args:
      example_generator: a generator of tf.Examples from file. See data.example_generator"""
    while True:
      e = example_generator.next() # e is a tf.Example
      try:
        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
      except ValueError:
        tf.logging.error('Failed to get article or abstract from example')
        continue
      if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
        tf.logging.warning('Found an example with empty article text. Skipping it.')
      else:
        yield (article_text, abstract_text) 
Example #14
Source File: batcher.py    From pointer_summarizer with Apache License 2.0 5 votes vote down vote up
def text_generator(self, example_generator):
    while True:
      e = example_generator.next() # e is a tf.Example
      try:
        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
      except ValueError:
        tf.logging.error('Failed to get article or abstract from example')
        continue
      if len(article_text)==0: # See https://github.com/abisee/pointer-generator/issues/1
        #tf.logging.warning('Found an example with empty article text. Skipping it.')
        continue
      else:
        yield (article_text, abstract_text) 
Example #15
Source File: batch_reader.py    From long-summarization with Apache License 2.0 4 votes vote down vote up
def _fill_example_queue(self):
        """Reads data from file and processes into Examples which are then placed into the example queue."""

        input_gen = self.text_generator(
            data.example_generator(self._data_path, self._single_pass))
        cnt = 0
        fail = 0
        while True:
            try:
                # read the next example from file. article and abstract are
                # both strings.
                (article_id, article_text, abstract_sents, labels,
                 section_names, sections) = six.next(input_gen)
            except StopIteration:  # if there are no more examples:
                tf.logging.info(
                    "The example generator for this example queue filling thread has exhausted data.")
                if self._single_pass:
                    tf.logging.info(
                        "single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
                    self._finished_reading = True
                    break
                else:
                    raise Exception(
                        "single_pass mode is off but the example generator is out of data; error.")

            # Use the <s> and </s> tags in abstract to get a list of sentences.
#       abstract_sentences = [sent.strip() for sent in data.abstract2sents(''.join(abstract_sents))]
            abstract_sentences = [e.replace(data.SENTENCE_START, '').replace(data.SENTENCE_END, '').strip()
                                  for e in abstract_sents]

            
            # at least 2 sections, some articles do not have sections
            if "_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _" in article_text:
              continue 
            
            if len(sections) <= 1:
              continue
            
            if not sections or len(sections) == 0:
              continue
            # do not process that are too long
            if len(article_text) > self._hps.max_article_sents:
              continue
              
            # Do not process documents with unusually long or short abstracts
            abst_len = len(' '.join(abstract_sentences).split())
            if abst_len > self._hps.max_abstract_len or\
                    abst_len < self._hps.min_abstract_len:
                continue
            
            # Process into an Example.
            example = Example(article_text, abstract_sentences, article_id, sections, section_names, labels,
                              self._vocab, self._hps)
            # place the Example in the example queue.
            if example.discard:
              fail += 1
            cnt += 1
            if example is not None and not example.discard:
              self._example_queue.put(example)
            if cnt % 100 == 0:
              print('total in queue: {} of {}'.format(cnt - fail, cnt))