Python data.abstract2sents() Examples
The following are 7
code examples of data.abstract2sents().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data
, or try the search function
.
Example #1
Source File: batcher.py From TransferRL with MIT License | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. if abstract_sentences is None or len(abstract_sentences) == 0: continue example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #2
Source File: batcher.py From RLSeq2Seq with MIT License | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #3
Source File: batcher.py From MAX-Text-Summarizer with Apache License 2.0 | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: # (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings. (article) = next(input_gen) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") # abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, article, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #4
Source File: batcher.py From Reinforce-Paraphrase-Generation with MIT License | 6 votes |
def fill_example_queue(self): input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: if self._single_pass: self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") break # abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. # abstract = str(abstract, encoding='utf8') abstract_sentences = [abstract] example = Example(article, abstract_sentences, self._vocab) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #5
Source File: batcher.py From pointer_summarizer with Apache License 2.0 | 6 votes |
def fill_example_queue(self): input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #6
Source File: batcher.py From pointer-generator with Apache License 2.0 | 6 votes |
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator(data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue.
Example #7
Source File: batch_reader.py From long-summarization with Apache License 2.0 | 4 votes |
def _fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" input_gen = self.text_generator( data.example_generator(self._data_path, self._single_pass)) cnt = 0 fail = 0 while True: try: # read the next example from file. article and abstract are # both strings. (article_id, article_text, abstract_sents, labels, section_names, sections) = six.next(input_gen) except StopIteration: # if there are no more examples: tf.logging.info( "The example generator for this example queue filling thread has exhausted data.") if self._single_pass: tf.logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception( "single_pass mode is off but the example generator is out of data; error.") # Use the <s> and </s> tags in abstract to get a list of sentences. # abstract_sentences = [sent.strip() for sent in data.abstract2sents(''.join(abstract_sents))] abstract_sentences = [e.replace(data.SENTENCE_START, '').replace(data.SENTENCE_END, '').strip() for e in abstract_sents] # at least 2 sections, some articles do not have sections if "_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _" in article_text: continue if len(sections) <= 1: continue if not sections or len(sections) == 0: continue # do not process that are too long if len(article_text) > self._hps.max_article_sents: continue # Do not process documents with unusually long or short abstracts abst_len = len(' '.join(abstract_sentences).split()) if abst_len > self._hps.max_abstract_len or\ abst_len < self._hps.min_abstract_len: continue # Process into an Example. example = Example(article_text, abstract_sentences, article_id, sections, section_names, labels, self._vocab, self._hps) # place the Example in the example queue. if example.discard: fail += 1 cnt += 1 if example is not None and not example.discard: self._example_queue.put(example) if cnt % 100 == 0: print('total in queue: {} of {}'.format(cnt - fail, cnt))