Python nltk.tokenize.treebank.TreebankWordDetokenizer() Examples
The following are 7
code examples of nltk.tokenize.treebank.TreebankWordDetokenizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.tokenize.treebank
, or try the search function
.
Example #1
Source File: treebank_encoder.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, *args, **kwargs): if 'tokenize' in kwargs: raise TypeError('``TreebankEncoder`` does not take keyword argument ``tokenize``.') if 'detokenize' in kwargs: raise TypeError('``TreebankEncoder`` does not take keyword argument ``detokenize``.') try: import nltk # Required for moses nltk.download('perluniprops') nltk.download('nonbreaking_prefixes') from nltk.tokenize.treebank import TreebankWordTokenizer from nltk.tokenize.treebank import TreebankWordDetokenizer except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise super().__init__( *args, tokenize=TreebankWordTokenizer().tokenize, detokenize=TreebankWordDetokenizer().detokenize, **kwargs)
Example #2
Source File: solver.py From neural_chat with MIT License | 6 votes |
def __init__(self, config, train_data_loader, eval_data_loader, vocab, is_train=True, model=None): self.config = config self.epoch_i = 0 self.train_data_loader = train_data_loader self.eval_data_loader = eval_data_loader self.vocab = vocab self.is_train = is_train self.model = model self.detokenizer = Detok() if config.emotion or config.infersent or config.context_input_only: self.botmoji = Botmoji() self.botsent = Botsent(config.dataset_dir.joinpath('train'), version=1, explained_var=0.95) # Info for saving epoch metrics to a csv file if self.config.mode == 'train': self.pandas_path = os.path.join(config.save_path, "metrics.csv") self.outfile_dict = {k: getattr(config, k) for k in OUTPUT_FILE_PARAMS} self.df = pd.DataFrame() self.save_priming_sentences()
Example #3
Source File: solver.py From neural_chat with MIT License | 6 votes |
def __init__(self, config, train_data_loader, eval_data_loader, vocab, is_train=True, model=None): self.config = config self.epoch_i = 0 self.train_data_loader = train_data_loader self.eval_data_loader = eval_data_loader self.vocab = vocab self.is_train = is_train self.model = model self.detokenizer = Detok() if config.emotion or config.infersent or config.context_input_only: self.botmoji = Botmoji() self.botsent = Botsent(config.dataset_dir.joinpath('train'), version=1, explained_var=0.95) # Info for saving epoch metrics to a csv file if self.config.mode == 'train': self.pandas_path = os.path.join(config.save_path, "metrics.csv") self.outfile_dict = {k: getattr(config, k) for k in OUTPUT_FILE_PARAMS} self.df = pd.DataFrame() self.save_priming_sentences()
Example #4
Source File: utils.py From ConvLab with MIT License | 5 votes |
def get_detokenize(): return lambda x: TreebankWordDetokenizer().detokenize(x)
Example #5
Source File: utils.py From NeuralDialog-LaRL with Apache License 2.0 | 5 votes |
def get_detokenize(): return lambda x: TreebankWordDetokenizer().detokenize(x)
Example #6
Source File: utils.py From NeuralDialog-ZSDG with Apache License 2.0 | 5 votes |
def get_dekenize(): return lambda x: TreebankWordDetokenizer().detokenize(x)
Example #7
Source File: cnndm.py From nlp-recipes with MIT License | 5 votes |
def detokenize(line): """ Detokenizes the processed CNN/DM dataset to recover the original dataset, e.g. converts "-LRB-" back to "(" and "-RRB-" back to ")". """ line = line.strip().replace("``", '"').replace("''", '"').replace("`", "'") twd = TreebankWordDetokenizer() s_list = [ twd.detokenize(x.strip().split(" "), convert_parentheses=True) for x in line.split("<S_SEP>") ] return " ".join(s_list)