Python rasa_nlu.training_data.load_data() Examples
The following are 30
code examples of rasa_nlu.training_data.load_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rasa_nlu.training_data
, or try the search function
.
Example #1
Source File: bot.py From rasa_core with Apache License 2.0 | 7 votes |
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/nlu.md') trainer = Trainer(config.load("config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
Example #2
Source File: bot.py From rasa_core with Apache License 2.0 | 6 votes |
def train_dialogue(domain_file="domain.yml", model_path="models/dialogue", training_data_file="data/stories.md"): agent = Agent(domain_file, policies=[MemoizationPolicy(max_history=3), MappingPolicy(), RestaurantPolicy(batch_size=100, epochs=400, validation_split=0.2)]) training_data = await agent.load_data(training_data_file) agent.train( training_data ) agent.persist(model_path) return agent
Example #3
Source File: trainer.py From weather-bot with MIT License | 6 votes |
def train_dialogue( domain_file="domain.yml", model_path="models/dialogue", training_data_file="data/stories.md" ): agent = Agent( domain_file, policies=[MemoizationPolicy(max_history=3), KerasPolicy()] ) training_data = agent.load_data(training_data_file) agent.train( training_data, epochs=400, batch_size=100, validation_split=0.2 ) agent.persist(model_path) return agent
Example #4
Source File: test_featurizers.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def test_spacy_featurizer_casing(spacy_nlp): from rasa_nlu.featurizers import spacy_featurizer # if this starts failing for the default model, we should think about # removing the lower casing the spacy nlp component does when it # retrieves vectors. For compressed spacy models (e.g. models # ending in _sm) this test will most likely fail. td = training_data.load_data('data/examples/rasa/demo-rasa.json') for e in td.intent_examples: doc = spacy_nlp(e.text) doc_capitalized = spacy_nlp(e.text.capitalize()) vecs = spacy_featurizer.features_for_doc(doc) vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized) assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \ "Vectors are unequal for texts '{}' and '{}'".format( e.text, e.text.capitalize())
Example #5
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def test_demo_data(filename): td = training_data.load_data(filename) assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"} assert td.entities == {"location", "cuisine"} assert len(td.training_examples) == 42 assert len(td.intent_examples) == 42 assert len(td.entity_examples) == 11 assert td.entity_synonyms == {'Chines': 'chinese', 'Chinese': 'chinese', 'chines': 'chinese', 'vegg': 'vegetarian', 'veggie': 'vegetarian'} assert td.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}, {"name": "zipcode", "pattern": "[0-9]{5}"}]
Example #6
Source File: test_multitenancy.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def train_models(component_builder, data): # Retrain different multitenancy models def train(cfg_name, project_name): from rasa_nlu.train import create_persistor from rasa_nlu import training_data cfg = config.load(cfg_name) trainer = Trainer(cfg, component_builder) training_data = training_data.load_data(data) trainer.train(training_data) trainer.persist("test_projects", project_name=project_name) train("sample_configs/config_spacy.yml", "test_project_spacy_sklearn") train("sample_configs/config_mitie.yml", "test_project_mitie") train("sample_configs/config_mitie_sklearn.yml", "test_project_mitie_sklearn")
Example #7
Source File: test_interpreter.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def test_interpreter(pipeline_template, component_builder, tmpdir): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for(component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, _conf) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text assert (not result['intent']['name'] or result['intent']['name'] in td.intents) assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to # require the exact entities to be found for entity in result['entities']: assert entity['entity'] in td.entities
Example #8
Source File: test_featurizers.py From rasa_nlu with Apache License 2.0 | 6 votes |
def test_spacy_featurizer_casing(spacy_nlp): from rasa_nlu.featurizers import spacy_featurizer # if this starts failing for the default model, we should think about # removing the lower casing the spacy nlp component does when it # retrieves vectors. For compressed spacy models (e.g. models # ending in _sm) this test will most likely fail. td = training_data.load_data('data/examples/rasa/demo-rasa.json') for e in td.intent_examples: doc = spacy_nlp(e.text) doc_capitalized = spacy_nlp(e.text.capitalize()) vecs = spacy_featurizer.features_for_doc(doc) vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized) assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \ "Vectors are unequal for texts '{}' and '{}'".format( e.text, e.text.capitalize())
Example #9
Source File: test_training_data.py From rasa_nlu with Apache License 2.0 | 6 votes |
def test_dialogflow_data(): td = training_data.load_data('data/examples/dialogflow/') assert len(td.entity_examples) == 5 assert len(td.intent_examples) == 24 assert len(td.training_examples) == 24 assert len(td.lookup_tables) == 2 assert td.intents == {"affirm", "goodbye", "hi", "inform"} assert td.entities == {"cuisine", "location"} non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v} assert non_trivial_synonyms == {"mexico": "mexican", "china": "chinese", "india": "indian"} # The order changes based on different computers hence the grouping assert {td.lookup_tables[0]['name'], td.lookup_tables[1]['name']} == {'location', 'cuisine'} assert {len(td.lookup_tables[0]['elements']), len(td.lookup_tables[1]['elements'])} == {4, 6}
Example #10
Source File: time_train_test.py From rasa_lookup_demo with Apache License 2.0 | 6 votes |
def train_model(): # trains a model and times it t = time() # training_data = load_data('demo_train.md') training_data = load_data("data/company_train_lookup.json") td_load_time = time() - t trainer = Trainer(config.load("config.yaml")) t = time() trainer.train(training_data) train_time = time() - t clear_model_dir() t = time() model_directory = trainer.persist( "./tmp/models" ) # Returns the directory the model is stored in persist_time = time() - t return td_load_time, train_time, persist_time
Example #11
Source File: test_interpreter.py From rasa_nlu with Apache License 2.0 | 6 votes |
def test_interpreter(pipeline_template, component_builder, tmpdir): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for(component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, _conf) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text assert (not result['intent']['name'] or result['intent']['name'] in td.intents) assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to # require the exact entities to be found for entity in result['entities']: assert entity['entity'] in td.entities
Example #12
Source File: test_training_data.py From rasa_nlu with Apache License 2.0 | 5 votes |
def test_luis_data(): td = training_data.load_data('data/examples/luis/demo-restaurants.json') assert len(td.entity_examples) == 8 assert len(td.intent_examples) == 28 assert len(td.training_examples) == 28 assert td.entity_synonyms == {} assert td.intents == {"affirm", "goodbye", "greet", "inform"} assert td.entities == {"location", "cuisine"}
Example #13
Source File: convert.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def convert_training_data(data_file, out_file, output_format, language): td = training_data.load_data(data_file, language) if output_format == 'md': output = td.as_markdown() else: output = td.as_json(indent=2) write_to_file(out_file, output)
Example #14
Source File: evaluate.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def run_evaluation(data_path, model_path, component_builder=None): # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) intent_predictions = get_intent_predictions(interpreter, test_data) logger.info("Intent evaluation results:") evaluate_intents(intent_targets, intent_predictions) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") evaluate_entities(entity_targets, entity_predictions, tokens, extractors)
Example #15
Source File: train.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def do_train(cfg, # type: RasaNLUModelConfig data, # type: Text path=None, # type: Optional[Text] project=None, # type: Optional[Text] fixed_model_name=None, # type: Optional[Text] storage=None, # type: Optional[Text] component_builder=None, # type: Optional[ComponentBuilder] url=None, # type: Optional[Text] **kwargs # type: Any ): # type: (...) -> Tuple[Trainer, Interpreter, Text] """Loads the trainer and the data and runs the training of the model.""" # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(cfg, component_builder) persistor = create_persistor(storage) if url is not None: training_data = load_data_from_url(url, cfg.language) else: training_data = load_data(data, cfg.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path
Example #16
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_wit_data(): td = training_data.load_data('data/examples/wit/demo-flights.json') assert len(td.entity_examples) == 4 assert len(td.intent_examples) == 1 assert len(td.training_examples) == 4 assert td.entity_synonyms == {} assert td.intents == {"flight_booking"} assert td.entities == {"location", "datetime"}
Example #17
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_dialogflow_data(): td = training_data.load_data('data/examples/dialogflow/') assert len(td.entity_examples) == 5 assert len(td.intent_examples) == 24 assert len(td.training_examples) == 24 assert td.intents == {"affirm", "goodbye", "hi", "inform"} assert td.entities == {"cuisine", "location"} non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v} assert non_trivial_synonyms == {"mexico": "mexican", "china": "chinese", "india": "indian"}
Example #18
Source File: convert.py From rasa_nlu with Apache License 2.0 | 5 votes |
def convert_training_data(data_file, out_file, output_format, language): td = training_data.load_data(data_file, language) if output_format == 'md': output = td.as_markdown() else: output = td.as_json(indent=2) write_to_file(out_file, output)
Example #19
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_data_merging(files): td_reference = training_data.load_data(files[0]) td = training_data.load_data(files[1]) assert len(td.entity_examples) == len(td_reference.entity_examples) assert len(td.intent_examples) == len(td_reference.intent_examples) assert len(td.training_examples) == len(td_reference.training_examples) assert td.intents == td_reference.intents assert td.entities == td_reference.entities assert td.entity_synonyms == td_reference.entity_synonyms assert td.regex_features == td_reference.regex_features
Example #20
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_markdown_single_sections(): td_regex_only = training_data.load_data('data/test/markdown_single_sections/regex_only.md') assert td_regex_only.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}] td_syn_only = training_data.load_data('data/test/markdown_single_sections/synonyms_only.md') assert td_syn_only.entity_synonyms == {'Chines': 'chinese', 'Chinese': 'chinese'}
Example #21
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_multiword_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 4 assert end == 7
Example #22
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_nonascii_entities(): data = """ { "luis_schema_version": "2.0", "utterances" : [ { "text": "I am looking for a ßäæ ?€ö) item", "intent": "unk", "entities": [ { "entity": "description", "startPos": 19, "endPos": 26 } ] } ] }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 entity = entities[0] assert entity["value"] == "ßäæ ?€ö)" assert entity["start"] == 19 assert entity["end"] == 27 assert entity["entity"] == "description"
Example #23
Source File: test_training_data.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format, language) td = training_data.load_data(out_path.strpath, language) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = training_data.load_data(gold_standard_file, language) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original # file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json', language) rto = training_data.load_data(rto_path.strpath, language) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms # If the above assert fails - this can be used # to dump to the file and diff using git # with io.open(gold_standard_file) as f: # f.write(td.as_json(indent=2))
Example #24
Source File: test_evaluation.py From Rasa_NLU_Chi with Apache License 2.0 | 5 votes |
def test_drop_intents_below_freq(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') clean_td = drop_intents_below_freq(td, 0) assert clean_td.intents == {'affirm', 'goodbye', 'greet', 'restaurant_search'} clean_td = drop_intents_below_freq(td, 10) assert clean_td.intents == {'affirm', 'restaurant_search'}
Example #25
Source File: visualize.py From rasa_core with Apache License 2.0 | 5 votes |
def visualize(config_path: Text, domain_path: Text, stories_path: Text, nlu_data_path: Text, output_path: Text, max_history: int): from rasa.core.agent import Agent from rasa.core import config policies = config.load(config_path) agent = Agent(domain_path, policies=policies) # this is optional, only needed if the `/greet` type of # messages in the stories should be replaced with actual # messages (e.g. `hello`) if nlu_data_path is not None: from rasa_nlu.training_data import load_data nlu_data_path = load_data(nlu_data_path) else: nlu_data_path = None logger.info("Starting to visualize stories...") await agent.visualize(stories_path, output_path, max_history, nlu_training_data=nlu_data_path) full_output_path = "file://{}".format(os.path.abspath(output_path)) logger.info("Finished graph creation. Saved into {}".format( full_output_path)) import webbrowser webbrowser.open(full_output_path)
Example #26
Source File: trainer.py From weather-bot with MIT License | 5 votes |
def train_nlu(): training_data = load_data('data/nlu-data.md') trainer = Trainer(config.load("nlu-config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
Example #27
Source File: bot.py From rasa_bot with Apache License 2.0 | 5 votes |
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config training_data = load_data("data/nlu.json") trainer = Trainer(config.load("data/nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo") return model_directory
Example #28
Source File: test_training_data.py From rasa_nlu with Apache License 2.0 | 5 votes |
def test_wit_data(): td = training_data.load_data('data/examples/wit/demo-flights.json') assert len(td.entity_examples) == 4 assert len(td.intent_examples) == 1 assert len(td.training_examples) == 4 assert td.entity_synonyms == {} assert td.intents == {"flight_booking"} assert td.entities == {"location", "datetime"}
Example #29
Source File: test_training_data.py From rasa_nlu with Apache License 2.0 | 5 votes |
def test_lookup_table_json(): lookup_fname = 'data/test/lookup_tables/plates.txt' td_lookup = training_data.load_data( 'data/test/lookup_tables/lookup_table.json') assert td_lookup.lookup_tables[0]['name'] == 'plates' assert td_lookup.lookup_tables[0]['elements'] == lookup_fname assert td_lookup.lookup_tables[1]['name'] == 'drinks' assert td_lookup.lookup_tables[1]['elements'] == [ 'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']
Example #30
Source File: test_training_data.py From rasa_nlu with Apache License 2.0 | 5 votes |
def test_lookup_table_md(): lookup_fname = 'data/test/lookup_tables/plates.txt' td_lookup = training_data.load_data( 'data/test/lookup_tables/lookup_table.md') assert td_lookup.lookup_tables[0]['name'] == 'plates' assert td_lookup.lookup_tables[0]['elements'] == lookup_fname assert td_lookup.lookup_tables[1]['name'] == 'drinks' assert td_lookup.lookup_tables[1]['elements'] == [ 'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']