Java Code Examples for burlap.domain.singleagent.gridworld.GridWorldDomain#setTf()
The following examples show how to use
burlap.domain.singleagent.gridworld.GridWorldDomain#setTf() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0 | 6 votes |
public GridWorldDQN(String solverFile, double gamma) { //create the domain gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); rf = new UniformCostRF(); tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); gwdg.setRf(rf); gwdg.setTf(tf); domain = gwdg.generateDomain(); goalCondition = new TFGoalCondition(tf); //set up the initial state of the task initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //set up the state hashing system for tabular algorithms hashingFactory = new SimpleHashableStateFactory(); //set up the environment for learners algorithms env = new SimulatedEnvironment(domain, initialState); dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma); }
Example 2
Source File: BasicBehavior.java From burlap_examples with MIT License | 6 votes |
public BasicBehavior(){ gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); tf = new GridWorldTerminalFunction(10, 10); gwdg.setTf(tf); goalCondition = new TFGoalCondition(tf); domain = gwdg.generateDomain(); initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); hashingFactory = new SimpleHashableStateFactory(); env = new SimulatedEnvironment(domain, initialState); // VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap())); // observer.initGUI(); // env.addObservers(observer); }
Example 3
Source File: VITutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String [] args){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setTf(new GridWorldTerminalFunction(10, 10)); gwd.setMapToFourRooms(); //only go in intended directon 80% of the time gwd.setProbSucceedTransitionDynamics(0.8); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //setup vi with 0.99 discount factor, a value //function initialization that initializes all states to value 0, and which will //run for 30 iterations over the state space VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(0.0), 30); //run planning from our initial state Policy p = vi.planFromState(s); //evaluate the policy with one roll out visualize the trajectory Episode ea = PolicyUtils.rollout(p, s, domain.getModel()); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea)); }
Example 4
Source File: QLTutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
Example 5
Source File: PlotTest.java From burlap_examples with MIT License | 2 votes |
public static void main(String [] args){ GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate //ends when the agent reaches a location final TerminalFunction tf = new SinglePFTF( PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); //reward function definition final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1); gw.setTf(tf); gw.setRf(rf); final OOSADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //initial state generator final ConstantStateGenerator sg = new ConstantStateGenerator(s); //set up the state hashing system for looking up states final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory(); /** * Create factory for Q-learning agent */ LearningAgentFactory qLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "Q-learning"; } public LearningAgent generateAgent() { return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1); } }; //define learning environment SimulatedEnvironment env = new SimulatedEnvironment(domain, sg); //define experiment LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory); exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); //start experiment exp.startExperiment(); }