burlap.mdp.singleagent.environment.SimulatedEnvironment Java Examples
The following examples show how to use
burlap.mdp.singleagent.environment.SimulatedEnvironment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java From cs7641-assignment4 with MIT License | 6 votes |
/** * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag * is enabled. */ private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) { LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() { public String getAgentName() { return Algorithm.QLearning.getTitle(); } public LearningAgent generateAgent() { return agent; } }); /* * Try different PerformanceMetric values below to display different charts. */ experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); experimenter.startExperiment(); }
Example #2
Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0 | 6 votes |
public GridWorldDQN(String solverFile, double gamma) { //create the domain gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); rf = new UniformCostRF(); tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); gwdg.setRf(rf); gwdg.setTf(tf); domain = gwdg.generateDomain(); goalCondition = new TFGoalCondition(tf); //set up the initial state of the task initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //set up the state hashing system for tabular algorithms hashingFactory = new SimpleHashableStateFactory(); //set up the environment for learners algorithms env = new SimulatedEnvironment(domain, initialState); dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma); }
Example #3
Source File: GradientDescentSarsaLam.java From burlap with Apache License 2.0 | 6 votes |
/** * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily * selects the action with the highest Q-value and breaks ties uniformly randomly. * @param initialState the initial state of the planning problem * @return a {@link burlap.behavior.policy.GreedyQPolicy}. */ @Override public GreedyQPolicy planFromState(State initialState) { if(this.model == null){ throw new RuntimeException("Planning requires a model, but none is provided."); } SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState); int eCount = 0; do{ this.runLearningEpisode(env); eCount++; }while(eCount < numEpisodesForPlanning && maxWeightChangeInLastEpisode > maxWeightChangeForPlanningTermination); return new GreedyQPolicy(this); }
Example #4
Source File: QLearning.java From burlap with Apache License 2.0 | 6 votes |
/** * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily * selects the action with the highest Q-value and breaks ties uniformly randomly. * @param initialState the initial state of the planning problem * @return a {@link burlap.behavior.policy.GreedyQPolicy}. */ @Override public GreedyQPolicy planFromState(State initialState) { if(this.model == null){ throw new RuntimeException("QLearning (and its subclasses) cannot execute planFromState because a model is not specified."); } SimulatedEnvironment env = new SimulatedEnvironment(this.domain, initialState); int eCount = 0; do{ this.runLearningEpisode(env, this.maxEpisodeSize); eCount++; }while(eCount < numEpisodesForPlanning && maxQChangeInLastEpisode > maxQChangeForPlanningTermination); return new GreedyQPolicy(this); }
Example #5
Source File: BasicBehavior.java From burlap_examples with MIT License | 6 votes |
public BasicBehavior(){ gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); tf = new GridWorldTerminalFunction(10, 10); gwdg.setTf(tf); goalCondition = new TFGoalCondition(tf); domain = gwdg.generateDomain(); initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); hashingFactory = new SimpleHashableStateFactory(); env = new SimulatedEnvironment(domain, initialState); // VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap())); // observer.initGUI(); // env.addObservers(observer); }
Example #6
Source File: ExampleGridWorld.java From burlap_examples with MIT License | 6 votes |
public static void main(String [] args){ ExampleGridWorld gen = new ExampleGridWorld(); gen.setGoalLocation(10, 10); SADomain domain = gen.generateDomain(); State initialState = new EXGridState(0, 0); SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState); Visualizer v = gen.getVisualizer(); VisualExplorer exp = new VisualExplorer(domain, env, v); exp.addKeyAction("w", ACTION_NORTH, ""); exp.addKeyAction("s", ACTION_SOUTH, ""); exp.addKeyAction("d", ACTION_EAST, ""); exp.addKeyAction("a", ACTION_WEST, ""); exp.initGUI(); }
Example #7
Source File: Main.java From cs7641-assignment4 with MIT License | 5 votes |
/** * Here is where the magic happens. In this method is where I loop through the specific number * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use * this method to run all three algorithms. The specific details are specified through the * PlannerFactory interface. * * This method collects all the information from the algorithm and packs it in an Analysis * instance that later gets dumped on the console. */ private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) { ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState); SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator); Planner planner = null; Policy policy = null; for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) { long startTime = System.nanoTime(); planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment); policy = planner.planFromState(initialState); /* * If we haven't converged, following the policy will lead the agent wandering around * and it might never reach the goal. To avoid this, we need to set the maximum number * of steps to take before terminating the policy rollout. I decided to set this maximum * at the number of grid locations in our map (width * width). This should give the * agent plenty of room to wander around. * * The smaller this number is, the faster the algorithm will run. */ int maxNumberOfSteps = problem.getWidth() * problem.getWidth(); Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps); analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000); } if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) { learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment); } if (SHOW_VISUALIZATION && planner != null && policy != null) { visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle()); } }
Example #8
Source File: IRLExample.java From burlap_examples with MIT License | 5 votes |
/** * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state * Use the wasd keys to move north south, east, and west, respectively. To enable recording, * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal. * Each demonstration begins after an environment reset. * After each demonstration that you want to keep, go back to the shell and type "rec -r" * If you reset the environment before you type that, * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo" * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts). */ public void launchExplorer(){ SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg); VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800); exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, ""); exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, ""); exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, ""); exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, ""); //exp.enableEpisodeRecording("r", "f", "irlDemo"); exp.initGUI(); }
Example #9
Source File: MCVideo.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures features = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(features, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0)); EnvironmentServer envServ = new EnvironmentServer(env, vob); for(int i = 0; i < 100; i++){ PolicyUtils.rollout(p, envServ); envServ.resetEnvironment(); } System.out.println("Finished"); }
Example #10
Source File: QLTutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
Example #11
Source File: ActorCritic.java From burlap with Apache License 2.0 | 5 votes |
public void planFromState(State initialState) { if(this.model == null){ throw new RuntimeException("Planning requires a model, but none is provided."); } SimulatedEnvironment env = new SimulatedEnvironment(this.model, initialState); for(int i = 0; i < numEpisodesForPlanning; i++){ this.runLearningEpisode(env, this.maxEpisodeSize); } }
Example #12
Source File: BFSMarkovOptionModel.java From burlap with Apache License 2.0 | 5 votes |
@Override public EnvironmentOutcome sample(State s, Action a) { if(!(a instanceof Option)){ return model.sample(s, a); } Option o = (Option)a; SimulatedEnvironment env = new SimulatedEnvironment(model, s); return o.control(env, discount); }
Example #13
Source File: TigerDomain.java From burlap with Apache License 2.0 | 4 votes |
/** * Main method for interacting with the tiger domain via an {@link EnvironmentShell} * By default, the TerminalExplorer interacts with the partially observable environment ({@link burlap.mdp.singleagent.pomdp.SimulatedPOEnvironment}), * which means you only get to see the observations that the agent would. However, if you set the first command-line argument * to be "h", then the explorer will explorer the underlying fully observable MDP states. * @param args either empty or ["h"]; provide "h" to explorer the underlying fully observable tiger MDP. */ public static void main(String [] args){ TigerDomain dgen = new TigerDomain(false); PODomain domain = (PODomain)dgen.generateDomain(); StateGenerator tigerGenerator = TigerDomain.randomSideStateGenerator(0.5); Environment observableEnv = new SimulatedEnvironment(domain, tigerGenerator); Environment poEnv = new SimulatedPOEnvironment(domain, tigerGenerator); Environment envTouse = poEnv; if(args.length > 0 && args[0].equals("h")){ envTouse = observableEnv; } EnvironmentShell shell = new EnvironmentShell(domain, envTouse); shell.start(); }
Example #14
Source File: ExampleOOGridWorld.java From burlap_examples with MIT License | 4 votes |
public static void main(String [] args){ ExampleOOGridWorld gen = new ExampleOOGridWorld(); OOSADomain domain = gen.generateDomain(); State initialState = new GenericOOState(new ExGridAgent(0, 0), new EXGridLocation(10, 10, "loc0")); SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState); Visualizer v = gen.getVisualizer(); VisualExplorer exp = new VisualExplorer(domain, env, v); exp.addKeyAction("w", ACTION_NORTH, ""); exp.addKeyAction("s", ACTION_SOUTH, ""); exp.addKeyAction("d", ACTION_EAST, ""); exp.addKeyAction("a", ACTION_WEST, ""); exp.initGUI(); }
Example #15
Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License | 4 votes |
public static void MCLSPIRBF(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); MCState s = new MCState(mcGen.physParams.valleyPos(), 0.); NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); RBFFeatures rbf = new RBFFeatures(inputFeatures, true); FlatStateGridder gridder = new FlatStateGridder() .gridDimension("x", mcGen.physParams.xmin, mcGen.physParams.xmax, 5) .gridDimension("v", mcGen.physParams.vmin, mcGen.physParams.vmax, 5); List<State> griddedStates = gridder.gridState(s); DistanceMetric metric = new EuclideanDistance(); for(State g : griddedStates){ rbf.addRBF(new GaussianRBF(inputFeatures.features(g), metric, 0.2)); } LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(rbf, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, s); env.addObservers(vob); for(int i = 0; i < 5; i++){ PolicyUtils.rollout(p, env); env.resetEnvironment(); } System.out.println("Finished"); }
Example #16
Source File: OptionsExample.java From burlap_examples with MIT License | 4 votes |
public static Episode optionExecuteResult(SADomain domain, Option o, State s){ SimulatedEnvironment env = new SimulatedEnvironment(domain, s); EnvironmentOptionOutcome eo = o.control(env, 0.99); return eo.episode; }
Example #17
Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License | 3 votes |
public static void MCLSPIFB(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(inputFeatures, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.)); env.addObservers(vob); for(int i = 0; i < 5; i++){ PolicyUtils.rollout(p, env); env.resetEnvironment(); } System.out.println("Finished"); }
Example #18
Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License | 2 votes |
public static void LLSARSA(){ LunarLanderDomain lld = new LunarLanderDomain(); OOSADomain domain = lld.generateDomain(); LLState s = new LLState(new LLAgent(5, 0, 0), new LLBlock.LLPad(75, 95, 0, 10, "pad")); ConcatenatedObjectFeatures inputFeatures = new ConcatenatedObjectFeatures() .addObjectVectorizion(LunarLanderDomain.CLASS_AGENT, new NumericVariableFeatures()); int nTilings = 5; double resolution = 10.; double xWidth = (lld.getXmax() - lld.getXmin()) / resolution; double yWidth = (lld.getYmax() - lld.getYmin()) / resolution; double velocityWidth = 2 * lld.getVmax() / resolution; double angleWidth = 2 * lld.getAngmax() / resolution; TileCodingFeatures tilecoding = new TileCodingFeatures(inputFeatures); tilecoding.addTilingsForAllDimensionsWithWidths( new double []{xWidth, yWidth, velocityWidth, velocityWidth, angleWidth}, nTilings, TilingArrangement.RANDOM_JITTER); double defaultQ = 0.5; DifferentiableStateActionValue vfa = tilecoding.generateVFA(defaultQ/nTilings); GradientDescentSarsaLam agent = new GradientDescentSarsaLam(domain, 0.99, vfa, 0.02, 0.5); SimulatedEnvironment env = new SimulatedEnvironment(domain, s); List<Episode> episodes = new ArrayList<Episode>(); for(int i = 0; i < 5000; i++){ Episode ea = agent.runLearningEpisode(env); episodes.add(ea); System.out.println(i + ": " + ea.maxTimeStep()); env.resetEnvironment(); } Visualizer v = LLVisualizer.getVisualizer(lld.getPhysParams()); new EpisodeSequenceVisualizer(v, domain, episodes); }
Example #19
Source File: PolicyUtils.java From burlap with Apache License 2.0 | 2 votes |
/** * This method will return the an episode that results from following the given policy from state s. The episode will terminate * when the policy reaches a terminal state. * @param p the {@link Policy} to roll out * @param s the state from which to roll out the policy * @param model the model from which to sample * @return an EpisodeAnalysis object that records the events from following the policy. */ public static Episode rollout(Policy p, State s, SampleModel model){ return rollout(p, new SimulatedEnvironment(model, s)); }
Example #20
Source File: PolicyUtils.java From burlap with Apache License 2.0 | 2 votes |
/** * This method will return the an episode that results from following the given policy from state s. The episode will terminate * when the policy reaches a terminal state or when the number of steps surpasses maxSteps. * @param p the {@link Policy} to roll out * @param s the state from which to roll out the policy * @param model the model from which to same state transitions * @param maxSteps the maximum number of steps to take before terminating the policy rollout. * @return an EpisodeAnalysis object that records the events from following the policy. */ public static Episode rollout(Policy p, State s, SampleModel model, int maxSteps){ return rollout(p, new SimulatedEnvironment(model, s), maxSteps); }
Example #21
Source File: PlotTest.java From burlap_examples with MIT License | 2 votes |
public static void main(String [] args){ GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate //ends when the agent reaches a location final TerminalFunction tf = new SinglePFTF( PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); //reward function definition final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1); gw.setTf(tf); gw.setRf(rf); final OOSADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //initial state generator final ConstantStateGenerator sg = new ConstantStateGenerator(s); //set up the state hashing system for looking up states final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory(); /** * Create factory for Q-learning agent */ LearningAgentFactory qLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "Q-learning"; } public LearningAgent generateAgent() { return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1); } }; //define learning environment SimulatedEnvironment env = new SimulatedEnvironment(domain, sg); //define experiment LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory); exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); //start experiment exp.startExperiment(); }
Example #22
Source File: VisualExplorer.java From burlap with Apache License 2.0 | 2 votes |
/** * Initializes with a domain and initial state, automatically creating a {@link burlap.mdp.singleagent.environment.SimulatedEnvironment} * as the environment with which to interact. The created {@link burlap.mdp.singleagent.environment.SimulatedEnvironment} will * have a {@link burlap.mdp.singleagent.common.NullRewardFunction} and {@link burlap.mdp.auxiliary.common.NullTermination} functions set. * @param domain the domain to explore * @param painter the 2D state visualizer * @param baseState the initial state from which to explore */ public VisualExplorer(SADomain domain, Visualizer painter, State baseState){ Environment env = new SimulatedEnvironment(domain, baseState); this.init(domain, env, painter, 800, 800); }
Example #23
Source File: EnvironmentShell.java From burlap with Apache License 2.0 | 2 votes |
/** * Creates a shell for a {@link SimulatedEnvironment} rooted at the input state using std in and std out. * @param domain the BURLAP domain * @param s the initial state for the simulated environment that will be created. */ public EnvironmentShell(SADomain domain, State s){ this(domain, new SimulatedEnvironment(domain, s), System.in, System.out); }
Example #24
Source File: PlannerFactory.java From cs7641-assignment4 with MIT License | votes |
Planner createPlanner(int episodeIndex, SADomain domain, HashableStateFactory hashingFactory, SimulatedEnvironment simulatedEnvironment);