burlap.behavior.singleagent.learning.LearningAgent Java Examples
The following examples show how to use
burlap.behavior.singleagent.learning.LearningAgent.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java From cs7641-assignment4 with MIT License | 6 votes |
/** * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag * is enabled. */ private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) { LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() { public String getAgentName() { return Algorithm.QLearning.getTitle(); } public LearningAgent generateAgent() { return agent; } }); /* * Try different PerformanceMetric values below to display different charts. */ experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); experimenter.startExperiment(); }
Example #2
Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0 | 6 votes |
/** * Runs a trial for an agent generated by the given factor when interpreting trial length as a number of total steps. * @param agentFactory the agent factory used to generate the agent to test. */ protected void runStepBoundTrial(LearningAgentFactory agentFactory){ //temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation //(e.g., if there is pre-test training) this.plotter.toggleDataCollection(false); LearningAgent agent = agentFactory.generateAgent(); this.plotter.toggleDataCollection(true); //turn it back on to begin this.plotter.startNewTrial(); int stepsRemaining = this.trialLength; while(stepsRemaining > 0){ Episode ea = agent.runLearningEpisode(this.environmentSever, stepsRemaining); stepsRemaining -= ea.numTimeSteps()-1; //-1 because we want to subtract the number of actions, not the number of states seen this.plotter.endEpisode(); this.environmentSever.resetEnvironment(); } this.plotter.endTrial(); }
Example #3
Source File: Main.java From cs7641-assignment4 with MIT License | 5 votes |
/** * Here is where the magic happens. In this method is where I loop through the specific number * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use * this method to run all three algorithms. The specific details are specified through the * PlannerFactory interface. * * This method collects all the information from the algorithm and packs it in an Analysis * instance that later gets dumped on the console. */ private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) { ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState); SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator); Planner planner = null; Policy policy = null; for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) { long startTime = System.nanoTime(); planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment); policy = planner.planFromState(initialState); /* * If we haven't converged, following the policy will lead the agent wandering around * and it might never reach the goal. To avoid this, we need to set the maximum number * of steps to take before terminating the policy rollout. I decided to set this maximum * at the number of grid locations in our map (width * width). This should give the * agent plenty of room to wander around. * * The smaller this number is, the faster the algorithm will run. */ int maxNumberOfSteps = problem.getWidth() * problem.getWidth(); Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps); analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000); } if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) { learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment); } if (SHOW_VISUALIZATION && planner != null && policy != null) { visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle()); } }
Example #4
Source File: LearningAgentToSGAgentInterface.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes. * @param domain The stochastic games {@link burlap.mdp.stochasticgames.SGDomain} in which this agent will interact. * @param learningAgent the {@link burlap.behavior.singleagent.learning.LearningAgent} that will handle this {@link SGAgent}'s control. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public LearningAgentToSGAgentInterface(SGDomain domain, LearningAgent learningAgent, String agentName, SGAgentType agentType){ this.init(domain, agentName, agentType); this.learningAgent = learningAgent; if(this.learningAgent instanceof MDPSolver){ SADomain sadomain = new SADomain(); for(ActionType actionType : agentType.actions){ sadomain.addActionType(actionType); } ((MDPSolver) this.learningAgent).setDomain(sadomain); } }
Example #5
Source File: BasicBehavior.java From burlap_examples with MIT License | 4 votes |
public void qLearningExample(String outputPath){ LearningAgent agent = new QLearning(domain, 0.99, hashingFactory, 0., 1.); //run learning for 50 episodes for(int i = 0; i < 50; i++){ Episode e = agent.runLearningEpisode(env); e.write(outputPath + "ql_" + i); System.out.println(i + ": " + e.maxTimeStep()); //reset environment for next learning episode env.resetEnvironment(); } simpleValueFunctionVis((ValueFunction)agent, new GreedyQPolicy((QProvider) agent)); }
Example #6
Source File: BasicBehavior.java From burlap_examples with MIT License | 4 votes |
public void sarsaLearningExample(String outputPath){ LearningAgent agent = new SarsaLam(domain, 0.99, hashingFactory, 0., 0.5, 0.3); //run learning for 50 episodes for(int i = 0; i < 50; i++){ Episode e = agent.runLearningEpisode(env); e.write(outputPath + "sarsa_" + i); System.out.println(i + ": " + e.maxTimeStep()); //reset environment for next learning episode env.resetEnvironment(); } }
Example #7
Source File: BasicBehavior.java From burlap_examples with MIT License | 4 votes |
public void experimentAndPlotter(){ //different reward function for more structured performance plots ((FactoredModel)domain.getModel()).setRf(new GoalBasedRF(this.goalCondition, 5.0, -0.1)); /** * Create factories for Q-learning agent and SARSA agent to compare */ LearningAgentFactory qLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "Q-Learning"; } public LearningAgent generateAgent() { return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1); } }; LearningAgentFactory sarsaLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "SARSA"; } public LearningAgent generateAgent() { return new SarsaLam(domain, 0.99, hashingFactory, 0.0, 0.1, 1.); } }; LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory, sarsaLearningFactory); exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); exp.startExperiment(); exp.writeStepAndEpisodeDataToCSV("expData"); }
Example #8
Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0 | 4 votes |
/** * Runs a trial for an agent generated by the given factory when interpreting trial length as a number of episodes. * @param agentFactory the agent factory used to generate the agent to test. */ protected void runEpisodeBoundTrial(LearningAgentFactory agentFactory){ //temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation //(e.g., if there is pre-test training) this.plotter.toggleDataCollection(false); LearningAgent agent = agentFactory.generateAgent(); this.plotter.toggleDataCollection(true); //turn it back on to begin this.plotter.startNewTrial(); for(int i = 0; i < this.trialLength; i++){ agent.runLearningEpisode(this.environmentSever); this.plotter.endEpisode(); this.environmentSever.resetEnvironment(); } this.plotter.endTrial(); }
Example #9
Source File: PlotTest.java From burlap_examples with MIT License | 2 votes |
public static void main(String [] args){ GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate //ends when the agent reaches a location final TerminalFunction tf = new SinglePFTF( PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); //reward function definition final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1); gw.setTf(tf); gw.setRf(rf); final OOSADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //initial state generator final ConstantStateGenerator sg = new ConstantStateGenerator(s); //set up the state hashing system for looking up states final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory(); /** * Create factory for Q-learning agent */ LearningAgentFactory qLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "Q-learning"; } public LearningAgent generateAgent() { return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1); } }; //define learning environment SimulatedEnvironment env = new SimulatedEnvironment(domain, sg); //define experiment LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory); exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); //start experiment exp.startExperiment(); }