burlap.behavior.singleagent.learning.LearningAgent Java Exaples

Source File: Main.java From cs7641-assignment4 with MIT License

6 votes

/**
 * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for
 * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag
 * is enabled.
 */
private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) {
	LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() {

		public String getAgentName() {
			return Algorithm.QLearning.getTitle();
		}

		public LearningAgent generateAgent() {
			return agent;
		}
	});

	/*
	 * Try different PerformanceMetric values below to display different charts.
	 */
	experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);
	experimenter.startExperiment();
}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

6 votes

/**
 * Runs a trial for an agent generated by the given factor when interpreting trial length as a number of total steps.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runStepBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);
	
	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	int stepsRemaining = this.trialLength;
	while(stepsRemaining > 0){
		Episode ea = agent.runLearningEpisode(this.environmentSever, stepsRemaining);
		stepsRemaining -= ea.numTimeSteps()-1; //-1  because we want to subtract the number of actions, not the number of states seen
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}

Source File: Main.java From cs7641-assignment4 with MIT License

5 votes

/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}

Source File: LearningAgentToSGAgentInterface.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes.
 * @param domain The stochastic games {@link burlap.mdp.stochasticgames.SGDomain} in which this agent will interact.
 * @param learningAgent the {@link burlap.behavior.singleagent.learning.LearningAgent} that will handle this {@link SGAgent}'s control.
 * @param agentName the name of the agent
 * @param agentType the {@link SGAgentType} for the agent defining its action space
 */
public LearningAgentToSGAgentInterface(SGDomain domain, LearningAgent learningAgent, String agentName, SGAgentType agentType){
	this.init(domain, agentName, agentType);
	this.learningAgent = learningAgent;
	if(this.learningAgent instanceof MDPSolver){
		SADomain sadomain = new SADomain();
		for(ActionType actionType : agentType.actions){
			sadomain.addActionType(actionType);
		}
		((MDPSolver) this.learningAgent).setDomain(sadomain);

	}
}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void qLearningExample(String outputPath){

		LearningAgent agent = new QLearning(domain, 0.99, hashingFactory, 0., 1.);

		//run learning for 50 episodes
		for(int i = 0; i < 50; i++){
			Episode e = agent.runLearningEpisode(env);

			e.write(outputPath + "ql_" + i);
			System.out.println(i + ": " + e.maxTimeStep());

			//reset environment for next learning episode
			env.resetEnvironment();
		}

		simpleValueFunctionVis((ValueFunction)agent, new GreedyQPolicy((QProvider) agent));

	}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void sarsaLearningExample(String outputPath){

		LearningAgent agent = new SarsaLam(domain, 0.99, hashingFactory, 0., 0.5, 0.3);

		//run learning for 50 episodes
		for(int i = 0; i < 50; i++){
			Episode e = agent.runLearningEpisode(env);

			e.write(outputPath + "sarsa_" + i);
			System.out.println(i + ": " + e.maxTimeStep());

			//reset environment for next learning episode
			env.resetEnvironment();
		}

	}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void experimentAndPlotter(){

		//different reward function for more structured performance plots
		((FactoredModel)domain.getModel()).setRf(new GoalBasedRF(this.goalCondition, 5.0, -0.1));

		/**
		 * Create factories for Q-learning agent and SARSA agent to compare
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-Learning";
			}


			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		LearningAgentFactory sarsaLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "SARSA";
			}


			public LearningAgent generateAgent() {
				return new SarsaLam(domain, 0.99, hashingFactory, 0.0, 0.1, 1.);
			}
		};

		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory, sarsaLearningFactory);
		exp.setUpPlottingConfiguration(500, 250, 2, 1000,
				TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE,
				PerformanceMetric.AVERAGE_EPISODE_REWARD);

		exp.startExperiment();
		exp.writeStepAndEpisodeDataToCSV("expData");

	}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

4 votes

/**
 * Runs a trial for an agent generated by the given factory when interpreting trial length as a number of episodes.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runEpisodeBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);

	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	for(int i = 0; i < this.trialLength; i++){
		agent.runLearningEpisode(this.environmentSever);
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}

Source File: PlotTest.java From burlap_examples with MIT License

2 votes

public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}

burlap.behavior.singleagent.learning.LearningAgent Java Examples