burlap.behavior.singleagent.learning.LearningAgentFactory Java Exaples

Source File: Main.java From cs7641-assignment4 with MIT License

6 votes

/**
 * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for
 * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag
 * is enabled.
 */
private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) {
	LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() {

		public String getAgentName() {
			return Algorithm.QLearning.getTitle();
		}

		public LearningAgent generateAgent() {
			return agent;
		}
	});

	/*
	 * Try different PerformanceMetric values below to display different charts.
	 */
	experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);
	experimenter.startExperiment();
}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

6 votes

/**
 * Runs a trial for an agent generated by the given factor when interpreting trial length as a number of total steps.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runStepBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);
	
	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	int stepsRemaining = this.trialLength;
	while(stepsRemaining > 0){
		Episode ea = agent.runLearningEpisode(this.environmentSever, stepsRemaining);
		stepsRemaining -= ea.numTimeSteps()-1; //-1  because we want to subtract the number of actions, not the number of states seen
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes.
 * The trialLength will be interpreted as the number of episodes, but it can be reinterpreted as a total number of steps per trial using the
 * {@link #toggleTrialLengthInterpretation(boolean)}.
 * @param testEnvironment the test {@link burlap.mdp.singleagent.environment.Environment} in which experiments will be performed.
 * @param nTrials the number of trials
 * @param trialLength the length of the trials (by default in episodes, but can be intereted as maximum step length)
 * @param agentFactories factories to generate the agents to be tested.
 */
public LearningAlgorithmExperimenter(Environment testEnvironment, int nTrials, int trialLength, LearningAgentFactory...agentFactories){
	
	if(agentFactories.length == 0){
		throw new RuntimeException("Zero agent factories provided. At least one must be given for an experiment");
	}
	
	this.testEnvironment = testEnvironment;
	this.nTrials = nTrials;
	this.trialLength = trialLength;
	this.agentFactories = agentFactories;
}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void experimentAndPlotter(){

		//different reward function for more structured performance plots
		((FactoredModel)domain.getModel()).setRf(new GoalBasedRF(this.goalCondition, 5.0, -0.1));

		/**
		 * Create factories for Q-learning agent and SARSA agent to compare
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-Learning";
			}


			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		LearningAgentFactory sarsaLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "SARSA";
			}


			public LearningAgent generateAgent() {
				return new SarsaLam(domain, 0.99, hashingFactory, 0.0, 0.1, 1.);
			}
		};

		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory, sarsaLearningFactory);
		exp.setUpPlottingConfiguration(500, 250, 2, 1000,
				TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE,
				PerformanceMetric.AVERAGE_EPISODE_REWARD);

		exp.startExperiment();
		exp.writeStepAndEpisodeDataToCSV("expData");

	}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

4 votes

/**
 * Runs a trial for an agent generated by the given factory when interpreting trial length as a number of episodes.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runEpisodeBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);

	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	for(int i = 0; i < this.trialLength; i++){
		agent.runLearningEpisode(this.environmentSever);
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}

Source File: PlotTest.java From burlap_examples with MIT License

2 votes

public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}

burlap.behavior.singleagent.learning.LearningAgentFactory Java Examples