burlap.mdp.auxiliary.common.ConstantStateGenerator Java Exaples

Source File: LSPI.java From burlap with Apache License 2.0

6 votes

/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("LSPI cannot execute planFromState because the reward function and/or terminal function for planning have not been set. Use the initializeForPlanning method to set them.");
	}

	if(planningCollector == null){
		this.planningCollector = new SARSCollector.UniformRandomSARSCollector(this.actionTypes);
	}
	this.dataset = this.planningCollector.collectNInstances(new ConstantStateGenerator(initialState), this.model, this.numSamplesForPlanning, Integer.MAX_VALUE, this.dataset);
	return this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);


}

Source File: Main.java From cs7641-assignment4 with MIT License

5 votes

/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}

Source File: SimulatedEnvironment.java From burlap with Apache License 2.0

5 votes

public SimulatedEnvironment(SADomain domain, State initialState) {

		this.stateGenerator = new ConstantStateGenerator(initialState);
		this.curState = initialState;
		if(domain.getModel() == null){
			throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one.");
		}
		this.model = domain.getModel();
	}

Source File: SimulatedEnvironment.java From burlap with Apache License 2.0

5 votes

@Override
public void setCurStateTo(State s) {
	if(this.stateGenerator == null){
		this.stateGenerator = new ConstantStateGenerator(s);
	}
	this.curState = s;
}

Source File: SimulatedEnvironment.java From burlap with Apache License 2.0

4 votes

public SimulatedEnvironment(SampleModel model, State initialState) {

		this.stateGenerator = new ConstantStateGenerator(initialState);
		this.curState = initialState;
		this.model = model;
	}

Source File: GameEpisode.java From burlap with Apache License 2.0

3 votes

public static void main(String[] args) {

		GridGame gg = new GridGame();
		OOSGDomain domain = gg.generateDomain();
		State s = GridGame.getTurkeyInitialState();

		JointRewardFunction jr = new GridGame.GGJointRewardFunction(domain);
		TerminalFunction tf = new GridGame.GGTerminalFunction(domain);
		World world = new World(domain, jr, tf, new ConstantStateGenerator(s));
		DPrint.toggleCode(world.getDebugId(),false);

		SGAgent ragent1 = new RandomSGAgent();
		SGAgent ragent2 = new RandomSGAgent();

		SGAgentType type = new SGAgentType("agent", domain.getActionTypes());

		world.join(ragent1);
		world.join(ragent2);

		GameEpisode ga = world.runGame(20);
		System.out.println(ga.maxTimeStep());

		String serialized = ga.serialize();
		System.out.println(serialized);

		GameEpisode read = GameEpisode.parse(serialized);
		System.out.println(read.maxTimeStep());
		System.out.println(read.state(0).toString());


	}

Source File: PlotTest.java From burlap_examples with MIT License

2 votes

public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}

Source File: World.java From burlap with Apache License 2.0

2 votes

/**
 * Initializes the world.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param initialState the initial state of the world every time a new game starts
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, State initialState){
	this.init(domain, domain.getJointActionModel(), jr, tf, new ConstantStateGenerator(initialState), new IdentityStateMapping());
}

burlap.mdp.auxiliary.common.ConstantStateGenerator Java Examples