burlap.mdp.singleagent.environment.SimulatedEnvironment Java Exaples

Source File: Main.java From cs7641-assignment4 with MIT License

6 votes

/**
 * Runs a learning experiment and shows some cool charts. Apparently, this is only useful for
 * Q-Learning, so I only call this method when Q-Learning is selected and the appropriate flag
 * is enabled.
 */
private static void learningExperimenter(Problem problem, LearningAgent agent, SimulatedEnvironment simulatedEnvironment) {
	LearningAlgorithmExperimenter experimenter = new LearningAlgorithmExperimenter(simulatedEnvironment, 10, problem.getNumberOfIterations(Algorithm.QLearning), new LearningAgentFactory() {

		public String getAgentName() {
			return Algorithm.QLearning.getTitle();
		}

		public LearningAgent generateAgent() {
			return agent;
		}
	});

	/*
	 * Try different PerformanceMetric values below to display different charts.
	 */
	experimenter.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);
	experimenter.startExperiment();
}

Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0

6 votes

public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        gwdg.setMapToFourRooms();
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        gwdg.setRf(rf);
        gwdg.setTf(tf);
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);
    }

Source File: GradientDescentSarsaLam.java From burlap with Apache License 2.0

6 votes

/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("Planning requires a model, but none is provided.");
	}

	SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

	int eCount = 0;
	do{
		this.runLearningEpisode(env);
		eCount++;
	}while(eCount < numEpisodesForPlanning && maxWeightChangeInLastEpisode > maxWeightChangeForPlanningTermination);

	return new GreedyQPolicy(this);

}

Source File: QLearning.java From burlap with Apache License 2.0

6 votes

/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {

	if(this.model == null){
		throw new RuntimeException("QLearning (and its subclasses) cannot execute planFromState because a model is not specified.");
	}

	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, initialState);

	int eCount = 0;
	do{
		this.runLearningEpisode(env, this.maxEpisodeSize);
		eCount++;
	}while(eCount < numEpisodesForPlanning && maxQChangeInLastEpisode > maxQChangeForPlanningTermination);


	return new GreedyQPolicy(this);

}

Source File: BasicBehavior.java From burlap_examples with MIT License

6 votes

public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		gwdg.setMapToFourRooms();
		tf = new GridWorldTerminalFunction(10, 10);
		gwdg.setTf(tf);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);


//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);
	}

Source File: ExampleGridWorld.java From burlap_examples with MIT License

6 votes

public static void main(String [] args){

		ExampleGridWorld gen = new ExampleGridWorld();
		gen.setGoalLocation(10, 10);
		SADomain domain = gen.generateDomain();
		State initialState = new EXGridState(0, 0);
		SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

		Visualizer v = gen.getVisualizer();
		VisualExplorer exp = new VisualExplorer(domain, env, v);

		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		exp.addKeyAction("a", ACTION_WEST, "");

		exp.initGUI();


	}

Source File: Main.java From cs7641-assignment4 with MIT License

5 votes

/**
 * Here is where the magic happens. In this method is where I loop through the specific number
 * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use
 * this method to run all three algorithms. The specific details are specified through the
 * PlannerFactory interface.
 * 
 * This method collects all the information from the algorithm and packs it in an Analysis
 * instance that later gets dumped on the console.
 */
private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) {
	ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState);
	SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator);
	Planner planner = null;
	Policy policy = null;
	for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) {
		long startTime = System.nanoTime();
		planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment);
		policy = planner.planFromState(initialState);

		/*
		 * If we haven't converged, following the policy will lead the agent wandering around
		 * and it might never reach the goal. To avoid this, we need to set the maximum number
		 * of steps to take before terminating the policy rollout. I decided to set this maximum
		 * at the number of grid locations in our map (width * width). This should give the
		 * agent plenty of room to wander around.
		 * 
		 * The smaller this number is, the faster the algorithm will run.
		 */
		int maxNumberOfSteps = problem.getWidth() * problem.getWidth();

		Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps);
		analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000);
	}

	if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) {
		learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment);
	}

	if (SHOW_VISUALIZATION && planner != null && policy != null) {
		visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle());
	}
}

Source File: IRLExample.java From burlap_examples with MIT License

5 votes

/**
 * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state
 * Use the wasd keys to move north south, east, and west, respectively. To enable recording,
 * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal.
 * Each demonstration begins after an environment reset.
 * After each demonstration that you want to keep, go back to the shell and type "rec -r"
 * If you reset the environment before you type that,
 * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your
 * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo"
 * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones
 * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts).
 */
public void launchExplorer(){
	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg);
	VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800);
	exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
	exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
	exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
	exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");

	//exp.enableEpisodeRecording("r", "f", "irlDemo");

	exp.initGUI();
}

Source File: MCVideo.java From burlap_examples with MIT License

5 votes

public static void main(String[] args) {

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures features = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
		FourierBasis fb = new FourierBasis(features, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain,
				new MCState(mcGen.physParams.valleyPos(), 0));
		EnvironmentServer envServ = new EnvironmentServer(env, vob);

		for(int i = 0; i < 100; i++){
			PolicyUtils.rollout(p, envServ);
			envServ.resetEnvironment();
		}

		System.out.println("Finished");

	}

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}

Source File: ActorCritic.java From burlap with Apache License 2.0

5 votes

public void planFromState(State initialState) {

		if(this.model == null){
			throw new RuntimeException("Planning requires a model, but none is provided.");
		}

		SimulatedEnvironment env = new SimulatedEnvironment(this.model, initialState);

		for(int i = 0; i < numEpisodesForPlanning; i++){
			this.runLearningEpisode(env, this.maxEpisodeSize);
		}
	}

Source File: BFSMarkovOptionModel.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}

Source File: TigerDomain.java From burlap with Apache License 2.0

4 votes

/**
 * Main method for interacting with the tiger domain via an {@link EnvironmentShell}
 * By default, the TerminalExplorer interacts with the partially observable environment ({@link burlap.mdp.singleagent.pomdp.SimulatedPOEnvironment}),
 * which means you only get to see the observations that the agent would. However, if you set the first command-line argument
 * to be "h", then the explorer will explorer the underlying fully observable MDP states.
 * @param args either empty or ["h"]; provide "h" to explorer the underlying fully observable tiger MDP.
 */
public static void main(String [] args){


	TigerDomain dgen = new TigerDomain(false);
	PODomain domain = (PODomain)dgen.generateDomain();

	StateGenerator tigerGenerator = TigerDomain.randomSideStateGenerator(0.5);

	Environment observableEnv = new SimulatedEnvironment(domain, tigerGenerator);
	Environment poEnv = new SimulatedPOEnvironment(domain, tigerGenerator);

	Environment envTouse = poEnv;
	if(args.length > 0 && args[0].equals("h")){
	    envTouse = observableEnv;
	}

	EnvironmentShell shell = new EnvironmentShell(domain, envTouse);
	shell.start();

	
	
}

Source File: ExampleOOGridWorld.java From burlap_examples with MIT License

4 votes

public static void main(String [] args){

		ExampleOOGridWorld gen = new ExampleOOGridWorld();
		OOSADomain domain = gen.generateDomain();
		State initialState = new GenericOOState(new ExGridAgent(0, 0), new EXGridLocation(10, 10, "loc0"));
		SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

		Visualizer v = gen.getVisualizer();
		VisualExplorer exp = new VisualExplorer(domain, env, v);

		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		exp.addKeyAction("a", ACTION_WEST, "");

		exp.initGUI();


	}

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

4 votes

public static void MCLSPIRBF(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();
		MCState s = new MCState(mcGen.physParams.valleyPos(), 0.);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		RBFFeatures rbf = new RBFFeatures(inputFeatures, true);
		FlatStateGridder gridder = new FlatStateGridder()
				.gridDimension("x", mcGen.physParams.xmin, mcGen.physParams.xmax, 5)
				.gridDimension("v", mcGen.physParams.vmin, mcGen.physParams.vmax, 5);

		List<State> griddedStates = gridder.gridState(s);
		DistanceMetric metric = new EuclideanDistance();
		for(State g : griddedStates){
			rbf.addRBF(new GaussianRBF(inputFeatures.features(g), metric, 0.2));
		}

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(rbf, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();


		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}

Source File: OptionsExample.java From burlap_examples with MIT License

4 votes

public static Episode optionExecuteResult(SADomain domain, Option o, State s){
	SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
	EnvironmentOptionOutcome eo = o.control(env, 0.99);
	return eo.episode;
}

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

3 votes

public static void MCLSPIFB(){

		MountainCar mcGen = new MountainCar();
		SADomain domain = mcGen.generateDomain();

		StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
		SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
		SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);

		NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
				.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
				.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));

		FourierBasis fb = new FourierBasis(inputFeatures, 4);

		LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
		Policy p = lspi.runPolicyIteration(30, 1e-6);

		Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
		VisualActionObserver vob = new VisualActionObserver(v);
		vob.initGUI();

		SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.));
		env.addObservers(vob);

		for(int i = 0; i < 5; i++){
			PolicyUtils.rollout(p, env);
			env.resetEnvironment();
		}

		System.out.println("Finished");


	}

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

2 votes

public static void LLSARSA(){

		LunarLanderDomain lld = new LunarLanderDomain();
		OOSADomain domain = lld.generateDomain();

		LLState s = new LLState(new LLAgent(5, 0, 0), new LLBlock.LLPad(75, 95, 0, 10, "pad"));

		ConcatenatedObjectFeatures inputFeatures = new ConcatenatedObjectFeatures()
				.addObjectVectorizion(LunarLanderDomain.CLASS_AGENT, new NumericVariableFeatures());

		int nTilings = 5;
		double resolution = 10.;

		double xWidth = (lld.getXmax() - lld.getXmin()) / resolution;
		double yWidth = (lld.getYmax() - lld.getYmin()) / resolution;
		double velocityWidth = 2 * lld.getVmax() / resolution;
		double angleWidth = 2 * lld.getAngmax() / resolution;



		TileCodingFeatures tilecoding = new TileCodingFeatures(inputFeatures);
		tilecoding.addTilingsForAllDimensionsWithWidths(
				new double []{xWidth, yWidth, velocityWidth, velocityWidth, angleWidth},
				nTilings,
				TilingArrangement.RANDOM_JITTER);




		double defaultQ = 0.5;
		DifferentiableStateActionValue vfa = tilecoding.generateVFA(defaultQ/nTilings);
		GradientDescentSarsaLam agent = new GradientDescentSarsaLam(domain, 0.99, vfa, 0.02, 0.5);

		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
		List<Episode> episodes = new ArrayList<Episode>();
		for(int i = 0; i < 5000; i++){
			Episode ea = agent.runLearningEpisode(env);
			episodes.add(ea);
			System.out.println(i + ": " + ea.maxTimeStep());
			env.resetEnvironment();
		}

		Visualizer v = LLVisualizer.getVisualizer(lld.getPhysParams());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}

Source File: PolicyUtils.java From burlap with Apache License 2.0

2 votes

/**
 * This method will return the an episode that results from following the given policy from state s. The episode will terminate
 * when the policy reaches a terminal state.
 * @param p the {@link Policy} to roll out
 * @param s the state from which to roll out the policy
 * @param model the model from which to sample
 * @return an EpisodeAnalysis object that records the events from following the policy.
 */
public static Episode rollout(Policy p, State s, SampleModel model){
	return rollout(p, new SimulatedEnvironment(model, s));
}

Source File: PolicyUtils.java From burlap with Apache License 2.0

2 votes

/**
 * This method will return the an episode that results from following the given policy from state s. The episode will terminate
 * when the policy reaches a terminal state or when the number of steps surpasses maxSteps.
 * @param p the {@link Policy} to roll out
 * @param s the state from which to roll out the policy
 * @param model the model from which to same state transitions
 * @param maxSteps the maximum number of steps to take before terminating the policy rollout.
 * @return an EpisodeAnalysis object that records the events from following the policy.
 */
public static Episode rollout(Policy p, State s, SampleModel model, int maxSteps){
	return rollout(p, new SimulatedEnvironment(model, s), maxSteps);
}

Source File: PlotTest.java From burlap_examples with MIT License

2 votes

public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}

Source File: VisualExplorer.java From burlap with Apache License 2.0

2 votes

/**
 * Initializes with a domain and initial state, automatically creating a {@link burlap.mdp.singleagent.environment.SimulatedEnvironment}
 * as the environment with which to interact. The created {@link burlap.mdp.singleagent.environment.SimulatedEnvironment} will
 * have a {@link burlap.mdp.singleagent.common.NullRewardFunction} and {@link burlap.mdp.auxiliary.common.NullTermination} functions set.
 * @param domain the domain to explore
 * @param painter the 2D state visualizer
 * @param baseState the initial state from which to explore
 */
public VisualExplorer(SADomain domain, Visualizer painter, State baseState){
	Environment env = new SimulatedEnvironment(domain, baseState);
	this.init(domain, env, painter, 800, 800);
}

Source File: EnvironmentShell.java From burlap with Apache License 2.0

2 votes

/**
 * Creates a shell for a {@link SimulatedEnvironment} rooted at the input state using std in and std out.
 * @param domain the BURLAP domain
 * @param s the initial state for the simulated environment that will be created.
 */
public EnvironmentShell(SADomain domain, State s){
	this(domain, new SimulatedEnvironment(domain, s), System.in, System.out);
}

Source File: PlannerFactory.java From cs7641-assignment4 with MIT License

votes

Planner createPlanner(int episodeIndex, SADomain domain, HashableStateFactory hashingFactory, SimulatedEnvironment simulatedEnvironment);

burlap.mdp.singleagent.environment.SimulatedEnvironment Java Examples