burlap.behavior.singleagent.auxiliary.StateReachability Java Exaples

Source File: CommandReachable.java From burlapcraft with GNU Lesser General Public License v3.0

6 votes

@Override
public void processCommand(ICommandSender p_71515_1_, String[] p_71515_2_) {

	MinecraftDomainGenerator mdg = new MinecraftDomainGenerator();
	SADomain domain = mdg.generateDomain();

	State in = MinecraftStateGeneratorHelper.getCurrentState(BurlapCraft.currentDungeon);
	List<State> reachable = StateReachability.getReachableStates(in, domain, new SimpleHashableStateFactory());
	for(State s : reachable){
		OOState os = (OOState)s;
		BCAgent a = (BCAgent)os.object(CLASS_AGENT);
		System.out.println(a.x + ", " + a.y + ", " + a.z + ", " + a.rdir + ", "+ a.vdir + ", " + a.selected);
	}
	System.out.println(reachable.size());

}

Source File: Main.java From cs7641-assignment4 with MIT License

5 votes

/**
 * This method takes care of visualizing the grid, rewards, and specific policy on a nice
 * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working.
 */
private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) {
	List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy);
	gui.setTitle(title);
	gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
	gui.initGUI();
}

Source File: VITutorial.java From burlap_examples with MIT License

5 votes

public void performReachabilityFrom(State seedState){

		Set<HashableState> hashedStates = StateReachability.getReachableHashedStates(seedState, this.domain, this.hashingFactory);

		//initialize the value function for all states
		for(HashableState hs : hashedStates){
			if(!this.valueFunction.containsKey(hs)){
				this.valueFunction.put(hs, this.vinit.value(hs.s()));
			}
		}

	}

Source File: AnalysisRunner.java From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0

5 votes

public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p, 
		State initialState, Domain domain, HashableStateFactory hashingFactory, String title){

	List<State> allStates = StateReachability.getReachableStates(initialState,
			(SADomain)domain, hashingFactory);
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(
			allStates, valueFunction, p);
	gui.setTitle(title);
	gui.initGUI();

}

Source File: IRLExample.java From burlap_examples with MIT License

4 votes

/**
 * Runs MLIRL on the trajectories stored in the "irlDemo" directory and then visualizes the learned reward function.
 */
public void runIRL(String pathToEpisodes){

	//create reward function features to use
	LocationFeatures features = new LocationFeatures(this.domain, 5);

	//create a reward function that is linear with respect to those features and has small random
	//parameter values to start
	LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, 5);
	for(int i = 0; i < rf.numParameters(); i++){
		rf.setParameter(i, RandomFactory.getMapped(0).nextDouble()*0.2 - 0.1);
	}

	//load our saved demonstrations from disk
	List<Episode> episodes = Episode.readEpisodes(pathToEpisodes);

	//use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL,
	//but you will probably want to use a fairly large horizon for this kind of reward function.
	double beta = 10;
	//DifferentiableVI dplanner = new DifferentiableVI(this.domain, rf, 0.99, beta, new SimpleHashableStateFactory(), 0.01, 100);
	DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(this.domain, rf, 0.99, new SimpleHashableStateFactory(), 10, -1, beta);

	dplanner.toggleDebugPrinting(false);

	//define the IRL problem
	MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf);
	request.setBoltzmannBeta(beta);

	//run MLIRL on it
	MLIRL irl = new MLIRL(request, 0.1, 0.1, 10);
	irl.performIRL();

	//get all states in the domain so we can visualize the learned reward function for them
	List<State> allStates = StateReachability.getReachableStates(basicState(), this.domain, new SimpleHashableStateFactory());

	//get a standard grid world value function visualizer, but give it StateRewardFunctionValue which returns the
	//reward value received upon reaching each state which will thereby let us render the reward function that is
	//learned rather than the value function for it.
	ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(
			allStates,
			5,
			5,
			new RewardValueProjection(rf),
			new GreedyQPolicy((QProvider) request.getPlanner())
	);

	gui.initGUI();


}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void manualValueFunctionVis(ValueFunction valueFunction, Policy p){

		List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory);

		//define color function
		LandmarkColorBlendInterpolation rb = new LandmarkColorBlendInterpolation();
		rb.addNextLandMark(0., Color.RED);
		rb.addNextLandMark(1., Color.BLUE);

		//define a 2D painter of state values, specifying which attributes correspond to the x and y coordinates of the canvas
		StateValuePainter2D svp = new StateValuePainter2D(rb);
		svp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1);

		//create our ValueFunctionVisualizer that paints for all states
		//using the ValueFunction source and the state value painter we defined
		ValueFunctionVisualizerGUI gui = new ValueFunctionVisualizerGUI(allStates, svp, valueFunction);

		//define a policy painter that uses arrow glyphs for each of the grid world actions
		PolicyGlyphPainter2D spp = new PolicyGlyphPainter2D();
		spp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1);

		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_NORTH, new ArrowActionGlyph(0));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_SOUTH, new ArrowActionGlyph(1));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_EAST, new ArrowActionGlyph(2));
		spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_WEST, new ArrowActionGlyph(3));
		spp.setRenderStyle(PolicyGlyphPainter2D.PolicyGlyphRenderStyle.DISTSCALED);


		//add our policy renderer to it
		gui.setSpp(spp);
		gui.setPolicy(p);

		//set the background color for places where states are not rendered to grey
		gui.setBgColor(Color.GRAY);

		//start it
		gui.initGUI();



	}

Source File: RandomStartStateGenerator.java From burlap with Apache License 2.0

4 votes

/**
 * Will discover the reachable states from which to randomly select. Reachable states found using a {@link SimpleHashableStateFactory} with identifier dependence.
 * @param domain the domain from which states will be drawn.
 * @param seedState the seed state from which the reachable states will be found.
 */
public RandomStartStateGenerator(SADomain domain, State seedState) {
	HashableStateFactory hashFactory = new SimpleHashableStateFactory(false);
	this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory);
	this.random = new Random();
}

Source File: BasicBehavior.java From burlap_examples with MIT License

3 votes

public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p){

		List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory);
		ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(allStates, 11, 11, valueFunction, p);
		gui.initGUI();

	}

Source File: RandomStartStateGenerator.java From burlap with Apache License 2.0

2 votes

/**
 * Will discover reachable states from which to randomly select.
 * @param domain the domain from which states will be drawn.
 * @param seedState the seed state from which the reachable states will be found.
 * @param hashFactory the hash factory to use for the reachability analysis.
 */
public RandomStartStateGenerator(SADomain domain, State seedState, HashableStateFactory hashFactory) {
	this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory);
	this.random = new Random();
}

burlap.behavior.singleagent.auxiliary.StateReachability Java Examples