burlap.domain.singleagent.gridworld.state.GridWorldState Java Exaples

Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0

6 votes

public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        gwdg.setMapToFourRooms();
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        gwdg.setRf(rf);
        gwdg.setTf(tf);
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);
    }

Source File: IRLExample.java From burlap_examples with MIT License

6 votes

/**
 * Creates a grid world state with the agent in (0,0) and various different grid cell types scattered about.
 * @return a grid world state with the agent in (0,0) and various different grid cell types scattered about.
 */
protected State basicState(){

	GridWorldState s = new GridWorldState(
			new GridAgent(0, 0),
			new GridLocation(0, 0, 1, "loc0"),
			new GridLocation(0, 4, 2, "loc1"),
			new GridLocation(4, 4, 3, "loc2"),
			new GridLocation(4, 0, 4, "loc3"),

			new GridLocation(1, 0, 0, "loc4"),
			new GridLocation(1, 2, 0, "loc5"),
			new GridLocation(1, 4, 0, "loc6"),
			new GridLocation(3, 1, 0, "loc7"),
			new GridLocation(3, 3, 0, "loc8")
	);

	return s;
}

Source File: BasicBehavior.java From burlap_examples with MIT License

6 votes

public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		gwdg.setMapToFourRooms();
		tf = new GridWorldTerminalFunction(10, 10);
		gwdg.setTf(tf);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);


//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);
	}

Source File: TestHashing.java From burlap with Apache License 2.0

6 votes

@Test
public void testSimpleHashFactoryIdentifierDependent() {
	SADomain domain = (SADomain)this.gridWorldTest.getDomain();
	State startState = this.gridWorldTest.generateState();
	HashableStateFactory factory = new SimpleHashableStateFactory(false);
	Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
	assert(hashedStates.size() == 104);
	
	Set<HashableState> renamedStates = new HashSet<HashableState>();
	for (HashableState state : hashedStates) {
		State source = state.s();
		State renamed = this.renameObjects((GridWorldState)source.copy());
		HashableState renamedHashed = factory.hashState(renamed);
		renamedStates.add(renamedHashed);
	}
	hashedStates.addAll(renamedStates);
	assert(hashedStates.size() == 208);
}

Source File: HelloGridWorld.java From burlap_examples with MIT License

6 votes

public static void main(String[] args) {

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
		SADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

		//create visualizer and explorer
		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
		VisualExplorer exp = new VisualExplorer(domain, v, s);

		//set control keys to use w-s-a-d
		exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
		exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
		exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");
		exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");

		exp.initGUI();

	}

Source File: TestHashing.java From burlap with Apache License 2.0

6 votes

@Test
public void testSimpleHashFactoryIdentifierIndependent() {
	SADomain domain = (SADomain)this.gridWorldTest.getDomain();
	State startState = this.gridWorldTest.generateState();
	HashableStateFactory factory = new SimpleHashableStateFactory();
	Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
	assert(hashedStates.size() == 104);
	
	Set<HashableState> renamedStates = new HashSet<HashableState>();
	for (HashableState state : hashedStates) {
		State source = state.s();
		State renamed = this.renameObjects((GridWorldState)source.copy());
		HashableState renamedHashed = factory.hashState(renamed);
		renamedStates.add(renamedHashed);
	}
	hashedStates.addAll(renamedStates);
	assert(hashedStates.size() == 104);
}

Source File: GridWorldDomain.java From burlap with Apache License 2.0

6 votes

/**
 * Attempts to move the agent into the given position, taking into account walls and blocks
 * @param s the current state
 * @param xd the attempted new X position of the agent
 * @param yd the attempted new Y position of the agent
 * @return input state s, after modification
 */
protected State move(State s, int xd, int yd){

	GridWorldState gws = (GridWorldState)s;

	int ax = gws.agent.x;
	int ay = gws.agent.y;

	int nx = ax+xd;
	int ny = ay+yd;

	//hit wall, so do not change position
	if(nx < 0 || nx >= map.length || ny < 0 || ny >= map[0].length || map[nx][ny] == 1 ||
			(xd > 0 && (map[ax][ay] == 3 || map[ax][ay] == 4)) || (xd < 0 && (map[nx][ny] == 3 || map[nx][ny] == 4)) ||
			(yd > 0 && (map[ax][ay] == 2 || map[ax][ay] == 4)) || (yd < 0 && (map[nx][ny] == 2 || map[nx][ny] == 4)) ){
		nx = ax;
		ny = ay;
	}

	GridAgent nagent = gws.touchAgent();
	nagent.x = nx;
	nagent.y = ny;

	return s;
}

Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0

5 votes

@Override
public void vectorizeState(State state, FloatPointer input) {
    GridWorldState gwState = (GridWorldState) state;

    int width = gwdg.getWidth();

    input.fill(0);

    ObjectInstance agent = gwState.object(GridWorldDomain.CLASS_AGENT);
    int x = (Integer)agent.get(GridWorldDomain.VAR_X);
    int y = (Integer)agent.get(GridWorldDomain.VAR_Y);

    input.put((long)(y*width + x), 1);
}

Source File: TestHashing.java From burlap with Apache License 2.0

5 votes

public State renameObjects(GridWorldState s) {
	SecureRandom random = new SecureRandom();
	List<GridLocation> locations = s.deepTouchLocations();
	for (GridLocation obj : locations) {
		String newName = new BigInteger(130, random).toString(32);
		obj.setName(newName);
	}
	return s;
}

Source File: TestHashing.java From burlap with Apache License 2.0

5 votes

public State generateLargeGW(SADomain domain, int width) {

		GridWorldState state = new GridWorldState(new GridAgent());

		for (int i = 0; i < width; i++) {
			state.locations.add(new GridLocation(i, width - 1 - i, "loc"+i));
		}
		return state;
	}

Source File: TestHashing.java From burlap with Apache License 2.0

5 votes

public Set<HashableState> generateRandomStates(SADomain domain, State state, HashableStateFactory factory, int width, int numStates, boolean moveLocations) {
	Set<HashableState> hashedStates = new HashSet<HashableState>();
	Random random = new Random();
	int misses = 0;
	int prevSize = 0;
	while (hashedStates.size() < numStates) {
		if (hashedStates.size() == prevSize) {
			misses++;
		}
		if (misses > 100) {
			break;
		}
		prevSize = hashedStates.size();
		if (prevSize > 0 && prevSize % 10000 == 0) {
			System.out.println("\t" + prevSize);
		}
		GridWorldState copy = (GridWorldState)state.copy();
		copy.touchAgent().x = random.nextInt(width);
		copy.agent.y = random.nextInt(width);

		
		if (moveLocations) {
			List<GridLocation> locations = copy.deepTouchLocations();
			for(GridLocation loc : locations){
				loc.x = random.nextInt(width);
				loc.y = random.nextInt(width);
			}
		}
		hashedStates.add(factory.hashState(copy));
	}
	return hashedStates;
}

Source File: TestHashing.java From burlap with Apache License 2.0

5 votes

public Set<HashableState> generateStates(SADomain domain, State state, HashableStateFactory factory, int width) {
	Set<HashableState> hashedStates = new HashSet<HashableState>();
	for (int i = 0; i < width; ++i) {
		for (int j =0 ; j < width; ++j) {
			GridWorldState copy = (GridWorldState)state.copy();
			copy.touchAgent().x = i;
			copy.agent.y = j;
			hashedStates.add(factory.hashState(copy));
		}
	}
	return hashedStates;
}

Source File: TestHashing.java From burlap with Apache License 2.0

5 votes

@Test
public void testSimpleHashFactoryLargeStateIdentifierDependent() {
	SADomain domain = (SADomain)this.gridWorldTest.getDomain();
	State startState = this.generateLargeGW(domain, 100);
	HashableStateFactory factory = new SimpleHashableStateFactory(false);
	Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
	int size = hashedStates.size();
	Set<Integer> hashes = new HashSet<Integer>();
	for (HashableState hs : hashedStates) {
		hashes.add(hs.hashCode());
	}
	System.err.println("Hashed states: " + hashedStates.size() + ", hashes: " + hashes.size());
	if (hashedStates.size() != hashes.size()) {
		System.err.println("Hashed states: " + hashedStates.size() + ", hashes: " + hashes.size());
	}
	
	Set<HashableState> renamedStates = new HashSet<HashableState>();
	for (HashableState state : hashedStates) {
		State source = state.s();
		State renamed = this.renameObjects((GridWorldState)source.copy());
		HashableState renamedHashed = factory.hashState(renamed);
		renamedStates.add(renamedHashed);
	}
	hashedStates.addAll(renamedStates);
	assert(hashedStates.size() == size * 2);
	
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testAStar() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	Heuristic mdistHeuristic = new Heuristic() {
		
		@Override
		public double h(State s) {

			GridAgent agent = ((GridWorldState)s).agent;
			GridLocation location = ((GridWorldState)s).locations.get(0);

			//get agent position
			int ax = agent.x;
			int ay = agent.y;
			
			//get location position
			int lx = location.x;
			int ly = location.y;
			
			//compute Manhattan distance
			double mdist = Math.abs(ax-lx) + Math.abs(ay-ly);
			
			return -mdist;
		}
	};
	
	//provide A* the heuristic as well as the reward function so that it can keep
	//track of the actual cost
	DeterministicPlanner planner = new AStar(domain, goalCondition,
		hashingFactory, mdistHeuristic);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	
	Episode analysis = PolicyUtils.rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testDFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	DeterministicPlanner planner = new DFS(this.domain, this.goalCondition, this.hashingFactory, -1 , true);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis);
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testBFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));

	DeterministicPlanner planner = new BFS(this.domain, this.goalCondition, this.hashingFactory);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}

Source File: GridWorldRewardFunction.java From burlap with Apache License 2.0

5 votes

@Override
public double reward(State s, Action a, State sprime) {

	int x = ((GridWorldState)sprime).agent.x;
	int y = ((GridWorldState)sprime).agent.y;
	
	if(x >= this.width || x < 0 || y >= this.height || y < 0){
		throw new RuntimeException("GridWorld reward matrix is only defined for a " + this.width + "x" + 
				this.height +" world, but the agent transitioned to position (" + x + "," + y + "), which is outside the bounds.");
	}
	
	double r = this.rewardMatrix[x][y];
	return r;
}

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}

Source File: VITutorial.java From burlap_examples with MIT License

5 votes

public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));
		gwd.setMapToFourRooms();

		//only go in intended directon 80% of the time
		gwd.setProbSucceedTransitionDynamics(0.8);

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}

Source File: GridWorldTerminalFunction.java From burlap with Apache License 2.0

4 votes

@Override
public boolean isTerminal(State s) {
	int x = ((GridWorldState)s).agent.x;
	int y = ((GridWorldState)s).agent.y;
	return this.terminalPositions.contains(new IntPair(x, y));
}

Source File: GridWorldDomain.java From burlap with Apache License 2.0

4 votes

/**
 * Creates a visual explorer or terminal explorer. By default a visual explorer is presented; use the "t" argument
 * to create terminal explorer. Will create a 4 rooms grid world with the agent in lower left corner and a location in
 * the upper right. Use w-a-s-d to move.
 * @param args command line args
 */
public static void main(String[] args) {

	GridWorldDomain gwdg = new GridWorldDomain(11, 11);
	gwdg.setMapToFourRooms();
	//gwdg.setProbSucceedTransitionDynamics(0.75);

	SADomain d = gwdg.generateDomain();


	GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

	
	int expMode = 1;
	if(args.length > 0){
		if(args[0].equals("v")){
			expMode = 1;
		}
		else if(args[0].equals("t")){
			expMode = 0;
		}
	}
	
	if(expMode == 0){

		EnvironmentShell shell = new EnvironmentShell(d, s);
		shell.start();
		
	}
	else if(expMode == 1){
		
		Visualizer v = GridWorldVisualizer.getVisualizer(gwdg.getMap());
		VisualExplorer exp = new VisualExplorer(d, v, s);
		
		//use w-s-a-d-x
		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("a", ACTION_WEST, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		
		exp.initGUI();
	}
	
	
}

Source File: TestGridWorld.java From burlap with Apache License 2.0

4 votes

public State generateState() {
	GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "location0"));
	return s;
}

Source File: Episode.java From burlap with Apache License 2.0

4 votes

public static void main(String[] args) {
	GridWorldDomain gwd = new GridWorldDomain(11, 11);
	SADomain domain = gwd.generateDomain();
	State s = new GridWorldState(new GridAgent(1, 3));

	Policy p = new RandomPolicy(domain);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 30);

	String yamlOut = ea.serialize();

	System.out.println(yamlOut);

	System.out.println("\n\n");

	Episode read = Episode.parseEpisode(yamlOut);

	System.out.println(read.actionString());
	System.out.println(read.state(0).toString());
	System.out.println(read.actionSequence.size());
	System.out.println(read.stateSequence.size());

}

Source File: BasicBehavior.java From burlap_examples with MIT License

4 votes

public void AStarExample(String outputPath){

		Heuristic mdistHeuristic = new Heuristic() {

			public double h(State s) {
				GridAgent a = ((GridWorldState)s).agent;
				double mdist = Math.abs(a.x-10) + Math.abs(a.y-10);

				return -mdist;
			}
		};

		DeterministicPlanner planner = new AStar(domain, goalCondition, hashingFactory, mdistHeuristic);
		Policy p = planner.planFromState(initialState);

		PolicyUtils.rollout(p, initialState, domain.getModel()).write(outputPath + "astar");

	}

Source File: IRLExample.java From burlap_examples with MIT License

3 votes

public State generateState() {

			GridWorldState s = (GridWorldState)this.sourceState.copy();

			int h = RandomFactory.getDefault().nextInt(this.height);
			s.touchAgent().y = h;

			return s;
		}

Source File: OptionsExample.java From burlap_examples with MIT License

3 votes

public static void testOptions(){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		SADomain domain = gwd.generateDomain();

		Option swToNorth = createRoomOption("swToNorth", domain, 1, 5, 0, 0, 4, 4);
		Option swToEast = createRoomOption("swToEast", domain, 5, 1, 0, 0, 4, 4);

		Option seToWest = createRoomOption("seToWest", domain, 5, 1, 6, 0, 10, 3);
		Option seToNorth = createRoomOption("seToNorth", domain, 8, 4, 6, 0, 10, 3);

		Option neToSouth = createRoomOption("neToSouth", domain, 8, 4, 6, 5, 10, 10);
		Option neToWest = createRoomOption("neToWest", domain, 5, 8, 6, 5, 10, 10);

		Option nwToEast = createRoomOption("nwToEast", domain, 5, 8, 0, 6, 4, 10);
		Option nwToSouth = createRoomOption("nwToSouth", domain, 1, 5, 0, 6, 4, 10);

		List<Episode> episodes = new ArrayList<Episode>();

		episodes.add(optionExecuteResult(domain, swToNorth, new GridWorldState(0, 0)));
		episodes.add(optionExecuteResult(domain, swToEast, new GridWorldState(0, 0)));

		episodes.add(optionExecuteResult(domain, seToWest, new GridWorldState(10, 0)));
		episodes.add(optionExecuteResult(domain, seToNorth, new GridWorldState(10, 0)));

		episodes.add(optionExecuteResult(domain, neToSouth, new GridWorldState(10, 10)));
		episodes.add(optionExecuteResult(domain, neToWest, new GridWorldState(10, 10)));

		episodes.add(optionExecuteResult(domain, nwToEast, new GridWorldState(0, 10)));
		episodes.add(optionExecuteResult(domain, nwToSouth, new GridWorldState(0, 10)));


		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		EpisodeSequenceVisualizer evis = new EpisodeSequenceVisualizer(v, domain, episodes);


	}

Source File: PlotTest.java From burlap_examples with MIT License

2 votes

public static void main(String [] args){

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate

		//ends when the agent reaches a location
		final TerminalFunction tf = new SinglePFTF(
				PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));

		//reward function definition
		final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

		gw.setTf(tf);
		gw.setRf(rf);


		final OOSADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));



		//initial state generator
		final ConstantStateGenerator sg = new ConstantStateGenerator(s);


		//set up the state hashing system for looking up states
		final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory();


		/**
		 * Create factory for Q-learning agent
		 */
		LearningAgentFactory qLearningFactory = new LearningAgentFactory() {

			public String getAgentName() {
				return "Q-learning";
			}

			public LearningAgent generateAgent() {
				return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1);
			}
		};

		//define learning environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, sg);

		//define experiment
		LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env,
				10, 100, qLearningFactory);

		exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE,
				PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD);


		//start experiment
		exp.startExperiment();


	}

burlap.domain.singleagent.gridworld.state.GridWorldState Java Examples