burlap.mdp.singleagent.environment.Environment#currentObservation

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}

Source File: PolicyUtils.java From burlap with Apache License 2.0

5 votes

/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env){

	Episode ea = new Episode(env.currentObservation());

	do{
		followAndRecordPolicy(p, env, ea);
	}while(!env.isInTerminalState());

	return ea;
}

Source File: PolicyUtils.java From burlap with Apache License 2.0

5 votes

/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached or when the provided number of steps has been taken.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @param numSteps the maximum number of steps to take in the environment.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env, int numSteps){

	Episode ea = new Episode(env.currentObservation());

	int nSteps;
	do{
		followAndRecordPolicy(p, env, ea);
		nSteps = ea.numTimeSteps();
	}while(!env.isInTerminalState() && nSteps < numSteps);

	return ea;
}

Source File: Option.java From burlap with Apache License 2.0

5 votes

public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}

Source File: RemoveStateObjectCommand.java From burlap with Apache License 2.0

5 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));
	List<String> args = (List<String>)oset.nonOptionArguments();
	if(oset.has("h")){
		os.println("[-v] objectName\nRemoves an OO-MDP object instance with name objectName" +
				"from the current state of the environment. The environment must implement StateSettableEnvironment " +
				"for this operation to work.\n\n" +
				"-v print the new Environment state after completion.");
		return 0;
	}

	StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class);
	if(senv == null){
		os.println("Cannot remove object from environment state, because the environment does not implement StateSettableEnvironment");
		return 0;
	}

	if(args.size() != 1){
		return -1;
	}

	State s = env.currentObservation();

	if(!(s instanceof MutableOOState)){
		os.println("Cannot remove object from state, because state is not a MutableOOState");
		return 0;
	}

	((MutableOOState)s).removeObject(args.get(0));
	senv.setCurStateTo(s);

	if(oset.has("v")){
		os.println(env.currentObservation().toString());
	}

	return 1;
}

Source File: ApproximateQLearning.java From burlap with Apache License 2.0

4 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	State initialState = env.currentObservation();
	Episode e = new Episode(initialState);


	int eStepCounter = 0;
	while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){

		//check state
		State curState = stateMapping.mapState(env.currentObservation());

		//select action
		Action a = this.learningPolicy.action(curState);

		//take action
		EnvironmentOutcome eo = env.executeAction(a);

		//save outcome in memory
		this.memory.addExperience(eo);

		//record transition and manage option case
		int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
		eStepCounter += stepInc;
		this.totalSteps += stepInc;
		e.transition(a, eo.op, eo.r);

		//perform learners
		List<EnvironmentOutcome> samples = this.memory.sampleExperiences(this.numReplay);
		this.updateQFunction(samples);

		//update stale function
		this.stepsSinceStale++;
		if(this.stepsSinceStale >= this.staleDuration){
			this.updateStaleFunction();
		}

	}

	this.totalEpisodes++;
	return e;
}

Source File: ListActionsCommand.java From burlap with Apache License 2.0

4 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();

	OptionSet oset = this.parser.parse(argString.split(" "));

	if(oset.has("h")){
		os.println("[s]\nCommand to list applicable and executable actions for the current environment observation.\n" +
				"-n: list the name of all known actions (no parameters specified), regardless of whether they are applicable in the current observation\n" +
				"-s: query applicable actions w.r.t. a POMDP hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment");

		return 0;
	}


	if(oset.has("n")){
		for(ActionType a : ((SADomain)shell.getDomain()).getActionTypes()){
			os.println(a.typeName());
		}
		return 0;
	}


	State qs = env.currentObservation();

	if(oset.has("s")){
		if(!(env instanceof SimulatedPOEnvironment)){
			os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment.");
			return 0;
		}
		qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState();
	}

	List<Action> actions = ActionUtils.allApplicableActionsForTypes(((SADomain)shell.getDomain()).getActionTypes(), qs);
	for(Action ga : actions){
		os.println(ga.toString());
	}

	return 0;
}

Source File: SetVarCommand.java From burlap with Apache License 2.0

4 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	Environment env = ((EnvironmentShell)shell).getEnv();
	OptionSet oset = this.parser.parse(argString.split(" "));
	List<String> args = (List<String>)oset.nonOptionArguments();
	if(oset.has("h")){
		os.println("[-v] [key value]+ \nSets the values for one or more state variables in an " +
				"environment state. Requires one or more key value pairs." +
				"The environment must implement StateSettableEnvironment and the states must be MutableState instances\n\n" +
				"-v print the new Environment state after completion.");
		return 0;
	}

	StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class);
	if(senv == null){
		os.println("Cannot set object values for environment states, because the environment does not implement StateSettableEnvironment");
		return 0;
	}

	if(args.size() % 2 != 0 && args.size() < 3){
		return -1;
	}

	State s = env.currentObservation();
	if(!(s instanceof MutableState)){
		os.println("Cannot modify state values, because the state does not implement MutableState");
	}


	for(int i = 0; i < args.size(); i+=2){
		try{
			((MutableState)s).set(args.get(i), args.get(i+1));
		}catch(Exception e){
			os.println("Could not set key " + args.get(i) + " to value " + args.get(i+1) + ". Aborting.");
			return 0;
		}
	}
	senv.setCurStateTo(s);
	if(oset.has("v")){
		os.println(senv.currentObservation().toString());
	}

	return 1;
}

Source File: ListPropFunctions.java From burlap with Apache License 2.0

4 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
	Environment env = ((EnvironmentShell)shell).getEnv();

	OptionSet oset = this.parser.parse(argString.split(" "));

	if(oset.has("h")){
		os.println("[s]\nCommand to list all true (or false) grounded propositional function for the current environment observation.\n" +
				"-f: list false grounded propositional functions, rather than true ones. " +
				"-n: list the name of all propositional functions, rather than grounded evaluations\n" +
				"-s: evaluate propositional functions on POMDP environment hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment");

		return 0;
	}


	if(!(shell.getDomain() instanceof OODomain)){
		os.println("cannot query propositional functions because the domain is not an OODomain");
		return 0;
	}

	if(oset.has("n")){
		for(PropositionalFunction pf : ((OODomain)shell.getDomain()).propFunctions()){
			os.println(pf.getName());
		}
		return 0;
	}


	State qs = env.currentObservation();

	if(oset.has("s")){
		if(!(env instanceof SimulatedPOEnvironment)){
			os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment.");
			return 0;
		}
		qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState();
	}

	List<GroundedProp> gps = PropositionalFunction.allGroundingsFromList(((OODomain)shell.getDomain()).propFunctions(), (OOState)qs);
	for(GroundedProp gp : gps){
		if(gp.isTrue((OOState)qs) == !oset.has("f")){
			os.println(gp.toString());
		}
	}

	return 0;
}

Source File: DeepQTester.java From burlap_caffe with Apache License 2.0

3 votes

@Override
public Episode runTestEpisode(Environment env, int maxSteps) {

    State initialState = env.currentObservation();
    Episode e = new Episode(initialState);


    int eStepCounter = 0;
    while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){

        //check state
        State curState = stateMapping.mapState(env.currentObservation());

        //select action
        Action a = this.policy.action(curState);

        //take action
        EnvironmentOutcome eo = env.executeAction(a);

        //save outcome in memory
        this.memory.addExperience(eo);

        //record transition and manage option case
        int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
        eStepCounter += stepInc;
        e.transition(a, eo.op, eo.r);

    }

    return e;
}

Source File: ActorCritic.java From burlap with Apache License 2.0

3 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {


	State initialState = env.currentObservation();
	Episode ea = new Episode(initialState);
	State curState = initialState;

	this.critic.startEpisode(curState);
	this.actor.startEpisode(curState);

	int timeSteps = 0;
	while(!env.isInTerminalState() && (timeSteps < maxSteps || maxSteps == -1)){

		Action ga = this.actor.action(curState);
		EnvironmentOutcome eo = env.executeAction(ga);

		ea.transition(eo);

		double critique = this.critic.critique(eo);
		this.actor.update(eo, critique);

		curState = env.currentObservation();
		timeSteps++;

	}

	this.critic.endEpisode();
	this.actor.endEpisode();

	if(episodeHistory.size() >= numEpisodesToStore){
		episodeHistory.poll();
	}
	episodeHistory.offer(ea);

	return ea;

}

Source File: ARTDP.java From burlap with Apache License 2.0

3 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	State initialState = env.currentObservation();

	Episode ea = new Episode(initialState);

	State curState = initialState;
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
		Action ga = policy.action(curState);
		EnvironmentOutcome eo = env.executeAction(ga);


		ea.transition(ga, eo.op, eo.r);

		this.model.updateModel(eo);

		this.modelPlanner.performBellmanUpdateOn(eo.o);

		curState = env.currentObservation();
		steps++;

	}

	return ea;
}

Source File: QLearning.java From burlap with Apache License 2.0

2 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	State initialState = env.currentObservation();

	Episode ea = new Episode(initialState);
	HashableState curState = this.stateHash(initialState);
	eStepCounter = 0;

	maxQChangeInLastEpisode = 0.;
	while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){

		Action action = learningPolicy.action(curState.s());
		QValue curQ = this.getQ(curState, action);



		EnvironmentOutcome eo;
		if(!(action instanceof Option)){
			eo = env.executeAction(action);
		}
		else{
			eo = ((Option)action).control(env, this.gamma);
		}



		HashableState nextState = this.stateHash(eo.op);
		double maxQ = 0.;

		if(!eo.terminated){
			maxQ = this.getMaxQ(nextState);
		}

		//manage option specifics
		double r = eo.r;
		double discount = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).discount : this.gamma;
		int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
		eStepCounter += stepInc;

		if(!(action instanceof Option) || !this.shouldDecomposeOptions){
			ea.transition(action, nextState.s(), r);
		}
		else{
			ea.appendAndMergeEpisodeAnalysis(((EnvironmentOptionOutcome)eo).episode);
		}



		double oldQ = curQ.q;

		//update Q-value
		curQ.q = curQ.q + this.learningRate.pollLearningRate(this.totalNumberOfSteps, curState.s(), action) * (r + (discount * maxQ) - curQ.q);

		double deltaQ = Math.abs(oldQ - curQ.q);
		if(deltaQ > maxQChangeInLastEpisode){
			maxQChangeInLastEpisode = deltaQ;
		}

		//move on polling environment for its current state in case it changed during processing
		curState = this.stateHash(env.currentObservation());
		this.totalNumberOfSteps++;


	}


	return ea;

}

Source File: PotentialShapedRMax.java From burlap with Apache License 2.0

2 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	State initialState = env.currentObservation();

	this.modelPlanner.initializePlannerIn(initialState);

	Episode ea = new Episode(initialState);

	Policy policy = this.createUnmodeledFavoredPolicy();

	State curState = initialState;
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		Action ga = policy.action(curState);
		EnvironmentOutcome eo = env.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		boolean modeledTerminal = this.model.terminal(eo.op);

		if(!this.model.transitionIsModeled(curState, ga)
				|| (!KWIKModel.Helper.stateTransitionsModeled(model, this.getActionTypes(), eo.op) && !modeledTerminal)){
			this.model.updateModel(eo);
			if(this.model.transitionIsModeled(curState, ga) || (eo.terminated != modeledTerminal && modeledTerminal != this.model.terminal(eo.op))){
				this.modelPlanner.modelChanged(curState);
				policy = this.createUnmodeledFavoredPolicy();
			}
		}


		curState = env.currentObservation();

		steps++;
	}

	if(episodeHistory.size() >= numEpisodesToStore){
		episodeHistory.poll();
	}
	episodeHistory.offer(ea);


	return ea;

}

Java Code Examples for burlap.mdp.singleagent.environment.Environment#currentObservation()