Java Code Examples for burlap.mdp.singleagent.environment.Environment#currentObservation()
The following examples show how to use
burlap.mdp.singleagent.environment.Environment#currentObservation() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: QLTutorial.java From burlap_examples with MIT License | 5 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { //initialize our episode object with the initial state of the environment Episode e = new Episode(env.currentObservation()); //behave until a terminal state or max steps is reached State curState = env.currentObservation(); int steps = 0; while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){ //select an action Action a = this.learningPolicy.action(curState); //take the action and observe outcome EnvironmentOutcome eo = env.executeAction(a); //record result e.transition(eo); //get the max Q value of the resulting state if it's not terminal, 0 otherwise double maxQ = eo.terminated ? 0. : this.value(eo.op); //update the old Q-value QValue oldQ = this.storedQ(curState, a); oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q); //update state pointer to next environment state observed curState = eo.op; steps++; } return e; }
Example 2
Source File: PolicyUtils.java From burlap with Apache License 2.0 | 5 votes |
/** * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state * in the environment is reached. * @param p the {@link Policy} * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated. * @return An {@link Episode} object specifying the interaction with the environment. */ public static Episode rollout(Policy p, Environment env){ Episode ea = new Episode(env.currentObservation()); do{ followAndRecordPolicy(p, env, ea); }while(!env.isInTerminalState()); return ea; }
Example 3
Source File: PolicyUtils.java From burlap with Apache License 2.0 | 5 votes |
/** * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state * in the environment is reached or when the provided number of steps has been taken. * @param p the {@link Policy} * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated. * @param numSteps the maximum number of steps to take in the environment. * @return An {@link Episode} object specifying the interaction with the environment. */ public static Episode rollout(Policy p, Environment env, int numSteps){ Episode ea = new Episode(env.currentObservation()); int nSteps; do{ followAndRecordPolicy(p, env, ea); nSteps = ea.numTimeSteps(); }while(!env.isInTerminalState() && nSteps < numSteps); return ea; }
Example 4
Source File: Option.java From burlap with Apache License 2.0 | 5 votes |
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){ Random rand = RandomFactory.getMapped(0); State initial = env.currentObservation(); State cur = initial; Episode episode = new Episode(cur); Episode history = new Episode(cur); double roll; double pT; int nsteps = 0; double r = 0.; double cd = 1.; do{ Action a = o.policy(cur, history); EnvironmentOutcome eo = env.executeAction(a); nsteps++; r += cd*eo.r; cur = eo.op; cd *= discount; history.transition(a, eo.op, eo.r); AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")"); episode.transition(annotatedAction, eo.op, r); pT = o.probabilityOfTermination(eo.op, history); roll = rand.nextDouble(); }while(roll > pT && !env.isInTerminalState()); EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode); return eoo; }
Example 5
Source File: RemoveStateObjectCommand.java From burlap with Apache License 2.0 | 5 votes |
@Override public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) { Environment env = ((EnvironmentShell)shell).getEnv(); OptionSet oset = this.parser.parse(argString.split(" ")); List<String> args = (List<String>)oset.nonOptionArguments(); if(oset.has("h")){ os.println("[-v] objectName\nRemoves an OO-MDP object instance with name objectName" + "from the current state of the environment. The environment must implement StateSettableEnvironment " + "for this operation to work.\n\n" + "-v print the new Environment state after completion."); return 0; } StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class); if(senv == null){ os.println("Cannot remove object from environment state, because the environment does not implement StateSettableEnvironment"); return 0; } if(args.size() != 1){ return -1; } State s = env.currentObservation(); if(!(s instanceof MutableOOState)){ os.println("Cannot remove object from state, because state is not a MutableOOState"); return 0; } ((MutableOOState)s).removeObject(args.get(0)); senv.setCurStateTo(s); if(oset.has("v")){ os.println(env.currentObservation().toString()); } return 1; }
Example 6
Source File: ApproximateQLearning.java From burlap with Apache License 2.0 | 4 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); Episode e = new Episode(initialState); int eStepCounter = 0; while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){ //check state State curState = stateMapping.mapState(env.currentObservation()); //select action Action a = this.learningPolicy.action(curState); //take action EnvironmentOutcome eo = env.executeAction(a); //save outcome in memory this.memory.addExperience(eo); //record transition and manage option case int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1; eStepCounter += stepInc; this.totalSteps += stepInc; e.transition(a, eo.op, eo.r); //perform learners List<EnvironmentOutcome> samples = this.memory.sampleExperiences(this.numReplay); this.updateQFunction(samples); //update stale function this.stepsSinceStale++; if(this.stepsSinceStale >= this.staleDuration){ this.updateStaleFunction(); } } this.totalEpisodes++; return e; }
Example 7
Source File: ListActionsCommand.java From burlap with Apache License 2.0 | 4 votes |
@Override public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) { Environment env = ((EnvironmentShell)shell).getEnv(); OptionSet oset = this.parser.parse(argString.split(" ")); if(oset.has("h")){ os.println("[s]\nCommand to list applicable and executable actions for the current environment observation.\n" + "-n: list the name of all known actions (no parameters specified), regardless of whether they are applicable in the current observation\n" + "-s: query applicable actions w.r.t. a POMDP hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment"); return 0; } if(oset.has("n")){ for(ActionType a : ((SADomain)shell.getDomain()).getActionTypes()){ os.println(a.typeName()); } return 0; } State qs = env.currentObservation(); if(oset.has("s")){ if(!(env instanceof SimulatedPOEnvironment)){ os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment."); return 0; } qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState(); } List<Action> actions = ActionUtils.allApplicableActionsForTypes(((SADomain)shell.getDomain()).getActionTypes(), qs); for(Action ga : actions){ os.println(ga.toString()); } return 0; }
Example 8
Source File: SetVarCommand.java From burlap with Apache License 2.0 | 4 votes |
@Override public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) { Environment env = ((EnvironmentShell)shell).getEnv(); OptionSet oset = this.parser.parse(argString.split(" ")); List<String> args = (List<String>)oset.nonOptionArguments(); if(oset.has("h")){ os.println("[-v] [key value]+ \nSets the values for one or more state variables in an " + "environment state. Requires one or more key value pairs." + "The environment must implement StateSettableEnvironment and the states must be MutableState instances\n\n" + "-v print the new Environment state after completion."); return 0; } StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class); if(senv == null){ os.println("Cannot set object values for environment states, because the environment does not implement StateSettableEnvironment"); return 0; } if(args.size() % 2 != 0 && args.size() < 3){ return -1; } State s = env.currentObservation(); if(!(s instanceof MutableState)){ os.println("Cannot modify state values, because the state does not implement MutableState"); } for(int i = 0; i < args.size(); i+=2){ try{ ((MutableState)s).set(args.get(i), args.get(i+1)); }catch(Exception e){ os.println("Could not set key " + args.get(i) + " to value " + args.get(i+1) + ". Aborting."); return 0; } } senv.setCurStateTo(s); if(oset.has("v")){ os.println(senv.currentObservation().toString()); } return 1; }
Example 9
Source File: ListPropFunctions.java From burlap with Apache License 2.0 | 4 votes |
@Override public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) { Environment env = ((EnvironmentShell)shell).getEnv(); OptionSet oset = this.parser.parse(argString.split(" ")); if(oset.has("h")){ os.println("[s]\nCommand to list all true (or false) grounded propositional function for the current environment observation.\n" + "-f: list false grounded propositional functions, rather than true ones. " + "-n: list the name of all propositional functions, rather than grounded evaluations\n" + "-s: evaluate propositional functions on POMDP environment hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment"); return 0; } if(!(shell.getDomain() instanceof OODomain)){ os.println("cannot query propositional functions because the domain is not an OODomain"); return 0; } if(oset.has("n")){ for(PropositionalFunction pf : ((OODomain)shell.getDomain()).propFunctions()){ os.println(pf.getName()); } return 0; } State qs = env.currentObservation(); if(oset.has("s")){ if(!(env instanceof SimulatedPOEnvironment)){ os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment."); return 0; } qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState(); } List<GroundedProp> gps = PropositionalFunction.allGroundingsFromList(((OODomain)shell.getDomain()).propFunctions(), (OOState)qs); for(GroundedProp gp : gps){ if(gp.isTrue((OOState)qs) == !oset.has("f")){ os.println(gp.toString()); } } return 0; }
Example 10
Source File: DeepQTester.java From burlap_caffe with Apache License 2.0 | 3 votes |
@Override public Episode runTestEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); Episode e = new Episode(initialState); int eStepCounter = 0; while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){ //check state State curState = stateMapping.mapState(env.currentObservation()); //select action Action a = this.policy.action(curState); //take action EnvironmentOutcome eo = env.executeAction(a); //save outcome in memory this.memory.addExperience(eo); //record transition and manage option case int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1; eStepCounter += stepInc; e.transition(a, eo.op, eo.r); } return e; }
Example 11
Source File: ActorCritic.java From burlap with Apache License 2.0 | 3 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); Episode ea = new Episode(initialState); State curState = initialState; this.critic.startEpisode(curState); this.actor.startEpisode(curState); int timeSteps = 0; while(!env.isInTerminalState() && (timeSteps < maxSteps || maxSteps == -1)){ Action ga = this.actor.action(curState); EnvironmentOutcome eo = env.executeAction(ga); ea.transition(eo); double critique = this.critic.critique(eo); this.actor.update(eo, critique); curState = env.currentObservation(); timeSteps++; } this.critic.endEpisode(); this.actor.endEpisode(); if(episodeHistory.size() >= numEpisodesToStore){ episodeHistory.poll(); } episodeHistory.offer(ea); return ea; }
Example 12
Source File: ARTDP.java From burlap with Apache License 2.0 | 3 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); Episode ea = new Episode(initialState); State curState = initialState; int steps = 0; while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){ Action ga = policy.action(curState); EnvironmentOutcome eo = env.executeAction(ga); ea.transition(ga, eo.op, eo.r); this.model.updateModel(eo); this.modelPlanner.performBellmanUpdateOn(eo.o); curState = env.currentObservation(); steps++; } return ea; }
Example 13
Source File: QLearning.java From burlap with Apache License 2.0 | 2 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); Episode ea = new Episode(initialState); HashableState curState = this.stateHash(initialState); eStepCounter = 0; maxQChangeInLastEpisode = 0.; while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){ Action action = learningPolicy.action(curState.s()); QValue curQ = this.getQ(curState, action); EnvironmentOutcome eo; if(!(action instanceof Option)){ eo = env.executeAction(action); } else{ eo = ((Option)action).control(env, this.gamma); } HashableState nextState = this.stateHash(eo.op); double maxQ = 0.; if(!eo.terminated){ maxQ = this.getMaxQ(nextState); } //manage option specifics double r = eo.r; double discount = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).discount : this.gamma; int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1; eStepCounter += stepInc; if(!(action instanceof Option) || !this.shouldDecomposeOptions){ ea.transition(action, nextState.s(), r); } else{ ea.appendAndMergeEpisodeAnalysis(((EnvironmentOptionOutcome)eo).episode); } double oldQ = curQ.q; //update Q-value curQ.q = curQ.q + this.learningRate.pollLearningRate(this.totalNumberOfSteps, curState.s(), action) * (r + (discount * maxQ) - curQ.q); double deltaQ = Math.abs(oldQ - curQ.q); if(deltaQ > maxQChangeInLastEpisode){ maxQChangeInLastEpisode = deltaQ; } //move on polling environment for its current state in case it changed during processing curState = this.stateHash(env.currentObservation()); this.totalNumberOfSteps++; } return ea; }
Example 14
Source File: PotentialShapedRMax.java From burlap with Apache License 2.0 | 2 votes |
@Override public Episode runLearningEpisode(Environment env, int maxSteps) { State initialState = env.currentObservation(); this.modelPlanner.initializePlannerIn(initialState); Episode ea = new Episode(initialState); Policy policy = this.createUnmodeledFavoredPolicy(); State curState = initialState; int steps = 0; while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){ Action ga = policy.action(curState); EnvironmentOutcome eo = env.executeAction(ga); ea.transition(ga, eo.op, eo.r); boolean modeledTerminal = this.model.terminal(eo.op); if(!this.model.transitionIsModeled(curState, ga) || (!KWIKModel.Helper.stateTransitionsModeled(model, this.getActionTypes(), eo.op) && !modeledTerminal)){ this.model.updateModel(eo); if(this.model.transitionIsModeled(curState, ga) || (eo.terminated != modeledTerminal && modeledTerminal != this.model.terminal(eo.op))){ this.modelPlanner.modelChanged(curState); policy = this.createUnmodeledFavoredPolicy(); } } curState = env.currentObservation(); steps++; } if(episodeHistory.size() >= numEpisodesToStore){ episodeHistory.poll(); } episodeHistory.offer(ea); return ea; }