burlap.mdp.core.action.Action Java Exaples

Source File: ExampleOOGridWorld.java From burlap_examples with MIT License

6 votes

protected int actionDir(Action a){
	int adir = -1;
	if(a.actionName().equals(ACTION_NORTH)){
		adir = 0;
	}
	else if(a.actionName().equals(ACTION_SOUTH)){
		adir = 1;
	}
	else if(a.actionName().equals(ACTION_EAST)){
		adir = 2;
	}
	else if(a.actionName().equals(ACTION_WEST)){
		adir = 3;
	}
	return adir;
}

Source File: BeliefAgent.java From burlap with Apache License 2.0

6 votes

/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}

Source File: ExampleGridWorld.java From burlap_examples with MIT License

6 votes

protected int actionDir(Action a){
	int adir = -1;
	if(a.actionName().equals(ACTION_NORTH)){
		adir = 0;
	}
	else if(a.actionName().equals(ACTION_SOUTH)){
		adir = 1;
	}
	else if(a.actionName().equals(ACTION_EAST)){
		adir = 2;
	}
	else if(a.actionName().equals(ACTION_WEST)){
		adir = 3;
	}
	return adir;
}

Source File: QLTutorial.java From burlap_examples with MIT License

6 votes

@Override
public List<QValue> qValues(State s) {
	//first get hashed state
	HashableState sh = this.hashingFactory.hashState(s);

	//check if we already have stored values
	List<QValue> qs = this.qValues.get(sh);

	//create and add initialized Q-values if we don't have them stored for this state
	if(qs == null){
		List<Action> actions = this.applicableActions(s);
		qs = new ArrayList<QValue>(actions.size());
		//create a Q-value for each action
		for(Action a : actions){
			//add q with initialized value
			qs.add(new QValue(s, a, this.qinit.qValue(s, a)));
		}
		//store this for later
		this.qValues.put(sh, qs);
	}

	return qs;
}

Source File: LinearDiffRFVInit.java From burlap with Apache License 2.0

6 votes

@Override
public double reward(State s, Action a, State sprime) {

	double [] features;
	if(this.rfFeaturesAreForNextState){
		features = this.rfFvGen.features(sprime);
	}
	else{
		features = this.rfFvGen.features(s);
	}
	double sum = 0.;
	for(int i = 0; i < features.length; i++){
		sum += features[i] * this.parameters[i];
	}
	return sum;

}

Source File: QLearning.java From burlap with Apache License 2.0

6 votes

/**
 * Returns the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 * is stored, then it is created and has its Q-value initialize using this objects {@link burlap.behavior.valuefunction.QFunction} data member.
 * @param s the hashed state for which to get the {@link QLearningStateNode} object
 * @return the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 */
protected QLearningStateNode getStateNode(HashableState s){
	
	QLearningStateNode node = qFunction.get(s);
	
	if(node == null){
		node = new QLearningStateNode(s);
		List<Action> gas = this.applicableActions(s.s());
		if(gas.isEmpty()){
			gas = this.applicableActions(s.s());
			throw new RuntimeErrorException(new Error("No possible actions in this state, cannot continue Q-learning"));
		}
		for(Action ga : gas){
			node.addQValue(ga, qInitFunction.qValue(s.s(), ga));
		}
		
		qFunction.put(s, node);
	}
	
	return node;
	
}

Source File: UCTStateNode.java From burlap with Apache License 2.0

6 votes

/**
 * Initializes the UCT state node.
 * @param s the state that this node wraps
 * @param d the depth of the node
 * @param actionTypes the possible OO-MDP actions that can be taken
 * @param constructor a {@link UCTActionNode} factory that can be used to create ActionNodes for each of the actions.
 */
public UCTStateNode(HashableState s, int d, List <ActionType> actionTypes, UCTActionConstructor constructor){
	
	state = s;
	depth = d;
	
	n = 0;
	
	actionNodes = new ArrayList<UCTActionNode>();

	List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s.s());
	for(Action a : actions){
		UCTActionNode an = constructor.generate(a);
		actionNodes.add(an);
	}

}

Source File: ActionControllerPlaceBlock.java From burlapcraft with GNU Lesser General Public License v3.0

5 votes

@Override
public int executeAction(Action a) {
	
	System.out.println("Place Block");
	HelperActions.placeBlock();
	
	return this.delayMS;
}

Source File: MinecraftModel.java From burlapcraft with GNU Lesser General Public License v3.0

5 votes

@Override
public State sample(State s, Action a) {

	GenericOOState gs = (GenericOOState)s.copy();

	String aname = a.actionName();
	if(aname.equals(HelperNameSpace.ACTION_MOVE)){
		simMove(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_ROTATE_LEFT)){
		simRotate(gs, HelperNameSpace.RotDirection.size - 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_ROTATE_RIGHT)){
		simRotate(gs, 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_AHEAD)){
		simPitch(gs, 0);
	}
	else if(aname.equals(HelperNameSpace.ACTION_DOWN_ONE)){
		simPitch(gs, HelperNameSpace.VertDirection.size - 1);
	}
	else if(aname.equals(HelperNameSpace.ACTION_PLACE_BLOCK)){
		simPlace(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_DEST_BLOCK)){
		simDestroy(gs);
	}
	else if(aname.equals(HelperNameSpace.ACTION_CHANGE_ITEM)){
		simChangeItem(gs);
	}
	else{
		throw new RuntimeException("MinecraftModel is not defined for action " + aname);
	}

	return gs;
}

Source File: TitForTat.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes with the specified cooperate and defect actions for both players.
 * @param domain the domain in which this agent will play.
 * @param coop the cooperate action for both players
 * @param defect the defect action for both players
 */
public TitForTatAgentFactory(SGDomain domain, Action coop, Action defect){
	this.domain = domain;
	this.myCoop = coop;
	this.myDefect = defect;
	this.opponentCoop = coop;
	this.opponentDefect = defect;
	
}

Source File: UCTTreeWalkPolicy.java From burlap with Apache License 2.0

5 votes

@Override
public double actionProb(State s, Action a) {
	if(this.action(s).equals(a)){
		return 1.;
	}
	return 0.;
}

Source File: GraphDefinedDomain.java From burlap with Apache License 2.0

5 votes

@Override
public List<Action> allApplicableActions(State s) {
	Action a = new GraphAction(aId);
	if(applicableInState(s)){
		return Arrays.asList(a);
	}
	return new ArrayList<Action>();
}

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}

Source File: KWIKModel.java From burlap with Apache License 2.0

5 votes

public static List<Action> unmodeledActions(KWIKModel model, List<ActionType> actionTypes, State s){
	List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s);
	List<Action> unmodeled = new ArrayList<Action>(actions.size());
	for(Action a : actions){
		if(!model.transitionIsModeled(s, a)){
			unmodeled.add(a);
		}
	}
	return unmodeled;
}

Source File: FourierBasis.java From burlap with Apache License 2.0

5 votes

@Override
public FourierBasis copy() {
	FourierBasis fb = new FourierBasis(this.inputFeatures, this.order, this.maxNonZeroCoefficients);
	fb.numStateVariables = this.numStateVariables;
	fb.coefficientVectors = new ArrayList<short[]>(this.coefficientVectors);
	fb.actionFeatureMultiplier = new HashMap<Action, Integer>(this.actionFeatureMultiplier);

	return fb;
}

Source File: PolicyFromJointPolicy.java From burlap with Apache License 2.0

5 votes

@Override
public Action action(State s) {
	if(!this.synchronizeJointActionSelectionAmongAgents){
		return ((JointAction)this.jointPolicy.action(s)).action(this.actingAgent);
	}
	else{
		return this.jointPolicy.getAgentSynchronizedActionSelection(this.actingAgent, s);
	}
}

Source File: SDPlannerPolicy.java From burlap with Apache License 2.0

5 votes

@Override
public double actionProb(State s, Action a) {
	if(a.equals(this.action(s))){
		return 1.;
	}
	return 0.;
}

Source File: FrostbiteRF.java From burlap with Apache License 2.0

5 votes

@Override
public double reward(State s, Action a, State sprime) {
	if (inWater.someGroundingIsTrue((OOState)sprime))
		return lostReward;
	if (iglooBuilt.someGroundingIsTrue((OOState)sprime) && onIce.someGroundingIsTrue((OOState)s))
		return goalReward;
	if (numberPlatformsActive((FrostbiteState)s) != numberPlatformsActive((FrostbiteState)sprime))
		return activatedPlatformReward;
	return defaultReward;
}

Source File: EquilibriumPlayingSGAgent.java From burlap with Apache License 2.0

5 votes

@Override
public Action action(State s) {

	List<Action> myActions = ActionUtils.allApplicableActionsForTypes(this.agentType.actions, s);
	BimatrixTuple bimatrix = this.constructBimatrix(s, myActions);
	solver.solve(bimatrix.rowPayoffs, bimatrix.colPayoffs);
	double [] strategy = solver.getLastComputedRowStrategy();
	Action selection = myActions.get(this.sampleStrategy(strategy));
	
	return selection;
}

Source File: StaticWeightedAStar.java From burlap with Apache License 2.0

5 votes

@Override
public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, double r) {
	double cumR = 0.;
	if(parentNode != null){
		double pCumR = cumulatedRewardMap.get(parentNode.s);
		cumR = pCumR + r;
	}
	
	double H  = heuristic.h(successorState.s());
	lastComputedCumR = cumR;
	double F = cumR + (this.epsilonP1*H);
	
	return F;
}

Source File: ManualAgentsCommands.java From burlap with Apache License 2.0

5 votes

@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {

	OptionSet oset = this.parser.parse(argString.split(" "));
	if(oset.has("h")){
		os.println("agentName actionName [actionParam*]\n" +
				"Sets the action for manual agent named agentName to the action with the name actionName. If the action" +
				"is a parameterized action, then the parameters must also be specified.");
		return 0;
	}

	List<String> args = (List<String>)oset.nonOptionArguments();

	if(args.size() < 2){
		return -1;
	}

	String agentName = args.get(0);

	String aname = args.get(1);

	ActionType action = ((SGDomain)shell.getDomain()).getActionType(aname);
	if(action == null){
		os.println("Cannot set action to " + aname + " because that action name is not known.");
		return 0;
	}

	Action ga = action.associatedAction(this.actionArgs(args));

	ManualSGAgent agent = manualAgents.get(agentName);
	if(agent == null){
		os.println("No manual agent named " + agentName);
		return 0;
	}

	agent.setNextAction(ga);

	return 0;
}

Source File: ApproximateQLearning.java From burlap with Apache License 2.0

5 votes

@Override
public List<QValue> qValues(State s) {
	s = this.stateMapping.mapState(s);
	List<Action> actions = this.applicableActions(s);
	List<QValue> qs = new ArrayList<QValue>(actions.size());
	for(Action a : actions){
		QValue q = new QValue(s, a, this.qValue(s, a));
		qs.add(q);
	}
	return qs;
}

Source File: TabularModel.java From burlap with Apache License 2.0

5 votes

@Override
public boolean transitionIsModeled(State s, Action ga) {
	
	StateActionNode san = this.getStateActionNode(this.hashingFactory.hashState(s), ga);
	if(san == null){
		return false;
	}
	if(san.nTries < this.nConfident){
		return false;
	}
	
	return true;
}

Source File: Visualizer.java From burlap with Apache License 2.0

5 votes

/**
 * Updates the state and action for the {@link burlap.visualizer.StateRenderLayer} and {@link burlap.visualizer.StateActionRenderLayer}; then repaints.
 * @param s the {@link State} to be painted.
 * @param a the {@link Action} to be painted.
 */
public void updateStateAction(State s, Action a){
	this.srender.updateState(s);
	if(this.sarender != null) {
		this.sarender.updateRenderedStateAction(s, a);
	}
	repaint();
}

Source File: DQN.java From burlap_caffe with Apache License 2.0

5 votes

@Override
public double evaluate(State state, Action action) {
    FloatBlob output = qValuesForState(state);

    int a = actionSet.map(action);
    return output.data_at(0,a,0,0);
}

Source File: CPClassicModel.java From burlap with Apache License 2.0

5 votes

@Override
public State sample(State s, Action a) {
	s = s.copy();

	if(a.actionName().equals(CartPoleDomain.ACTION_RIGHT)){
		return moveClassicModel(s, 1);
	}
	else if(a.actionName().equals(CartPoleDomain.ACTION_LEFT)){
		return moveClassicModel(s, -1);
	}
	throw new RuntimeException("Unknown action " + a.actionName());

}

Source File: BoltzmannPolicyGradient.java From burlap with Apache License 2.0

5 votes

/**
 * Computes the gradient of a Boltzmann policy using the given differentiable valueFunction.
 * @param s the input state of the policy gradient
 * @param a the action whose policy probability gradient being queried
 * @param planner the differentiable {@link DifferentiableQFunction} valueFunction
 * @param beta the Boltzmann beta parameter. This parameter is the inverse of the Botlzmann temperature. As beta becomes larger, the policy becomes more deterministic. Should lie in [0, +ifnty].
 * @return the gradient of the policy.
 */
public static FunctionGradient computeBoltzmannPolicyGradient(State s, Action a, DifferentiableQFunction planner, double beta){


	//get q objects
	List<QValue> Qs = ((QProvider)planner).qValues(s);
	double [] qs = new double[Qs.size()];
	for(int i = 0; i < Qs.size(); i++){
		qs[i] = Qs.get(i).q;
	}

	//find matching action index
	int aind = -1;
	for(int i = 0; i < Qs.size(); i++){
		if(Qs.get(i).a.equals(a)){
			aind = i;
			break;
		}
	}

	if(aind == -1){
		throw new RuntimeException("Error in computing BoltzmannPolicyGradient: Could not find query action in Q-value list.");
	}

	FunctionGradient [] qGradients = new FunctionGradient[qs.length];
	for(int i = 0; i < qs.length; i++){
		qGradients[i] = planner.qGradient(s, Qs.get(i).a);
	}


	FunctionGradient policyGradient = computePolicyGradient(qs, qGradients, aind, beta);

	return policyGradient;

}

Source File: DenseLinearVFA.java From burlap with Apache License 2.0

5 votes

@Override
public double evaluate(State s, Action a) {
	this.currentStateFeatures = this.stateFeatures.features(s);
	this.currentActionOffset = this.getActionOffset(a);
	int indOff = this.currentActionOffset*this.currentStateFeatures.length;
	double val = 0;
	for(int i = 0; i < this.currentStateFeatures.length; i++){
		val += this.currentStateFeatures[i] * this.stateActionWeights[i+indOff];
	}
	this.currentValue = val;
	this.currentGradient = null;
	this.lastState = s;
	return this.currentValue;
}

Source File: GreedyDeterministicQPolicy.java From burlap with Apache License 2.0

5 votes

@Override
public Action action(State s) {
	
	List<QValue> qValues = this.qplanner.qValues(s);
	double maxQV = Double.NEGATIVE_INFINITY;
	QValue maxQ = null;
	for(QValue q : qValues){
		if(q.q > maxQV){
			maxQV = q.q;
			maxQ = q;
		}
	}
	
	return maxQ.a;
}

Source File: BFSMarkovOptionModel.java From burlap with Apache License 2.0

5 votes

@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}

burlap.mdp.core.action.Action Java Examples