burlap.mdp.singleagent.model.RewardFunction Java Exaples

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

6 votes

public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}

Source File: ExampleGridWorld.java From burlap_examples with MIT License

6 votes

@Override
public SADomain generateDomain() {

	SADomain domain = new SADomain();


	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));

	GridWorldStateModel smodel = new GridWorldStateModel();
	RewardFunction rf = new ExampleRF(this.goalx, this.goaly);
	TerminalFunction tf = new ExampleTF(this.goalx, this.goaly);

	domain.setModel(new FactoredModel(smodel, rf, tf));

	return domain;
}

Source File: ApprenticeshipLearning.java From burlap with Apache License 2.0

6 votes

/**
 * Generates an anonymous instance of a reward function derived from a FeatureMapping 
 * and associated feature weights
 * Computes (w^(i))T phi from step 4 in section 3
 * @param featureFunctions The feature mapping of states to features
 * @param featureWeights The weights given to each feature
 * @return An anonymous instance of RewardFunction
 */
public static RewardFunction generateRewardFunction(
		DenseStateFeatures featureFunctions, FeatureWeights featureWeights) {
	final DenseStateFeatures newFeatureFunctions = featureFunctions;
	final FeatureWeights newFeatureWeights = new FeatureWeights(featureWeights);
	return new RewardFunction() {
		@Override
		public double reward(State state, Action a, State sprime) {
			double[] featureWeightValues = newFeatureWeights.getWeights();
			double sumReward = 0;
			double [] fv = newFeatureFunctions.features(state);
			for (int i = 0; i < fv.length; ++i) {
				sumReward += featureWeightValues[i] * fv[i];
			}
			return sumReward;
		}

	};
}

Source File: InvertedPendulum.java From burlap with Apache License 2.0

5 votes

@Override
public SADomain generateDomain() {
	
	SADomain domain = new SADomain();


	IPPhysicsParams cphys = this.physParams.copy();
	IPModel smodel = new IPModel(cphys);

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new InvertedPendulumRewardFunction();
	}
	if(tf == null){
		tf = new InvertedPendulumTerminalFunction();
	}

	FactoredModel model = new FactoredModel(smodel, rf ,tf);
	domain.setModel(model);

	domain.addActionType(new UniversalActionType(ACTION_LEFT))
			.addActionType(new UniversalActionType(ACTION_RIGHT))
			.addActionType(new UniversalActionType(ACTION_NO_FORCE));

	
	return domain;
}

Source File: BlocksWorld.java From burlap with Apache License 2.0

5 votes

@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();
	
	domain.addStateClass(CLASS_BLOCK, BlocksWorldBlock.class);

	domain.addActionType(new StackActionType(ACTION_STACK))
			.addActionType(new UnstackActionType(ACTION_UNSTACK));

	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;

	if(rf == null){
		rf = new NullRewardFunction();
	}
	if(tf == null){
		tf = new NullTermination();
	}

	BWModel smodel = new BWModel();
	FactoredModel model = new FactoredModel(smodel, rf , tf);
	domain.setModel(model);

	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());
	
	return domain;
}

Source File: RewardValueProjection.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes. Note that if projectionType is ONESTEP a runtime exception will be thrown because projecting a one step
 * value requires the {@link burlap.mdp.core.Domain} to enumerate the actions and transition dynamics. Use the
 * {@link #RewardValueProjection(RewardFunction, RewardProjectionType, SADomain)}
 * constructor instead.
 * @param rf the input {@link RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType){
	this.rf = rf;
	this.projectionType = projectionType;
	if(projectionType == RewardProjectionType.ONESTEP){
		throw new RuntimeException("If the reward function depends on a 1 step transition (e.g., from a source state to a target state) " +
				"then to project the value the Domain is needed evaluate the transition dynamics. Use the RewardValueProjection(RewardFunction, RewardProjectionType, Domain) " +
				"constructor instead to specify.");
	}
}

Source File: RewardValueProjection.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes.
 * @param rf the input {@link RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 * @param domain the {@link burlap.mdp.core.Domain} in which the {@link RewardFunction} is evaluated.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, SADomain domain){
	this.rf = rf;
	this.projectionType = projectionType;
	this.domain = domain;
	if(this.projectionType == RewardProjectionType.ONESTEP){
		this.oneStepBellmanPlanner = new SparseSampling(domain, 1., new SimpleHashableStateFactory(), 1, -1);
		this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(domain.getModel(), rf));
		this.oneStepBellmanPlanner.toggleDebugPrinting(false);
		this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
	}
}

Source File: PotentialShapedRF.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes the shaping with the objective reward function, the potential function, and the discount of the MDP.
 * @param baseRF the objective task reward function.
 * @param potentialFunction the potential function to use.
 * @param discount the discount factor of the MDP.
 */
public PotentialShapedRF(RewardFunction baseRF, PotentialFunction potentialFunction, double discount) {
	super(baseRF);
	
	this.potentialFunction = potentialFunction;
	this.discount = discount;
	
}

Source File: MountainCar.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: CartPoleDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: MountainCar.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: LunarLanderDomain.java From burlap with Apache License 2.0

4 votes

@Override
public OOSADomain generateDomain() {
	
	OOSADomain domain = new OOSADomain();
	
	List <Double> thrustValuesTemp = this.thrustValues;
	if(thrustValuesTemp.isEmpty()){
		thrustValuesTemp.add(0.32);
		thrustValuesTemp.add(-physParams.gravity);
	}
	
	domain.addStateClass(CLASS_AGENT, LLAgent.class)
			.addStateClass(CLASS_PAD, LLBlock.LLPad.class)
			.addStateClass(CLASS_OBSTACLE, LLBlock.LLObstacle.class);

	//make copy of physics parameters
	LLPhysicsParams cphys = this.physParams.copy();
	
	//add actions
	domain.addActionType(new UniversalActionType(ACTION_TURN_LEFT))
			.addActionType(new UniversalActionType(ACTION_TURN_RIGHT))
			.addActionType(new UniversalActionType(ACTION_IDLE))
			.addActionType(new ThrustType(thrustValues));


	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());

	LunarLanderModel smodel = new LunarLanderModel(cphys);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new LunarLanderRF(domain);
	}
	if(tf == null){
		tf = new LunarLanderTF(domain);
	}

	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);
	
	return domain;
	
}

Source File: GridWorldDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: GridWorldDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: GraphDefinedDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: GraphDefinedDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: FrostbiteDomain.java From burlap with Apache License 2.0

4 votes

/**
 * Creates a new frostbite domain.
 *
 * @return the generated domain object
 */
@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();

	domain.addStateClass(CLASS_AGENT, FrostbiteAgent.class)
			.addStateClass(CLASS_IGLOO, FrostbiteIgloo.class)
			.addStateClass(CLASS_PLATFORM, FrostbitePlatform.class);

	//add actions
	domain.addActionType(new UniversalActionType(ACTION_NORTH))
			.addActionType(new UniversalActionType(ACTION_SOUTH))
			.addActionType(new UniversalActionType(ACTION_EAST))
			.addActionType(new UniversalActionType(ACTION_WEST))
			.addActionType(new UniversalActionType(ACTION_IDLE));



	//add pfs
	List<PropositionalFunction> pfs = this.generatePFs();
	for(PropositionalFunction pf : pfs){
		domain.addPropFunction(pf);
	}


	FrostbiteModel smodel = new FrostbiteModel(scale);
	RewardFunction rf = this.rf;
	TerminalFunction tf = this.tf;
	if(rf == null){
		rf = new FrostbiteRF(domain);
	}
	if(tf == null){
		tf = new FrostbiteTF(domain);
	}


	FactoredModel model = new FactoredModel(smodel, rf, tf);
	domain.setModel(model);

	return domain;
}

Source File: InvertedPendulum.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: InvertedPendulum.java From burlap with Apache License 2.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: CartPoleDomain.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: BlockDude.java From burlap with Apache License 2.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: ExampleOOGridWorld.java From burlap_examples with MIT License

4 votes

@Override
public OOSADomain generateDomain() {

	OOSADomain domain = new OOSADomain();

	domain.addStateClass(CLASS_AGENT, ExGridAgent.class)
			.addStateClass(CLASS_LOCATION, EXGridLocation.class);

	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));


	OODomain.Helper.addPfsToDomain(domain, this.generatePfs());

	OOGridWorldStateModel smodel = new OOGridWorldStateModel();
	RewardFunction rf = new SingleGoalPFRF(domain.propFunction(PF_AT), 100, -1);
	TerminalFunction tf = new SinglePFTF(domain.propFunction(PF_AT));

	domain.setModel(new FactoredModel(smodel, rf, tf));


	return domain;
}

Source File: MinecraftEnvironment.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public void setRewardFunction(RewardFunction rf) {
	this.rewardFunction = rf;
}

Source File: MinecraftDomainGenerator.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public RewardFunction getRf() {
	return rf;
}

Source File: MinecraftDomainGenerator.java From burlapcraft with GNU Lesser General Public License v3.0

4 votes

public void setRf(RewardFunction rf) {
	this.rf = rf;
}

Source File: CustomRewardModel.java From burlap with Apache License 2.0

4 votes

public CustomRewardModel(SampleModel model, RewardFunction rewardFunction) {
	this.model = model;
	this.rewardFunction = rewardFunction;
}

burlap.mdp.singleagent.model.RewardFunction Java Examples