burlap.mdp.singleagent.SADomain Java Examples
The following examples show how to use
burlap.mdp.singleagent.SADomain.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestHashing.java From burlap with Apache License 2.0 | 6 votes |
@Test public void testSimpleHashFactoryIdentifierIndependent() { SADomain domain = (SADomain)this.gridWorldTest.getDomain(); State startState = this.gridWorldTest.generateState(); HashableStateFactory factory = new SimpleHashableStateFactory(); Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory); assert(hashedStates.size() == 104); Set<HashableState> renamedStates = new HashSet<HashableState>(); for (HashableState state : hashedStates) { State source = state.s(); State renamed = this.renameObjects((GridWorldState)source.copy()); HashableState renamedHashed = factory.hashState(renamed); renamedStates.add(renamedHashed); } hashedStates.addAll(renamedStates); assert(hashedStates.size() == 104); }
Example #2
Source File: DifferentiableSparseSampling.java From burlap with Apache License 2.0 | 6 votes |
/** * Initializes. The model of this planner will automatically be set to a {@link CustomRewardModel} using the provided reward function. * @param domain the problem domain * @param rf the differentiable reward function * @param gamma the discount factor * @param hashingFactory the hashing factory used to compare state equality * @param h the planning horizon * @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics. * @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer. */ public DifferentiableSparseSampling(SADomain domain, DifferentiableRF rf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){ this.solverInit(domain, gamma, hashingFactory); this.h = h; this.c = c; this.rf = rf; this.boltzBeta = boltzBeta; this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>(); this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>(); this.rfDim = rf.numParameters(); this.vinit = new VanillaDiffVinit(new ConstantValueFunction(), rf); this.model = new CustomRewardModel(domain.getModel(), rf); this.operator = new DifferentiableSoftmaxOperator(boltzBeta); this.debugCode = 6368290; }
Example #3
Source File: ExampleGridWorld.java From burlap_examples with MIT License | 6 votes |
public static void main(String [] args){ ExampleGridWorld gen = new ExampleGridWorld(); gen.setGoalLocation(10, 10); SADomain domain = gen.generateDomain(); State initialState = new EXGridState(0, 0); SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState); Visualizer v = gen.getVisualizer(); VisualExplorer exp = new VisualExplorer(domain, env, v); exp.addKeyAction("w", ACTION_NORTH, ""); exp.addKeyAction("s", ACTION_SOUTH, ""); exp.addKeyAction("d", ACTION_EAST, ""); exp.addKeyAction("a", ACTION_WEST, ""); exp.initGUI(); }
Example #4
Source File: TestHashing.java From burlap with Apache License 2.0 | 6 votes |
@Test public void testSimpleHashFactoryIdentifierDependent() { SADomain domain = (SADomain)this.gridWorldTest.getDomain(); State startState = this.gridWorldTest.generateState(); HashableStateFactory factory = new SimpleHashableStateFactory(false); Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory); assert(hashedStates.size() == 104); Set<HashableState> renamedStates = new HashSet<HashableState>(); for (HashableState state : hashedStates) { State source = state.s(); State renamed = this.renameObjects((GridWorldState)source.copy()); HashableState renamedHashed = factory.hashState(renamed); renamedStates.add(renamedHashed); } hashedStates.addAll(renamedStates); assert(hashedStates.size() == 208); }
Example #5
Source File: ExampleGridWorld.java From burlap_examples with MIT License | 6 votes |
@Override public SADomain generateDomain() { SADomain domain = new SADomain(); domain.addActionTypes( new UniversalActionType(ACTION_NORTH), new UniversalActionType(ACTION_SOUTH), new UniversalActionType(ACTION_EAST), new UniversalActionType(ACTION_WEST)); GridWorldStateModel smodel = new GridWorldStateModel(); RewardFunction rf = new ExampleRF(this.goalx, this.goaly); TerminalFunction tf = new ExampleTF(this.goalx, this.goaly); domain.setModel(new FactoredModel(smodel, rf, tf)); return domain; }
Example #6
Source File: CommandReachable.java From burlapcraft with GNU Lesser General Public License v3.0 | 6 votes |
@Override public void processCommand(ICommandSender p_71515_1_, String[] p_71515_2_) { MinecraftDomainGenerator mdg = new MinecraftDomainGenerator(); SADomain domain = mdg.generateDomain(); State in = MinecraftStateGeneratorHelper.getCurrentState(BurlapCraft.currentDungeon); List<State> reachable = StateReachability.getReachableStates(in, domain, new SimpleHashableStateFactory()); for(State s : reachable){ OOState os = (OOState)s; BCAgent a = (BCAgent)os.object(CLASS_AGENT); System.out.println(a.x + ", " + a.y + ", " + a.z + ", " + a.rdir + ", "+ a.vdir + ", " + a.selected); } System.out.println(reachable.size()); }
Example #7
Source File: MDPSolver.java From burlap with Apache License 2.0 | 5 votes |
@Override public void solverInit(SADomain domain, double gamma, HashableStateFactory hashingFactory){ this.gamma = gamma; this.hashingFactory = hashingFactory; this.setDomain(domain); }
Example #8
Source File: PolicyIteration.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes the valueFunction. * @param domain the domain in which to plan * @param gamma the discount factor * @param hashingFactory the state hashing factor to use * @param maxDelta when the maximum change in the value function is smaller than this value, policy evaluation will terminate. Similarly, when the maximum value value function change between policy iterations is smaller than this value planning will terminate. * @param maxEvaluationIterations when the number iterations of value iteration used to evaluate a policy exceeds this value, policy evaluation will terminate. * @param maxPolicyIterations when the number of policy iterations passes this value, planning will terminate. */ public PolicyIteration(SADomain domain, double gamma, HashableStateFactory hashingFactory, double maxDelta, int maxEvaluationIterations, int maxPolicyIterations){ this.DPPInit(domain, gamma, hashingFactory); this.maxEvalDelta = maxDelta; this.maxPIDelta = maxDelta; this.maxIterations = maxEvaluationIterations; this.maxPolicyIterations = maxPolicyIterations; this.evaluativePolicy = new GreedyQPolicy(this.getCopyOfValueFunction()); }
Example #9
Source File: DFS.java From burlap with Apache License 2.0 | 5 votes |
/** * Constructor of DFS with specification of depth limit, whether to maintain a closed list that affects exploration, and whether paths * generated by options should be explored first. * @param domain the domain in which to plan * @param gc indicates the goal states * @param hashingFactory the state hashing factory to use * @param maxDepth depth limit of DFS. -1 specifies no limit. * @param maintainClosed whether to maintain a closed list or not * @param optionsFirst whether to explore paths generated by options first. */ protected void DFSInit(SADomain domain, StateConditionTest gc, HashableStateFactory hashingFactory, int maxDepth, boolean maintainClosed, boolean optionsFirst){ this.deterministicPlannerInit(domain, gc, hashingFactory); this.maxDepth = maxDepth; this.maintainClosed = maintainClosed; if(optionsFirst){ this.setOptionsFirst(); } rand = RandomFactory.getMapped(0); }
Example #10
Source File: QLTutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
Example #11
Source File: SimulatedEnvironment.java From burlap with Apache License 2.0 | 5 votes |
public SimulatedEnvironment(SADomain domain, State initialState) { this.stateGenerator = new ConstantStateGenerator(initialState); this.curState = initialState; if(domain.getModel() == null){ throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one."); } this.model = domain.getModel(); }
Example #12
Source File: PolicyIteration.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes the valueFunction. * @param domain the domain in which to plan * @param gamma the discount factor * @param hashingFactory the state hashing factor to use * @param maxPIDelta when the maximum value value function change between policy iterations is smaller than this value planning will terminate. * @param maxEvalDelta when the maximum change in the value function is smaller than this value, policy evaluation will terminate. * @param maxEvaluationIterations when the number iterations of value iteration used to evaluate a policy exceeds this value, policy evaluation will terminate. * @param maxPolicyIterations when the number of policy iterations passes this value, planning will terminate. */ public PolicyIteration(SADomain domain, double gamma, HashableStateFactory hashingFactory, double maxPIDelta, double maxEvalDelta, int maxEvaluationIterations, int maxPolicyIterations){ this.DPPInit(domain, gamma, hashingFactory); this.maxEvalDelta = maxEvalDelta; this.maxPIDelta = maxPIDelta; this.maxIterations = maxEvaluationIterations; this.maxPolicyIterations = maxPolicyIterations; this.evaluativePolicy = new GreedyQPolicy(this.getCopyOfValueFunction()); }
Example #13
Source File: Main.java From cs7641-assignment4 with MIT License | 5 votes |
/** * Here is where the magic happens. In this method is where I loop through the specific number * of episodes (iterations) and run the specific algorithm. To keep things nice and clean, I use * this method to run all three algorithms. The specific details are specified through the * PlannerFactory interface. * * This method collects all the information from the algorithm and packs it in an Analysis * instance that later gets dumped on the console. */ private static void runAlgorithm(Analysis analysis, Problem problem, SADomain domain, HashableStateFactory hashingFactory, State initialState, PlannerFactory plannerFactory, Algorithm algorithm) { ConstantStateGenerator constantStateGenerator = new ConstantStateGenerator(initialState); SimulatedEnvironment simulatedEnvironment = new SimulatedEnvironment(domain, constantStateGenerator); Planner planner = null; Policy policy = null; for (int episodeIndex = 1; episodeIndex <= problem.getNumberOfIterations(algorithm); episodeIndex++) { long startTime = System.nanoTime(); planner = plannerFactory.createPlanner(episodeIndex, domain, hashingFactory, simulatedEnvironment); policy = planner.planFromState(initialState); /* * If we haven't converged, following the policy will lead the agent wandering around * and it might never reach the goal. To avoid this, we need to set the maximum number * of steps to take before terminating the policy rollout. I decided to set this maximum * at the number of grid locations in our map (width * width). This should give the * agent plenty of room to wander around. * * The smaller this number is, the faster the algorithm will run. */ int maxNumberOfSteps = problem.getWidth() * problem.getWidth(); Episode episode = PolicyUtils.rollout(policy, initialState, domain.getModel(), maxNumberOfSteps); analysis.add(episodeIndex, episode.rewardSequence, episode.numTimeSteps(), (long) (System.nanoTime() - startTime) / 1000000); } if (algorithm == Algorithm.QLearning && USE_LEARNING_EXPERIMENTER) { learningExperimenter(problem, (LearningAgent) planner, simulatedEnvironment); } if (SHOW_VISUALIZATION && planner != null && policy != null) { visualize(problem, (ValueFunction) planner, policy, initialState, domain, hashingFactory, algorithm.getTitle()); } }
Example #14
Source File: VITutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String [] args){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setTf(new GridWorldTerminalFunction(10, 10)); gwd.setMapToFourRooms(); //only go in intended directon 80% of the time gwd.setProbSucceedTransitionDynamics(0.8); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //setup vi with 0.99 discount factor, a value //function initialization that initializes all states to value 0, and which will //run for 30 iterations over the state space VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(0.0), 30); //run planning from our initial state Policy p = vi.planFromState(s); //evaluate the policy with one roll out visualize the trajectory Episode ea = PolicyUtils.rollout(p, s, domain.getModel()); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea)); }
Example #15
Source File: MCVideo.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures features = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(features, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0)); EnvironmentServer envServ = new EnvironmentServer(env, vob); for(int i = 0; i < 100; i++){ PolicyUtils.rollout(p, envServ); envServ.resetEnvironment(); } System.out.println("Finished"); }
Example #16
Source File: ActionSet.java From burlap_caffe with Apache License 2.0 | 5 votes |
public ActionSet(SADomain domain) { List<Action> actionList = ActionUtils.allApplicableActionsForTypes(domain.getActionTypes(), null); size = actionList.size(); actions = new Action[size]; actionList.toArray(actions); initActionMap(); }
Example #17
Source File: LearningAgentToSGAgentInterface.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes. * @param domain The stochastic games {@link burlap.mdp.stochasticgames.SGDomain} in which this agent will interact. * @param learningAgent the {@link burlap.behavior.singleagent.learning.LearningAgent} that will handle this {@link SGAgent}'s control. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public LearningAgentToSGAgentInterface(SGDomain domain, LearningAgent learningAgent, String agentName, SGAgentType agentType){ this.init(domain, agentName, agentType); this.learningAgent = learningAgent; if(this.learningAgent instanceof MDPSolver){ SADomain sadomain = new SADomain(); for(ActionType actionType : agentType.actions){ sadomain.addActionType(actionType); } ((MDPSolver) this.learningAgent).setDomain(sadomain); } }
Example #18
Source File: Main.java From cs7641-assignment4 with MIT License | 5 votes |
/** * This method takes care of visualizing the grid, rewards, and specific policy on a nice * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working. */ private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) { List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory); ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy); gui.setTitle(title); gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); gui.initGUI(); }
Example #19
Source File: StateEnumerator.java From burlap with Apache License 2.0 | 5 votes |
/** * Finds all states that are reachable from an input state and enumerates them * @param from the state from which all reachable states should be searched */ public void findReachableStatesAndEnumerate(State from){ Set<HashableState> reachable = StateReachability.getReachableHashedStates(from, (SADomain)this.domain, this.hashingFactory); for(HashableState sh : reachable){ this.getEnumeratedID(sh); } }
Example #20
Source File: RewardValueProjection.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes. * @param rf the input {@link RewardFunction} to project for one step. * @param projectionType the type of reward projection to use. * @param domain the {@link burlap.mdp.core.Domain} in which the {@link RewardFunction} is evaluated. */ public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, SADomain domain){ this.rf = rf; this.projectionType = projectionType; this.domain = domain; if(this.projectionType == RewardProjectionType.ONESTEP){ this.oneStepBellmanPlanner = new SparseSampling(domain, 1., new SimpleHashableStateFactory(), 1, -1); this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(domain.getModel(), rf)); this.oneStepBellmanPlanner.toggleDebugPrinting(false); this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true); } }
Example #21
Source File: BlocksWorld.java From burlap with Apache License 2.0 | 5 votes |
/** * Main method for exploring the domain. The initial state will have 3 red blocks starting on the table. By default this method will launch the visual explorer. * Pass a "t" argument to use the terminal explorer. * @param args process arguments */ public static void main(String [] args){ BlocksWorld bw = new BlocksWorld(); SADomain domain = bw.generateDomain(); State s = getNewState(3); int expMode = 1; if(args.length > 0){ if(args[0].equals("v")){ expMode = 1; } else if(args[0].equals("t")){ expMode = 0; } } if(expMode == 0){ EnvironmentShell shell = new EnvironmentShell(domain, s); shell.start(); } else if(expMode == 1){ VisualExplorer exp = new VisualExplorer(domain, BlocksWorldVisualizer.getVisualizer(24), s); exp.initGUI(); } }
Example #22
Source File: MultipleIntentionsMLIRLRequest.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes * @param domain the domain of the problem * @param plannerFactory A {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientPlannerFactory} that produces {@link DifferentiableQFunction} objects. * @param expertEpisodes the expert trajectories * @param rf the {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF} model to use. * @param k the number of clusters */ public MultipleIntentionsMLIRLRequest(SADomain domain, QGradientPlannerFactory plannerFactory, List<Episode> expertEpisodes, DifferentiableRF rf, int k) { super(domain, null, expertEpisodes, rf); this.plannerFactory = plannerFactory; this.k = k; if(this.plannerFactory != null) { this.setPlanner((Planner) plannerFactory.generateDifferentiablePlannerForRequest(this)); } }
Example #23
Source File: RLGlueDomain.java From burlap with Apache License 2.0 | 5 votes |
@Override public Domain generateDomain() { Domain domain = new SADomain(); if(theTaskSpec.getNumDiscreteActionDims() != 1 || theTaskSpec.getNumContinuousActionDims() > 0){ throw new RuntimeException("Can only create domains with one discrete action dimension"); } for(int i = 0; i < theTaskSpec.getDiscreteActionRange(0).getRangeSize(); i++){ new RLGlueActionType(domain, i); } return domain; }
Example #24
Source File: TestHashing.java From burlap with Apache License 2.0 | 5 votes |
public Set<HashableState> generateRandomStates(SADomain domain, State state, HashableStateFactory factory, int width, int numStates, boolean moveLocations) { Set<HashableState> hashedStates = new HashSet<HashableState>(); Random random = new Random(); int misses = 0; int prevSize = 0; while (hashedStates.size() < numStates) { if (hashedStates.size() == prevSize) { misses++; } if (misses > 100) { break; } prevSize = hashedStates.size(); if (prevSize > 0 && prevSize % 10000 == 0) { System.out.println("\t" + prevSize); } GridWorldState copy = (GridWorldState)state.copy(); copy.touchAgent().x = random.nextInt(width); copy.agent.y = random.nextInt(width); if (moveLocations) { List<GridLocation> locations = copy.deepTouchLocations(); for(GridLocation loc : locations){ loc.x = random.nextInt(width); loc.y = random.nextInt(width); } } hashedStates.add(factory.hashState(copy)); } return hashedStates; }
Example #25
Source File: BeliefSparseSampling.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes the planner. * @param domain the POMDP domain * @param discount the discount factor * @param hashingFactory the Belief MDP {@link burlap.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. * @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree. * @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics. */ public BeliefSparseSampling(PODomain domain, double discount, HashableStateFactory hashingFactory, int h, int c){ this.solverInit(domain, discount, hashingFactory); BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain); this.beliefMDP = (SADomain)bdgen.generateDomain(); this.mdpPlanner = new SparseSampling(this.beliefMDP, discount, hashingFactory, h, Math.max(1, c)); if(c < 1){ this.mdpPlanner.setComputeExactValueFunction(true); } }
Example #26
Source File: TabularModel.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes. * @param sourceDomain the source domain whose actions will be modeled. * @param hashingFactory the hashing factory to index states * @param nConfident the number of observed transitions to be confident in the model's prediction. */ public TabularModel(SADomain sourceDomain, HashableStateFactory hashingFactory, int nConfident){ this.sourceDomain = sourceDomain; this.hashingFactory = hashingFactory; this.stateNodes = new HashMap<HashableState, TabularModel.StateNode>(); this.terminalStates = new HashSet<HashableState>(); this.nConfident = nConfident; }
Example #27
Source File: StateReachability.java From burlap with Apache License 2.0 | 5 votes |
/** * Returns the list of {@link State} objects that are reachable from a source state. * @param from the source state * @param inDomain the domain of the state * @param usingHashFactory the state hashing factory to use for indexing states and testing equality. * @return the list of {@link State} objects that are reachable from a source state. */ public static List <State> getReachableStates(State from, SADomain inDomain, HashableStateFactory usingHashFactory){ Set<HashableState> hashed = getReachableHashedStates(from, inDomain, usingHashFactory); List<State> states = new ArrayList<State>(hashed.size()); for(HashableState sh : hashed){ states.add(sh.s()); } return states; }
Example #28
Source File: TestHashing.java From burlap with Apache License 2.0 | 5 votes |
@Test public void testSimpleHashFactory() { SADomain domain = (SADomain)this.gridWorldTest.getDomain(); State startState = this.gridWorldTest.generateState(); HashableStateFactory factory = new SimpleHashableStateFactory(); Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory); assert(hashedStates.size() == 104); }
Example #29
Source File: InvertedPendulum.java From burlap with Apache License 2.0 | 5 votes |
@Override public SADomain generateDomain() { SADomain domain = new SADomain(); IPPhysicsParams cphys = this.physParams.copy(); IPModel smodel = new IPModel(cphys); RewardFunction rf = this.rf; TerminalFunction tf = this.tf; if(rf == null){ rf = new InvertedPendulumRewardFunction(); } if(tf == null){ tf = new InvertedPendulumTerminalFunction(); } FactoredModel model = new FactoredModel(smodel, rf ,tf); domain.setModel(model); domain.addActionType(new UniversalActionType(ACTION_LEFT)) .addActionType(new UniversalActionType(ACTION_RIGHT)) .addActionType(new UniversalActionType(ACTION_NO_FORCE)); return domain; }
Example #30
Source File: ApproximateQLearning.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes. * @param domain the learning domain * @param gamma the discount factor * @param vfa the value function approximation to use * @param stateMapping the state mapping to use to process a state observation from the environment */ public ApproximateQLearning(SADomain domain, double gamma, ParametricFunction.ParametricStateActionFunction vfa, StateMapping stateMapping) { this.vfa = vfa; this.staleVfa = vfa; this.learningPolicy = new EpsilonGreedy(this, 0.1); this.stateMapping = stateMapping; this.solverInit(domain, gamma, null); }