burlap.domain.singleagent.gridworld.GridWorldDomain Java Examples
The following examples show how to use
burlap.domain.singleagent.gridworld.GridWorldDomain.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0 | 6 votes |
public GridWorldDQN(String solverFile, double gamma) { //create the domain gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); rf = new UniformCostRF(); tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); gwdg.setRf(rf); gwdg.setTf(tf); domain = gwdg.generateDomain(); goalCondition = new TFGoalCondition(tf); //set up the initial state of the task initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //set up the state hashing system for tabular algorithms hashingFactory = new SimpleHashableStateFactory(); //set up the environment for learners algorithms env = new SimulatedEnvironment(domain, initialState); dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma); }
Example #2
Source File: HelloGridWorld.java From burlap_examples with MIT License | 6 votes |
public static void main(String[] args) { GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate SADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //create visualizer and explorer Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap()); VisualExplorer exp = new VisualExplorer(domain, v, s); //set control keys to use w-s-a-d exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, ""); exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, ""); exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, ""); exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, ""); exp.initGUI(); }
Example #3
Source File: BasicBehavior.java From burlap_examples with MIT License | 6 votes |
public BasicBehavior(){ gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); tf = new GridWorldTerminalFunction(10, 10); gwdg.setTf(tf); goalCondition = new TFGoalCondition(tf); domain = gwdg.generateDomain(); initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); hashingFactory = new SimpleHashableStateFactory(); env = new SimulatedEnvironment(domain, initialState); // VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap())); // observer.initGUI(); // env.addObservers(observer); }
Example #4
Source File: TestHashing.java From burlap with Apache License 2.0 | 5 votes |
public void testSimpleHashFactoryLargeState(HashableStateFactory factory, int width, int numRandomStates, boolean moveLocObjects) { GridWorldDomain gw = new GridWorldDomain(width, width); SADomain domain = (SADomain)gw.generateDomain(); State startState = this.generateLargeGW(domain, width); Set<HashableState> hashedStates = this.generateRandomStates(domain, startState, factory, width, numRandomStates, moveLocObjects); Set<Integer> hashes = new HashSet<Integer>(); for (HashableState hs : hashedStates) { hashes.add(hs.hashCode()); } System.out.println("Hashed states: " + hashedStates.size() + ", hashes: " + hashes.size()); }
Example #5
Source File: Main.java From cs7641-assignment4 with MIT License | 5 votes |
/** * This method takes care of visualizing the grid, rewards, and specific policy on a nice * BURLAP-predefined GUI. I found this very useful to understand how the algorithm was working. */ private static void visualize(Problem map, ValueFunction valueFunction, Policy policy, State initialState, SADomain domain, HashableStateFactory hashingFactory, String title) { List<State> states = StateReachability.getReachableStates(initialState, domain, hashingFactory); ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(states, map.getWidth(), map.getWidth(), valueFunction, policy); gui.setTitle(title); gui.setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); gui.initGUI(); }
Example #6
Source File: TestGridWorld.java From burlap with Apache License 2.0 | 5 votes |
public void assertPFs(State s, boolean[] expectedValues) { OOState os = (OOState)s; PropositionalFunction atLocation = domain.propFunction(GridWorldDomain.PF_AT_LOCATION); List<GroundedProp> gpAt = atLocation.allGroundings(os); Assert.assertEquals(1, gpAt.size()); Assert.assertEquals(expectedValues[0], gpAt.get(0).isTrue((OOState)s)); PropositionalFunction pfWallNorth = domain.propFunction(GridWorldDomain.PF_WALL_NORTH); List<GroundedProp> gpWallNorth = pfWallNorth.allGroundings(os); Assert.assertEquals(1, gpWallNorth.size()); Assert.assertEquals(expectedValues[1], gpWallNorth.get(0).isTrue((OOState)s)); PropositionalFunction pfWallSouth = domain.propFunction(GridWorldDomain.PF_WALL_SOUTH); List<GroundedProp> gpWallSouth = pfWallSouth.allGroundings(os); Assert.assertEquals(1, gpWallSouth.size()); Assert.assertEquals(expectedValues[2], gpWallSouth.get(0).isTrue((OOState)s)); PropositionalFunction pfWallEast = domain.propFunction(GridWorldDomain.PF_WALL_EAST); List<GroundedProp> gpWallEast = pfWallEast.allGroundings(os); Assert.assertEquals(1, gpWallEast.size()); Assert.assertEquals(expectedValues[3], gpWallEast.get(0).isTrue((OOState)s)); PropositionalFunction pfWallWest = domain.propFunction(GridWorldDomain.PF_WALL_WEST); List<GroundedProp> gpWallWest = pfWallWest.allGroundings(os); Assert.assertEquals(1, gpWallWest.size()); Assert.assertEquals(expectedValues[4], gpWallWest.get(0).isTrue((OOState)s)); }
Example #7
Source File: TestGridWorld.java From burlap with Apache License 2.0 | 5 votes |
@Before public void setup() { this.gw = new GridWorldDomain(11,11); gw.setMapToFourRooms(); gw.setProbSucceedTransitionDynamics(1.0); this.domain = gw.generateDomain(); //generate the grid world domain }
Example #8
Source File: TestPlanning.java From burlap with Apache License 2.0 | 5 votes |
@Before public void setup() { this.gw = new GridWorldDomain(11, 11); this.gw.setMapToFourRooms(); this.gw.setRf(new UniformCostRF()); TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION)); this.gw.setTf(tf); this.domain = this.gw.generateDomain(); this.goalCondition = new TFGoalCondition(tf); this.hashingFactory = new SimpleHashableStateFactory(); }
Example #9
Source File: AnalysisRunner.java From omscs-cs7641-machine-learning-assignment-4 with GNU Lesser General Public License v3.0 | 5 votes |
public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p, State initialState, Domain domain, HashableStateFactory hashingFactory, String title){ List<State> allStates = StateReachability.getReachableStates(initialState, (SADomain)domain, hashingFactory); ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization( allStates, valueFunction, p); gui.setTitle(title); gui.initGUI(); }
Example #10
Source File: QLTutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
Example #11
Source File: VITutorial.java From burlap_examples with MIT License | 5 votes |
public static void main(String [] args){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setTf(new GridWorldTerminalFunction(10, 10)); gwd.setMapToFourRooms(); //only go in intended directon 80% of the time gwd.setProbSucceedTransitionDynamics(0.8); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //setup vi with 0.99 discount factor, a value //function initialization that initializes all states to value 0, and which will //run for 30 iterations over the state space VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(0.0), 30); //run planning from our initial state Policy p = vi.planFromState(s); //evaluate the policy with one roll out visualize the trajectory Episode ea = PolicyUtils.rollout(p, s, domain.getModel()); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea)); }
Example #12
Source File: IRLExample.java From burlap_examples with MIT License | 5 votes |
/** * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state * Use the wasd keys to move north south, east, and west, respectively. To enable recording, * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal. * Each demonstration begins after an environment reset. * After each demonstration that you want to keep, go back to the shell and type "rec -r" * If you reset the environment before you type that, * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo" * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts). */ public void launchExplorer(){ SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg); VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800); exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, ""); exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, ""); exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, ""); exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, ""); //exp.enableEpisodeRecording("r", "f", "irlDemo"); exp.initGUI(); }
Example #13
Source File: IRLExample.java From burlap_examples with MIT License | 5 votes |
public IRLExample(){ this.gwd = new GridWorldDomain(5 ,5); this.gwd.setNumberOfLocationTypes(5); gwd.makeEmptyMap(); this.domain = gwd.generateDomain(); State bs = this.basicState(); this.sg = new LeftSideGen(5, bs); this.v = GridWorldVisualizer.getVisualizer(this.gwd.getMap()); }
Example #14
Source File: GridWorldDQN.java From burlap_caffe with Apache License 2.0 | 5 votes |
@Override public void vectorizeState(State state, FloatPointer input) { GridWorldState gwState = (GridWorldState) state; int width = gwdg.getWidth(); input.fill(0); ObjectInstance agent = gwState.object(GridWorldDomain.CLASS_AGENT); int x = (Integer)agent.get(GridWorldDomain.VAR_X); int y = (Integer)agent.get(GridWorldDomain.VAR_Y); input.put((long)(y*width + x), 1); }
Example #15
Source File: BasicBehavior.java From burlap_examples with MIT License | 4 votes |
public void manualValueFunctionVis(ValueFunction valueFunction, Policy p){ List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory); //define color function LandmarkColorBlendInterpolation rb = new LandmarkColorBlendInterpolation(); rb.addNextLandMark(0., Color.RED); rb.addNextLandMark(1., Color.BLUE); //define a 2D painter of state values, specifying which attributes correspond to the x and y coordinates of the canvas StateValuePainter2D svp = new StateValuePainter2D(rb); svp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1); //create our ValueFunctionVisualizer that paints for all states //using the ValueFunction source and the state value painter we defined ValueFunctionVisualizerGUI gui = new ValueFunctionVisualizerGUI(allStates, svp, valueFunction); //define a policy painter that uses arrow glyphs for each of the grid world actions PolicyGlyphPainter2D spp = new PolicyGlyphPainter2D(); spp.setXYKeys("agent:x", "agent:y", new VariableDomain(0, 11), new VariableDomain(0, 11), 1, 1); spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_NORTH, new ArrowActionGlyph(0)); spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_SOUTH, new ArrowActionGlyph(1)); spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_EAST, new ArrowActionGlyph(2)); spp.setActionNameGlyphPainter(GridWorldDomain.ACTION_WEST, new ArrowActionGlyph(3)); spp.setRenderStyle(PolicyGlyphPainter2D.PolicyGlyphRenderStyle.DISTSCALED); //add our policy renderer to it gui.setSpp(spp); gui.setPolicy(p); //set the background color for places where states are not rendered to grey gui.setBgColor(Color.GRAY); //start it gui.initGUI(); }
Example #16
Source File: IRLExample.java From burlap_examples with MIT License | 4 votes |
public LocationFeatures(OODomain domain, int numLocations){ this.numLocations = numLocations; this.inLocationPF = domain.propFunction(GridWorldDomain.PF_AT_LOCATION); }
Example #17
Source File: IRLExample.java From burlap_examples with MIT License | 4 votes |
/** * Runs MLIRL on the trajectories stored in the "irlDemo" directory and then visualizes the learned reward function. */ public void runIRL(String pathToEpisodes){ //create reward function features to use LocationFeatures features = new LocationFeatures(this.domain, 5); //create a reward function that is linear with respect to those features and has small random //parameter values to start LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, 5); for(int i = 0; i < rf.numParameters(); i++){ rf.setParameter(i, RandomFactory.getMapped(0).nextDouble()*0.2 - 0.1); } //load our saved demonstrations from disk List<Episode> episodes = Episode.readEpisodes(pathToEpisodes); //use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL, //but you will probably want to use a fairly large horizon for this kind of reward function. double beta = 10; //DifferentiableVI dplanner = new DifferentiableVI(this.domain, rf, 0.99, beta, new SimpleHashableStateFactory(), 0.01, 100); DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(this.domain, rf, 0.99, new SimpleHashableStateFactory(), 10, -1, beta); dplanner.toggleDebugPrinting(false); //define the IRL problem MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf); request.setBoltzmannBeta(beta); //run MLIRL on it MLIRL irl = new MLIRL(request, 0.1, 0.1, 10); irl.performIRL(); //get all states in the domain so we can visualize the learned reward function for them List<State> allStates = StateReachability.getReachableStates(basicState(), this.domain, new SimpleHashableStateFactory()); //get a standard grid world value function visualizer, but give it StateRewardFunctionValue which returns the //reward value received upon reaching each state which will thereby let us render the reward function that is //learned rather than the value function for it. ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization( allStates, 5, 5, new RewardValueProjection(rf), new GreedyQPolicy((QProvider) request.getPlanner()) ); gui.initGUI(); }
Example #18
Source File: Episode.java From burlap with Apache License 2.0 | 4 votes |
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); SADomain domain = gwd.generateDomain(); State s = new GridWorldState(new GridAgent(1, 3)); Policy p = new RandomPolicy(domain); Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 30); String yamlOut = ea.serialize(); System.out.println(yamlOut); System.out.println("\n\n"); Episode read = Episode.parseEpisode(yamlOut); System.out.println(read.actionString()); System.out.println(read.state(0).toString()); System.out.println(read.actionSequence.size()); System.out.println(read.stateSequence.size()); }
Example #19
Source File: GridAgent.java From burlap with Apache License 2.0 | 4 votes |
@Override public String className() { return GridWorldDomain.CLASS_AGENT; }
Example #20
Source File: GridLocation.java From burlap with Apache License 2.0 | 4 votes |
@Override public String className() { return GridWorldDomain.CLASS_LOCATION; }
Example #21
Source File: TestGridWorld.java From burlap with Apache License 2.0 | 4 votes |
public void testGridWorld(State s) { ActionType northActionType = domain.getAction(GridWorldDomain.ACTION_NORTH); ActionType eastActionType = domain.getAction(GridWorldDomain.ACTION_EAST); ActionType southActionType = domain.getAction(GridWorldDomain.ACTION_SOUTH); ActionType westActionType = domain.getAction(GridWorldDomain.ACTION_WEST); List<Action> northActions = northActionType.allApplicableActions(s); Assert.assertEquals(1, northActions.size()); List<Action> eastActions = eastActionType.allApplicableActions(s); Assert.assertEquals(1, eastActions.size()); List<Action> southActions = southActionType.allApplicableActions(s); Assert.assertEquals(1, southActions.size()); List<Action> westActions = westActionType.allApplicableActions(s); Assert.assertEquals(1, westActions.size()); Action north = northActions.get(0); Action south = southActions.get(0); Action east = eastActions.get(0); Action west = westActions.get(0); // AtLocation, WallNorth, WallSouth, WallEast, WallWest this.assertPFs(s, new boolean[] {false, false, true, false, true}); s = domain.getModel().sample(s, north).op; this.assertPFs(s, new boolean[] {false, false, false, false, true}); s = domain.getModel().sample(s, east).op; this.assertPFs(s, new boolean[] {false, false, false, false, false}); s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, north).op; this.assertPFs(s, new boolean[] {false, false, false, true, true}); s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, east).op; this.assertPFs(s, new boolean[] {false, false, true, true, false}); s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, east).op; this.assertPFs(s, new boolean[] {false, true, true, false, false}); s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, north).op; this.assertPFs(s, new boolean[] {false, true, false, false, true}); s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, south).op; s = domain.getModel().sample(s, north).op; s = domain.getModel().sample(s, west).op; this.assertPFs(s, new boolean[] {false, true, false, false, true}); s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, east).op; s = domain.getModel().sample(s, east).op; this.assertPFs(s, new boolean[] {true, true, false, true, false}); }
Example #22
Source File: BasicBehavior.java From burlap_examples with MIT License | 3 votes |
public void simpleValueFunctionVis(ValueFunction valueFunction, Policy p){ List<State> allStates = StateReachability.getReachableStates(initialState, domain, hashingFactory); ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization(allStates, 11, 11, valueFunction, p); gui.initGUI(); }
Example #23
Source File: OptionsExample.java From burlap_examples with MIT License | 3 votes |
public static void testOptions(){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); SADomain domain = gwd.generateDomain(); Option swToNorth = createRoomOption("swToNorth", domain, 1, 5, 0, 0, 4, 4); Option swToEast = createRoomOption("swToEast", domain, 5, 1, 0, 0, 4, 4); Option seToWest = createRoomOption("seToWest", domain, 5, 1, 6, 0, 10, 3); Option seToNorth = createRoomOption("seToNorth", domain, 8, 4, 6, 0, 10, 3); Option neToSouth = createRoomOption("neToSouth", domain, 8, 4, 6, 5, 10, 10); Option neToWest = createRoomOption("neToWest", domain, 5, 8, 6, 5, 10, 10); Option nwToEast = createRoomOption("nwToEast", domain, 5, 8, 0, 6, 4, 10); Option nwToSouth = createRoomOption("nwToSouth", domain, 1, 5, 0, 6, 4, 10); List<Episode> episodes = new ArrayList<Episode>(); episodes.add(optionExecuteResult(domain, swToNorth, new GridWorldState(0, 0))); episodes.add(optionExecuteResult(domain, swToEast, new GridWorldState(0, 0))); episodes.add(optionExecuteResult(domain, seToWest, new GridWorldState(10, 0))); episodes.add(optionExecuteResult(domain, seToNorth, new GridWorldState(10, 0))); episodes.add(optionExecuteResult(domain, neToSouth, new GridWorldState(10, 10))); episodes.add(optionExecuteResult(domain, neToWest, new GridWorldState(10, 10))); episodes.add(optionExecuteResult(domain, nwToEast, new GridWorldState(0, 10))); episodes.add(optionExecuteResult(domain, nwToSouth, new GridWorldState(0, 10))); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); EpisodeSequenceVisualizer evis = new EpisodeSequenceVisualizer(v, domain, episodes); }
Example #24
Source File: PlotTest.java From burlap_examples with MIT License | 2 votes |
public static void main(String [] args){ GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate //ends when the agent reaches a location final TerminalFunction tf = new SinglePFTF( PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); //reward function definition final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1); gw.setTf(tf); gw.setRf(rf); final OOSADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state GridWorldState s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //initial state generator final ConstantStateGenerator sg = new ConstantStateGenerator(s); //set up the state hashing system for looking up states final SimpleHashableStateFactory hashingFactory = new SimpleHashableStateFactory(); /** * Create factory for Q-learning agent */ LearningAgentFactory qLearningFactory = new LearningAgentFactory() { public String getAgentName() { return "Q-learning"; } public LearningAgent generateAgent() { return new QLearning(domain, 0.99, hashingFactory, 0.3, 0.1); } }; //define learning environment SimulatedEnvironment env = new SimulatedEnvironment(domain, sg); //define experiment LearningAlgorithmExperimenter exp = new LearningAlgorithmExperimenter(env, 10, 100, qLearningFactory); exp.setUpPlottingConfiguration(500, 250, 2, 1000, TrialMode.MOST_RECENT_AND_AVERAGE, PerformanceMetric.CUMULATIVE_STEPS_PER_EPISODE, PerformanceMetric.AVERAGE_EPISODE_REWARD); //start experiment exp.startExperiment(); }