burlap.behavior.singleagent.Episode Java Exaples

Source File: BeliefAgent.java From burlap with Apache License 2.0

6 votes

/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}

Source File: LearningAlgorithmExperimenter.java From burlap with Apache License 2.0

6 votes

/**
 * Runs a trial for an agent generated by the given factor when interpreting trial length as a number of total steps.
 * @param agentFactory the agent factory used to generate the agent to test.
 */
protected void runStepBoundTrial(LearningAgentFactory agentFactory){
	
	//temporarily disable plotter data collection to avoid possible contamination for any actions taken by the agent generation
	//(e.g., if there is pre-test training)
	this.plotter.toggleDataCollection(false);
	
	LearningAgent agent = agentFactory.generateAgent();
	
	this.plotter.toggleDataCollection(true); //turn it back on to begin
	
	this.plotter.startNewTrial();
	
	int stepsRemaining = this.trialLength;
	while(stepsRemaining > 0){
		Episode ea = agent.runLearningEpisode(this.environmentSever, stepsRemaining);
		stepsRemaining -= ea.numTimeSteps()-1; //-1  because we want to subtract the number of actions, not the number of states seen
		this.plotter.endEpisode();
		this.environmentSever.resetEnvironment();
	}
	
	this.plotter.endTrial();
	
}

Source File: ContinuousDomainTutorial.java From burlap_examples with MIT License

6 votes

public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}

Source File: LSPI.java From burlap with Apache License 2.0

6 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {

	Episode ea = maxSteps != -1 ? PolicyUtils.rollout(this.learningPolicy, env, maxSteps) : PolicyUtils.rollout(this.learningPolicy, env);

	this.updateDatasetWithLearningEpisode(ea);

	if(this.shouldRereunPolicyIteration(ea)){
		this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);
		this.numStepsSinceLastLearningPI = 0;
	}
	else{
		this.numStepsSinceLastLearningPI += ea.numTimeSteps()-1;
	}

	if(episodeHistory.size() >= numEpisodesToStore){
		episodeHistory.poll();
	}
	episodeHistory.offer(ea);

	return ea;
}

Source File: MLIRL.java From burlap with Apache License 2.0

5 votes

/**
 * Computes and returns the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 * @param ea the trajectory
 * @param weight the weight to assign the trajectory
 * @return the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
 */
public double logLikelihoodOfTrajectory(Episode ea, double weight){
	double logLike = 0.;
	Policy p = new BoltzmannQPolicy((QProvider)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
	for(int i = 0; i < ea.numTimeSteps()-1; i++){
		this.request.getPlanner().planFromState(ea.state(i));
		double actProb = p.actionProb(ea.state(i), ea.action(i));
		logLike += Math.log(actProb);
	}
	logLike *= weight;
	return logLike;
}

Source File: MLIRL.java From burlap with Apache License 2.0

5 votes

/**
 * Computes and returns the gradient of the log-likelihood of all trajectories
 * @return the gradient of the log-likelihood of all trajectories
 */
public FunctionGradient logLikelihoodGradient(){
	HashedAggregator<Integer> gradientSum = new HashedAggregator<Integer>();

	double [] weights = this.request.getEpisodeWeights();
	List<Episode> exampleTrajectories = this.request.getExpertEpisodes();

	for(int i = 0; i < exampleTrajectories.size(); i++){
		Episode ea = exampleTrajectories.get(i);
		double weight = weights[i];
		for(int t = 0; t < ea.numTimeSteps()-1; t++){
			this.request.getPlanner().planFromState(ea.state(t));
			FunctionGradient policyGrad = this.logPolicyGrad(ea.state(t), ea.action(t));
			//weigh it by trajectory strength
			for(FunctionGradient.PartialDerivative pd : policyGrad.getNonZeroPartialDerivatives()){
				double newVal = pd.value * weight;
				gradientSum.add(pd.parameterId, newVal);
			}

		}
	}

	FunctionGradient gradient = new FunctionGradient.SparseGradient(gradientSum.size());
	for(Map.Entry<Integer, Double> e : gradientSum.entrySet()){
		gradient.put(e.getKey(), e.getValue());
	}

	return gradient;
}

Source File: MultipleIntentionsMLIRLRequest.java From burlap with Apache License 2.0

5 votes

/**
 * Initializes
 * @param domain the domain of the problem
 * @param plannerFactory A {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientPlannerFactory} that produces {@link DifferentiableQFunction} objects.
 * @param expertEpisodes the expert trajectories
 * @param rf the {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF} model to use.
 * @param k the number of clusters
 */
public MultipleIntentionsMLIRLRequest(SADomain domain, QGradientPlannerFactory plannerFactory, List<Episode> expertEpisodes, DifferentiableRF rf, int k) {
	super(domain, null, expertEpisodes, rf);
	this.plannerFactory = plannerFactory;
	this.k = k;
	if(this.plannerFactory != null) {
		this.setPlanner((Planner) plannerFactory.generateDifferentiablePlannerForRequest(this));
	}
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testBFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));

	DeterministicPlanner planner = new BFS(this.domain, this.goalCondition, this.hashingFactory);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}

Source File: MacroAction.java From burlap with Apache License 2.0

5 votes

@Override
public double probabilityOfTermination(State s, Episode history) {
	if(history.actionSequence.size() >= actionSequence.size()){
		return 1.;
	}
	return 0.;
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

public void evaluateEpisode(Episode analysis, Boolean expectOptimal) {
	if (expectOptimal) {
		Assert.assertEquals(this.gw.getHeight() + this.gw.getWidth() - 1, analysis.stateSequence.size());
		Assert.assertEquals(analysis.stateSequence.size()-1, analysis.actionSequence.size());
		Assert.assertEquals(analysis.actionSequence.size(), analysis.rewardSequence.size());
		Assert.assertEquals(-analysis.actionSequence.size(), analysis.discountedReturn(1.0), TestPlanning.delta);
	}

	Assert.assertEquals(true, domain.getModel().terminal(analysis.stateSequence.get(analysis.stateSequence.size()-1)));
	Assert.assertEquals(true, this.goalCondition.satisfies(analysis.stateSequence.get(analysis.stateSequence.size()-1)));
}

Source File: PolicyUtils.java From burlap with Apache License 2.0

5 votes

/**
 * Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
 * in the environment is reached or when the provided number of steps has been taken.
 * @param p the {@link Policy}
 * @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
 * @param numSteps the maximum number of steps to take in the environment.
 * @return An {@link Episode} object specifying the interaction with the environment.
 */
public static Episode rollout(Policy p, Environment env, int numSteps){

	Episode ea = new Episode(env.currentObservation());

	int nSteps;
	do{
		followAndRecordPolicy(p, env, ea);
		nSteps = ea.numTimeSteps();
	}while(!env.isInTerminalState() && nSteps < numSteps);

	return ea;
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testAStar() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	Heuristic mdistHeuristic = new Heuristic() {
		
		@Override
		public double h(State s) {

			GridAgent agent = ((GridWorldState)s).agent;
			GridLocation location = ((GridWorldState)s).locations.get(0);

			//get agent position
			int ax = agent.x;
			int ay = agent.y;
			
			//get location position
			int lx = location.x;
			int ly = location.y;
			
			//compute Manhattan distance
			double mdist = Math.abs(ax-lx) + Math.abs(ay-ly);
			
			return -mdist;
		}
	};
	
	//provide A* the heuristic as well as the reward function so that it can keep
	//track of the actual cost
	DeterministicPlanner planner = new AStar(domain, goalCondition,
		hashingFactory, mdistHeuristic);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	
	Episode analysis = PolicyUtils.rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis, true);
}

Source File: LSPI.java From burlap with Apache License 2.0

5 votes

/**
 * Updates this object's {@link SARSData} to include the results of a learning episode.
 * @param ea the learning episode as an {@link Episode} object.
 */
protected void updateDatasetWithLearningEpisode(Episode ea){
	if(this.dataset == null){
		this.dataset = new SARSData(ea.numTimeSteps()-1);
	}
	for(int i = 0; i < ea.numTimeSteps()-1; i++){
		this.dataset.add(ea.state(i), ea.action(i), ea.reward(i+1), ea.state(i+1));
	}
}

Source File: RTDP.java From burlap with Apache License 2.0

5 votes

/**
 * Performs Bellman updates only after a rollout is complete and in reverse order
 * @param initialState the initial state from which to plan
 */
protected void batchRTDP(State initialState){
	
	int totalStates = 0;
	
	int consecutiveSmallDeltas = 0;
	for(int i = 0; i < numRollouts; i++){
		
		Episode ea = PolicyUtils.rollout(rollOutPolicy, initialState, model, maxDepth);
		LinkedList <HashableState> orderedStates = new LinkedList<HashableState>();
		for(State s : ea.stateSequence){
			orderedStates.addFirst(this.stateHash(s));
		}
		
		double delta = this.performOrderedBellmanUpdates(orderedStates);
		totalStates += orderedStates.size();
		DPrint.cl(debugCode, "Pass: " + i + "; Num states: " + orderedStates.size() + " (total: " + totalStates + ")");
		
		if(delta < this.maxDelta){
			consecutiveSmallDeltas++;
			if(consecutiveSmallDeltas >= this.minNumRolloutsWithSmallValueChange){
				break;
			}
		}
		else{
			consecutiveSmallDeltas = 0;
		}
	}
	
	
}

Source File: Option.java From burlap with Apache License 2.0

5 votes

public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setMapToFourRooms();
		gwd.setProbSucceedTransitionDynamics(0.8);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){
			episodes.add(agent.runLearningEpisode(env));
			env.resetEnvironment();
		}

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);

	}

Source File: QLTutorial.java From burlap_examples with MIT License

5 votes

@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}

Source File: SubgoalOption.java From burlap with Apache License 2.0

5 votes

@Override
public double probabilityOfTermination(State s, Episode history) {
	if(terminationStates.satisfies(s) || !policy.definedFor(s)){
		return 1.;
	}
	return 0.;
}

Source File: VITutorial.java From burlap_examples with MIT License

5 votes

public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));
		gwd.setMapToFourRooms();

		//only go in intended directon 80% of the time
		gwd.setProbSucceedTransitionDynamics(0.8);

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}

Source File: MLIRL.java From burlap with Apache License 2.0

5 votes

/**
 * Computes and returns the log-likelihood of all expert trajectories under the current reward function parameters.
 * @return the log-likelihood of all expert trajectories under the current reward function parameters.
 */
public double logLikelihood(){

	double [] weights = this.request.getEpisodeWeights();
	List<Episode> exampleTrajectories = this.request.getExpertEpisodes();

	double sum = 0.;
	for(int i = 0; i < exampleTrajectories.size(); i++){
		sum += this.logLikelihoodOfTrajectory(exampleTrajectories.get(i), weights[i]);
	}

	return sum;

}

Source File: SubgoalOption.java From burlap with Apache License 2.0

5 votes

@Override
public List<ActionProb> policyDistribution(State s, Episode history) {
	if(!(policy instanceof EnumerablePolicy)){
		throw new RuntimeException("SubgoalOption cannot return policy distribution because underlying policy is not an EnumberablePolicy");
	}
	return ((EnumerablePolicy)policy).policyDistribution(s);
}

Source File: BeliefAgent.java From burlap with Apache License 2.0

5 votes

/**
 * Causes the agent to act until the environment reaches a termination condition. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminal(){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	while(!this.environment.isInTerminalState()){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);
		
		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
	}
	
	return ea;
}

Source File: LSPI.java From burlap with Apache License 2.0

5 votes

/**
 * Returns whether LSPI should be rereun given the latest learning episode results. Default behavior is to return true
 * if the number of leanring episode steps plus the number of steps since the last run is greater than the {@link #numStepsSinceLastLearningPI} threshold.
 * @param ea the most recent learning episode
 * @return true if LSPI should be rerun; false otherwise.
 */
protected boolean shouldRereunPolicyIteration(Episode ea){
	if(this.numStepsSinceLastLearningPI+ea.numTimeSteps()-1 > this.minNewStepsForLearningPI){
		return true;
	}
	return false;
}

Source File: TestPlanning.java From burlap with Apache License 2.0

5 votes

@Test
public void testDFS() {
	GridWorldState initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, 0, "loc0"));
	
	DeterministicPlanner planner = new DFS(this.domain, this.goalCondition, this.hashingFactory, -1 , true);
	planner.planFromState(initialState);
	Policy p = new SDPlannerPolicy(planner);
	Episode analysis = rollout(p, initialState, domain.getModel());
	this.evaluateEpisode(analysis);
}

Source File: PotentialShapedRMax.java From burlap with Apache License 2.0

4 votes

@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}

Source File: LSPI.java From burlap with Apache License 2.0

4 votes

@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}

Source File: ApproximateQLearning.java From burlap with Apache License 2.0

4 votes

@Override
public Episode runLearningEpisode(Environment env) {
	return this.runLearningEpisode(env, -1);
}

Source File: ApprenticeshipLearning.java From burlap with Apache License 2.0

4 votes

/**
 * Returns the initial state of a randomly chosen episode analysis
 * @param episodes the expert demonstrations
 * @return a random episode's initial state
 */
public static State getInitialState(List<Episode> episodes) {
	Random rando = new Random();
	Episode randomEpisode = episodes.get(rando.nextInt(episodes.size()));
	return randomEpisode.state(0);
}

Source File: TestBlockDude.java From burlap with Apache License 2.0

4 votes

public void testDude(State s) {
	TerminalFunction tf = new BlockDudeTF();
	StateConditionTest sc = new TFGoalCondition(tf);

	AStar astar = new AStar(domain, sc, new SimpleHashableStateFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(s);

	Policy p = new SDPlannerPolicy(astar);
	Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.discountedReturn(1.0), 0.001);

	/*
	BlockDude constructor = new BlockDude();
	Domain d = constructor.generateDomain();

	List<Integer> px = new ArrayList<Integer>();
	List <Integer> ph = new ArrayList<Integer>();

	ph.add(15);
	ph.add(3);
	ph.add(3);
	ph.add(3);
	ph.add(0);
	ph.add(0);
	ph.add(0);
	ph.add(1);
	ph.add(2);
	ph.add(0);
	ph.add(2);
	ph.add(3);
	ph.add(2);
	ph.add(2);
	ph.add(3);
	ph.add(3);
	ph.add(15);
	
	State o = BlockDude.getCleanState(d, px, ph, 6);
	o = BlockDude.setAgent(o, 9, 3, 1, 0);
	o = BlockDude.setExit(o, 1, 0);
	
	o = BlockDude.setBlock(o, 0, 5, 1);
	o = BlockDude.setBlock(o, 1, 6, 1);
	o = BlockDude.setBlock(o, 2, 14, 3);
	o = BlockDude.setBlock(o, 3, 16, 4);
	o = BlockDude.setBlock(o, 4, 17, 4);
	o = BlockDude.setBlock(o, 5, 17, 5);
	
	TerminalFunction tf = new SinglePFTF(d.getPropFunction(BlockDude.PFATEXIT));
	StateConditionTest sc = new SinglePFSCT(d.getPropFunction(BlockDude.PFATEXIT));

	RewardFunction rf = new UniformCostRF();

	AStar astar = new AStar(d, rf, sc, new DiscreteStateHashFactory(), new NullHeuristic());
	astar.toggleDebugPrinting(false);
	astar.planFromState(o);

	Policy p = new SDPlannerPolicy(astar);
	EpisodeAnalysis ea = p.evaluateBehavior(o, rf, tf, 100);

	State lastState = ea.stateSequence.get(ea.stateSequence.size() - 1);
	Assert.assertEquals(true, tf.isTerminal(lastState));
	Assert.assertEquals(true, sc.satisfies(lastState));
	Assert.assertEquals(-94.0, ea.getDiscountedReturn(1.0), 0.001);
	*/
}

Source File: PotentialShapedRMax.java From burlap with Apache License 2.0

4 votes

public List<Episode> getAllStoredLearningEpisodes() {
	return episodeHistory;
}

burlap.behavior.singleagent.Episode Java Examples