burlap.debugtools.RandomFactory Java Examples
The following examples show how to use
burlap.debugtools.RandomFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IPModel.java From burlap with Apache License 2.0 | 6 votes |
@Override public State sample(State s, Action a) { s = s.copy(); double baseForce = 0.; if(a.actionName().equals(CartPoleDomain.ACTION_LEFT)){ baseForce = -physParams.actionForce; } else if(a.actionName().equals(CartPoleDomain.ACTION_RIGHT)){ baseForce = physParams.actionForce; } double roll = RandomFactory.getMapped(0).nextDouble() * (2 * physParams.actionNoise) - physParams.actionNoise; double force = baseForce + roll; return updateState(s, force); }
Example #2
Source File: PolicyUtils.java From burlap with Apache License 2.0 | 6 votes |
/** * This is a helper method for stochastic policies. If the policy is stochastic, then rather than * having the policy define both the {@link Policy#action(State)} method and * {@link EnumerablePolicy#policyDistribution(State)} method, * the objects needs to only define the {@link EnumerablePolicy#policyDistribution(State)} method and * the {@link Policy#action(State)} method can simply * return the result of this method to sample an action. * @param p the {@link EnumerablePolicy} * @param s the input state from which an action should be selected. * @return an {@link Action} to take */ public static Action sampleFromActionDistribution(EnumerablePolicy p, State s){ Random rand = RandomFactory.getMapped(0); double roll = rand.nextDouble(); List <ActionProb> probs = p.policyDistribution(s); if(probs == null || probs.isEmpty()){ throw new PolicyUndefinedException(); } double sump = 0.; for(ActionProb ap : probs){ sump += ap.pSelection; if(roll < sump){ return ap.ga; } } throw new RuntimeException("Tried to sample policy action distribution, but it did not sum to 1."); }
Example #3
Source File: FullStateModel.java From burlap with Apache License 2.0 | 6 votes |
/** * Method to implement the {@link SampleStateModel#sample(State, Action)} method when the * {@link FullStateModel#stateTransitions(State, Action)} method is implemented. Operates by calling * the {@link FullStateModel#stateTransitions(State, Action)} method, rolls a random number, and selects a * transition according the probability specified by {@link FullStateModel#stateTransitions(State, Action)}. * @param model the {@link FullStateModel} with the implemented {@link FullStateModel#stateTransitions(State, Action)} method. * @param s the input state * @param a the action to be applied in the input state * @return a sampled state transition ({@link State}). */ public static State sampleByEnumeration(FullStateModel model, State s, Action a){ List<StateTransitionProb> tps = model.stateTransitions(s, a); double roll = RandomFactory.getMapped(0).nextDouble(); double sum = 0; for(StateTransitionProb tp : tps){ sum += tp.p; if(roll < sum){ return tp.s; } } throw new RuntimeException("Transition probabilities did not sum to one, they summed to " + sum); }
Example #4
Source File: SARSCollector.java From burlap with Apache License 2.0 | 6 votes |
@Override public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) { if(intoDataset == null){ intoDataset = new SARSData(); } State curState = s; int nsteps = 0; boolean terminated = model.terminal(s); while(!terminated && nsteps < maxSteps){ List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState); Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size())); EnvironmentOutcome eo = model.sample(curState, ga); intoDataset.add(curState, ga, eo.r, eo.op); curState = eo.op; terminated = eo.terminated; nsteps++; } return intoDataset; }
Example #5
Source File: SARSCollector.java From burlap with Apache License 2.0 | 6 votes |
@Override public SARSData collectDataFrom(Environment env, int maxSteps, SARSData intoDataset) { if(intoDataset == null){ intoDataset = new SARSData(); } int nsteps = 0; while(!env.isInTerminalState() && nsteps < maxSteps){ List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, env.currentObservation()); Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size())); EnvironmentOutcome eo = env.executeAction(ga); intoDataset.add(eo.o, eo.a, eo.r, eo.op); nsteps++; } return intoDataset; }
Example #6
Source File: BoundedRTDP.java From burlap with Apache License 2.0 | 6 votes |
/** * Returns the maximum Q-value entry for the given state with ties broken randomly. * @param s the query state for the Q-value * @return the maximum Q-value entry for the given state with ties broken randomly. */ protected QValue maxQ(State s){ List<QValue> qs = this.qValues(s); double max = Double.NEGATIVE_INFINITY; List<QValue> maxQs = new ArrayList<QValue>(qs.size()); for(QValue q : qs){ if(q.q == max){ maxQs.add(q); } else if(q.q > max){ max = q.q; maxQs.clear(); maxQs.add(q); } } //return random max int rint = RandomFactory.getMapped(0).nextInt(maxQs.size()); return maxQs.get(rint); }
Example #7
Source File: BoundedRTDP.java From burlap with Apache License 2.0 | 6 votes |
/** * Selects a next state for expansion when action a is applied in state s according to the next possible state that has the largest lower and upper bound margin. * Ties are broken randomly. * @param s the source state of the transition * @param a the action applied in the source state * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and the expected margin size of this transition. */ protected StateSelectionAndExpectedGap getNextStateByMaxMargin(State s, Action a){ List<TransitionProb> tps = ((FullModel)model).transitions(s, a); double sum = 0.; double maxGap = Double.NEGATIVE_INFINITY; List<HashableState> maxStates = new ArrayList<HashableState>(tps.size()); for(TransitionProb tp : tps){ HashableState nsh = this.hashingFactory.hashState(tp.eo.op); double gap = this.getGap(nsh); sum += tp.p*gap; if(gap == maxGap){ maxStates.add(nsh); } else if(gap > maxGap){ maxStates.clear(); maxStates.add(nsh); maxGap = gap; } } int rint = RandomFactory.getMapped(0).nextInt(maxStates.size()); StateSelectionAndExpectedGap select = new StateSelectionAndExpectedGap(maxStates.get(rint), sum); return select; }
Example #8
Source File: BoundedRTDP.java From burlap with Apache License 2.0 | 5 votes |
/** * Selects a next state for expansion when action a is applied in state s by randomly sampling from the transition dynamics weighted by the margin of the lower and * upper bound value functions. * @param s the source state of the transition * @param a the action applied in the source state * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and the expected margin size of this transition. */ protected StateSelectionAndExpectedGap getNextStateBySampling(State s, Action a){ List<TransitionProb> tps = ((FullModel)model).transitions(s, a); double sum = 0.; double [] weightedGap = new double[tps.size()]; HashableState[] hashedStates = new HashableState[tps.size()]; for(int i = 0; i < tps.size(); i++){ TransitionProb tp = tps.get(i); HashableState nsh = this.hashingFactory.hashState(tp.eo.op); hashedStates[i] = nsh; double gap = this.getGap(nsh); weightedGap[i] = tp.p*gap; sum += weightedGap[i]; } double roll = RandomFactory.getMapped(0).nextDouble(); double cumSum = 0.; for(int i = 0; i < weightedGap.length; i++){ cumSum += weightedGap[i]/sum; if(roll < cumSum){ StateSelectionAndExpectedGap select = new StateSelectionAndExpectedGap(hashedStates[i], sum); return select; } } throw new RuntimeException("Error: probabilities in state selection did not sum to 1."); }
Example #9
Source File: TigerObservations.java From burlap with Apache License 2.0 | 5 votes |
@Override public State sample(State state, Action action){ //override for faster sampling if(action.actionName().equals(TigerDomain.ACTION_LEFT) || action.actionName().equals(TigerDomain.ACTION_RIGHT)){ return this.observationReset(); } else if(action.actionName().equals(TigerDomain.ACTION_LISTEN)){ String tigerVal = (String)state.get(TigerDomain.VAR_DOOR); double r = RandomFactory.getMapped(0).nextDouble(); if(r < this.listenAccuracy){ if(tigerVal.equals(TigerDomain.VAL_LEFT)){ return this.observationLeft(); } else{ return this.observationRight(); } } else{ //then nosiy listen; reverse direction if(tigerVal.equals(TigerDomain.VAL_LEFT)){ return this.observationRight(); } else{ return this.observationLeft(); } } } else if(action.actionName().equals(TigerDomain.ACTION_DO_NOTHING)){ return this.observationNothing(); } throw new RuntimeException("Unknown action " + action.actionName() + "; cannot return observation sample."); }
Example #10
Source File: TigerDomain.java From burlap with Apache License 2.0 | 5 votes |
/** * Returns a {@link burlap.mdp.auxiliary.StateGenerator} that some of the of the time generates an hidden tiger state with the tiger on the * left side, and others on the right. Probability of left side is specified with the argument probLeft * @param probLeft the probability that a state with the tiger on the left side will be generated * @return a {@link burlap.mdp.auxiliary.StateGenerator} */ public static StateGenerator randomSideStateGenerator(final double probLeft){ return new StateGenerator() { @Override public State generateState() { double roll = RandomFactory.getMapped(0).nextDouble(); return roll < probLeft ? new TigerState(VAL_LEFT) : new TigerState(VAL_RIGHT); } }; }
Example #11
Source File: MCRandomStateGenerator.java From burlap with Apache License 2.0 | 5 votes |
/** * Initializes for the {@link MountainCar} {@link Domain} object for which states will be generated. By default, the random x and velocity ranges will be * the full range used by the domain. * @param params the mountain car physics parameters specifying the boundaries */ public MCRandomStateGenerator(MountainCar.MCPhysicsParams params){ this.xmin = params.xmin; this.xmax = params.xmax; this.vmin = params.vmin; this.vmax = params.vmax; this.rand = RandomFactory.getMapped(0); }
Example #12
Source File: TabularBeliefState.java From burlap with Apache License 2.0 | 5 votes |
@Override public State sample() { double sumProb = 0.; double r = RandomFactory.getMapped(0).nextDouble(); for(Map.Entry<Integer, Double> e : this.beliefValues.entrySet()){ sumProb += e.getValue(); if(r < sumProb){ return this.stateEnumerator.getStateForEnumerationId(e.getKey()); } } throw new RuntimeException("Error; could not sample from belief state because the beliefs did not sum to 1; they summed to: " + sumProb); }
Example #13
Source File: ObservationUtilities.java From burlap with Apache License 2.0 | 5 votes |
/** * A helper method for easily implementing the {@link ObservationFunction#sample(State, Action)} method that * samples an observation by first getting all non-zero probability observations, as returned by the {@link DiscreteObservationFunction#probabilities(State, Action)} * method, and then sampling from the enumerated distribution. Note that enumerating all observation probabilities may be computationally * inefficient; therefore, it may be better to directly implement the {@link ObservationFunction#sample(State, Action)} * method with efficient domain specific code. * @param of the {@link ObservationFunction} to use. * @param state the true MDP state * @param action the action that led to the MDP state * @return an observation represented with a {@link State}. */ public static State sampleByEnumeration(DiscreteObservationFunction of, State state, Action action){ List<ObservationProbability> obProbs = of.probabilities(state, action); Random rand = RandomFactory.getMapped(0); double r = rand.nextDouble(); double sumProb = 0.; for(ObservationProbability op : obProbs){ sumProb += op.p; if(r < sumProb){ return op.observation; } } throw new RuntimeException("Could not sample observaiton because observation probabilities did not sum to 1; they summed to " + sumProb); }
Example #14
Source File: FullModel.java From burlap with Apache License 2.0 | 5 votes |
/** * Method to implement the {@link SampleModel#sample(State, Action)} method when the * {@link FullModel#transitions(State, Action)} method is implemented. Operates by calling * the {@link FullModel#transitions(State, Action)} method, rolls a random number, and selects a * transition according the probability specified by {@link FullModel#transitions(State, Action)}. * @param model the {@link FullModel} with the implemented {@link FullModel#transitions(State, Action)} method. * @param s the input state * @param a the action to be applied in the input state * @return a sampled transition ({@link EnvironmentOutcome}). */ public static EnvironmentOutcome sampleByEnumeration(FullModel model, State s, Action a){ List<TransitionProb> tps = model.transitions(s, a); double roll = RandomFactory.getMapped(0).nextDouble(); double sum = 0; for(TransitionProb tp : tps){ sum += tp.p; if(roll < sum){ return tp.eo; } } throw new RuntimeException("Transition probabilities did not sum to one, they summed to " + sum); }
Example #15
Source File: Tournament.java From burlap with Apache License 2.0 | 5 votes |
/** * Runs the tournament */ public void runTournament(){ selector.resetMatchSelections(); List<MatchEntry> match; while((match = selector.getNextMatch()) != null){ World w = worldGenerator.generateWorld(); //shuffle entrants Collections.shuffle(match, RandomFactory.getMapped(0)); Map<String, Integer> agentNameToId = new HashMap<String, Integer>(); //have the matched agents join the world for(MatchEntry me : match){ SGAgent a = agents.get(me.agentId).generateAgent("agent" + me.agentId, me.agentType); w.join(a); agentNameToId.put(a.agentName(), me.agentId); DPrint.c(debugId, me.agentId + " "); } DPrint.cl(debugId, ""); //run the game for(int i = 0; i < this.numGames; i++){ w.runGame(maxStages); } //record results for(Entry<String, Integer> an : agentNameToId.entrySet()){ int aId = an.getValue(); double gameCumR = w.getCumulativeRewardForAgent(an.getKey()); double tournCumR = tournamentCumulatedReward.get(aId); tournamentCumulatedReward.set(aId, gameCumR+tournCumR); } } }
Example #16
Source File: DFS.java From burlap with Apache License 2.0 | 5 votes |
/** * Constructor of DFS with specification of depth limit, whether to maintain a closed list that affects exploration, and whether paths * generated by options should be explored first. * @param domain the domain in which to plan * @param gc indicates the goal states * @param hashingFactory the state hashing factory to use * @param maxDepth depth limit of DFS. -1 specifies no limit. * @param maintainClosed whether to maintain a closed list or not * @param optionsFirst whether to explore paths generated by options first. */ protected void DFSInit(SADomain domain, StateConditionTest gc, HashableStateFactory hashingFactory, int maxDepth, boolean maintainClosed, boolean optionsFirst){ this.deterministicPlannerInit(domain, gc, hashingFactory); this.maxDepth = maxDepth; this.maintainClosed = maintainClosed; if(optionsFirst){ this.setOptionsFirst(); } rand = RandomFactory.getMapped(0); }
Example #17
Source File: UnmodeledFavoredPolicy.java From burlap with Apache License 2.0 | 5 votes |
@Override public Action action(State s) { List<Action> unmodeled = KWIKModel.Helper.unmodeledActions(model, allActionTypes, s); if(!unmodeled.isEmpty()){ return unmodeled.get(RandomFactory.getMapped(0).nextInt(unmodeled.size())); } return this.sourcePolicy.action(s); }
Example #18
Source File: Option.java From burlap with Apache License 2.0 | 5 votes |
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){ Random rand = RandomFactory.getMapped(0); State initial = env.currentObservation(); State cur = initial; Episode episode = new Episode(cur); Episode history = new Episode(cur); double roll; double pT; int nsteps = 0; double r = 0.; double cd = 1.; do{ Action a = o.policy(cur, history); EnvironmentOutcome eo = env.executeAction(a); nsteps++; r += cd*eo.r; cur = eo.op; cd *= discount; history.transition(a, eo.op, eo.r); AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")"); episode.transition(annotatedAction, eo.op, r); pT = o.probabilityOfTermination(eo.op, history); roll = rand.nextDouble(); }while(roll > pT && !env.isInTerminalState()); EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode); return eoo; }
Example #19
Source File: FrameExperienceMemory.java From burlap_caffe with Apache License 2.0 | 4 votes |
public List<FrameExperience> sampleFrameExperiences(int n) { List<FrameExperience> samples; if(this.size == 0){ return new ArrayList<>(); } if(this.alwaysIncludeMostRecent){ n--; } if(this.size < n){ samples = new ArrayList<>(this.size); for(int i = 0; i < this.size; i++){ samples.add(this.experiences[i]); } return samples; } else{ samples = new ArrayList<>(Math.max(n, 1)); Random r = RandomFactory.getMapped(0); for(int i = 0; i < n; i++) { int sind = r.nextInt(this.size); samples.add(this.experiences[sind]); } } if(this.alwaysIncludeMostRecent){ FrameExperience eo; if(next > 0) { eo = this.experiences[next - 1]; } else if(size > 0){ eo = this.experiences[this.experiences.length-1]; } else{ throw new RuntimeException("FixedSizeMemory getting most recent fails because memory is size 0."); } samples.add(eo); } return samples; }
Example #20
Source File: EpsilonGreedy.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes with the QComputablePlanner to use and the value of epsilon to use, where epsilon is the probability of taking a random action. * @param planner the QComputablePlanner to use * @param epsilon the probability of taking a random action. */ public EpsilonGreedy(QProvider planner, double epsilon) { qplanner = planner; this.epsilon = epsilon; rand = RandomFactory.getMapped(0); }
Example #21
Source File: EpsilonGreedy.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes with the value of epsilon, where epsilon is the probability of taking a random action. * @param epsilon the probability of taking a random action. */ public EpsilonGreedy(double epsilon) { qplanner = null; this.epsilon = epsilon; rand = RandomFactory.getMapped(0); }
Example #22
Source File: StochasticTree.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes the three data structure */ protected void init(){ root = null; nodeMap = new HashMap<T, StochasticTree<T>.STNode>(); rand = RandomFactory.getMapped(2347636); }
Example #23
Source File: GridGameStandardMechanics.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes the mechanics for the given domain and sets the semi-wall pass through probability to 0.5; * @param d the domain object */ public GridGameStandardMechanics(Domain d){ rand = RandomFactory.getMapped(0); domain = d; pMoveThroughSWall = 0.5; }
Example #24
Source File: GridGameStandardMechanics.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes the mechanics for the given domain and sets the semi-wall pass through probability to semiWallPassThroughProb. * @param d d the domain object * @param semiWallPassThroughProb the probability that an agent will pass through a semi-wall. */ public GridGameStandardMechanics(Domain d, double semiWallPassThroughProb){ rand = RandomFactory.getMapped(0); domain = d; pMoveThroughSWall = semiWallPassThroughProb; }
Example #25
Source File: GreedyQPolicy.java From burlap with Apache License 2.0 | 4 votes |
/** * Initializes with a QComputablePlanner * @param planner the QComputablePlanner to use */ public GreedyQPolicy(QProvider planner){ qplanner = planner; rand = RandomFactory.getMapped(0); }
Example #26
Source File: GreedyQPolicy.java From burlap with Apache License 2.0 | 4 votes |
public GreedyQPolicy(){ qplanner = null; rand = RandomFactory.getMapped(0); }
Example #27
Source File: IRLExample.java From burlap_examples with MIT License | 4 votes |
/** * Runs MLIRL on the trajectories stored in the "irlDemo" directory and then visualizes the learned reward function. */ public void runIRL(String pathToEpisodes){ //create reward function features to use LocationFeatures features = new LocationFeatures(this.domain, 5); //create a reward function that is linear with respect to those features and has small random //parameter values to start LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, 5); for(int i = 0; i < rf.numParameters(); i++){ rf.setParameter(i, RandomFactory.getMapped(0).nextDouble()*0.2 - 0.1); } //load our saved demonstrations from disk List<Episode> episodes = Episode.readEpisodes(pathToEpisodes); //use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL, //but you will probably want to use a fairly large horizon for this kind of reward function. double beta = 10; //DifferentiableVI dplanner = new DifferentiableVI(this.domain, rf, 0.99, beta, new SimpleHashableStateFactory(), 0.01, 100); DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(this.domain, rf, 0.99, new SimpleHashableStateFactory(), 10, -1, beta); dplanner.toggleDebugPrinting(false); //define the IRL problem MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf); request.setBoltzmannBeta(beta); //run MLIRL on it MLIRL irl = new MLIRL(request, 0.1, 0.1, 10); irl.performIRL(); //get all states in the domain so we can visualize the learned reward function for them List<State> allStates = StateReachability.getReachableStates(basicState(), this.domain, new SimpleHashableStateFactory()); //get a standard grid world value function visualizer, but give it StateRewardFunctionValue which returns the //reward value received upon reaching each state which will thereby let us render the reward function that is //learned rather than the value function for it. ValueFunctionVisualizerGUI gui = GridWorldDomain.getGridWorldValueFunctionVisualization( allStates, 5, 5, new RewardValueProjection(rf), new GreedyQPolicy((QProvider) request.getPlanner()) ); gui.initGUI(); }
Example #28
Source File: UCT.java From burlap with Apache License 2.0 | 3 votes |
protected void UCTInit(SADomain domain, double gamma, HashableStateFactory hashingFactory, int horizon, int nRollouts, int explorationBias){ this.solverInit(domain, gamma, hashingFactory); this.maxHorizon = horizon; this.maxRollOutsFromRoot = nRollouts; this.explorationBias = explorationBias; goalCondition = null; rand = RandomFactory.getMapped(589449); }
Example #29
Source File: RandomSGAgent.java From burlap with Apache License 2.0 | 3 votes |
@Override public Action action(State s) { List<Action> gsas = ActionUtils.allApplicableActionsForTypes(this.agentType.actions, s); int r = RandomFactory.getMapped(0).nextInt(gsas.size()); Action gsa = gsas.get(r); return gsa; }
Example #30
Source File: IRLExample.java From burlap_examples with MIT License | 3 votes |
public State generateState() { GridWorldState s = (GridWorldState)this.sourceState.copy(); int h = RandomFactory.getDefault().nextInt(this.height); s.touchAgent().y = h; return s; }