org.deeplearning4j.rl4j.policy.DQNPolicy Java Examples

The following examples show how to use org.deeplearning4j.rl4j.policy.DQNPolicy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MalmoExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void malmoCliffWalk() throws MalmoConnectionError, IOException {
    //record the training data in rl4j-data in a new folder (save)
    DataManager manager = new DataManager(false);

    MalmoEnv mdp = createMDP();

    //define the training
    QLearningDiscreteConv<MalmoBox> dql =
            new QLearningDiscreteConv<MalmoBox>(mdp, MALMO_NET, MALMO_HPROC, MALMO_QL, manager);

    //train
    dql.train();

    //get the final policy
    DQNPolicy<MalmoBox> pol = dql.getPolicy();

    //serialize and save (serialization showcase, but not required)
    pol.save("cliffwalk_pixel.policy");

    //close the mdp
    mdp.close();
}
 
Example #2
Source File: MalmoExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}
 
Example #3
Source File: MalmoExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void malmoCliffWalk() throws MalmoConnectionError, IOException {
    //record the training data in rl4j-data in a new folder (save)
    DataManager manager = new DataManager(false);

    MalmoEnv mdp = createMDP();

    //define the training
    QLearningDiscreteConv<MalmoBox> dql =
            new QLearningDiscreteConv<MalmoBox>(mdp, MALMO_NET, MALMO_HPROC, MALMO_QL, manager);

    //train
    dql.train();

    //get the final policy
    DQNPolicy<MalmoBox> pol = dql.getPolicy();

    //serialize and save (serialization showcase, but not required)
    pol.save("cliffwalk_pixel.policy");

    //close the mdp
    mdp.close();
}
 
Example #4
Source File: MalmoExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}
 
Example #5
Source File: QLearningDiscrete.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public QLearningDiscrete(MDP<O, Integer, DiscreteSpace> mdp, IDQN dqn, QLearningConfiguration conf,
                         int epsilonNbStep, ILearningBehavior<Integer> learningBehavior, Random random) {
    this.configuration = conf;
    this.mdp = new LegacyMDPWrapper<>(mdp, null);
    qNetwork = dqn;
    policy = new DQNPolicy(getQNetwork());
    egPolicy = new EpsGreedy(policy, mdp, conf.getUpdateStart(), epsilonNbStep, random, conf.getMinEpsilon(),
            this);

    this.learningBehavior = learningBehavior;
}
 
Example #6
Source File: AsyncNStepQLearningThreadDiscrete.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public Policy<Integer> getPolicy(IDQN nn) {
    return new EpsGreedy(new DQNPolicy(nn), getMdp(), configuration.getUpdateStart(), configuration.getEpsilonNbStep(),
            rnd, configuration.getMinEpsilon(), this);
}
 
Example #7
Source File: AsyncNStepQLearningDiscrete.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public IPolicy<Integer> getPolicy() {
    return new DQNPolicy<OBSERVATION>(getNeuralNet());
}