org.deeplearning4j.rl4j.policy.DQNPolicy Java Examples

The following examples show how to use org.deeplearning4j.rl4j.policy.DQNPolicy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: MalmoExample.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

public static void malmoCliffWalk() throws MalmoConnectionError, IOException {
    //record the training data in rl4j-data in a new folder (save)
    DataManager manager = new DataManager(false);

    MalmoEnv mdp = createMDP();

    //define the training
    QLearningDiscreteConv<MalmoBox> dql =
            new QLearningDiscreteConv<MalmoBox>(mdp, MALMO_NET, MALMO_HPROC, MALMO_QL, manager);

    //train
    dql.train();

    //get the final policy
    DQNPolicy<MalmoBox> pol = dql.getPolicy();

    //serialize and save (serialization showcase, but not required)
    pol.save("cliffwalk_pixel.policy");

    //close the mdp
    mdp.close();
}

Example #2

Source File: MalmoExample.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}

Example #3

Source File: MalmoExample.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

public static void malmoCliffWalk() throws MalmoConnectionError, IOException {
    //record the training data in rl4j-data in a new folder (save)
    DataManager manager = new DataManager(false);

    MalmoEnv mdp = createMDP();

    //define the training
    QLearningDiscreteConv<MalmoBox> dql =
            new QLearningDiscreteConv<MalmoBox>(mdp, MALMO_NET, MALMO_HPROC, MALMO_QL, manager);

    //train
    dql.train();

    //get the final policy
    DQNPolicy<MalmoBox> pol = dql.getPolicy();

    //serialize and save (serialization showcase, but not required)
    pol.save("cliffwalk_pixel.policy");

    //close the mdp
    mdp.close();
}

Example #4

Source File: MalmoExample.java From Java-Deep-Learning-Cookbook with MIT License

6 votes

public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}

Example #5

Source File: QLearningDiscrete.java From deeplearning4j with Apache License 2.0

5 votes

public QLearningDiscrete(MDP<O, Integer, DiscreteSpace> mdp, IDQN dqn, QLearningConfiguration conf,
                         int epsilonNbStep, ILearningBehavior<Integer> learningBehavior, Random random) {
    this.configuration = conf;
    this.mdp = new LegacyMDPWrapper<>(mdp, null);
    qNetwork = dqn;
    policy = new DQNPolicy(getQNetwork());
    egPolicy = new EpsGreedy(policy, mdp, conf.getUpdateStart(), epsilonNbStep, random, conf.getMinEpsilon(),
            this);

    this.learningBehavior = learningBehavior;
}

Example #6

Source File: AsyncNStepQLearningThreadDiscrete.java From deeplearning4j with Apache License 2.0

4 votes

public Policy<Integer> getPolicy(IDQN nn) {
    return new EpsGreedy(new DQNPolicy(nn), getMdp(), configuration.getUpdateStart(), configuration.getEpsilonNbStep(),
            rnd, configuration.getMinEpsilon(), this);
}

Example #7

Source File: AsyncNStepQLearningDiscrete.java From deeplearning4j with Apache License 2.0

4 votes

public IPolicy<Integer> getPolicy() {
    return new DQNPolicy<OBSERVATION>(getNeuralNet());
}