Python Examples of sklearn.preprocessing.StandardScaler

Source File: main.py From transferlearning with MIT License

14 votes

def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc

Source File: test_invalid.py From mabwiser with Apache License 2.0

7 votes

def test_invalid_log_format(self):
        rng = np.random.RandomState(seed=7)
        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=None)

Source File: test_simulator.py From mabwiser with Apache License 2.0

7 votes

def test_contextual_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))

Source File: data_utils.py From CalibrationNN with GNU General Public License v3.0

6 votes

def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline

Source File: test_StandardScaler.py From differential-privacy-library with MIT License

6 votes

def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.random.rand(10, 5)
        ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc)
        ss.fit(X)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            ss = StandardScaler(epsilon=1, bounds=(0, 1))
            ss.fit(X)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                ss.fit(X)

        self.assertEqual((1, 0), acc.total())

Source File: test_invalid.py From mabwiser with Apache License 2.0

6 votes

def test_invalid_test_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=1, batch_size=0,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=50.0, batch_size=0,
                      is_ordered=True, seed=7)

Source File: test_invalid.py From mabwiser with Apache License 2.0

6 votes

def test_invalid_batch_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0.5,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=10,
                      is_ordered=True, seed=7)

Source File: test_StandardScaler.py From differential-privacy-library with MIT License

6 votes

def test_similar_results(self):
        global_seed(314159)

        X = np.random.rand(100000, 5)

        dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
        dp_ss.fit(X)

        sk_ss = sk_pp.StandardScaler()
        sk_ss.fit(X)

        self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.mean_, sk_ss.mean_))
        self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.var_, sk_ss.var_))
        self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_unordered(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_online_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())

Source File: instruments.py From CalibrationNN with GNU General Public License v3.0

6 votes

def random_normal_draw(history, nb_samples, **kwargs):
    """Random normal distributed draws
    
    Arguments:
        history: numpy 2D array, with history along axis=0 and parameters 
            along axis=1
        nb_samples: number of samples to draw
        
    Returns:
        numpy 2D array, with samples along axis=0 and parameters along axis=1
    """
    scaler = StandardScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    sqrt_cov = sqrtm(empirical_covariance(scaled)).real
    
    #Draw correlated random variables
    #draws are generated transposed for convenience of the dot operation
    draws = np.random.standard_normal((history.shape[-1], nb_samples))
    draws = np.dot(sqrt_cov, draws)
    draws = np.transpose(draws)
    return scaler.inverse_transform(draws)

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_offline_run_n_jobs(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp, n_jobs=2)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para, n_jobs=2)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_offline(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)

Source File: test_examples.py From mabwiser with Apache License 2.0

6 votes

def test_simulator_mixed(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                 ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]

        sim = Simulator(mixed, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

Source File: test_examples.py From mabwiser with Apache License 2.0

6 votes

def test_simulator_hyper_parameter(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        hyper_parameter_tuning = []
        for radius in range(6, 10):
            hyper_parameter_tuning.append(('Radius' + str(radius),
                                           MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
                                               n_jobs=n_jobs)))

        sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456,
                        is_quick=True)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

Source File: test_linucb.py From mabwiser with Apache License 2.0

6 votes

def test_unused_arm_scaled2(self):

        context_history = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5],
                                    [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5],
                                    [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64')

        scaler = StandardScaler()
        scaled_contexts = scaler.fit_transform(context_history)
        scaled_predict = scaler.transform(np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1),
                                 context_history=scaled_contexts,
                                 contexts=scaled_predict,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4])

Source File: DataModule.py From sgd-influence with MIT License

6 votes

def fetch(self, n_tr, n_val, n_test, seed=0):
        x, y = self.load()
        
        # split data
        x_tr, x_val, y_tr, y_val = train_test_split(
            x, y, train_size=n_tr, test_size=n_val+n_test, random_state=seed)
        x_val, x_test, y_val, y_test = train_test_split(
            x_val, y_val, train_size=n_val, test_size=n_test, random_state=seed+1)
        
        # process x
        if self.normalize:
            scaler = StandardScaler()
            scaler.fit(x_tr)
            x_tr = scaler.transform(x_tr)
            x_val = scaler.transform(x_val)
            x_test = scaler.transform(x_test)
        if self.append_one:
            x_tr = np.c_[x_tr, np.ones(n_tr)]
            x_val = np.c_[x_val, np.ones(n_val)]
            x_test = np.c_[x_test, np.ones(n_test)]
        
        return (x_tr, y_tr), (x_val, y_val), (x_test, y_test)

Source File: test_simulator.py From mabwiser with Apache License 2.0

6 votes

def test_contextual_unordered_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_nearest(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 1])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_linucb_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_radius(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_lints_knearest(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

Source File: conv1d_autoencoder.py From keras-anomaly-detection with MIT License

5 votes

def preprocess_data(csv_data):
    credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
    credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
    # print(credit_card_data.head())
    credit_card_np_data = credit_card_data.as_matrix()
    y_true = csv_data['Class'].as_matrix()
    return credit_card_np_data, y_true

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_lints_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=0.5),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_lints(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1.5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_linucb(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

Source File: test_invalid.py From mabwiser with Apache License 2.0

5 votes

def test_invalid_simulator_stats_scope(self):
        rng = np.random.RandomState(seed=7)
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.randint(0, 100) for _ in range(10)])

        sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                        decisions=decisions,
                        rewards=rewards,
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            sim._set_stats('validation', decisions, rewards)

Source File: test_simulator.py From mabwiser with Apache License 2.0

5 votes

def test_plot_min_net_online(self, mock_show):
        rng = np.random.RandomState(seed=7)
        sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 100) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7)
        sim.run()
        sim.plot('min', False)

Source File: test_examples.py From mabwiser with Apache License 2.0

5 votes

def test_simulator_contextual(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10),
                                          n_jobs=n_jobs)),
                           ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize),
                                                    NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15),
                                                 NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10),
                                           n_jobs=n_jobs))]

        sim = Simulator(contextual_mabs, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

Python sklearn.preprocessing.StandardScaler() Examples