Python sklearn.preprocessing.StandardScaler() Examples

The following are 30 code examples of sklearn.preprocessing.StandardScaler(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: main.py    From transferlearning with MIT License 14 votes vote down vote up
def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc 
Example #2
Source File: test_invalid.py    From mabwiser with Apache License 2.0 7 votes vote down vote up
def test_invalid_log_format(self):
        rng = np.random.RandomState(seed=7)
        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=None) 
Example #3
Source File: test_simulator.py    From mabwiser with Apache License 2.0 7 votes vote down vote up
def test_contextual_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
Example #4
Source File: data_utils.py    From CalibrationNN with GNU General Public License v3.0 6 votes vote down vote up
def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline 
Example #5
Source File: test_StandardScaler.py    From differential-privacy-library with MIT License 6 votes vote down vote up
def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.random.rand(10, 5)
        ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc)
        ss.fit(X)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            ss = StandardScaler(epsilon=1, bounds=(0, 1))
            ss.fit(X)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                ss.fit(X)

        self.assertEqual((1, 0), acc.total()) 
Example #6
Source File: test_invalid.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_invalid_test_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=1, batch_size=0,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=50.0, batch_size=0,
                      is_ordered=True, seed=7) 
Example #7
Source File: test_invalid.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_invalid_batch_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0.5,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=10,
                      is_ordered=True, seed=7) 
Example #8
Source File: test_StandardScaler.py    From differential-privacy-library with MIT License 6 votes vote down vote up
def test_similar_results(self):
        global_seed(314159)

        X = np.random.rand(100000, 5)

        dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
        dp_ss.fit(X)

        sk_ss = sk_pp.StandardScaler()
        sk_ss.fit(X)

        self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.mean_, sk_ss.mean_))
        self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.var_, sk_ss.var_))
        self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_)) 
Example #9
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_unordered(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
Example #10
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_online_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
Example #11
Source File: instruments.py    From CalibrationNN with GNU General Public License v3.0 6 votes vote down vote up
def random_normal_draw(history, nb_samples, **kwargs):
    """Random normal distributed draws
    
    Arguments:
        history: numpy 2D array, with history along axis=0 and parameters 
            along axis=1
        nb_samples: number of samples to draw
        
    Returns:
        numpy 2D array, with samples along axis=0 and parameters along axis=1
    """
    scaler = StandardScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    sqrt_cov = sqrtm(empirical_covariance(scaled)).real
    
    #Draw correlated random variables
    #draws are generated transposed for convenience of the dot operation
    draws = np.random.standard_normal((history.shape[-1], nb_samples))
    draws = np.dot(sqrt_cov, draws)
    draws = np.transpose(draws)
    return scaler.inverse_transform(draws) 
Example #12
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
Example #13
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_offline_run_n_jobs(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp, n_jobs=2)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para, n_jobs=2)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
Example #14
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_offline(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7) 
Example #15
Source File: test_examples.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_simulator_mixed(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                 ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]

        sim = Simulator(mixed, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions) 
Example #16
Source File: test_examples.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_simulator_hyper_parameter(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        hyper_parameter_tuning = []
        for radius in range(6, 10):
            hyper_parameter_tuning.append(('Radius' + str(radius),
                                           MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
                                               n_jobs=n_jobs)))

        sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456,
                        is_quick=True)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions) 
Example #17
Source File: test_linucb.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_unused_arm_scaled2(self):

        context_history = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5],
                                    [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5],
                                    [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64')

        scaler = StandardScaler()
        scaled_contexts = scaler.fit_transform(context_history)
        scaled_predict = scaler.transform(np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1),
                                 context_history=scaled_contexts,
                                 contexts=scaled_predict,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4]) 
Example #18
Source File: DataModule.py    From sgd-influence with MIT License 6 votes vote down vote up
def fetch(self, n_tr, n_val, n_test, seed=0):
        x, y = self.load()
        
        # split data
        x_tr, x_val, y_tr, y_val = train_test_split(
            x, y, train_size=n_tr, test_size=n_val+n_test, random_state=seed)
        x_val, x_test, y_val, y_test = train_test_split(
            x_val, y_val, train_size=n_val, test_size=n_test, random_state=seed+1)
        
        # process x
        if self.normalize:
            scaler = StandardScaler()
            scaler.fit(x_tr)
            x_tr = scaler.transform(x_tr)
            x_val = scaler.transform(x_val)
            x_test = scaler.transform(x_test)
        if self.append_one:
            x_tr = np.c_[x_tr, np.ones(n_tr)]
            x_val = np.c_[x_val, np.ones(n_val)]
            x_test = np.c_[x_test, np.ones(n_test)]
        
        return (x_tr, y_tr), (x_val, y_val), (x_test, y_test) 
Example #19
Source File: test_simulator.py    From mabwiser with Apache License 2.0 6 votes vote down vote up
def test_contextual_unordered_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
Example #20
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_nearest(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 1])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys()) 
Example #21
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_linucb_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2]) 
Example #22
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_radius(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys()) 
Example #23
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_lints_knearest(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2]) 
Example #24
Source File: conv1d_autoencoder.py    From keras-anomaly-detection with MIT License 5 votes vote down vote up
def preprocess_data(csv_data):
    credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
    credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
    # print(credit_card_data.head())
    credit_card_np_data = credit_card_data.as_matrix()
    y_true = csv_data['Class'].as_matrix()
    return credit_card_np_data, y_true 
Example #25
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_lints_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=0.5),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2]) 
Example #26
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_lints(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1.5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys()) 
Example #27
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_linucb(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys()) 
Example #28
Source File: test_invalid.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_invalid_simulator_stats_scope(self):
        rng = np.random.RandomState(seed=7)
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.randint(0, 100) for _ in range(10)])

        sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                        decisions=decisions,
                        rewards=rewards,
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            sim._set_stats('validation', decisions, rewards) 
Example #29
Source File: test_simulator.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_plot_min_net_online(self, mock_show):
        rng = np.random.RandomState(seed=7)
        sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 100) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7)
        sim.run()
        sim.plot('min', False) 
Example #30
Source File: test_examples.py    From mabwiser with Apache License 2.0 5 votes vote down vote up
def test_simulator_contextual(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10),
                                          n_jobs=n_jobs)),
                           ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize),
                                                    NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15),
                                                 NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10),
                                           n_jobs=n_jobs))]

        sim = Simulator(contextual_mabs, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)