Python sklearn.utils.validation.column_or_1d() Examples
The following are 21
code examples of sklearn.utils.validation.column_or_1d().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.validation
, or try the search function
.
Example #1
Source File: utils.py From pmdarima with MIT License | 6 votes |
def is_constant(x): """Test ``x`` for constancy. Determine whether a vector is composed of all of the same elements and nothing else. Parameters ---------- x : array-like, shape=(n_samples,) The time series vector. Examples -------- >>> import numpy as np >>> x = np.array([1, 2, 3]) >>> y = np.ones(3) >>> [is_constant(x), is_constant(y)] [False, True] """ x = column_or_1d(x) # type: np.ndarray return (x == x[0]).all()
Example #2
Source File: Estimator.py From tbats with MIT License | 6 votes |
def _validate(self, y): """Validates input time series. Also adjusts box_cox if necessary.""" try: y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=True, dtype=np.float64)) # type: np.ndarray except Exception as validation_exception: self.context.get_exception_handler().exception( "y series is invalid", error.InputArgsException, previous_exception=validation_exception ) return False if np.any(y <= 0): if self.use_box_cox is True: self.context.get_exception_handler().warn( "Box-Cox transformation (use_box_cox) was forced to True " "but there are negative values in input series. " "Setting use_box_cox to False.", error.InputArgsWarning ) self.use_box_cox = False return y
Example #3
Source File: transformations.py From keras-pandas with MIT License | 6 votes |
def fit(self, y): """Fit label encoder Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- self : returns an instance of self. """ y = column_or_1d(y, warn=True) y = numpy.append(y, ['UNK']) self.classes_ = numpy.unique(y) return self
Example #4
Source File: transformations.py From keras-pandas with MIT License | 6 votes |
def fit_transform(self, y, **kwargs): """Fit label encoder and return encoded labels Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] :param **kwargs: """ y = column_or_1d(y, warn=True) y = numpy.append(y, ['UNK']) self.classes_, y = numpy.unique(y, return_inverse=True) return y
Example #5
Source File: transformations.py From keras-pandas with MIT License | 6 votes |
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) y = numpy.array(list(map(lambda x: x if x in self.classes_ else 'UNK', y))) classes = numpy.unique(y) if len(numpy.intersect1d(classes, self.classes_)) < len(classes): diff = numpy.setdiff1d(classes, self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) return numpy.searchsorted(self.classes_, y)
Example #6
Source File: ConditionMortalityPredictor.py From CDSS with GNU General Public License v3.0 | 6 votes |
def _select_features(self): # Use FeatureSelector to prune all but 100 variables. fs = FeatureSelector(algorithm=FeatureSelector.RECURSIVE_ELIMINATION, \ problem=FeatureSelector.CLASSIFICATION) fs.set_input_matrix(self._X_train, column_or_1d(self._y_train)) num_features_to_select = int(0.01*len(self._X_train.columns.values)) fs.select(k=num_features_to_select) # Enumerate eliminated features pre-transformation. self._feature_ranks = fs.compute_ranks() for i in range(len(self._feature_ranks)): if self._feature_ranks[i] > num_features_to_select: self._eliminated_features.append(self._X_train.columns[i]) self._X_train = fs.transform_matrix(self._X_train) self._X_test = fs.transform_matrix(self._X_test)
Example #7
Source File: SupervisedLearningPipeline.py From CDSS with GNU General Public License v3.0 | 6 votes |
def _train_predictor(self, problem, classes=None, hyperparams=None): if problem == SupervisedLearningPipeline.CLASSIFICATION: if 'bifurcated' in hyperparams['algorithm']: learning_class = BifurcatedSupervisedClassifier # Strip 'bifurcated-' from algorithm for SupervisedClassifier. hyperparams['algorithm'] = '-'.join(hyperparams['algorithm'].split('-')[1:]) else: learning_class = SupervisedClassifier self._predictor = learning_class(classes, hyperparams) elif problem == SupervisedLearningPipeline.REGRESSION: learning_class = Regressor self._predictor = learning_class(algorithm=algorithm) status = self._predictor.train(self._X_train, column_or_1d(self._y_train), groups = self._patIds_train) return status
Example #8
Source File: TestClassifierAnalyzer.py From CDSS with GNU General Public License v3.0 | 6 votes |
def setUp(self): log.level = logging.ERROR # Use simple classifier and test case for testing non-ROC analyses. X = RANDOM_10_TEST_CASE['X'] y = RANDOM_10_TEST_CASE['y'] self._list_classifier = ListPredictor([0, 1]) self._lc_analyzer = ClassifierAnalyzer(self._list_classifier, X, y) # Use ml classifier and complex test case. X = RANDOM_100_TEST_CASE['X'] y = RANDOM_100_TEST_CASE['y'] # Generate train/test split. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123456789) # Train logistic regression model. hyperparams = { 'algorithm': SupervisedClassifier.REGRESS_AND_ROUND, 'random_state': 123456789 } self._ml_classifier = SupervisedClassifier([0, 1], hyperparams) self._ml_classifier.train(X_train, column_or_1d(y_train)) self._ml_analyzer = ClassifierAnalyzer(self._ml_classifier, X_test, y_test)
Example #9
Source File: array.py From pmdarima with MIT License | 5 votes |
def check_endog(y, dtype=DTYPE, copy=True, force_all_finite=False): """Wrapper for ``check_array`` and ``column_or_1d`` from sklearn Parameters ---------- y : array-like, shape=(n_samples,) The 1d endogenous array. dtype : string, type or None (default=np.float64) Data type of result. If None, the dtype of the input is preserved. If "numeric", dtype is preserved unless array.dtype is object. copy : bool, optional (default=False) Whether a forced copy will be triggered. If copy=False, a copy might still be triggered by a conversion. force_all_finite : bool, optional (default=False) Whether to raise an error on np.inf and np.nan in an array. The possibilities are: - True: Force all values of array to be finite. - False: accept both np.inf and np.nan in array. Returns ------- y : np.ndarray, shape=(n_samples,) A 1d numpy ndarray """ return column_or_1d( check_array(y, ensure_2d=False, force_all_finite=force_all_finite, copy=copy, dtype=dtype)) # type: np.ndarray
Example #10
Source File: BoxCox.py From tbats with MIT License | 5 votes |
def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)): y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray if lam is None: lam = find_box_cox_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds) if lam <= 0 and np.any(y <= 0): raise error.InputArgsException('y must have only positive values for box-cox transformation.') if np.isclose(0.0, lam): return np.log(y) return (np.sign(y) * (np.abs(y) ** lam) - 1) / lam
Example #11
Source File: BoxCox.py From tbats with MIT License | 5 votes |
def find_box_cox_lambda(y, seasonal_periods=None, bounds=(-1, 2)): y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray guerrero = Guerrero() return guerrero.find_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds)
Example #12
Source File: Estimator.py From tbats with MIT License | 5 votes |
def _normalize_seasonal_periods_to_type(self, seasonal_periods, dtype): """Validates seasonal periods and normalizes them Normalization ensures periods are of proper type, unique and sorted. """ if seasonal_periods is not None: try: seasonal_periods = c1d(check_array(seasonal_periods, ensure_2d=False, force_all_finite=True, ensure_min_samples=0, copy=True, dtype=dtype)) except Exception as validation_exception: self.context.get_exception_handler().exception("seasonal_periods definition is invalid", error.InputArgsException, previous_exception=validation_exception) seasonal_periods = np.unique(seasonal_periods) if len(seasonal_periods[np.where(seasonal_periods <= 1)]) > 0: self.context.get_exception_handler().warn( "All seasonal periods should be values greater than 1. " "Ignoring all seasonal period values that do not meet this condition.", error.InputArgsWarning ) seasonal_periods = seasonal_periods[np.where(seasonal_periods > 1)] seasonal_periods.sort() if len(seasonal_periods) == 0: seasonal_periods = None return seasonal_periods
Example #13
Source File: utils.py From AIF360 with Apache License 2.0 | 5 votes |
def check_inputs(X, y, sample_weight=None, ensure_2d=True): """Input validation for debiasing algorithms. Checks all inputs for consistent length, validates shapes (optional for X), and returns an array of all ones if sample_weight is ``None``. Args: X (array-like): Input data. y (array-like, shape = (n_samples,)): Target values. sample_weight (array-like, optional): Sample weights. ensure_2d (bool, optional): Whether to raise a ValueError if X is not 2D. Returns: tuple: * **X** (`array-like`) -- Validated X. Unchanged. * **y** (`array-like`) -- Validated y. Possibly converted to 1D if not a :class:`pandas.Series`. * **sample_weight** (`array-like`) -- Validated sample_weight. If no sample_weight is provided, returns a consistent-length array of ones. """ if ensure_2d and X.ndim != 2: raise ValueError("Expected X to be 2D, got ndim == {} instead.".format( X.ndim)) if not isinstance(y, pd.Series): # don't cast Series -> ndarray y = column_or_1d(y) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) else: sample_weight = np.ones(X.shape[0]) check_consistent_length(X, y, sample_weight) return X, y, sample_weight
Example #14
Source File: SupervisedClassifier.py From CDSS with GNU General Public License v3.0 | 5 votes |
def _maybe_reshape_y(self, y): # If necessary, reshape y from (n_samples, 1) to (n_samples, ) try: num_cols = y.shape[1] y = column_or_1d(y) log.debug('Reshaped y to 1d.') except IndexError: log.debug('Did not need to reshape y to 1d.') return y
Example #15
Source File: encoders.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def inverse_transform(self, y): """Transform labels back to original encoding. If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_label_value`` for unseen values. Parameters ---------- y : numpy array of shape [n_samples] Encoded label values. Returns ------- y_decoded : numpy array of shape [n_samples] Label values. """ check_is_fitted(self, "classes_") y = column_or_1d(y, warn=True) if y.dtype.kind not in ("i", "u"): try: y = y.astype(np.float).astype(np.int) except ValueError: raise ValueError("`y` contains values not convertible to integer.") # inverse transform of empty array is empty array if _num_samples(y) == 0: return np.array([]) labels = np.arange(len(self.classes_)) diff = np.setdiff1d(y, labels) if diff and not self.fill_unseen_labels: raise ValueError("y contains previously unseen labels: %s" % str(diff)) y_decoded = [self.classes_[idx] if idx in labels else self.fill_label_value for idx in y] return y_decoded
Example #16
Source File: encoders.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def transform(self, y): """Transform labels to normalized encoding. If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_encoded_label_value`` for unseen values. Seen labels are encoded with value between 0 and n_classes-1. Unseen labels are encoded with ``self.fill_encoded_label_value`` with a default value of n_classes. Parameters ---------- y : array-like of shape [n_samples] Label values. Returns ------- y_encoded : array-like of shape [n_samples] Encoded label values. """ check_is_fitted(self, "classes_") y = column_or_1d(y, warn=True) # transform of empty array is empty array if _num_samples(y) == 0: return np.array([]) if self.fill_unseen_labels: _, mask = _encode_check_unknown(y, self.classes_, return_mask=True) y_encoded = np.searchsorted(self.classes_, y) fill_encoded_label_value = self.fill_encoded_label_value or len(self.classes_) y_encoded[~mask] = fill_encoded_label_value else: _, y_encoded = _encode(y, uniques=self.classes_, encode=True) return y_encoded
Example #17
Source File: encoders.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def fit(self, y): """Fit label encoder. Parameters ---------- y : array-like of shape (n_samples,) Label values. Returns ------- self : RobustLabelEncoder. """ y = column_or_1d(y, warn=True) self.classes_ = self._check_labels_and_sort() or _encode(y) return self
Example #18
Source File: array.py From pmdarima with MIT License | 5 votes |
def as_series(x): """Cast as pandas Series. Cast an iterable to a Pandas Series object. Note that the index will simply be a positional ``arange`` and cannot be set in this function. Parameters ---------- x : array-like, shape=(n_samples,) The 1d array on which to compute the auto correlation. Examples -------- >>> as_series([1, 2, 3]) 0 1 1 2 2 3 dtype: int64 >>> as_series(as_series((1, 2, 3))) 0 1 1 2 2 3 dtype: int64 >>> import pandas as pd >>> as_series(pd.Series([4, 5, 6], index=['a', 'b', 'c'])) a 4 b 5 c 6 dtype: int64 Returns ------- s : pd.Series A pandas Series object. """ if isinstance(x, pd.Series): return x return pd.Series(column_or_1d(x))
Example #19
Source File: SupervisedLearningPipeline.py From CDSS with GNU General Public License v3.0 | 4 votes |
def _select_features(self, problem, percent_features_to_select, algorithm, features_to_keep=None): # Initialize FeatureSelector. fs = FeatureSelector(problem=problem, algorithm=algorithm, random_state=self._random_state) fs.set_input_matrix(self._X_train, column_or_1d(self._y_train)) num_features_to_select = int(percent_features_to_select*len(self._X_train.columns.values)) # Parse features_to_keep. if features_to_keep is None: features_to_keep = [] # Select features. fs.select(k=num_features_to_select) # Enumerate eliminated features pre-transformation. feature_ranks = fs.compute_ranks() for i in range(len(feature_ranks)): if feature_ranks[i] > num_features_to_select: # If in features_to_keep, pretend it wasn't eliminated. if self._X_train.columns[i] not in features_to_keep: self._eliminated_features.append(self._X_train.columns[i]) # Hack: rather than making FeatureSelector handle the concept of # kept features, just copy the data here and add it back to the # transformed matrices. # Rather than looping, do this individually so that we can skip if # transformed X already has the feature. # for feature in features_to_keep: kept_X_train_feature = self._X_train[features_to_keep].copy() log.debug('kept_X_train_feature.shape: %s' % str(kept_X_train_feature.shape)) self._X_train = fs.transform_matrix(self._X_train) for feature in features_to_keep: if feature not in self._X_train: self._X_train = self._X_train.merge(kept_X_train_feature[[feature]], left_index=True, right_index=True) kept_X_test_feature = self._X_test[features_to_keep].copy() log.debug('kept_X_test_feature.shape: %s' % str(kept_X_test_feature.shape)) self._X_test = fs.transform_matrix(self._X_test) for feature in features_to_keep: if feature not in self._X_test: self._X_test = self._X_test.merge(kept_X_test_feature[[feature]], left_index=True, right_index=True) if not features_to_keep: # Even if there is no feature to keep, still need to # perform transform_matrix to drop most low-rank features self._X_train = fs.transform_matrix(self._X_train) self._X_test = fs.transform_matrix(self._X_test)
Example #20
Source File: SupervisedLearner.py From CDSS with GNU General Public License v3.0 | 4 votes |
def run(self): file_organizer = Syst.FileOrganizerLocal(working_folderpath=self.working_folderpath) raw_matrix_train, raw_matrix_test = Utils.split_rows(self.input_matrix) X_train_raw, y_train = Utils.split_Xy(raw_matrix_train, ylabel=self.ylabel) feature_processing_pipeline = Pipeline( memory=None, # file_organizer.cached_pipeline_filepath, steps=[ ('impute_features', Clas.FeatureImputer()), ('remove_features', Clas.FeatureRemover()), ('select_features', Clas.Select_Features()) ] ) X_train_processed = feature_processing_pipeline.fit_transform(X_train_raw, y_train) predictor = SupervisedClassifier(classes=[0, 1], hyperparams={'algorithm':'random-forest', 'hyperparam_strategy':SupervisedClassifier.EXHAUSTIVE_SEARCH, 'max_iter': 1024 }) status = predictor.train(X_train_processed, column_or_1d(y_train)) X_test_raw, y_test = Utils.split_Xy(raw_matrix_test, ylabel=self.ylabel) X_test_processed = feature_processing_pipeline.transform(X_test_raw) y_test_pred_proba = predictor.predict_probability(X_test_processed)[:, 1] res_df = pd.DataFrame({'actual': y_test, 'predict': y_test_pred_proba}) res_df.to_csv(file_organizer.get_output_filepath()) '''TODO''' from scripts.LabTestAnalysis.lab_statistics.stats_utils import get_confusion_metrics from sklearn.metrics import roc_auc_score AUC = roc_auc_score(y_test, y_test_pred_proba) sensitivity, specificity, LR_p, LR_n, PPV, NPV = get_confusion_metrics(actual_labels=y_test.values, predict_probas=y_test_pred_proba, threshold=0.5) print("AUC: %s, sensitivity: %s, specificity: %s, LR_p: %s, LR_n: %s, PPV: %s, NPV: %s:. " \ % (AUC, sensitivity, specificity, LR_p, LR_n, PPV, NPV))
Example #21
Source File: Model.py From tbats with MIT License | 4 votes |
def _fit_to_observations(self, y, starting_x): """Fits model with starting x to time series""" self.warnings = [] self.is_fitted = False if self.validate_input: try: y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=True, dtype=np.float64)) # type: np.ndarray except Exception as validation_exception: self.context.get_exception_handler().exception("y series is invalid", error.InputArgsException, previous_exception=validation_exception) self.y = y yw = self._boxcox(y) matrix_builder = self.matrix w = matrix_builder.make_w_vector() g = matrix_builder.make_g_vector() F = matrix_builder.make_F_matrix() # initialize matrices yw_hat = np.asarray([0.0] * len(y)) # x = np.matrix(np.zeros((len(params.x0), len(yw) + 1))) x = starting_x with warnings.catch_warnings(): warnings.filterwarnings('error') try: for t in range(0, len(y)): yw_hat[t] = w @ x e = yw[t] - yw_hat[t] x = F @ x + g * e except RuntimeWarning: # calculation issues, values close to max float value self.add_warning('Numeric calculation issues detected. Model is not usable.') self.is_fitted = False return self # store fit results self.x_last = x self.resid_boxcox = yw - yw_hat try: self.y_hat = self._inv_boxcox(yw_hat) except RuntimeWarning: self.add_warning('Box-Cox related numeric calculation issues detected. Model is not usable.') self.is_fitted = False return self self.resid = self.y - self.y_hat self.is_fitted = True self.aic = self.calculate_aic() return self