Python statsmodels.api.RLM Examples
The following are 7
code examples of statsmodels.api.RLM().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
statsmodels.api
, or try the search function
.
Example #1
Source File: causal_search.py From whynot with MIT License | 6 votes |
def __init__(self, y, x, z, data, alpha): self.regression = sm.RLM(data[y], data[x + z]) self.result = self.regression.fit() self.coefficient = self.result.params[x][0] confidence_interval = self.result.conf_int(alpha=alpha / 2.0) self.upper = confidence_interval[1][x][0] self.lower = confidence_interval[0][x][0]
Example #2
Source File: smoothers.py From plotnine with GNU General Public License v2.0 | 6 votes |
def rlm(data, xseq, **params): """ Fit RLM """ if params['formula']: return rlm_formula(data, xseq, **params) X = sm.add_constant(data['x']) Xseq = sm.add_constant(xseq) init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.RLM, sm.RLM.fit) model = sm.RLM(data['y'], X, **init_kwargs) results = model.fit(**fit_kwargs) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(Xseq) if params['se']: warnings.warn("Confidence intervals are not yet implemented" "for RLM smoothing.", PlotnineWarning) return data
Example #3
Source File: smoothers.py From plotnine with GNU General Public License v2.0 | 6 votes |
def rlm_formula(data, xseq, **params): """ Fit RLM using a formula """ eval_env = params['enviroment'] formula = params['formula'] init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.RLM, sm.RLM.fit) model = smf.rlm( formula, data, eval_env=eval_env, **init_kwargs ) results = model.fit(**fit_kwargs) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(data) if params['se']: warnings.warn("Confidence intervals are not yet implemented" "for RLM smoothing.", PlotnineWarning) return data
Example #4
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y = x.sum(1) + np.random.randn(x.shape[0]) self.results = sm.RLM(y, self.exog).fit()
Example #5
Source File: test_shrink_pickle.py From vnpy_crypto with MIT License | 5 votes |
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y = x.sum(1) + np.random.randn(x.shape[0]) self.results = sm.RLM(y, self.exog).fit()
Example #6
Source File: plant_analysis.py From OpenOA with BSD 3-Clause "New" or "Revised" License | 5 votes |
def plot_reanalysis_gross_energy_data(self, outlier_thres): """ Make a plot of normalized 30-day gross energy vs wind speed for each reanalysis product, include R2 measure :param outlier_thres (float): outlier threshold (typical range of 1 to 4) which adjusts outlier sensitivity detection :return: matplotlib.pyplot object """ import matplotlib.pyplot as plt valid_monthly = self._monthly.df project = self._plant plt.figure(figsize=(9, 9)) # Loop through each reanalysis product and make a scatterplot of monthly wind speed vs plant energy for p in np.arange(0, len(list(project._reanalysis._product.keys()))): col_name = list(project._reanalysis._product.keys())[p] # Reanalysis column in monthly data frame x = sm.add_constant(valid_monthly[col_name]) # Define 'x'-values (constant needed for regression function) y = valid_monthly['gross_energy_gwh'] * 30 / valid_monthly[ 'num_days_expected'] # Normalize energy data to 30-days rlm = sm.RLM(y, x, M=sm.robust.norms.HuberT( t=outlier_thres)) # Robust linear regression with HuberT algorithm (threshold equal to 2) rlm_results = rlm.fit() r2 = np.corrcoef(x.loc[rlm_results.weights == 1, col_name], y[rlm_results.weights == 1])[ 0, 1] # Get R2 from valid data # Plot results plt.subplot(2, 2, p + 1) plt.plot(x.loc[rlm_results.weights != 1, col_name], y[rlm_results.weights != 1], 'rx', label='Outlier') plt.plot(x.loc[rlm_results.weights == 1, col_name], y[rlm_results.weights == 1], '.', label='Valid data') plt.title(col_name + ', R2=' + str(np.round(r2, 3))) plt.xlabel('Wind speed (m/s)') plt.ylabel('30-day normalized gross energy (GWh)') plt.tight_layout() return plt
Example #7
Source File: plant_analysis.py From OpenOA with BSD 3-Clause "New" or "Revised" License | 4 votes |
def filter_outliers(self, reanal, outlier_thresh, comb_loss_thresh): """ This function filters outliers based on 1. The reanalysis product 2. The Huber parameter which controls sensitivity of outlier detection in robust linear regression 3. The combined availability and curtailment loss criteria There are only 300 combinations of outlier removals: (3 reanalysis product x 10 outlier threshold values x 10 combined loss thresholds) Therefore, we use a memoized funciton to store the regression data in a dictionary for each combination as it comes up in the Monte Carlo simulation. This saves significant computational time in not having to run robust linear regression for each Monte Carlo iteration Args: reanal(:obj:`string`): The name of the reanalysis product outlier_thresh(:obj:`float`): The Huber parameter controlling sensitivity of outlier detection comb_loss_thresh(:obj:`float`): The combined availabilty and curtailment monthly loss threshold Returns: :obj:`pandas.DataFrame`: Filtered monthly data ready for linear regression """ # Check if valid data has already been calculated and stored. If so, just return it if (reanal, outlier_thresh, comb_loss_thresh) in self.outlier_filtering: valid_data = self.outlier_filtering[(reanal, outlier_thresh, comb_loss_thresh)] return valid_data # If valid data hasn't yet been stored in dictionary, determine the valid data df = self._monthly.df # First set of filters checking combined losses and if the Nan data flag was on df_sub = df.loc[ ((df['availability_pct'] + df['curtailment_pct']) < comb_loss_thresh) & (df['nan_flag'] == False)] #print df_sub # Now perform robust linear regression using Huber algorithm to flag outliers X = sm.add_constant(df_sub[reanal]) # Reanalysis data with constant column y = df_sub['gross_energy_gwh'] # Energy data # Perform robust linear regression rlm = sm.RLM(y, X, M=sm.robust.norms.HuberT(outlier_thresh)) rlm_results = rlm.fit() # Define valid data as points in which the Huber algorithm returned a value of 1 valid_data = df_sub.loc[rlm_results.weights == 1, [reanal, 'energy_gwh', 'availability_gwh', 'curtailment_gwh', 'num_days_expected']] # Update the dictionary self.outlier_filtering[(reanal, outlier_thresh, comb_loss_thresh)] = valid_data # Return result return valid_data