2021-03-14
阅读量:
7463
AttributeError:'feature_select' object has no attribute 'select_list' 代码不明白为什么报错
def get_kind(x: pd.Series, diff_limit: int = 10):
x = x.astype('str')
x = x.str.extract(r'(^(\-|)(?=.*\d)\d*(?:\.\d*)?$)')[0]
x.dropna(inplace=True)
if x.nunique() > diff_limit:
kind = 'numeric'
else:
kind = 'categorical'
return kind
class feature_select(BaseEstimator, TransformerMixin):
def __init__(self,
num_list: list = None,
cate_list: list = None,
num_method: str='sys',
cate_method: str='sys',
diff_num: int = 10,
pos_label: str = 1,
show_df: bool = False,
):
self.num_list = num_list
self.cate_list = cate_list
self.num_method= num_method
self.cate_method= cate_method
self.diff_num = diff_num
self.pos_label = pos_label
self.show_df = show_df
self.selete_list = []
def fit (self, X, y=None):
X=X.copy()
from scipy import stats
if self.num_list is None:
self.num_list = []
for col in X.columns:
kind = get_kind(x=X[col], diff_limit=self.diff_num)
if kind == 'numeric':
self.num_list.append(col)
if self.cate_list is None:
self.cate_list = []
for col in X.columns:
kind = get_kind(x=X[col], diff_limit=self.diff_num)
if kind == 'categorical':
self.cate_list.append(col)
X['y']=y
yes=X[X['y'] == self.pos_label]
yes.reset_index(drop=True,inplace=True)
no=X[X['y'] != self.pos_label]
no.reset_index(drop=True,inplace=True)
del X['y']
sys_cate_list, kf_list, kf_p_list =[],[],[]
sys_num_list, t_list, p_value_list, anova_f_list, anova_p_list=[],[],[],[],[]
if self.cate_method == 'sys' or self.show_df is True:
for obj in self.cate_list:
value_list = list(X[obj].unique())
value_sum = 0
for value in value_list:
support_yes = (yes[yes[obj] == value].shape[0] +1)/ ( yes.shape[0]+1)
support_no = (no[no[obj] == value].shape[0] +1) / (no.shape[0]+1)
confidence_yes = support_yes / (support_yes + support_no)
value_sum += abs(2 * confidence_yes - 1) * (X[X[obj] == value].shape[0] / X.shape[0])
sys_cate_list.append(value_sum)
if value_sum >=0.1:
self.select_list.append(obj)
if self.cate_method == 'kf' or self.show_df is True:
for obj in self.cate_list:
df_obj=pd.get_dummies(X[obj],prefix=obj)
df_obj['result']=y
df_obj=df_obj,groupby('result').sum()
obs = df_obj.values
kf = stats.chi2_contingency(obs)
'''
chi2: The test statistic
p: p-value
dof: Degrees of freedom
expected: The expected frequencies, based on the marginal sums of the table.
'''
chi2, p, dof, expect = kf
kf_list.append(chi2)
kf_p_list.append(p)
if p < 0.05:
self.select_list.append(obj)
if self.num_method == 'sys' or self.show_df is True:
for num in self.num_list:
mean_c1 = no[num].mean()
std_c1 = no[num].std()
mean_c2 = yes[num].mean()
std_c2 = yes[num].std()
value_sum=abs(mean_c1 - mean_c2) / (std_c1 + std_c2) * 2
sys_num_list.append(value_sum)
if value_sum >=0.1:
self.select_list.append(num)
if self.num_method == 't' or self.show_df is True:
for num in num_list:
t_t, t_p = stats.ttest_ind(yes[num], no[num], equal_var=False, nan_policy='omit') # 'omit'忽略nan值执行计算
t_list.append(t_t)
p_value_list.append(t_p)
if t_p < 0.05:
self.select_list.append(num)
if self.num_method == 'anova' or self.show_df is True:
for num in num_list:
anova_f, anova_p = stats.f_oneway(yes[num], no[num])
anova_f_list.append(anova_f)
anova_p_list.append(anova_p)
if anova_p < 0.05:
self.select_list.append(num)
if self.show_df is True:
dic1 = {'categorical': self.cate_list, 'importance_': sys_cate_list, 'Kf-Value': kf_list, 'Kf_P-Value': kf_p_list, }
df = pd.DataFrame(dic1, columns=['categorical', 'importance_', 'Kf-Value', 'Kf_P-Value'])
df.sort_values(by='Kf_P-Value', inplace=True)
print(df,'\n')
dic2 = {'numeric':self. num_list, 'importance_': sys_num_list, 'T-Value': t_list, 'P-value': p_value_list, 'Anova-F-Value': anova_f_list, 'Anova-P-value': anova_p_list}
df = pd.DataFrame(dic2, columns=['numeric', 'importance_', 'T-Value', 'P-value', 'Anova-F-Value', 'Anova-P-value'])
df.sort_values(by='Anova-P-value', inplace=True)
print(df,'\n')
self.select_list= list(set(self.select_list))
print('After select attr:', self.select_list)
return self
def transform(self,X):
X=X.copy()
print('attr select success!')
return X[self.select_list]
fs=feature_select(pos_label=’yes’,show_df=True)
x_train=fs.fit_transform(x_train, y_train)fit的时候报错,提示如下:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)in1 fs=feature_select(pos_label='1',show_df=True)
----> 2 x_=fs.fit_transform(x_, y_)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
700 else:
701 # fit method of arity 2 (supervised transformation)
--> 702 return self.fit(X, y, **fit_params).transform(X)
703
704in fit(self, X, y)
56 sys_cate_list.append(value_sum)
57 if value_sum >=0.1:
---> 58 self.select_list.append(obj)
59
60 if self.cate_method == 'kf' or self.show_df is True:
AttributeError: 'feature_select' object has no attribute 'select_list'
68.3351
1
4
关注作者
收藏
评论(4)
发表评论
推荐帖子
0条评论
0条评论
0条评论

