注意:此页面搜索的是所有试题
国家开放大学大数据预处理复习题
请写出下方代码的功能
pearson = pd.Series(name="pearson correlation")
for i in train_x:
pearson[i] = pearsonr(train_y, train_x[i])[0]
var_cor = pearson.abs() > 0.5
请写出下方代码的功能
model_all = GradientBoostingClassifier(random_state=0)
model_all.fit(X=train_x, y=train_y)
feature_imp = pd.Series(model_all.feature_importances_,
index=train_x.columns)
var_tree = feature_imp.sort_values(ascending=False).head(8).index
print(feature_imp.sort_values(ascending=False))
print("\n结果为:\n%s" % var_tree.values)
请写出下方代码的功能
bin_1=pd.cut(price,bins=5)
d1={“price”:car_data[“price”],”bin”:bin_1}
p1=pd.DataFrame(data=d1)
print(“等宽分箱结果:\n%s”%p1[0:20])
print(“等宽分箱频数分布:\n%s”%bin_1.value_counts())
pearson = pd.Series(name="pearson correlation")
for i in train_x:
pearson[i] = pearsonr(train_y, train_x[i])[0]
var_cor = pearson.abs() > 0.5
请写出下方代码的功能
model_all = GradientBoostingClassifier(random_state=0)
model_all.fit(X=train_x, y=train_y)
feature_imp = pd.Series(model_all.feature_importances_,
index=train_x.columns)
var_tree = feature_imp.sort_values(ascending=False).head(8).index
print(feature_imp.sort_values(ascending=False))
print("\n结果为:\n%s" % var_tree.values)
请写出下方代码的功能
bin_1=pd.cut(price,bins=5)
d1={“price”:car_data[“price”],”bin”:bin_1}
p1=pd.DataFrame(data=d1)
print(“等宽分箱结果:\n%s”%p1[0:20])
print(“等宽分箱频数分布:\n%s”%bin_1.value_counts())