注意
点击 这里 下载完整示例代码或通过Binder在浏览器中运行此示例
度量标准¶
Auto-sklearn 支持各种内置度量标准,可在API 中的度量标准部分找到。然而,也可以定义自己的度量标准并用它来拟合和评估模型。以下示例展示了如何为分类问题使用内置和自定义度量标准。
import numpy as np
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
import autosklearn.metrics
自定义度量标准¶
def accuracy(solution, prediction):
# custom function defining accuracy
return np.mean(solution == prediction)
def error(solution, prediction):
# custom function defining error
return np.mean(solution != prediction)
def accuracy_wk(solution, prediction, extra_argument):
# custom function defining accuracy and accepting an additional argument
assert extra_argument is None
return np.mean(solution == prediction)
def error_wk(solution, prediction, extra_argument):
# custom function defining error and accepting an additional argument
assert extra_argument is None
return np.mean(solution != prediction)
def metric_which_needs_x(solution, prediction, X_data, consider_col, val_threshold):
# custom function defining accuracy
assert X_data is not None
rel_idx = X_data[:, consider_col] > val_threshold
return np.mean(solution[rel_idx] == prediction[rel_idx])
数据加载¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)
打印可用度量标准列表¶
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))
print("Available REGRESSION autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))
Available CLASSIFICATION metrics autosklearn.metrics.*:
*accuracy
*balanced_accuracy
*roc_auc
*average_precision
*log_loss
*precision_macro
*precision_micro
*precision_samples
*precision_weighted
*recall_macro
*recall_micro
*recall_samples
*recall_weighted
*f1_macro
*f1_micro
*f1_samples
*f1_weighted
Available REGRESSION autosklearn.metrics.*:
*mean_absolute_error
*mean_squared_error
*root_mean_squared_error
*mean_squared_log_error
*median_absolute_error
*r2
第一个示例:使用预定义的准确率指标¶
print("#" * 80)
print("Use predefined accuracy metric")
scorer = autosklearn.metrics.accuracy
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {scorer.name}")
################################################################################
Use predefined accuracy metric
Accuracy score 0.951 using accuracy
第二个示例:使用自定义准确率指标¶
print("#" * 80)
print("Use self defined accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu",
score_func=accuracy,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy metric
Accuracy score 0.958 using accu
第三个示例:使用自定义错误率指标¶
print("#" * 80)
print("Use self defined error metric")
error_rate = autosklearn.metrics.make_scorer(
name="error",
score_func=error,
optimum=0,
greater_is_better=False,
needs_proba=False,
needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)
cls.predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error metric
Error score -0.042 using error
第四个示例:使用带有附加参数的自定义准确率指标¶
print("#" * 80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_add",
score_func=accuracy_wk,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60, per_run_time_limit=30, seed=1, metric=accuracy_scorer
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy with additional argument
Accuracy score 0.958 using accu_add
第五个示例:使用带有附加参数的自定义准确率指标¶
print("#" * 80)
print("Use self defined error with additional argument")
error_rate = autosklearn.metrics.make_scorer(
name="error_add",
score_func=error_wk,
optimum=0,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error with additional argument
[WARNING] [2022-09-20 09:06:56,340:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:06:59,761:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:03,267:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:04,490:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:08,583:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:09,506:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:11,881:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:15,907:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:20,128:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:25,022:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:28,498:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:33,277:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:37,278:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:38,197:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.615 using error_add
第六个示例:使用带有附加参数且需要xdata的度量标准¶
"""
Finally, *Auto-sklearn* also support metric that require the train data (aka X_data) to
compute a value. This can be useful if one only cares about the score on a subset of the
data.
"""
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_X",
score_func=metric_which_needs_x,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_X=True,
needs_threshold=False,
consider_col=1,
val_threshold=18.8,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = metric_which_needs_x(
y_test,
predictions,
X_data=X_test,
consider_col=1,
val_threshold=18.8,
)
print(f"Error score {score:.3f} using {accuracy_scorer.name:s}")
[WARNING] [2022-09-20 09:08:26,830:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:28,209:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:29,449:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:31,978:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:33,021:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.919 using accu_X
脚本总运行时间: ( 5 分钟 47.306 秒)