度量标准

Auto-sklearn 支持各种内置度量标准,可在API 中的度量标准部分找到。然而,也可以定义自己的度量标准并用它来拟合和评估模型。以下示例展示了如何为分类问题使用内置和自定义度量标准。

import numpy as np

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification
import autosklearn.metrics

自定义度量标准

def accuracy(solution, prediction):
    # custom function defining accuracy
    return np.mean(solution == prediction)


def error(solution, prediction):
    # custom function defining error
    return np.mean(solution != prediction)


def accuracy_wk(solution, prediction, extra_argument):
    # custom function defining accuracy and accepting an additional argument
    assert extra_argument is None
    return np.mean(solution == prediction)


def error_wk(solution, prediction, extra_argument):
    # custom function defining error and accepting an additional argument
    assert extra_argument is None
    return np.mean(solution != prediction)


def metric_which_needs_x(solution, prediction, X_data, consider_col, val_threshold):
    # custom function defining accuracy
    assert X_data is not None
    rel_idx = X_data[:, consider_col] > val_threshold
    return np.mean(solution[rel_idx] == prediction[rel_idx])

数据加载

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

第一个示例:使用预定义的准确率指标

print("#" * 80)
print("Use predefined accuracy metric")
scorer = autosklearn.metrics.accuracy
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    seed=1,
    metric=scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {scorer.name}")
################################################################################
Use predefined accuracy metric
Accuracy score 0.951 using accuracy

第二个示例:使用自定义准确率指标

print("#" * 80)
print("Use self defined accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(
    name="accu",
    score_func=accuracy,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    seed=1,
    metric=accuracy_scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy metric
Accuracy score 0.958 using accu

第三个示例:使用自定义错误率指标

print("#" * 80)
print("Use self defined error metric")
error_rate = autosklearn.metrics.make_scorer(
    name="error",
    score_func=error,
    optimum=0,
    greater_is_better=False,
    needs_proba=False,
    needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    seed=1,
    metric=error_rate,
)
cls.fit(X_train, y_train)

cls.predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error metric
Error score -0.042 using error

第四个示例:使用带有附加参数的自定义准确率指标

print("#" * 80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(
    name="accu_add",
    score_func=accuracy_wk,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
    extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60, per_run_time_limit=30, seed=1, metric=accuracy_scorer
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy with additional argument
Accuracy score 0.958 using accu_add

第五个示例:使用带有附加参数的自定义准确率指标

print("#" * 80)
print("Use self defined error with additional argument")
error_rate = autosklearn.metrics.make_scorer(
    name="error_add",
    score_func=error_wk,
    optimum=0,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
    extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    seed=1,
    metric=error_rate,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error with additional argument
[WARNING] [2022-09-20 09:06:56,340:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:06:59,761:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:03,267:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:04,490:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:08,583:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:09,506:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:11,881:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:15,907:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:20,128:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:25,022:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:28,498:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:33,277:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:37,278:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:38,197:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.615 using error_add

第六个示例:使用带有附加参数且需要xdata的度量标准

"""
Finally, *Auto-sklearn* also support metric that require the train data (aka X_data) to
compute a value. This can be useful if one only cares about the score on a subset of the
data.
"""

accuracy_scorer = autosklearn.metrics.make_scorer(
    name="accu_X",
    score_func=metric_which_needs_x,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_X=True,
    needs_threshold=False,
    consider_col=1,
    val_threshold=18.8,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    seed=1,
    metric=accuracy_scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = metric_which_needs_x(
    y_test,
    predictions,
    X_data=X_test,
    consider_col=1,
    val_threshold=18.8,
)
print(f"Error score {score:.3f} using {accuracy_scorer.name:s}")
[WARNING] [2022-09-20 09:08:26,830:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:28,209:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:29,449:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:31,978:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:33,021:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.919 using accu_X

脚本总运行时间: ( 5 分钟 47.306 秒)

画廊由Sphinx-Gallery生成