注意
点击此处下载完整示例代码,或通过 Binder 在浏览器中运行此示例
逐次减半¶
这个高级示例演示了如何与 SMAC 回调函数交互并从运行中获取相关信息,例如迭代次数。特别是,它举例说明了如何选择在 smac 中使用的强化策略,在本例中为:逐次减半 (SuccessiveHalving)。
这导致了对 BOHB 算法 的改编。它使用逐次减半代替了 Hyperband,可以缩写为 BOSH。要获得 BOHB 算法,只需导入 Hyperband 并将其用作强化策略即可。
from pprint import pprint
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
定义一个实例化 SuccessiveHalving 的回调函数¶
def get_smac_object_callback(budget_type):
def get_smac_object(
scenario_dict,
seed,
ta,
ta_kwargs,
metalearning_configurations,
n_jobs,
dask_client,
multi_objective_algorithm, # This argument will be ignored as SH does not yet support multi-objective optimization
multi_objective_kwargs,
):
from smac.facade.smac_ac_facade import SMAC4AC
from smac.intensification.successive_halving import SuccessiveHalving
from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
from smac.scenario.scenario import Scenario
if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
raise ValueError(
"Please make sure to guard the code invoking Auto-sklearn by "
"`if __name__ == '__main__'` and remove this exception."
)
scenario = Scenario(scenario_dict)
if len(metalearning_configurations) > 0:
default_config = scenario.cs.get_default_configuration()
initial_configurations = [default_config] + metalearning_configurations
else:
initial_configurations = None
rh2EPM = RunHistory2EPM4LogCost
ta_kwargs["budget_type"] = budget_type
return SMAC4AC(
scenario=scenario,
rng=seed,
runhistory2epm=rh2EPM,
tae_runner=ta,
tae_runner_kwargs=ta_kwargs,
initial_configurations=initial_configurations,
run_id=seed,
intensifier=SuccessiveHalving,
intensifier_kwargs={
"initial_budget": 10.0,
"max_budget": 100,
"eta": 2,
"min_chall": 1,
},
n_jobs=n_jobs,
dask_client=dask_client,
)
return get_smac_object
数据加载¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1, shuffle=True
)
构建并拟合分类器¶
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=40,
per_run_time_limit=10,
tmp_folder="/tmp/autosklearn_sh_example_tmp",
disable_evaluator_output=False,
# 'holdout' with 'train_size'=0.67 is the default argument setting
# for AutoSklearnClassifier. It is explicitly specified in this example
# for demonstrational purpose.
resampling_strategy="holdout",
resampling_strategy_arguments={"train_size": 0.67},
include={
"classifier": [
"extra_trees",
"gradient_boosting",
"random_forest",
"sgd",
"passive_aggressive",
],
"feature_preprocessor": ["no_preprocessing"],
},
get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
warnings.warn(
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d22a6610>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4040>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0b408e0>,
'model_id': 2,
'rank': 1,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
4: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d624bfd0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aae50>,
'model_id': 4,
'rank': 2,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
5: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61d3e80>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62e1dc0>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61d3310>,
'model_id': 5,
'rank': 3,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False, l2_regularization=1e-10,
learning_rate=0.16262682406125173, max_iter=64,
max_leaf_nodes=66, n_iter_no_change=0,
random_state=1, validation_fraction=None,
warm_start=True)},
6: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b362e0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62cd0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b36b80>,
'model_id': 6,
'rank': 4,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)},
7: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1aa6e80>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0d67910>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1aa6190>,
'model_id': 7,
'rank': 5,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)},
8: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c382b0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d248c790>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c38ac0>,
'model_id': 8,
'rank': 9,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
9: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23022b0>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d442c940>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23020d0>,
'model_id': 9,
'rank': 10,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=64, n_jobs=1, random_state=1,
warm_start=True)},
10: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e27100>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d24bb8e0>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e27eb0>,
'model_id': 10,
'rank': 11,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=5.027708640006448e-08,
learning_rate=0.09750328007832798, max_iter=64,
max_leaf_nodes=1234, min_samples_leaf=25,
n_iter_no_change=1, random_state=1,
validation_fraction=0.08300813783286698,
warm_start=True)},
11: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45af8b0>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1af2d90>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45af970>,
'model_id': 11,
'rank': 14,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,
l2_regularization=1.0945814167023392e-10,
learning_rate=0.11042628136263043, max_iter=256,
max_leaf_nodes=30, min_samples_leaf=22,
n_iter_no_change=0, random_state=1,
validation_fraction=None, warm_start=True)},
12: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45e0370>,
'cost': 0.04255319148936165,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3ef7b50>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e85070>,
'model_id': 12,
'rank': 15,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=1, min_samples_leaf=6,
min_samples_split=13, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
13: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45bdc10>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0e5b550>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45bddf0>,
'model_id': 13,
'rank': 16,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=2.506856350040198e-06,
learning_rate=0.04634380160611007, max_iter=64,
max_leaf_nodes=11, min_samples_leaf=41,
n_iter_no_change=17, random_state=1,
validation_fraction=None, warm_start=True)},
14: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26eb0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ce652e20>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b260a0>,
'model_id': 14,
'rank': 17,
'sklearn_classifier': ExtraTreesClassifier(bootstrap=True, max_features=3, min_samples_leaf=2,
min_samples_split=3, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
16: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d0f45400>,
'cost': 0.049645390070921946,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d457f820>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0f45f70>,
'model_id': 16,
'rank': 18,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=12,
min_samples_leaf=15, min_samples_split=6,
n_estimators=64, n_jobs=1, random_state=1,
warm_start=True)},
17: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a2d90>,
'cost': 0.099290780141844,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a2e20>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2a21f0>,
'model_id': 17,
'rank': 19,
'sklearn_classifier': SGDClassifier(alpha=9.410144741041167e-05, average=True,
eta0=0.0018055343233337954, learning_rate='constant', loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=0.05082904256838701, warm_start=True)}}
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.985816
Number of target algorithm runs: 21
Number of successful target algorithm runs: 21
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 0
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.9440559440559441
我们也可以将交叉验证与逐次减半结合使用¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1, shuffle=True
)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=40,
per_run_time_limit=10,
tmp_folder="/tmp/autosklearn_sh_example_tmp_01",
disable_evaluator_output=False,
resampling_strategy="cv",
include={
"classifier": [
"extra_trees",
"gradient_boosting",
"random_forest",
"sgd",
"passive_aggressive",
],
"feature_preprocessor": ["no_preprocessing"],
},
get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
warnings.warn(
{ 2: { 'cost': 0.046948356807511755,
'ensemble_weight': 0.1,
'estimators': [ { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2165340>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd4f0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21655b0>,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1ef6250>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d60015e0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1ef6910>,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35a60>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6169c40>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d351f0>,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624a00>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f45430>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624dc0>,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c0e400>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d17c88e0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c0e7c0>,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)}],
'model_id': 2,
'rank': 1,
'voting_model': VotingClassifier(estimators=None, voting='soft')},
4: { 'cost': 0.08215962441314555,
'ensemble_weight': 0.22,
'estimators': [ { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2c8490>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f33b50>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2c8520>,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23027c0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45bd760>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23023d0>,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc79ac70>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6140a30>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc79abb0>,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45d05e0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1aa6cd0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45d0ca0>,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b2c1f0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45eea30>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b2ca60>,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
tol=0.0002600768160857831, warm_start=True)}],
'model_id': 4,
'rank': 2,
'voting_model': VotingClassifier(estimators=None, voting='soft')},
6: { 'cost': 0.04694835680751174,
'ensemble_weight': 0.06,
'estimators': [ { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d25bf9d0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61e5220>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d25bf610>,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248c970>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd7520>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccba90>,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b72e0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4f40>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7880>,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e68c40>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3dcb3a0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e68760>,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3aeeac0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fe5e80>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3aee730>,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.609412172481434e-10,
learning_rate=0.05972079854295879, max_iter=64,
max_leaf_nodes=4, min_samples_leaf=2,
n_iter_no_change=14, random_state=1,
validation_fraction=None, warm_start=True)}],
'model_id': 6,
'rank': 3,
'voting_model': VotingClassifier(estimators=None, voting='soft')},
7: { 'cost': 0.035211267605633784,
'ensemble_weight': 0.12,
'estimators': [ { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624880>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0caab80>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624100>,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c62c40>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3c54520>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c62460>,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce64b1c0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c15850>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce64b5b0>,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d624b8e0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f40520>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d624b280>,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2143880>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca561370>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21437f0>,
'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
max_iter=128, penalty='l1', random_state=1,
tol=1.3716748930467322e-05, warm_start=True)}],
'model_id': 7,
'rank': 4,
'voting_model': VotingClassifier(estimators=None, voting='soft')},
8: { 'cost': 0.039906103286385,
'ensemble_weight': 0.24,
'estimators': [ { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d24bbeb0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05e9b261c0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d248c3a0>,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f6fc10>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2e0a0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3f6f4c0>,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1efea90>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448de20>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1efebe0>,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d245a040>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1a8ddf0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d245a7c0>,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
{ 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c36df0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca916040>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c36220>,
'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
min_samples_split=4, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)}],
'model_id': 8,
'rank': 5,
'voting_model': VotingClassifier(estimators=None, voting='soft')},
9: { 'cost': 0.030516431924882622,
'ensemble_weight': 0.2,
'estimators': [ { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aaca0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1417310>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aafa0>,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=128, n_jobs=1, random_state=1,
warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b4ad30>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e6cb50>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b4aa30>,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=128, n_jobs=1, random_state=1,
warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c40580>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fa2ca0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c404c0>,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=128, n_jobs=1, random_state=1,
warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb1911f0>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca701940>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb191130>,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=128, n_jobs=1, random_state=1,
warm_start=True)},
{ 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca81ce20>,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca9405e0>,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca81cd60>,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
n_estimators=128, n_jobs=1, random_state=1,
warm_start=True)}],
'model_id': 9,
'rank': 7,
'voting_model': VotingClassifier(estimators=None, voting='soft')}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.971831
Number of target algorithm runs: 10
Number of successful target algorithm runs: 9
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 1
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.958041958041958
使用带有逐次减半的迭代拟合交叉验证¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1, shuffle=True
)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=40,
per_run_time_limit=10,
tmp_folder="/tmp/autosklearn_sh_example_tmp_cv_02",
disable_evaluator_output=False,
resampling_strategy="cv-iterative-fit",
include={
"classifier": [
"extra_trees",
"gradient_boosting",
"random_forest",
"sgd",
"passive_aggressive",
],
"feature_preprocessor": ["no_preprocessing"],
},
get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
warnings.warn(
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1219fa0>,
'cost': 0.046948356807511755,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62850>,
'ensemble_weight': 0.32,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1219430>,
'model_id': 2,
'rank': 1,
'sklearn_classifier': None},
3: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d438e580>,
'cost': 0.05164319248826292,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45d0e20>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d438eaf0>,
'model_id': 3,
'rank': 2,
'sklearn_classifier': None},
4: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e59880>,
'cost': 0.11267605633802817,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448ae80>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3dcbd30>,
'model_id': 4,
'rank': 3,
'sklearn_classifier': None},
5: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d5fe5f40>,
'cost': 0.035211267605633804,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62970>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5fe5670>,
'model_id': 5,
'rank': 4,
'sklearn_classifier': None},
6: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca2e0ca0>,
'cost': 0.04694835680751174,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd78b0>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca2e0a90>,
'model_id': 6,
'rank': 5,
'sklearn_classifier': None},
7: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
'cost': 0.03286384976525822,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ad4490>,
'ensemble_weight': 0.36,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aa7f0>,
'model_id': 7,
'rank': 6,
'sklearn_classifier': None}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
mode = stats.mode(array)
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.967136
Number of target algorithm runs: 7
Number of successful target algorithm runs: 6
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 1
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.972027972027972
接下来,我们将看到在 Auto-sklearn 中使用子样本作为预算¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1, shuffle=True
)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=40,
per_run_time_limit=10,
tmp_folder="/tmp/autosklearn_sh_example_tmp_03",
disable_evaluator_output=False,
# 'holdout' with 'train_size'=0.67 is the default argument setting
# for AutoSklearnClassifier. It is explicitly specified in this example
# for demonstrational purpose.
resampling_strategy="holdout",
resampling_strategy_arguments={"train_size": 0.67},
get_smac_object_callback=get_smac_object_callback("subsample"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
warnings.warn(
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26370>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4240ee0>,
'ensemble_weight': 0.12,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6043d90>,
'model_id': 2,
'rank': 4,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
3: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35100>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62ef100>,
'ensemble_weight': 0.14,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d352e0>,
'model_id': 3,
'rank': 5,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
beta_2=0.9, early_stopping=True,
hidden_layer_sizes=(115, 115, 115),
learning_rate_init=0.00018009776276177523, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
4: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb155af0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc052400>,
'ensemble_weight': 0.12,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624ee0>,
'model_id': 4,
'rank': 6,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.00021148999718383549, beta_1=0.999,
beta_2=0.9, hidden_layer_sizes=(113, 113, 113),
learning_rate_init=0.0007452270241186694, max_iter=32,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
5: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7b20>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a22e0>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccb550>,
'model_id': 5,
'rank': 7,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
6: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a21c0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d2302df0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc69a460>,
'model_id': 6,
'rank': 8,
'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
early_stopping=True, hidden_layer_sizes=(101, 101),
learning_rate_init=0.0004684917334431039, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
7: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e35a00>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62234f0>,
'ensemble_weight': 0.14,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5ff7580>,
'model_id': 7,
'rank': 9,
'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
8: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d237eee0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d13ffd00>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb27b550>,
'model_id': 8,
'rank': 10,
'sklearn_classifier': RandomForestClassifier(max_features=3, min_samples_leaf=2, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
9: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2413eb0>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c007f0>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d618dd00>,
'model_id': 9,
'rank': 11,
'sklearn_classifier': ExtraTreesClassifier(max_features=6, min_samples_split=10, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
10: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6140b20>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e089a0>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e7a130>,
'model_id': 10,
'rank': 12,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)}}
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.978723
Number of target algorithm runs: 12
Number of successful target algorithm runs: 12
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 0
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.9440559440559441
混合预算方法¶
最后,还有一种混合预算类型,它在可能的情况下使用迭代,否则使用子样本
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1, shuffle=True
)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=40,
per_run_time_limit=10,
tmp_folder="/tmp/autosklearn_sh_example_tmp_04",
disable_evaluator_output=False,
# 'holdout' with 'train_size'=0.67 is the default argument setting
# for AutoSklearnClassifier. It is explicitly specified in this example
# for demonstrational purpose.
resampling_strategy="holdout",
resampling_strategy_arguments={"train_size": 0.67},
include={
"classifier": ["extra_trees", "gradient_boosting", "random_forest", "sgd"]
},
get_smac_object_callback=get_smac_object_callback("mixed"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
warnings.warn(
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6873430>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc27dcd0>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6873f70>,
'model_id': 2,
'rank': 1,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
4: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7190>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d19ad490>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7400>,
'model_id': 4,
'rank': 5,
'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
6: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce6245e0>,
'cost': 0.04255319148936165,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cb1d66d0>,
'ensemble_weight': 0.12,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d13ffb50>,
'model_id': 6,
'rank': 6,
'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=64,
n_jobs=1, random_state=1, warm_start=True)},
9: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e44730>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd820>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e44790>,
'model_id': 9,
'rank': 7,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=0.005326508887463406,
learning_rate=0.060800813211425456, max_iter=64,
max_leaf_nodes=6, min_samples_leaf=5,
n_iter_no_change=5, random_state=1,
validation_fraction=None, warm_start=True)},
11: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ad40a0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05f240fd30>,
'ensemble_weight': 0.26,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3ad4cd0>,
'model_id': 11,
'rank': 8,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.387912939529945e-10,
learning_rate=0.30755227194768237, max_iter=64,
max_leaf_nodes=60, min_samples_leaf=39,
n_iter_no_change=18, random_state=1,
validation_fraction=None, warm_start=True)},
14: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b265b0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0ccb940>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b26f70>,
'model_id': 14,
'rank': 9,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,
min_samples_split=20, n_estimators=64, n_jobs=1,
random_state=1, warm_start=True)},
16: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f33c40>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3d350a0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6403e20>,
'model_id': 16,
'rank': 10,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=8.057778875694463e-05,
learning_rate=0.09179220974965213, max_iter=64,
max_leaf_nodes=200, n_iter_no_change=18,
random_state=1,
validation_fraction=0.14295295806077554,
warm_start=True)},
17: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ff3280>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ef67c0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62ef3a0>,
'model_id': 17,
'rank': 11,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=16, n_estimators=64,
n_jobs=1, random_state=1, warm_start=True)}}
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.985816
Number of target algorithm runs: 19
Number of successful target algorithm runs: 19
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 0
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.951048951048951
脚本总运行时间: ( 3 分钟 10.654 秒)