逐次减半

这个高级示例演示了如何与 SMAC 回调函数交互并从运行中获取相关信息,例如迭代次数。特别是,它举例说明了如何选择在 smac 中使用的强化策略,在本例中为:逐次减半 (SuccessiveHalving)

这导致了对 BOHB 算法 的改编。它使用逐次减半代替了 Hyperband,可以缩写为 BOSH。要获得 BOHB 算法,只需导入 Hyperband 并将其用作强化策略即可。

from pprint import pprint

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

定义一个实例化 SuccessiveHalving 的回调函数

def get_smac_object_callback(budget_type):
    def get_smac_object(
        scenario_dict,
        seed,
        ta,
        ta_kwargs,
        metalearning_configurations,
        n_jobs,
        dask_client,
        multi_objective_algorithm,  # This argument will be ignored as SH does not yet support multi-objective optimization
        multi_objective_kwargs,
    ):
        from smac.facade.smac_ac_facade import SMAC4AC
        from smac.intensification.successive_halving import SuccessiveHalving
        from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
        from smac.scenario.scenario import Scenario

        if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
            raise ValueError(
                "Please make sure to guard the code invoking Auto-sklearn by "
                "`if __name__ == '__main__'` and remove this exception."
            )

        scenario = Scenario(scenario_dict)
        if len(metalearning_configurations) > 0:
            default_config = scenario.cs.get_default_configuration()
            initial_configurations = [default_config] + metalearning_configurations
        else:
            initial_configurations = None
        rh2EPM = RunHistory2EPM4LogCost

        ta_kwargs["budget_type"] = budget_type

        return SMAC4AC(
            scenario=scenario,
            rng=seed,
            runhistory2epm=rh2EPM,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            initial_configurations=initial_configurations,
            run_id=seed,
            intensifier=SuccessiveHalving,
            intensifier_kwargs={
                "initial_budget": 10.0,
                "max_budget": 100,
                "eta": 2,
                "min_chall": 1,
            },
            n_jobs=n_jobs,
            dask_client=dask_client,
        )

    return get_smac_object

数据加载

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

构建并拟合分类器

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d22a6610>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4040>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0b408e0>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
           'cost': 0.07801418439716312,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d624bfd0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aae50>,
           'model_id': 4,
           'rank': 2,
           'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61d3e80>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62e1dc0>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61d3310>,
           'model_id': 5,
           'rank': 3,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False, l2_regularization=1e-10,
                               learning_rate=0.16262682406125173, max_iter=64,
                               max_leaf_nodes=66, n_iter_no_change=0,
                               random_state=1, validation_fraction=None,
                               warm_start=True)},
    6: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b362e0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62cd0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b36b80>,
           'model_id': 6,
           'rank': 4,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1aa6e80>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0d67910>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1aa6190>,
           'model_id': 7,
           'rank': 5,
           'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c382b0>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d248c790>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c38ac0>,
           'model_id': 8,
           'rank': 9,
           'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23022b0>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d442c940>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23020d0>,
           'model_id': 9,
           'rank': 10,
           'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=64, n_jobs=1, random_state=1,
                     warm_start=True)},
    10: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e27100>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d24bb8e0>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e27eb0>,
            'model_id': 10,
            'rank': 11,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=5.027708640006448e-08,
                               learning_rate=0.09750328007832798, max_iter=64,
                               max_leaf_nodes=1234, min_samples_leaf=25,
                               n_iter_no_change=1, random_state=1,
                               validation_fraction=0.08300813783286698,
                               warm_start=True)},
    11: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45af8b0>,
            'cost': 0.014184397163120588,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1af2d90>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45af970>,
            'model_id': 11,
            'rank': 14,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,
                               l2_regularization=1.0945814167023392e-10,
                               learning_rate=0.11042628136263043, max_iter=256,
                               max_leaf_nodes=30, min_samples_leaf=22,
                               n_iter_no_change=0, random_state=1,
                               validation_fraction=None, warm_start=True)},
    12: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45e0370>,
            'cost': 0.04255319148936165,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3ef7b50>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e85070>,
            'model_id': 12,
            'rank': 15,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=1, min_samples_leaf=6,
                       min_samples_split=13, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    13: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45bdc10>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0e5b550>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45bddf0>,
            'model_id': 13,
            'rank': 16,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.506856350040198e-06,
                               learning_rate=0.04634380160611007, max_iter=64,
                               max_leaf_nodes=11, min_samples_leaf=41,
                               n_iter_no_change=17, random_state=1,
                               validation_fraction=None, warm_start=True)},
    14: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26eb0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ce652e20>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b260a0>,
            'model_id': 14,
            'rank': 17,
            'sklearn_classifier': ExtraTreesClassifier(bootstrap=True, max_features=3, min_samples_leaf=2,
                     min_samples_split=3, n_estimators=64, n_jobs=1,
                     random_state=1, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d0f45400>,
            'cost': 0.049645390070921946,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d457f820>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0f45f70>,
            'model_id': 16,
            'rank': 18,
            'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=12,
                       min_samples_leaf=15, min_samples_split=6,
                       n_estimators=64, n_jobs=1, random_state=1,
                       warm_start=True)},
    17: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a2d90>,
            'cost': 0.099290780141844,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a2e20>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2a21f0>,
            'model_id': 17,
            'rank': 19,
            'sklearn_classifier': SGDClassifier(alpha=9.410144741041167e-05, average=True,
              eta0=0.0018055343233337954, learning_rate='constant', loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=0.05082904256838701, warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 21
  Number of successful target algorithm runs: 21
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.9440559440559441

我们也可以将交叉验证与逐次减半结合使用

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_01",
    disable_evaluator_output=False,
    resampling_strategy="cv",
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'cost': 0.046948356807511755,
           'ensemble_weight': 0.1,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2165340>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd4f0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21655b0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1ef6250>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d60015e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1ef6910>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35a60>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6169c40>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d351f0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624a00>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f45430>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624dc0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c0e400>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d17c88e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c0e7c0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)}],
           'model_id': 2,
           'rank': 1,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    4: {   'cost': 0.08215962441314555,
           'ensemble_weight': 0.22,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2c8490>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f33b50>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2c8520>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23027c0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45bd760>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23023d0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc79ac70>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6140a30>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc79abb0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45d05e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1aa6cd0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45d0ca0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b2c1f0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45eea30>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b2ca60>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)}],
           'model_id': 4,
           'rank': 2,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    6: {   'cost': 0.04694835680751174,
           'ensemble_weight': 0.06,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d25bf9d0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61e5220>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d25bf610>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248c970>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd7520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccba90>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b72e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4f40>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7880>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e68c40>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3dcb3a0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e68760>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3aeeac0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fe5e80>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3aee730>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)}],
           'model_id': 6,
           'rank': 3,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    7: {   'cost': 0.035211267605633784,
           'ensemble_weight': 0.12,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624880>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0caab80>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624100>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c62c40>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3c54520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c62460>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce64b1c0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c15850>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce64b5b0>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d624b8e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f40520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d624b280>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2143880>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca561370>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21437f0>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)}],
           'model_id': 7,
           'rank': 4,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    8: {   'cost': 0.039906103286385,
           'ensemble_weight': 0.24,
           'estimators': [   {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d24bbeb0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05e9b261c0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d248c3a0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f6fc10>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2e0a0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3f6f4c0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1efea90>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448de20>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1efebe0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d245a040>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1a8ddf0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d245a7c0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c36df0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca916040>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c36220>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)}],
           'model_id': 8,
           'rank': 5,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    9: {   'cost': 0.030516431924882622,
           'ensemble_weight': 0.2,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aaca0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1417310>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aafa0>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b4ad30>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e6cb50>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b4aa30>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c40580>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fa2ca0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c404c0>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb1911f0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca701940>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb191130>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca81ce20>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca9405e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca81cd60>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)}],
           'model_id': 9,
           'rank': 7,
           'voting_model': VotingClassifier(estimators=None, voting='soft')}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.971831
  Number of target algorithm runs: 10
  Number of successful target algorithm runs: 9
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.958041958041958

使用带有逐次减半的迭代拟合交叉验证

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_cv_02",
    disable_evaluator_output=False,
    resampling_strategy="cv-iterative-fit",
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1219fa0>,
           'cost': 0.046948356807511755,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62850>,
           'ensemble_weight': 0.32,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1219430>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': None},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d438e580>,
           'cost': 0.05164319248826292,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45d0e20>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d438eaf0>,
           'model_id': 3,
           'rank': 2,
           'sklearn_classifier': None},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e59880>,
           'cost': 0.11267605633802817,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448ae80>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3dcbd30>,
           'model_id': 4,
           'rank': 3,
           'sklearn_classifier': None},
    5: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d5fe5f40>,
           'cost': 0.035211267605633804,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62970>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5fe5670>,
           'model_id': 5,
           'rank': 4,
           'sklearn_classifier': None},
    6: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca2e0ca0>,
           'cost': 0.04694835680751174,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd78b0>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca2e0a90>,
           'model_id': 6,
           'rank': 5,
           'sklearn_classifier': None},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
           'cost': 0.03286384976525822,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ad4490>,
           'ensemble_weight': 0.36,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aa7f0>,
           'model_id': 7,
           'rank': 6,
           'sklearn_classifier': None}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.967136
  Number of target algorithm runs: 7
  Number of successful target algorithm runs: 6
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.972027972027972

接下来,我们将看到在 Auto-sklearn 中使用子样本作为预算

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_03",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    get_smac_object_callback=get_smac_object_callback("subsample"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26370>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4240ee0>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6043d90>,
           'model_id': 2,
           'rank': 4,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35100>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62ef100>,
           'ensemble_weight': 0.14,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d352e0>,
           'model_id': 3,
           'rank': 5,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
              beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(115, 115, 115),
              learning_rate_init=0.00018009776276177523, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb155af0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc052400>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624ee0>,
           'model_id': 4,
           'rank': 6,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.00021148999718383549, beta_1=0.999,
              beta_2=0.9, hidden_layer_sizes=(113, 113, 113),
              learning_rate_init=0.0007452270241186694, max_iter=32,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7b20>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a22e0>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccb550>,
           'model_id': 5,
           'rank': 7,
           'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a21c0>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d2302df0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc69a460>,
           'model_id': 6,
           'rank': 8,
           'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
              early_stopping=True, hidden_layer_sizes=(101, 101),
              learning_rate_init=0.0004684917334431039, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e35a00>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62234f0>,
           'ensemble_weight': 0.14,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5ff7580>,
           'model_id': 7,
           'rank': 9,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d237eee0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d13ffd00>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb27b550>,
           'model_id': 8,
           'rank': 10,
           'sklearn_classifier': RandomForestClassifier(max_features=3, min_samples_leaf=2, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2413eb0>,
           'cost': 0.07801418439716312,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c007f0>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d618dd00>,
           'model_id': 9,
           'rank': 11,
           'sklearn_classifier': ExtraTreesClassifier(max_features=6, min_samples_split=10, n_estimators=512,
                     n_jobs=1, random_state=1, warm_start=True)},
    10: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6140b20>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e089a0>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e7a130>,
            'model_id': 10,
            'rank': 12,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.978723
  Number of target algorithm runs: 12
  Number of successful target algorithm runs: 12
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.9440559440559441

混合预算方法

最后,还有一种混合预算类型,它在可能的情况下使用迭代,否则使用子样本

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_04",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    include={
        "classifier": ["extra_trees", "gradient_boosting", "random_forest", "sgd"]
    },
    get_smac_object_callback=get_smac_object_callback("mixed"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6873430>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc27dcd0>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6873f70>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7190>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d19ad490>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7400>,
           'model_id': 4,
           'rank': 5,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce6245e0>,
           'cost': 0.04255319148936165,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cb1d66d0>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d13ffb50>,
           'model_id': 6,
           'rank': 6,
           'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=64,
                     n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e44730>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd820>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e44790>,
           'model_id': 9,
           'rank': 7,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=0.005326508887463406,
                               learning_rate=0.060800813211425456, max_iter=64,
                               max_leaf_nodes=6, min_samples_leaf=5,
                               n_iter_no_change=5, random_state=1,
                               validation_fraction=None, warm_start=True)},
    11: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ad40a0>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05f240fd30>,
            'ensemble_weight': 0.26,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3ad4cd0>,
            'model_id': 11,
            'rank': 8,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.387912939529945e-10,
                               learning_rate=0.30755227194768237, max_iter=64,
                               max_leaf_nodes=60, min_samples_leaf=39,
                               n_iter_no_change=18, random_state=1,
                               validation_fraction=None, warm_start=True)},
    14: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b265b0>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0ccb940>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b26f70>,
            'model_id': 14,
            'rank': 9,
            'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,
                     min_samples_split=20, n_estimators=64, n_jobs=1,
                     random_state=1, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f33c40>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3d350a0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6403e20>,
            'model_id': 16,
            'rank': 10,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=8.057778875694463e-05,
                               learning_rate=0.09179220974965213, max_iter=64,
                               max_leaf_nodes=200, n_iter_no_change=18,
                               random_state=1,
                               validation_fraction=0.14295295806077554,
                               warm_start=True)},
    17: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ff3280>,
            'cost': 0.07801418439716312,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ef67c0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62ef3a0>,
            'model_id': 17,
            'rank': 11,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=16, n_estimators=64,
                       n_jobs=1, random_state=1, warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 19
  Number of successful target algorithm runs: 19
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.951048951048951

脚本总运行时间: ( 3 分钟 10.654 秒)

图库由 Sphinx-Gallery 生成