In [1]:
from sklearn_benchmarks.reporting.hp_match import HpMatchReporting
from sklearn_benchmarks.utils import default_run_dir, default_report_config
from pathlib import Path
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

ONNX Runtime vs. scikit-learn

In [2]:
run_dir = default_run_dir()
report_config = default_report_config("onnx_vs_scikit_learn")
In [3]:
# Parameters
report_config = {
    "log_scale": True,
    "benchmarking_estimator_parameters": {
        "sklearn_KNeighborsClassifier_brute_force": [
            "n_neighbors",
            "algorithm",
            "n_jobs",
        ],
        "sklearnex_KNeighborsClassifier_brute_force": [],
        "sklearn_KNeighborsClassifier_kd_tree": ["n_neighbors", "algorithm", "n_jobs"],
        "sklearnex_KNeighborsClassifier_kd_tree": [],
        "sklearn_KMeans_tall": [
            "init",
            "algorithm",
            "n_clusters",
            "max_iter",
            "n_init",
            "tol",
        ],
        "sklearnex_KMeans_tall": [],
        "sklearn_KMeans_short": [
            "init",
            "algorithm",
            "n_clusters",
            "max_iter",
            "n_init",
            "tol",
        ],
        "sklearnex_KMeans_short": [],
        "sklearn_LogisticRegression": [
            "penalty",
            "dual",
            "tol",
            "C",
            "fit_intercept",
            "intercept_scaling",
            "class_weight",
            "random_state",
            "solver",
            "max_iter",
            "multi_class",
            "verbose",
            "warm_start",
            "n_jobs",
            "l1_ratio",
        ],
        "sklearnex_LogisticRegression": [],
        "sklearn_LinearRegression": [],
        "sklearnex_LinearRegression": [],
        "sklearn_Ridge": ["alpha"],
        "sklearnex_Ridge": [],
        "sklearn_TSNE": [],
        "sklearnex_TSNE": [],
        "sklearn_PCA": ["n_components"],
        "sklearnex_PCA": [],
        "sklearn_HistGradientBoostingClassifier_best": [
            "learning_rate",
            "n_iter_no_change",
            "max_leaf_nodes",
            "max_bins",
            "min_samples_leaf",
            "max_iter",
        ],
        "sklearn_HistGradientBoostingClassifier": [
            "learning_rate",
            "n_iter_no_change",
            "max_leaf_nodes",
            "max_bins",
            "min_samples_leaf",
            "max_iter",
            "early_stopping",
        ],
        "lightgbm": [
            "learning_rate",
            "num_leaves",
            "max_bin",
            "min_data_in_leaf",
            "bagging_fraction",
            "feature_fraction_bynode",
            "n_estimators",
        ],
        "xgboost": [
            "validate_parameters",
            "tree_method",
            "use_label_encoder",
            "learning_rate",
            "max_leaves",
            "max_bin",
            "min_child_weight",
            "subsample",
            "colsample_bynode",
            "n_estimators",
        ],
        "catboost_lossguide": [
            "allow_writing_files",
            "grow_policy",
            "bootstrap_type",
            "learning_rate",
            "max_leaves",
            "border_count",
            "min_data_in_leaf",
            "subsample",
            "rsm",
            "max_depth",
            "iterations",
        ],
        "catboost_symmetric": [
            "allow_writing_files",
            "grow_policy",
            "bootstrap_type",
            "learning_rate",
            "border_count",
            "min_data_in_leaf",
            "subsample",
            "rsm",
            "max_depth",
            "iterations",
        ],
    },
    "estimators": [
        {
            "title": "KNeighborsClassifier (brute force)",
            "name": "KNeighborsClassifier_brute_force",
            "split_bars_by_parameters": ["n_jobs"],
        },
        {
            "title": "HistGradientBoostingClassifier",
            "name": "HistGradientBoostingClassifier_best",
        },
    ],
    "other_library": "onnx",
    "parameters": {"n_cols": 2},
}
run_dir = "./results/local/20220316T125621/"
In [4]:
run_dir = Path(run_dir)
In [5]:
reporting = HpMatchReporting(**report_config, run_dir=run_dir)
reporting.make_report()

We assume here there is a perfect match between the hyperparameters of both librairies. For a given set of parameters and a given dataset, we compute the speed-up time scikit-learn / time onnx. For instance, a speed-up of 2 means that onnx is twice as fast as scikit-learn for a given set of parameters and a given dataset.

Benchmark environment information

System

python 3.8.12 | packaged by conda-forge | (default, Jan 30 2022, 23:42:07) [GCC 9.4.0]
executable /usr/share/miniconda/envs/sklbench/bin/python
machine Linux-5.11.0-1028-azure-x86_64-with-glibc2.10

Dependencies

pip 22.0.4
setuptools 60.9.3
sklearn 1.0.2
numpy 1.22.3
scipy 1.8.0
Cython None
pandas 1.4.1
matplotlib 3.5.1
joblib 1.1.0
threadpoolctl 3.1.0

Threadpool

user_api internal_api prefix filepath version threading_layer architecture num_threads
0 blas openblas libopenblas /usr/share/miniconda/envs/sklbench/lib/libopenblasp-r0.3.18.so 0.3.18 pthreads SkylakeX 2
1 openmp openmp libgomp /usr/share/miniconda/envs/sklbench/lib/libgomp.so.1.0.0 None NaN NaN 2

Cpu_count

cpu_count 2
physical_cpu_count 2