from sklearn_benchmarks.reporting.hp_match import HpMatchReporting
from sklearn_benchmarks.utils import default_run_dir, default_report_config
from pathlib import Path
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

scikit-learn-intelex (Intel® oneAPI) vs. scikit-learn¶

run_dir = default_run_dir()
report_config = default_report_config("scikit_learn_intelex_vs_scikit_learn")

# Parameters
report_config = {
    "log_scale": True,
    "benchmarking_estimator_parameters": {
        "sklearn_KNeighborsClassifier_brute_force": [
            "n_neighbors",
            "algorithm",
            "n_jobs",
        ],
        "sklearnex_KNeighborsClassifier_brute_force": [],
        "sklearn_KNeighborsClassifier_kd_tree": ["n_neighbors", "algorithm", "n_jobs"],
        "sklearnex_KNeighborsClassifier_kd_tree": [],
        "sklearn_KMeans_tall": [
            "init",
            "algorithm",
            "n_clusters",
            "max_iter",
            "n_init",
            "tol",
        ],
        "sklearnex_KMeans_tall": [],
        "sklearn_KMeans_short": [
            "init",
            "algorithm",
            "n_clusters",
            "max_iter",
            "n_init",
            "tol",
        ],
        "sklearnex_KMeans_short": [],
        "sklearn_LogisticRegression": [
            "penalty",
            "dual",
            "tol",
            "C",
            "fit_intercept",
            "intercept_scaling",
            "class_weight",
            "random_state",
            "solver",
            "max_iter",
            "multi_class",
            "verbose",
            "warm_start",
            "n_jobs",
            "l1_ratio",
        ],
        "sklearnex_LogisticRegression": [],
        "sklearn_LinearRegression": [],
        "sklearnex_LinearRegression": [],
        "sklearn_Ridge": ["alpha"],
        "sklearnex_Ridge": [],
        "sklearn_TSNE": [],
        "sklearnex_TSNE": [],
        "sklearn_PCA": ["n_components"],
        "sklearnex_PCA": [],
        "sklearn_HistGradientBoostingClassifier_best": [
            "learning_rate",
            "n_iter_no_change",
            "max_leaf_nodes",
            "max_bins",
            "min_samples_leaf",
            "max_iter",
        ],
        "sklearn_HistGradientBoostingClassifier": [
            "learning_rate",
            "n_iter_no_change",
            "max_leaf_nodes",
            "max_bins",
            "min_samples_leaf",
            "max_iter",
            "early_stopping",
        ],
        "lightgbm": [
            "learning_rate",
            "num_leaves",
            "max_bin",
            "min_data_in_leaf",
            "bagging_fraction",
            "feature_fraction_bynode",
            "n_estimators",
        ],
        "xgboost": [
            "validate_parameters",
            "tree_method",
            "use_label_encoder",
            "learning_rate",
            "max_leaves",
            "max_bin",
            "min_child_weight",
            "subsample",
            "colsample_bynode",
            "n_estimators",
        ],
        "catboost_lossguide": [
            "allow_writing_files",
            "grow_policy",
            "bootstrap_type",
            "learning_rate",
            "max_leaves",
            "border_count",
            "min_data_in_leaf",
            "subsample",
            "rsm",
            "max_depth",
            "iterations",
        ],
        "catboost_symmetric": [
            "allow_writing_files",
            "grow_policy",
            "bootstrap_type",
            "learning_rate",
            "border_count",
            "min_data_in_leaf",
            "subsample",
            "rsm",
            "max_depth",
            "iterations",
        ],
    },
    "estimators": [
        {
            "title": "KNeighborsClassifier (brute force)",
            "name": "KNeighborsClassifier_brute_force",
            "split_bars_by_parameters": ["n_jobs"],
        },
        {
            "title": "KNeighborsClassifier (KD tree)",
            "name": "KNeighborsClassifier_kd_tree",
            "split_bars_by_parameters": ["n_jobs"],
        },
        {"title": "KMeans (tall)", "name": "KMeans_tall"},
        {"title": "KMeans (short)", "name": "KMeans_short"},
        {"title": "Logistic Regression", "name": "LogisticRegression"},
        {"title": "LinearRegression", "name": "LinearRegression"},
        {"title": "Ridge", "name": "Ridge"},
        {"title": "TSNE", "name": "TSNE"},
        {"title": "PCA", "name": "PCA"},
    ],
    "other_library": "sklearnex",
    "parameters": {"n_cols": 2},
}
run_dir = "./results/local/20220316T125621/"

run_dir = Path(run_dir)

reporting = HpMatchReporting(**report_config, run_dir=run_dir)
reporting.make_report()

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	n_jobs	n_neighbors	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
0	fit	100000	100000	100	1.055	0.020	0.076	-1	1	0.054	0.001	19.418	19.419	Download	Download
3	fit	100000	100000	100	1.045	0.016	0.077	-1	5	0.047	0.000	22.378	22.379	Download	Download
6	fit	100000	100000	100	1.040	0.009	0.077	1	100	0.047	0.000	21.978	21.978	Download	Download
9	fit	100000	100000	100	1.047	0.022	0.076	-1	100	0.053	0.000	19.765	19.765	Download	Download
12	fit	100000	100000	100	1.061	0.021	0.075	1	5	0.047	0.000	22.473	22.474	Download	Download
15	fit	100000	100000	100	1.041	0.016	0.077	1	1	0.053	0.000	19.699	19.700	Download	Download
18	fit	100000	100000	2	0.042	0.001	0.038	-1	1	0.009	0.000	4.910	4.913	Download	Download
21	fit	100000	100000	2	0.049	0.001	0.033	-1	5	0.009	0.000	5.647	5.651	Download	Download
24	fit	100000	100000	2	0.047	0.000	0.034	1	100	0.009	0.000	5.492	5.496	Download	Download
27	fit	100000	100000	2	0.047	0.000	0.034	-1	100	0.009	0.000	5.433	5.437	Download	Download
30	fit	100000	100000	2	0.047	0.000	0.034	1	5	0.009	0.000	5.563	5.566	Download	Download
33	fit	100000	100000	2	0.048	0.000	0.034	1	1	0.009	0.000	5.598	5.601	Download	Download

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	latency	n_jobs	n_neighbors	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
1	predict	100000	1000	100	2.425	0.121	0.000	0.002	-1	1	0.172	0.001	14.106	14.107	Download	Download
2	predict	100000	1	100	0.024	0.002	0.000	0.024	-1	1	0.009	0.000	2.579	2.581	Download	Download
4	predict	100000	1000	100	3.015	0.064	0.000	0.003	-1	5	0.175	0.001	17.201	17.201	Download	Download
5	predict	100000	1	100	0.027	0.002	0.000	0.027	-1	5	0.010	0.002	2.707	2.765	Download	Download
7	predict	100000	1000	100	1.912	0.006	0.000	0.002	1	100	0.210	0.002	9.123	9.124	Download	Download
8	predict	100000	1	100	0.021	0.000	0.000	0.021	1	100	0.009	0.000	2.273	2.274	Download	Download
10	predict	100000	1000	100	3.229	0.141	0.000	0.003	-1	100	0.212	0.002	15.249	15.250	Download	Download
11	predict	100000	1	100	0.026	0.003	0.000	0.026	-1	100	0.009	0.000	2.793	2.796	Download	Download
13	predict	100000	1000	100	1.835	0.010	0.000	0.002	1	5	0.175	0.001	10.510	10.510	Download	Download
14	predict	100000	1	100	0.020	0.000	0.000	0.020	1	5	0.009	0.000	2.156	2.157	Download	Download
16	predict	100000	1000	100	1.209	0.015	0.001	0.001	1	1	0.171	0.000	7.067	7.067	Download	Download
17	predict	100000	1	100	0.020	0.001	0.000	0.020	1	1	0.009	0.000	2.260	2.262	Download	Download
19	predict	100000	1000	2	1.879	0.033	0.000	0.002	-1	1	0.026	0.000	72.738	72.740	Download	Download
20	predict	100000	1	2	0.006	0.004	0.000	0.006	-1	1	0.001	0.000	7.577	7.758	Download	Download
22	predict	100000	1000	2	2.675	0.083	0.000	0.003	-1	5	0.028	0.000	96.998	97.003	Download	Download
23	predict	100000	1	2	0.005	0.002	0.000	0.005	-1	5	0.001	0.000	6.811	6.914	Download	Download
25	predict	100000	1000	2	1.886	0.005	0.000	0.002	1	100	0.062	0.000	30.497	30.498	Download	Download
26	predict	100000	1	2	0.003	0.000	0.000	0.003	1	100	0.001	0.000	2.891	2.927	Download	Download
28	predict	100000	1000	2	2.611	0.033	0.000	0.003	-1	100	0.062	0.001	42.091	42.102	Download	Download
29	predict	100000	1	2	0.007	0.003	0.000	0.007	-1	100	0.001	0.000	7.636	7.735	Download	Download
31	predict	100000	1000	2	1.900	0.009	0.000	0.002	1	5	0.027	0.000	69.657	69.659	Download	Download
32	predict	100000	1	2	0.003	0.000	0.000	0.003	1	5	0.001	0.000	3.328	3.393	Download	Download
34	predict	100000	1000	2	1.133	0.006	0.000	0.001	1	1	0.026	0.000	43.041	43.047	Download	Download
35	predict	100000	1	2	0.002	0.000	0.000	0.002	1	1	0.001	0.000	2.275	2.317	Download	Download

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	n_jobs	n_neighbors	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
0	fit	1000000	1000000	10	2.944	0.047	0.027	-1	1	0.738	0.009	3.988	3.988	Download	Download
3	fit	1000000	1000000	10	2.963	0.055	0.027	-1	5	0.733	0.006	4.040	4.040	Download	Download
6	fit	1000000	1000000	10	3.000	0.090	0.027	1	100	0.741	0.011	4.048	4.049	Download	Download
9	fit	1000000	1000000	10	3.005	0.055	0.027	-1	100	0.738	0.006	4.074	4.074	Download	Download
12	fit	1000000	1000000	10	2.982	0.076	0.027	1	5	0.743	0.014	4.013	4.013	Download	Download
15	fit	1000000	1000000	10	3.061	0.021	0.026	1	1	0.733	0.007	4.177	4.177	Download	Download
18	fit	1000	1000	2	0.001	0.000	0.029	-1	1	0.001	0.000	0.583	0.588	Download	Download
21	fit	1000	1000	2	0.001	0.000	0.029	-1	5	0.001	0.000	0.583	0.589	Download	Download
24	fit	1000	1000	2	0.001	0.000	0.029	1	100	0.001	0.000	0.559	0.567	Download	Download
27	fit	1000	1000	2	0.001	0.000	0.030	-1	100	0.001	0.000	0.574	0.582	Download	Download
30	fit	1000	1000	2	0.001	0.000	0.029	1	5	0.001	0.000	0.582	0.589	Download	Download
33	fit	1000	1000	2	0.001	0.000	0.030	1	1	0.001	0.000	0.550	0.561	Download	Download

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	latency	n_jobs	n_neighbors	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
1	predict	1000000	1000	10	0.459	0.006	0.000	0.000	-1	1	0.112	0.002	4.093	4.093	Download	Download
2	predict	1000000	1	10	0.003	0.000	0.000	0.003	-1	1	0.000	0.000	9.831	10.984	Download	Download
4	predict	1000000	1000	10	0.795	0.013	0.000	0.001	-1	5	0.201	0.001	3.961	3.961	Download	Download
5	predict	1000000	1	10	0.003	0.001	0.000	0.003	-1	5	0.000	0.000	9.240	10.343	Download	Download
7	predict	1000000	1000	10	5.027	0.042	0.000	0.005	1	100	0.581	0.003	8.656	8.656	Download	Download
8	predict	1000000	1	10	0.003	0.001	0.000	0.003	1	100	0.001	0.000	4.175	4.512	Download	Download
10	predict	1000000	1000	10	2.666	0.067	0.000	0.003	-1	100	0.586	0.004	4.552	4.552	Download	Download
11	predict	1000000	1	10	0.005	0.001	0.000	0.005	-1	100	0.001	0.000	8.340	8.900	Download	Download
13	predict	1000000	1000	10	1.476	0.020	0.000	0.001	1	5	0.200	0.003	7.381	7.382	Download	Download
14	predict	1000000	1	10	0.001	0.000	0.000	0.001	1	5	0.000	0.000	3.605	4.007	Download	Download
16	predict	1000000	1000	10	0.790	0.002	0.000	0.001	1	1	0.112	0.001	7.039	7.039	Download	Download
17	predict	1000000	1	10	0.001	0.000	0.000	0.001	1	1	0.000	0.000	3.449	3.812	Download	Download
19	predict	1000	1000	2	0.022	0.001	0.001	0.000	-1	1	0.001	0.000	41.536	42.603	Download	Download
20	predict	1000	1	2	0.002	0.000	0.000	0.002	-1	1	0.000	0.000	16.203	18.885	Download	Download
22	predict	1000	1000	2	0.023	0.000	0.001	0.000	-1	5	0.001	0.000	28.599	29.065	Download	Download
23	predict	1000	1	2	0.002	0.000	0.000	0.002	-1	5	0.000	0.000	15.828	18.891	Download	Download
25	predict	1000	1000	2	0.035	0.003	0.000	0.000	1	100	0.005	0.000	7.700	7.709	Download	Download
26	predict	1000	1	2	0.001	0.000	0.000	0.001	1	100	0.000	0.000	3.829	4.538	Download	Download
28	predict	1000	1000	2	0.033	0.000	0.000	0.000	-1	100	0.005	0.000	7.076	7.094	Download	Download
29	predict	1000	1	2	0.002	0.000	0.000	0.002	-1	100	0.000	0.000	14.958	17.871	Download	Download
31	predict	1000	1000	2	0.021	0.000	0.001	0.000	1	5	0.001	0.000	27.813	28.300	Download	Download
32	predict	1000	1	2	0.001	0.000	0.000	0.001	1	5	0.000	0.000	4.287	4.871	Download	Download
34	predict	1000	1000	2	0.020	0.000	0.001	0.000	1	1	0.001	0.000	37.010	38.215	Download	Download
35	predict	1000	1	2	0.001	0.000	0.000	0.001	1	1	0.000	0.000	4.031	4.773	Download	Download

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	n_iter	init	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
0	fit	1000000	1000000	2	0.494	0.004	0.032	30	random	0.369	0.023	1.337	1.340	Download	Download
3	fit	1000000	1000000	2	0.556	0.005	0.029	30	k-means++	0.407	0.014	1.364	1.365	Download	Download
6	fit	1000000	1000000	100	4.773	0.225	0.168	30	random	2.859	0.014	1.670	1.670	Download	Download
9	fit	1000000	1000000	100	4.902	0.057	0.163	30	k-means++	3.031	0.008	1.618	1.618	Download	Download

scikit-learn-intelex (Intel® oneAPI) vs. scikit-learn¶

KNeighborsClassifier (brute force) ¶

Speedup barplots ¶

Raw results ¶

KNeighborsClassifier (KD tree) ¶

Speedup barplots ¶

Raw results ¶

KMeans (tall) ¶

Speedup barplots ¶

Raw results ¶

KMeans (short) ¶

Speedup barplots ¶

Mismatches between validation scores¶

Raw results ¶

Logistic Regression ¶

Speedup barplots ¶

Raw results ¶

LinearRegression ¶

Speedup barplots ¶

Raw results ¶

Ridge ¶

Speedup barplots ¶

Raw results ¶

TSNE ¶

Speedup barplots ¶

Raw results ¶

PCA ¶

Speedup barplots ¶

Raw results ¶

Benchmark environment information¶

System¶

Dependencies¶

Threadpool¶

Cpu_count¶

	estimator	library	diff_adjusted_rand_scores	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	latency	n_iter	algorithm	init	max_iter	n_clusters	n_init	tol	adjusted_rand_score_sklearn	mean_duration_sklearnex	std_duration_sklearnex	adjusted_rand_score_sklearnex	speedup	std_speedup
7	sklearn_KMeans_short	sklearn	0.074768	predict	10000	1000	100	0.001314	0.000299	0.608838	0.000001	20	full	random	20	300	1	1.000000e-16	0.222701	0.001151	0.000208	0.297469	1.141138	1.159636
10	sklearn_KMeans_short	sklearn	0.076616	predict	10000	1000	100	0.001395	0.000322	0.573650	0.000001	20	full	k-means++	20	300	1	1.000000e-16	0.408030	0.001152	0.000190	0.331414	1.210810	1.227103

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	latency	n_iter	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
0	fit	1000000	1000000	100	10.724	0.013	0.075	0.000	[20]	2.011	0.008	5.332	5.332	Download	Download
3	fit	1000	1000	10000	0.813	0.056	0.098	0.001	[27]	0.869	0.060	0.935	0.938	Download	Download

	function	n_samples_train	n_samples	n_features	mean_duration_sklearn	std_duration_sklearn	iteration_throughput	latency	mean_duration_sklearnex	std_duration_sklearnex	speedup	std_speedup	sklearn_profiling	sklearnex_profiling
0	fit	100000	100000	100	0.480	0.005	0.167	0.0	0.027	0.000	17.870	17.871	Download	Download
1	fit	10000	10000	1000	0.444	0.005	0.180	0.0	0.187	0.003	2.372	2.373	Download	Download


python	3.8.12 \| packaged by conda-forge \| (default, Jan 30 2022, 23:42:07) [GCC 9.4.0]
executable	/usr/share/miniconda/envs/sklbench/bin/python
machine	Linux-5.11.0-1028-azure-x86_64-with-glibc2.10


pip	22.0.4
setuptools	60.9.3
sklearn	1.0.2
numpy	1.22.3
scipy	1.8.0
Cython	None
pandas	1.4.1
matplotlib	3.5.1
joblib	1.1.0
threadpoolctl	3.1.0

	user_api	internal_api	prefix	filepath	version	threading_layer	architecture	num_threads
0	blas	openblas	libopenblas	/usr/share/miniconda/envs/sklbench/lib/libopenblasp-r0.3.18.so	0.3.18	pthreads	SkylakeX	2
1	openmp	openmp	libgomp	/usr/share/miniconda/envs/sklbench/lib/libgomp.so.1.0.0	None	NaN	NaN	2


cpu_count	2
physical_cpu_count	2