In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
plt.rcParams['figure.dpi'] = 100
In [2]:
with open("benchmarks/env_info.txt") as f:
    env_info = json.load(f)

print(json.dumps(env_info, indent=2))
{
  "system_info": {
    "python": "3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05)  [GCC 9.3.0]",
    "executable": "/usr/share/miniconda/envs/cython_experimentations/bin/python",
    "machine": "Linux-5.4.0-1047-azure-x86_64-with-glibc2.10"
  },
  "dependencies_info": {
    "pip": "21.1.2",
    "setuptools": "49.6.0.post20210108",
    "sklearn": "0.24.2",
    "numpy": "1.20.3",
    "scipy": "1.6.3",
    "Cython": "0.29.23",
    "pandas": "1.2.4",
    "matplotlib": "3.4.2",
    "joblib": "1.0.1",
    "threadpoolctl": "2.1.0"
  },
  "threadpool_info": [
    {
      "filepath": "/usr/share/miniconda/envs/cython_experimentations/lib/libopenblasp-r0.3.15.so",
      "prefix": "libopenblas",
      "user_api": "blas",
      "internal_api": "openblas",
      "version": "0.3.15",
      "num_threads": 2,
      "threading_layer": "pthreads"
    },
    {
      "filepath": "/usr/share/miniconda/envs/cython_experimentations/lib/libgomp.so.1.0.0",
      "prefix": "libgomp",
      "user_api": "openmp",
      "internal_api": "openmp",
      "version": null,
      "num_threads": 2
    }
  ],
  "cpu_count": 2
}
In [3]:
RESULTS_FILE_PATH = "benchmarks/results/pairwise_dist.csv"
df = pd.read_csv(RESULTS_FILE_PATH)
In [4]:
cols = ["n_samples", "n_samples", "n_features"]
df[cols] = df[cols].astype(np.uint32)
In [5]:
df_grouped = df.groupby(["n_samples", "n_features"])
In [6]:
y_labels = {
    "time_elapsed": "Time elapsed (in s)",
    "throughput": "Thoughput (in GB/s)"
}
In [7]:
OPENBLAS_NUM_THREADS = env_info["threadpool_info"][0]["num_threads"]
OMP_NUM_THREADS = env_info["threadpool_info"][1]["num_threads"]
In [8]:
for vals, df in df_grouped:
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    axes = axes.flatten()
    for col, ax in zip(["time_elapsed", "throughput"], axes):
        _ = sns.barplot(x=df["implementation"], y=col, hue="implementation", data=df, ax=ax)
        _ = ax.set_ylabel(y_labels[col])
        _ = ax.set_xlabel("")
    title = f"Pairwise distances, dtype=np.float64, "
    title += f"OMP_NUM_THREADS={OMP_NUM_THREADS}, OPENBLAS_NUM_THREADS={OPENBLAS_NUM_THREADS}\n"
    title += "n_samples=%s, n_features=%s" % vals
    _ = fig.suptitle(title, fontsize=16)