Thomas J. Fan
@thomasjpfan
github.com/thomasjpfan/pydata-nyc-2023-scikit-learn-array-api
from sklearn.discriminant_analysis import LinearDiscriminantAnalysislda_np = LinearDiscriminantAnalysis()lda_np.fit(X_np, y_np)y_pred_np = lda_np.predict(X_np)type(y_pred_np)# <class 'numpy.ndarray'>
import sklearnimport torchsklearn.set_config(array_api_dispatch=True)X_torch_cpu, y_torch_cpu = torch.asarray(X_np), torch.asarray(y_np)lda = LinearDiscriminantAnalysis()lda.fit(X_torch_cpu, y_torch_cpu)type(lda.predict(X_torch_cpu))# <class 'torch.Tensor'>
import sklearnwith sklearn.config_context(array_api_dispatch=True): X_torch_cuda = torch.asarray(X_np, device="cuda") y_torch_cuda = torch.asarray(y_np, device="cuda") lda = LinearDiscriminantAnalysis() lda.fit(X_torch_cuda, y_torch_cuda) type(lda.predict(X_torch_cuda)) # <class 'torch.Tensor'>
16-core AMD 5950x CPU and Nvidia RTX 3090 GPU
def func(x, y): out = np.mean(x, axis=0) - 2 * np.std(y, axis=0) return out
def func(x, y): out = np.mean(x, axis=0) - 2 * np.std(y, axis=0) return out
def func(x, y): xp = array_namespace(x, y) out = xp.mean(x, axis=0) - 2 * xp.std(y, axis=0) return out
import numpy.array_api as xpimport cupy.array_api as xp
import numpy as npimport cupy as cp
import cupyimport cupy.array_api as xpsklearn.set_config(array_api_dispatch=True)X_cp, y_cp = cupy.asarray(...), cupy.asarray(...)X_xp, y_xp = xp.asarray(X_cp), xp.asarray(y_cp)lda = LinearDiscriminantAnalysis()lda.fit(X_xp, y_xp)
array_api_compat
๐array_api_compat
๐from array_api_compat import array_namespacedef func(x, y): xp = array_namespace(x, y) out = xp.mean(x, axis=0) - 2 * xp.std(y, axis=0) return out
array_api_compat
๐from array_api_compat import array_namespacedef func(x, y): xp = array_namespace(x, y) out = xp.mean(x, axis=0) - 2 * xp.std(y, axis=0) return out
array_api_compat
Extend:ndarray
ndarray
Tensors
numpy.array_api
cupy.array_api
import torchsklearn.set_config(array_api_dispatch=True)X_torch_cpu, y_torch_cpu = torch.asarray(...), torch.asarray(...)lda = LinearDiscriminantAnalysis()lda.fit(X_torch_cpu, y_torch_cpu)
import numpy as npy_sum = y.sum(axis=0)
import numpy as npy_sum = y.sum(axis=0)
from array_api_compat import array_namespacexp = array_namespace(y)y_sum = xp.sum(y, axis=0)
import numpy as npy = (X.mean(axis=1) > 1.0).any()
import numpy as npy = (X.mean(axis=1) > 1.0).any()
xp = array_namespace(x)y = xp.any(xp.mean(X, axis=1) > 1.0)
import numpy as npC = np.dot(A, B)
import numpy as npC = np.dot(A, B)
@
is more restrictive compared to np.dot
C = A @ B
import numpy as npuniques = np.unique(x)uniques, counts = np.unique(x, return_counts=True)
import numpy as npuniques = np.unique(x)uniques, counts = np.unique(x, return_counts=True)
xp = array_namespace(x)uniques = xp.unique_values(x)counts = xp.unique_counts(x)
import numpy as npx_mean = np.nanmax(x, axis=1)
def xp_nanmax(X, axis=None): xp = array_namespace(X) if is_numpy_namespace(xp): return xp.asarray(numpy.nanmax(X, axis=axis)) # Implement using Array API standard (simplified) mask = xp.isnan(X) inf_ = xp.asarray(-xp.inf, device=device(X)) X_nanmax = xp.max(xp.where(mask, inf_, X), axis=axis) return X_nanmax
import numpy as npx = np.asarray([[1, 2], [4, 5], [4, 1]])x[[0, 2]]# array([[1, 2],# [4, 1]])
import numpy as npx = np.asarray([[1, 2], [4, 5], [4, 1]])x[[0, 2]]# array([[1, 2],# [4, 1]])
2022.12
standardimport numpy.array_api as xpx = xp.asarray([[1, 2], [4, 5], [4, 1]])xp.take(x, xp.asarray([0, 2]), axis=0)# Array([[1, 2],# [4, 1]], dtype=int64)
import numpy as npx = np.asarray([[1, 2, 3], [4, 5, 6]])x[1]# array([4, 5, 6])
import numpy as npx = np.asarray([[1, 2, 3], [4, 5, 6]])x[1]# array([4, 5, 6])
import numpy.array_api as xpx = xp.asarray([[1, 2, 3], [4, 5, 6]])x[1]# IndexError
import numpy as npx = np.asarray([[1, 2, 3], [4, 5, 6]])x[1]# array([4, 5, 6])
import numpy.array_api as xpx = xp.asarray([[1, 2, 3], [4, 5, 6]])x[1]# IndexError
x[1, :]# array([4, 5, 6])
import numpy as nprng = np.random.default_rng()x = rng.standard_normal(size=10)
import numpy as nprng = np.random.default_rng()x = rng.standard_normal(size=10)
import numpy as nprng = np.random.default_rng()x_np = rng.standard_normal(size=10)xp = array_namespace(x)x_xp = xp.asarray(x_np, device=device(x))
rng = np.random.default_rng()x = rng.standard_normal(size=(10_000, 10_000))x_c = np.asarray(x, order="C")x_f = np.asarray(x, order="F")%%timeit_ = x_c.sum(axis=0)# 36.3 ms ยฑ 1.44 ms per loop%%timeit_ = x_f.sum(axis=0)# 18.8 ms ยฑ 131 ยตs per loop
import numpy as npx1 = np.asarray([[1, 2], [4, 5]])x2 = np.asarray([[1, 2]], dtype=np.float32)x1 + x2# array([[2., 4.],# [5., 7.]])
import numpy as npx1 = np.asarray([[1, 2], [4, 5]])x2 = np.asarray([[1, 2]], dtype=np.float32)x1 + x2# array([[2., 4.],# [5., 7.]])
x1 = xp.asarray([[1, 2], [4, 5]])x2 = xp.asarray([[1, 2]], dtype=xp.float32)x1 + x2# TypeError: int64 and float32 cannot be type promoted together
x1 = xp.asarray([[1, 2], [4, 5]], dtype=xp.float32)x2 = xp.asarray([[1, 2]], dtype=xp.float32)x1 + x2# Array([[2., 4.],# [5., 7.]], dtype=float32)
import numpy as npx1 = np.asarray([[1, 2, 3]])x2 = 1.0x1 + x2# array([[2., 3., 4.]])
import numpy as npx1 = np.asarray([[1, 2, 3]])x2 = 1.0x1 + x2# array([[2., 3., 4.]])
import numpy.array_api as xpx1 = xp.asarray([[1, 2, 3]])x2 = 1.0x1 + x2# TypeError: Python float scalars can only be promoted with floating-point arrays.
import numpy.array_api as xpx1 = xp.asarray([[1, 2, 3]], dtype=xp.float32)x2 = 1.0x1 + x2# Array([[2., 3., 4.]], dtype=float32)
import numpy as npy = np.linspace(2.0, 3.0, num=10)
import numpy as npy = np.linspace(2.0, 3.0, num=10)
from array_api_compat import devicexp = array_namespace(x)y = xp.linspace(2.0, 3.0, num=10, device=device(x))
RandomForestClassifier
RandomForestRegressor
HistGradientBoostingClassifier
HistGradientBoostingRegressor
LogisticRegression
PoissonRegressor
def func(a, b): xp = array_namespace(a, b) c = xp.sum(a, axis=1) + xp.sum(b, axis=1) c = numpy.asarray(c) d = compiled_code_that_only_works_with_numpy(c) d = xp.asarray(d) return d
def func(a, b, plugin): xp = array_namespace(a, b) c = xp.sum(a, axis=1) + xp.sum(b, axis=1) d = plugin.dispatch_to_library(c) e = xp.mean(d, axis=0) return e
def erf(x): if is_numpy(x): import scipy.special return scipy.special.erf(x) elif is_cupy(x): import cupyx.scipy.special.erf import cupyx.scipy.special.erf(x) elif is_pytorch(x): import torch return torch.special.erf(x) else: ...
16-core AMD 5950x CPU and Nvidia RTX 3090 GPU
Keyboard shortcuts
โ, โ, Pg Up, k | Go to previous slide |
โ, โ, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |