I want to implement some fast convex analysis operations - proximal operators and the like. I'm new to Cython and thought that this would be the right tool for the job. I have implementations both in pure Python and in Cython (mwe_py.py
and mwe_c.pyx
below). However, when I compare them, the Python + Numpy version is significantly faster than the Cython version. Why is this? I have tried using memoryviews, which are supposed to allow for faster indexing/operations; however, the performance difference is very pronounced! Any advice on how to fix mwe_c.pyx
below to approach "optimal" Cython code would be greatly appreciated.
import pyximport; pyximport.install(language_level=3)
import mwe_c
import mwe_py
import numpy as np
from time import time
n = 100000
nreps = 10000
x = np.random.randn(n)
z = np.random.randn(n)
tau = 1.0
t0 = time()
for _ in range(nreps):
out = mwe_c.prox_translation(mwe_c.prox_two_norm, x, z, tau)
t1 = time()
print(t1 - t0)
t0 = time()
for _ in range(nreps):
out = mwe_py.prox_translation(mwe_py.prox_two_norm, x, z, tau)
t1 = time()
print(t1 - t0)
which gives the outputs, respectively:
10.76103401184082 # (seconds)
5.988733291625977 # (seconds)
Below is mwe_py.py
:
import numpy.linalg as la
def proj_two_norm(x):
"""projection onto l2 unit ball"""
X = la.norm(x)
if X <= 1:
return x
return x / X
def prox_two_norm(x, tau):
"""proximal mapping of l2 norm with parameter tau"""
return x - proj_two_norm(x / tau)
def prox_translation(prox_func, x, z, tau=None):
"""Compute prox(f(. - z))(x) where prox_func(x, tau) is prox(tau * f)(x)."""
if tau is None:
tau = 1.0
return z + prox_func(x - z, tau)
And here, at last, is mwe_c.pyx
:
import numpy as np
cimport numpy as np
cdef double [::1] aasubtract(double [::1] x, double [::1] z):
cdef unsigned int i, m = len(x), n = len(z);
assert m == n, f"vectors must have the same length"
cdef double [::1] out = np.copy(x);
for i in range(n):
out[i] -= z[i]
return out
cdef double [::1] vsdivide(double [::1] x, double tau):
"""Divide an array by a scalar element-wise."""
cdef:
unsigned int i, n = len(x);
double [::1] out = np.copy(x);
for i in range(n):
out[i] /= tau
return out
cdef double two_norm(double [::1] x):
cdef:
double out = 0.0;
unsigned int i, n=len(x);
for i in range(n):
out = out + x[i]**2
out = out **.5
return out
cdef double [::1] proj_two_norm(double [::1] x):
"""project x onto the unit two ball."""
cdef double x_norm = two_norm(x);
cdef unsigned int i, n = len(x);
cdef double [::1] p = np.copy(x);
if x_norm <= 1:
return p
for i in range(n):
p[i] = p[i] / x_norm
return p
cpdef double [::1] prox_two_norm(double [::1] x, double tau):
"""double [::1] prox_two_norm(double [::1] x, double tau)"""
cdef unsigned int i, n = len(x);
cdef double [::1] out = x.copy(), Px = x.copy();
Px = proj_two_norm(vsdivide(Px, tau));
for i in range(n):
out[i] = out[i] - Px[i]
return out
cpdef prox_translation(
prox_func,
double [::1] x,
double [::1] z,
double tau=1.0
):
cdef:
unsigned int i, n = len(x);
double [::1] out = prox_func(aasubtract(x, z), tau);
for i in range(n):
out[i] += z[i];
return out
wraparound
,boundscheck
andcdivision
. This should increase the performance of your Cython code. However, I don't expect it to be faster than your numpy code for reasons Jérôme gave in his nice answer. – Electrotherapy