Threaded FFT in Enthought Python
Asked Answered
C

3

5

Fast Fourier Transforms (FFTs) in Numpy/SciPy are not threaded. Enthought Python is shipped with the Intel MKL numerical library, which is capable of threaded FFTs. How does one get access to these routines?

Centner answered 1/8, 2012 at 5:14 Comment(0)
C
2

New and improved version which handles arbitrary strides in the input and output arrays. By default this one is now not-in-place and creates a new array. It mimics the Numpy FFT routines except that it has a different normalisation.

''' Wrapper to MKL FFT routines '''

import numpy as _np
import ctypes as _ctypes

mkl = _ctypes.cdll.LoadLibrary("mk2_rt.dll")
_DFTI_COMPLEX = _ctypes.c_int(32)
_DFTI_DOUBLE = _ctypes.c_int(36)
_DFTI_PLACEMENT = _ctypes.c_int(11)
_DFTI_NOT_INPLACE = _ctypes.c_int(44)
_DFTI_INPUT_STRIDES = _ctypes.c_int(12)
_DFTI_OUTPUT_STRIDES = _ctypes.c_int(13)

def fft2(a, out=None):
    ''' 
Forward two-dimensional double-precision complex-complex FFT.
Uses the Intel MKL libraries distributed with Enthought Python.
Normalisation is different from Numpy!
By default, allocates new memory like 'a' for output data.
Returns the array containing output data.
'''

    assert a.dtype == _np.complex128
    assert len(a.shape) == 2

    inplace = False

    if out is a:
        inplace = True

    elif out is not None:
        assert out.dtype == _np.complex128
        assert a.shape == out.shape
        assert not _np.may_share_memory(a, out)

    else:
        out = _np.empty_like(a)

    Desc_Handle = _ctypes.c_void_p(0)
    dims = (_ctypes.c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )

    #Set input strides if necessary
    if not a.flags['C_CONTIGUOUS']:
        in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
        mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))

    if inplace:
        #Inplace FFT
        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )

    else:
        #Not-inplace FFT
        mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)

        #Set output strides if necessary
        if not out.flags['C_CONTIGUOUS']:
            out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))

        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )

    mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))

    return out

def ifft2(a, out=None):
    ''' 
Backward two-dimensional double-precision complex-complex FFT.
Uses the Intel MKL libraries distributed with Enthought Python.
Normalisation is different from Numpy!
By default, allocates new memory like 'a' for output data.
Returns the array containing output data.
'''

    assert a.dtype == _np.complex128
    assert len(a.shape) == 2

    inplace = False

    if out is a:
        inplace = True

    elif out is not None:
        assert out.dtype == _np.complex128
        assert a.shape == out.shape
        assert not _np.may_share_memory(a, out)

    else:
        out = _np.empty_like(a)

    Desc_Handle = _ctypes.c_void_p(0)
    dims = (_ctypes.c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )

    #Set input strides if necessary
    if not a.flags['C_CONTIGUOUS']:
        in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
        mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))

    if inplace:
        #Inplace FFT
        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )

    else:
        #Not-inplace FFT
        mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)

        #Set output strides if necessary
        if not out.flags['C_CONTIGUOUS']:
            out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))

        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )

    mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))

    return out
Centner answered 9/4, 2014 at 2:37 Comment(0)
C
3

The following code works for me with Enthought 7.3-1 (64-bit) on Windows 7 Ultimate 64-bit. I haven't benchmarked it but it certainly uses all cores at once rather than just one.

from ctypes import *

class Mkl_Fft:
    c_double_p = POINTER(c_double)

    def __init__(self,num_threads=8):
        self.dfti = cdll.LoadLibrary("mk2_rt.dll")
        self.dfti.MKL_Set_Num_Threads(num_threads)
        self.Create = self.dfti.DftiCreateDescriptor_d_md
        self.Commit = self.dfti.DftiCommitDescriptor
        self.ComputeForward = self.dfti.DftiComputeForward

    def fft(self,a):
        Desc_Handle = c_void_p(0)
        dims = (c_int*2)(*a.shape)
        DFTI_COMPLEX = c_int(32)
        rank = 2

        self.Create(byref(Desc_Handle), DFTI_COMPLEX, rank, dims )
        self.Commit(Desc_Handle)
        self.ComputeForward(Desc_Handle, a.ctypes.data_as(self.c_double_p) )

Usage:

import numpy as np
a = np.ones( (32,32), dtype = complex128 )
fft = Mkl_Fft()
fft.fft(a)
Centner answered 1/8, 2012 at 5:14 Comment(0)
C
2

A cleaner version of my original answer is as follows:

from ctypes import *

mkl = cdll.LoadLibrary("mk2_rt.dll")
c_double_p = POINTER(c_double)
DFTI_COMPLEX = c_int(32)
DFTI_DOUBLE = c_int(36)

def fft2(a):
    Desc_Handle = c_void_p(0)
    dims = (c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(byref(Desc_Handle), DFTI_DOUBLE, DFTI_COMPLEX, 2, dims )
    mkl.DftiCommitDescriptor(Desc_Handle)
    mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(c_void_p) )
    mkl.DftiFreeDescriptor(byref(Desc_Handle))

    return a

def ifft2(a):
    Desc_Handle = c_void_p(0)
    dims = (c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(byref(Desc_Handle), DFTI_DOUBLE, DFTI_COMPLEX, 2, dims )
    mkl.DftiCommitDescriptor(Desc_Handle)
    mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(c_void_p) )
    mkl.DftiFreeDescriptor(byref(Desc_Handle))

    return a
Centner answered 24/10, 2013 at 0:27 Comment(0)
C
2

New and improved version which handles arbitrary strides in the input and output arrays. By default this one is now not-in-place and creates a new array. It mimics the Numpy FFT routines except that it has a different normalisation.

''' Wrapper to MKL FFT routines '''

import numpy as _np
import ctypes as _ctypes

mkl = _ctypes.cdll.LoadLibrary("mk2_rt.dll")
_DFTI_COMPLEX = _ctypes.c_int(32)
_DFTI_DOUBLE = _ctypes.c_int(36)
_DFTI_PLACEMENT = _ctypes.c_int(11)
_DFTI_NOT_INPLACE = _ctypes.c_int(44)
_DFTI_INPUT_STRIDES = _ctypes.c_int(12)
_DFTI_OUTPUT_STRIDES = _ctypes.c_int(13)

def fft2(a, out=None):
    ''' 
Forward two-dimensional double-precision complex-complex FFT.
Uses the Intel MKL libraries distributed with Enthought Python.
Normalisation is different from Numpy!
By default, allocates new memory like 'a' for output data.
Returns the array containing output data.
'''

    assert a.dtype == _np.complex128
    assert len(a.shape) == 2

    inplace = False

    if out is a:
        inplace = True

    elif out is not None:
        assert out.dtype == _np.complex128
        assert a.shape == out.shape
        assert not _np.may_share_memory(a, out)

    else:
        out = _np.empty_like(a)

    Desc_Handle = _ctypes.c_void_p(0)
    dims = (_ctypes.c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )

    #Set input strides if necessary
    if not a.flags['C_CONTIGUOUS']:
        in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
        mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))

    if inplace:
        #Inplace FFT
        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )

    else:
        #Not-inplace FFT
        mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)

        #Set output strides if necessary
        if not out.flags['C_CONTIGUOUS']:
            out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))

        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )

    mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))

    return out

def ifft2(a, out=None):
    ''' 
Backward two-dimensional double-precision complex-complex FFT.
Uses the Intel MKL libraries distributed with Enthought Python.
Normalisation is different from Numpy!
By default, allocates new memory like 'a' for output data.
Returns the array containing output data.
'''

    assert a.dtype == _np.complex128
    assert len(a.shape) == 2

    inplace = False

    if out is a:
        inplace = True

    elif out is not None:
        assert out.dtype == _np.complex128
        assert a.shape == out.shape
        assert not _np.may_share_memory(a, out)

    else:
        out = _np.empty_like(a)

    Desc_Handle = _ctypes.c_void_p(0)
    dims = (_ctypes.c_int*2)(*a.shape)

    mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )

    #Set input strides if necessary
    if not a.flags['C_CONTIGUOUS']:
        in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
        mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))

    if inplace:
        #Inplace FFT
        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )

    else:
        #Not-inplace FFT
        mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)

        #Set output strides if necessary
        if not out.flags['C_CONTIGUOUS']:
            out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))

        mkl.DftiCommitDescriptor(Desc_Handle)
        mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )

    mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))

    return out
Centner answered 9/4, 2014 at 2:37 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.