I'm looking for an efficient way to implement morphological image dilation with a square kernel in TensorFlow. It looks like the obvious ways of doing it are extremely inefficient compared to what they could be, as shown by OpenCV. See results of running source code pasted at bottom - even the fastest method is about 30x slower than OpenCV. These are from MacBook Air with M1 chipset.
Dilation of 640x480 image with a 25x25 kernel took:
0.61ms using opencv
545.40ms using tf.nn.max_pool2d
228.66ms using tf.nn.dilation2d naively
17.63ms using tf.nn.dilation2d with row-col
Question: Does anyone know of a way to do image-dilation with TensorFlow that isn't extremely inefficient?
Source Code for current solutions:
import numpy as np
import cv2
import tensorflow as tf
import time
def tf_dilate(heatmap, width: int, method: str = 'rowcol'):
""" Dilate the heatmap with a square kernel """
if method=='maxpool':
return tf.nn.max_pool2d(heatmap[None, :, :, None], ksize=width, padding='SAME', strides=(1, 1))[0, :, :, 0]
elif method == 'naive_dilate':
return tf.nn.dilation2d(heatmap[None, :, :, None], filters=tf.zeros((width, width, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))[0, :, :, 0]
elif method == 'rowcol_dilate':
row_dilation = tf.nn.dilation2d(heatmap[None, :, :, None], filters=tf.zeros((1, width, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))
full_dilation = tf.nn.dilation2d(row_dilation, filters=tf.zeros((width, 1, 1), dtype=heatmap.dtype),
strides=(1, 1, 1, 1), padding="SAME", data_format="NHWC", dilations=(1, 1, 1, 1))
return full_dilation[0, :, :, 0]
else:
raise NotImplementedError(f'No method {method}')
def test_dilation_options(img_shape=(480, 640), kernel_size=25):
img = np.random.randn(*img_shape).astype(np.float32)**2
def get_result_and_time(version: str):
tf_image = tf.constant(img, dtype=tf.float32)
t_start = time.time()
if version=='opencv':
result = cv2.dilate(img, kernel=np.ones((kernel_size, kernel_size), dtype=np.float32))
return time.time()-t_start, result
else:
result = tf_dilate(tf_image, width=kernel_size, method=version)
return time.time()-t_start, result.numpy()
t_opencv, result_opencv = get_result_and_time('opencv')
t_maxpool, result_maxpool = get_result_and_time('maxpool')
t_naive_dilate, result_naive_dilate = get_result_and_time('naive_dilate')
t_rowcol_dilate, result_rowcol_dilate = get_result_and_time('rowcol_dilate')
assert np.array_equal(result_opencv, result_maxpool), "Maxpool result did not match opencv result"
assert np.array_equal(result_opencv, result_naive_dilate), "Naive dilation result did not match opencv result"
assert np.array_equal(result_opencv, result_rowcol_dilate), "Row-col dilation result did not match opencv result"
print(f'Dilation of {img_shape[1]}x{img_shape[0]} image with a {kernel_size}x{kernel_size} kernel took: '
f'\n {t_opencv*1000:.2f}ms using opencv'
f'\n {t_maxpool*1000:.2f}ms using tf.nn.max_pool2d'
f'\n {t_naive_dilate*1000:.2f}ms using tf.nn.dilation2d naively'
f'\n {t_rowcol_dilate*1000:.2f}ms using tf.nn.dilation2d with row-col'
)
if __name__ == '__main__':
test_dilation_options()
width-1
comparisons per pixel. – Calculus