Use Vulkan VkImage as a CUDA cuArray
Asked Answered
C

1

14

What is the correct way of using a Vulkan VkImage as a CUDA cuArray?

I've been trying to follow some examples, however I get a CUDA_ERROR_INVALID_VALUE on a call to cuExternalMemoryGetMappedMipmappedArray()

To provide the information in an ordered way.

I'm using CUDA 10.1

Base code comes from https://github.com/SaschaWillems/Vulkan, in particular I'm using the 01 - Vulkan Gears demo, enriched with the saveScreenshot method 09 - Capturing screenshots

Instead of saving the snapshot image to a file, I'll be sending the snapshot image into CUDA as a CUarray.

I've enabled the following instance and device extensions:

        std::vector<const char*> instanceExtensions = {
                VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
                VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
                VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
                VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME };

        std::vector<const char*> deviceExtensions = { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
                VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
                VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
                VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME };

I have a VkImage, created as follows:

        // Create the linear tiled destination image to copy to and to read the memory from
        VkImageCreateInfo imageCreateCI(vks::initializers::imageCreateInfo());
        imageCreateCI.imageType = VK_IMAGE_TYPE_2D;
        // Note that vkCmdBlitImage (if supported) will also do format conversions if the swapchain color format would differ
        imageCreateCI.format = VK_FORMAT_R8G8B8A8_UNORM;
        imageCreateCI.extent.width = width;
        imageCreateCI.extent.height = height;
        imageCreateCI.extent.depth = 1;
        imageCreateCI.arrayLayers = 1;
        imageCreateCI.mipLevels = 1;
        imageCreateCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
        imageCreateCI.samples = VK_SAMPLE_COUNT_1_BIT;
        imageCreateCI.tiling = VK_IMAGE_TILING_LINEAR;
        imageCreateCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
        imageCreateCI.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;

        VkExternalMemoryImageCreateInfoKHR extImageCreateInfo = {};

        /*
         * Indicate that the memory backing this image will be exported in an
         * fd. In some implementations, this may affect the call to
         * GetImageMemoryRequirements() with this image.
         */
        extImageCreateInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR;
        extImageCreateInfo.handleTypes |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;

        imageCreateCI.pNext = &extImageCreateInfo;

        // Create the image
        VkImage dstImage;
        VK_CHECK_RESULT(vkCreateImage(device, &imageCreateCI, nullptr, &dstImage));
        // Create memory to back up the image
        VkMemoryRequirements memRequirements;
        VkMemoryAllocateInfo memAllocInfo(vks::initializers::memoryAllocateInfo());
        VkDeviceMemory dstImageMemory;
        vkGetImageMemoryRequirements(device, dstImage, &memRequirements);
        memAllocInfo.allocationSize = memRequirements.size;
        // Memory must be host visible to copy from
        memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);

        VkExportMemoryAllocateInfoKHR exportInfo = {};
        exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
        exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;

        memAllocInfo.pNext = &exportInfo;


        VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &dstImageMemory));
        VK_CHECK_RESULT(vkBindImageMemory(device, dstImage, dstImageMemory, 0));

From there I'll:

Get the Vulkan Memory Handler:

int CuEncoderImpl::getVulkanMemoryHandle(VkDevice device,
        VkDeviceMemory memory) {
    // Get handle to memory of the VkImage

    int fd = -1;
    VkMemoryGetFdInfoKHR fdInfo = { };
    fdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
    fdInfo.memory = memory;
    fdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;

    auto func = (PFN_vkGetMemoryFdKHR) vkGetDeviceProcAddr(device,
            "vkGetMemoryFdKHR");

    if (!func) {
        printf("Failed to locate function vkGetMemoryFdKHR\n");
        return -1;
    }

    VkResult r = func(device, &fdInfo, &fd);
    if (r != VK_SUCCESS) {
        printf("Failed executing vkGetMemoryFdKHR [%d]\n", r);
        return -1;
    }

    return fd;

}

Import the memory:

    CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc = { };
    memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
    memDesc.handle.fd = getVulkanMemoryHandle(device, memory);
    memDesc.size = extent.width*extent.height*4;

    CUDA_DRVAPI_CALL(cuImportExternalMemory(&externalMem, &memDesc));

And map the memory: This is the step that it is failing.

CUarray CuEncoderImpl::getCUDAArrayFromExternalMemory(const VkExtent3D &extent,const CUexternalMemory &m_extMem) {
    CUmipmappedArray m_mipmapArray;
    CUresult result = CUDA_SUCCESS;
    CUarray array;

    CUDA_ARRAY3D_DESCRIPTOR arrayDesc = { };
    arrayDesc.Width = extent.width;
    arrayDesc.Height = extent.height;
    arrayDesc.Depth = 0;
    arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
    arrayDesc.NumChannels = 4;
    arrayDesc.Flags = CUDA_ARRAY3D_SURFACE_LDST;

    CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { };
    mipmapArrayDesc.arrayDesc = arrayDesc;
    mipmapArrayDesc.numLevels = 1;
    mipmapArrayDesc.offset = 0;

    CUDA_DRVAPI_CALL(cuExternalMemoryGetMappedMipmappedArray(&m_mipmapArray, m_extMem, &mipmapArrayDesc));

    CUDA_DRVAPI_CALL(cuMipmappedArrayGetLevel(&array, m_mipmapArray, 0));
    return array;
}

I've been trying multiple combinations of the parameters, but failed so far. The error point to an invalid parameter, but I'm not sure how to find what's wrong.

Only thing that had worked is to map the Vulkan image memory to a host buffer and then copying it into the CUDA array... but I guess that's expensive and I'd like to avoid it if possible.

Cuneiform answered 29/3, 2019 at 20:12 Comment(1)
@talonmies Vulkan-CUDA interoperability is a feature of CUDA 10, see devblogs.nvidia.com/cuda-10-features-revealedCuneiform
C
16

For the record, I finally got this to work.

Some notes and the modifications I had to do to the code listed in the question:

  1. Vulkan-CUDA interoperability is advertised as a feature of CUDA 10, see CUDA 10 Features revealed
  2. The tiling of the image that is going to be mapped had to be `VK_IMAGE_TILING_OPTIMAL
        imageCreateCI.tiling = VK_IMAGE_TILING_OPTIMAL;
  1. The memory for that image must be allocated with the VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
        memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  1. The memory descriptor when importing the memory should use the memory size that was returned in the memory requirements (size below is memRequirements.size from the code creating the image):
    CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc = { };
    memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
    memDesc.handle.fd = getVulkanMemoryHandle(device, memory);
    memDesc.size = size;

    CUDA_DRVAPI_CALL(cuImportExternalMemory(&externalMem, &memDesc));
  1. Finally the mapped array is described as being CU_AD_FORMAT_UNSIGNED_INT8 with four channels and with a CUDA_ARRAY3D_COLOR_ATTACHMENT
    CUDA_ARRAY3D_DESCRIPTOR arrayDesc = { };
    arrayDesc.Width = extent.width;
    arrayDesc.Height = extent.height;
    arrayDesc.Depth = 0;
    arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
    arrayDesc.NumChannels = 4;
    arrayDesc.Flags = CUDA_ARRAY3D_COLOR_ATTACHMENT;

    CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { };
    mipmapArrayDesc.arrayDesc = arrayDesc;
    mipmapArrayDesc.numLevels = 1;
    mipmapArrayDesc.offset = 0;

    CUDA_DRVAPI_CALL(cuExternalMemoryGetMappedMipmappedArray(&m_mipmapArray, m_extMem, &mipmapArrayDesc));

After those changes, I was able to get it to work. I few the changes were glaring mistakes on my side (like the size), a few things I found carefully re-reading the documentation for the 100th time, others were guesses at hints in the documentation and, finally, a lot of trial and error.

Cuneiform answered 6/4, 2019 at 9:35 Comment(3)
Many thanks to you, I managed to run that in less than a day :)Alida
Though, I am not sure about one thing. Which destruction/deallocation functions we must call in this pipeline? I guess, cudaFreeMipmappedArray and cudaDestroyExternalMemory, but not cudaFreeArray on a mipmap level?Alida
Adding my contribution here, as the code above almost worked for me, but was failing a bit randomly (image had weird artifacts), but not always. Anyway, I realized the memory allocation was giving me an offset, which is not used here. So I changed that: memDesc.size = offset + size; // Not really sure about this one mipmapArrayDesc.offset = offset; // From allocated memoryAffiche

© 2022 - 2024 — McMap. All rights reserved.