2 // https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
5 # define EXPORT __declspec(dllexport)
13 #include <cuda_runtime_api.h>
14 #include <nppi_filtering_functions.h>
16 EXPORT int nppif_main()
19 * 8-bit unsigned single-channel 1D row convolution.
21 const int simgrows = 32;
22 const int simgcols = 32;
23 Npp8u *d_pSrc, *d_pDst;
24 const int nMaskSize = 3;
26 oROI.width = simgcols - nMaskSize;
27 oROI.height = simgrows;
28 const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
29 const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
30 const int simgpix = simgrows * simgcols;
31 const int dimgpix = oROI.width * oROI.height;
32 const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
33 const int nDstStep = oROI.width * sizeof(d_pDst[0]);
35 const int nDivisor = 1;
36 const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
38 const Npp32s nAnchor = 2;
39 cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
40 if (err != cudaSuccess) {
41 fprintf(stderr, "Cuda error %d\n", __LINE__);
44 err = cudaMalloc((void**)&d_pDst, dimgsize);
45 if (err != cudaSuccess) {
46 fprintf(stderr, "Cuda error %d\n", __LINE__);
49 err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
50 if (err != cudaSuccess) {
51 fprintf(stderr, "Cuda error %d\n", __LINE__);
54 // set image to pixval initially
55 err = cudaMemset(d_pSrc, pixval, simgsize);
56 if (err != cudaSuccess) {
57 fprintf(stderr, "Cuda error %d\n", __LINE__);
60 err = cudaMemset(d_pDst, 0, dimgsize);
61 if (err != cudaSuccess) {
62 fprintf(stderr, "Cuda error %d\n", __LINE__);
65 err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
66 cudaMemcpyHostToDevice);
67 if (err != cudaSuccess) {
68 fprintf(stderr, "Cuda error %d\n", __LINE__);
73 nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
74 nMaskSize, nAnchor, nDivisor);
75 assert(ret == NPP_NO_ERROR);
76 Npp8u* h_imgres = new Npp8u[dimgpix];
77 err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
78 if (err != cudaSuccess) {
79 fprintf(stderr, "Cuda error %d\n", __LINE__);
83 for (int i = 0; i < dimgpix; i++) {
84 if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
85 fprintf(stderr, "h_imgres at index %d failed to match\n", i);