CUDA debug invalid kernel image error -
i wrote following cuda kernel , trying load module:
#include <stdio.h> extern "c" // ensure function name "vadd" { __global__ void vadd(const float *a, const float *b, float *c) { int = threadidx.x + blockidx.x * blockdim.x; printf("thread id %d\n", i); c[i] = a[i] + b[i]; } }
i compile ptx code using following command:
nvcc -ptx -arch=sm_20 vadd.cu
when trying load file module using cumoduleload
cuda 200 error (invalid kernel image). how can find out wrong kernel image? have tried ptxas
, according that, generated ptx code fine.
edit: code using load module:
#include "cuda.h" #include <cassert> #include <dlfcn.h> #include <stdio.h> void check(curesult err) { if (err != cuda_success) { printf("error %i\n", err); } assert(err == cuda_success); } int main(int argc, char **argv) { void *cuda = dlopen("libcuda.so", rtld_now | rtld_deepbind | rtld_global); assert(cuda != null); printf("cuinit\n"); curesult (*init)() = (curesult (*)()) dlsym(cuda, "cuinit"); check(init()); printf("cudeviceget\n"); curesult (*deviceget)(cudevice *, int) = (curesult (*)(cudevice *, int)) dlsym(cuda, "cudeviceget"); cudevice device; check(deviceget(&device, 0)); printf("cuctxcreate\n"); curesult (*ctxcreate)(cucontext * , unsigned int, cudevice) = (curesult (*)(cucontext * , unsigned int, cudevice)) dlsym(cuda, "cuctxcreate"); cucontext context; check(ctxcreate(&context, 0, device)); printf("cumoduleload\n"); curesult (*moduleload)(cumodule *, const char*) = (curesult (*)(cumodule *, const char*)) dlsym(cuda, "cumoduleload"); cumodule mod; check(moduleload(&mod, "vadd.ptx")); return 0; }
this related why cuctxcreate creates old context?: using cuctxcreate
directly, gives old api context (v3.1) incompatible usage of printf
. can check api version cuctxgetapiversion
. if switch cuctxcreate_v2
, used through #define
's in cuda.h
, you'll more recent api context.
in order spot discrepancy, i've run sample ld_debug=symbols
, , compared using cuda api directly (since runs sample ptx). comparing symbol resolutions, big difference call cuctxcreate
:
cuctxcreate(...) symbol=cuctxcreate_v2; lookup in file=./test [0] symbol=cuctxcreate_v2; lookup in file=/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0]
... in original code, using dlsym(..., "cuctxcreate")
mapped directly cuctxcreate
.
Comments
Post a Comment