c++ - using a pointer to vector<T>::data() for cublasSgemm -


i trying use vector::data() pointer when using cudamalloc, cudamemcpy, , cublassgemm can't seem work. if not mistaken, vector::data() should return pointer actual array stored in memory vector should same having t* aarray pointer array of type t stored in memory. using latter work, not data() pointer.

here code working on:

matrix<t> matrix<t>::cudaprod(matrix<t>&a,matrix<t>&b, matrix<t>&c) { c = matrix<t>(a.height, b.width); //resizing of vector of elements matrix c //a[m][n]*b[n][k]=c[m][k] int m = a.height; int n = b.height; int k = b.width; float alpha = 1.0f; float beta = 0.0f;  t* d_a = a.getpointer(); t* d_b = b.getpointer(); t* d_c = c.getpointer();  cudamalloc(&d_a,a.size); cudamalloc(&d_b,b.size); cudamalloc(&d_c,c.size);  cudamemcpy(d_a,a.getpointer(),a.size,cudamemcpyhosttodevice); cudamemcpy(d_b,b.getpointer(),b.size,cudamemcpyhosttodevice);  cublashandle_t handle;  cublasstatus_t status = cublascreate(&handle);  if (status != cublas_status_success)  {     std::cerr << "!!!! cublas initialization error\n"; }  status = cublassgemm(handle,cublas_op_n,cublas_op_n,k,m,n,&alpha,d_b,k,d_a,n,&beta,d_c,k);  if (status != cublas_status_success)  {     std::cerr << "!!!! kernel execution error.\n"; }  status = cublasdestroy(handle); if (status != cublas_status_success)  {     std::cerr << "!!!! shutdown error (a)\n"; }  cudamemcpy(c.getpointer(), d_c, c.size,cudamemcpydevicetohost);  cudafree(d_a); cudafree(d_b); cudafree(d_c); 

the getpointer() member function returns vector::data() of vector of elements matrix object. size vector element's size in memory.

the vector of matrix c returns zeros when using data() pointer, , returns product of matrix , b when using t* aarray pointers without vectors.

is possible use vectors store array of elements , data() pointer initialize device copy of array, or forced use c style array storage on host? also, have tried using thrust::device_vector , works stay away creating raw_pointer_casts.

thanks help!

edit: having trouble copy , pasting, here complete example:

#include <cuda.h> #include <cuda_runtime.h> #include <cuda_device_runtime_api.h> #include <cublas_v2.h> #include <vector> #include <iostream>  using namespace std;  template<typename t> class matrix { public: ~matrix(); matrix(); matrix(int rows, int columns); int width; int height; int stride; size_t size;  t &getelement(int row, int column); void setelement(int row, int column, t value); void setelements(vector<t> value); vector<t>& getelements(); t* getpointer(); matrix<t> cudaprod(matrix<t>&a,matrix<t>&b, matrix<t>&c); private: vector<t> elements; t* firstelement; };  template<typename t> matrix<t>::~matrix() { }  template<typename t> matrix<t>::matrix() { }  template<typename t> matrix<t>::matrix(int rows, int columns) { height = rows; width = columns; stride = columns; //in row major order equal # of columns elements.resize(rows*columns); firstelement = elements.data(); size = height*width*sizeof(t); }  template<typename t> t &matrix<t>::getelement(int row, int column) { return elements[row*width + column]; //row major order return }  template<typename t> vector<t>& matrix<t>::getelements() { return elements; //row major order return }  template<typename t> void matrix<t>::setelement(int row, int column, t value) { elements[row*width + column] = value; //row major order return }  template<typename t> void matrix<t>::setelements(vector<t> value) { elements = value; }  template<typename t> t* matrix<t>::getpointer() { return firstelement; }   template<typename t> //matrix multiplication using cuda matrix<t> matrix<t>::cudaprod(matrix<t>&a,matrix<t>&b, matrix<t>&c) { c = matrix<t>(a.height, b.width); //a[m][n]*b[n][k]=c[m][k] int m = a.height; int n = b.height; int k = b.width; float alpha = 1.0f; float beta = 0.0f;   //thrust usage  /*thrust::device_vector<t> d_a = a.getelements(); t* d_a = thrust::raw_pointer_cast(&d_a[0]); thrust::device_vector<t> d_b = b.getelements(); t* d_b = thrust::raw_pointer_cast(&d_b[0]); thrust::device_vector<t> d_c = c.getelements(); t* d_c = thrust::raw_pointer_cast(&d_c[0]);*/  t* d_a = a.getpointer(); t* d_b = b.getpointer(); t* d_c = c.getpointer();  cudamalloc(&d_a,a.size); cudamalloc(&d_b,b.size); cudamalloc(&d_c,c.size);  cudamemcpy(d_a,a.getpointer(),a.size,cudamemcpyhosttodevice); cudamemcpy(d_b,b.getpointer(),b.size,cudamemcpyhosttodevice); cudamemcpy(d_c,c.getpointer(),c.size,cudamemcpyhosttodevice);  cublashandle_t handle;  cublasstatus_t status = cublascreate(&handle);  if (status != cublas_status_success)  {     std::cerr << "!!!! cublas initialization error\n"; }  status = cublassgemm(handle,cublas_op_n,cublas_op_n,k,m,n,&alpha,d_b,k,d_a,n,&beta,d_c,k);  if (status != cublas_status_success)  {     std::cerr << "!!!! kernel execution error.\n"; }  status = cublasdestroy(handle); if (status != cublas_status_success)  {     std::cerr << "!!!! shutdown error (a)\n"; }  //thrust::copy(d_c.begin(), d_c.end(), c.getelements().begin());  cudamemcpy(c.getpointer(), d_c, c.size,cudamemcpydevicetohost);  cudafree(d_a); cudafree(d_b); cudafree(d_c);  return c;  }  int main() {     matrix<float> a(2,2); matrix<float> b(2,2); matrix<float> c;  vector<float> ae(4,2); vector<float> be(4,4); a.setelements(ae); b.setelements(be);  c = c.cudaprod(a, b, c);  //function call cudaprod()  for(int row = 0; row < a.height; ++row) {     for(int col = 0; col < a.width; ++col)     {                cout<<a.getelement(row, col)<<" "; //h_c stored on device in column major order, need switch row major order      }      printf("\n"); } printf("\n");  for(int row = 0; row < b.height; ++row) {     for(int col = 0; col < b.width; ++col)     {                cout<<b.getelement(row, col)<<" "; //h_c stored on device in column major order, need switch row major order      }      printf("\n"); } printf("\n");  for(int row = 0; row < c.height; ++row) {     for(int col = 0; col < c.width; ++col)     {                cout<<c.getelement(row, col)<<" "; //h_c stored on device in column major order, need switch row major order      }      printf("\n"); } printf("\n"); } 

from std::vector::data documentation, data() returns both const , non-const qualified pointers, depending on fact vector qualified const or not. quoting documentation

if vector object const-qualified, function returns pointer const value_type. otherwise, returns pointer value_type.

accordingly, using

firstelement = elements.data(); 

in matrix constructor fine read/write data.

the main problem code declaring c in main, passing reference c cudaprod method , internally using

c = matrix<t>(a.height, b.width); 

which redeclare matrix.

if change definition of cudaprod method to

template<typename t> void cudaprod(matrix<t>&a,matrix<t>&b, matrix<t>&c) 

remove the

return c; 

statement , allocate space c in main as

matrix<float> c(2,2); vector<float> ce(4,10); c.setelements(ce); 

your code should work correctly.


Comments

Popular posts from this blog

android - Get AccessToken using signpost OAuth without opening a browser (Two legged Oauth) -

org.mockito.exceptions.misusing.InvalidUseOfMatchersException: mockito -

google shop client API returns 400 bad request error while adding an item -