Jump to content

cudaMemcpy access violation location

RexLee
void _init_gaussian_blur(const int _row, const int _col, uchar4 * _original_image, uchar4 *_blurred_image, const double *const _gauss_array, const int _gauss_array_size)
{
	uchar4 *dev_original_image;
	uchar4 *dev_blurred_image;
	unsigned char *dev_b_channel;
	unsigned char *dev_g_channel;
	unsigned char *dev_r_channel;
	unsigned char *dev_blurred_b_channel;
	unsigned char *dev_blurred_g_channel;
	unsigned char *dev_blurred_r_channel;

	int block_x = ceil((double)_row / 32);
	int block_y = ceil((double)_col / 32);

	cudaMalloc((void**)&dev_original_image, sizeof(uchar4) * 2000 * 2000);
	cudaMalloc((void**)&dev_blurred_image, sizeof(uchar4) * 2000 * 2000);
	cudaMalloc((void**)&dev_b_channel, sizeof(unsigned char) * 2000 * 2000);
	cudaMalloc((void**)&dev_g_channel, sizeof(unsigned char) * 2000 * 2000);
	cudaMalloc((void**)&dev_r_channel, sizeof(unsigned char) * 2000 * 2000);
	cudaMemcpy(dev_original_image, _original_image, sizeof(uchar4) * 2000 * 2000, cudaMemcpyHostToDevice);
	
	std::ofstream test_out;
	test_out.open("wtest.txt");
	for (int i = 0; i < _row; ++i)
	{
		for (int j = 0; j < _col; ++j)
			test_out << static_cast<int>(_original_image[i * _col + j].x) << " ";
		test_out << "\n";
	}
	test_out.close();
	_seperate_channel<<<dim3(block_x, block_y, 1), dim3(32, 32, 1)>>>(_row, _col, dev_original_image, dev_b_channel, dev_g_channel, dev_r_channel);
	cudaDeviceSynchronize();
	cudaFree(dev_original_image);

	cudaMalloc((void**)&dev_blurred_b_channel, sizeof(unsigned char) * 2000 * 2000);
	_calc_channel_blur<<<dim3(block_x, block_y, 1), dim3(32, 32, 1)>>>(_row, _col, dev_b_channel, _gauss_array, _gauss_array_size, dev_blurred_b_channel);
	cudaDeviceSynchronize();
	cudaFree(dev_b_channel);

	cudaMalloc((void**)&dev_blurred_g_channel, sizeof(unsigned char) * 2000 * 2000);
	_calc_channel_blur << <dim3(block_x, block_y, 1), dim3(32, 32, 1) >> >(_row, _col, dev_g_channel, _gauss_array, _gauss_array_size, dev_blurred_g_channel);
	cudaDeviceSynchronize();
	cudaFree(dev_g_channel);

	cudaMalloc((void**)&dev_blurred_r_channel, sizeof(unsigned char) * 2000 * 2000);
	_calc_channel_blur << <dim3(block_x, block_y, 1), dim3(32, 32, 1) >> >(_row, _col, dev_r_channel, _gauss_array, _gauss_array_size, dev_blurred_r_channel);
	cudaDeviceSynchronize();
	cudaFree(dev_r_channel);

	_combine_channel << <dim3(block_x, block_y, 1), dim3(32, 32, 1) >> > (_row, _col, dev_blurred_b_channel, dev_blurred_g_channel, dev_blurred_r_channel, dev_blurred_image);
	cudaMemcpy(_blurred_image, dev_blurred_image, sizeof(uchar4) * 2000 * 2000, cudaMemcpyDeviceToHost);
	cudaFree(dev_blurred_b_channel);
	cudaFree(dev_blurred_g_channel);
	cudaFree(dev_blurred_r_channel);
	cudaFree(dev_blurred_image);
}

The problem occurs in line     cudaMemcpy(dev_original_image, _original_image, sizeof(uchar4) * 2000 * 2000, cudaMemcpyHostToDevice);.

When running the program an access violation reading location error occurs.

I printed the _original_image to a txt and it looks fine so I don't know what the problem is.

Link to comment
Share on other sites

Link to post
Share on other sites

How are you creating _original_image? And you should add some error checking to make sure the cudaMallocs aren't failing.

1474412270.2748842

Link to comment
Share on other sites

Link to post
Share on other sites

int main()
{
  	cv::Mat image;
	cv::Mat imageRGBA;
	image = cv::imread(file_name.c_str(), CV_LOAD_IMAGE_COLOR);
	.
    .
    .
  	uchar4 *original_image;
  	original_image = (uchar4*)malloc(sizeof(uchar4) * 2000 * 2000);
  	_convert_to_uchar(file_name, &original_image, &row, &col, image, imageRGBA);
  	.
    .
    .
}
void _convert_to_uchar(const std::string _file_name, uchar4 **_original_image, int **_row, int **_col, cv::Mat &_image, cv::Mat &_imageRGBA)
{
	**_row = _image.rows;
	**_col = _image.cols;
	cv::cvtColor(_image, _imageRGBA, CV_BGR2BGRA);
	*_original_image = (uchar4*)_imageRGBA.ptr<unsigned char>(0);
}

 

Link to comment
Share on other sites

Link to post
Share on other sites

You malloc memory for original_image and then, in the _convert_to_uchar function, overwrite the pointer in this line:

 

*_original_image = (uchar4*)_imageRGBA.ptr<unsigned char>(0);

That's at least a leak.

I don't know the specifics about the libraries you're using, but since the pointer no longer points to your allocated memory but to whatever the  cv::Mat instance returned, using the pointer afterwards could lead to the access violation.

 

Perhaps you are meant to copy (perhaps with conversion, see below) the data from the buffer pointed to by the cv::Mat instance to your own buffer ?

 

Additional smells:

-uchar4 and unsigned char are 2 different things, yet you seem to think you can get away with a simple cast. Are you sure this will work? uchar4 looks like a struct from what i can find online, structs can have padding bytes for alignment in memory. Are you really really sure you do not need to convert the data in some way?

 

-Why are you using double pointers for _row and _col? That's a complicated construct for simple int's.

 

Link to comment
Share on other sites

Link to post
Share on other sites

Yeah the code is kinda of messy right now. The code worked after I made another uchar4* called reload_image and used a for loop to copy all the information from _original_image. 

Link to comment
Share on other sites

Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

×