Welcome, Guest
Username: Password: Remember me

TOPIC: Haar like Feature extraction

Haar like Feature extraction 3 years 9 months ago #38

  • jenn_tm
  • jenn_tm's Avatar
  • OFFLINE
  • New Member
  • Posts: 1
  • Karma: 0
Hi, I am pursuing a project on car detection in a given image and Im new in CUDA. For this I need to extract Haar like features on a subwindow of a given size (20 * 40) to detect cars irrespective of their sizes in a given image (viola -jones use this for face detection). I am having some trouble translating this codes from C, to CUDA using shared memory. I was wondering if there is any chance you could give a hand with this project. thank you in advance.


I know that there is some libraries and even the new opencv support this function, but I want to understand CUDA execution and model, For now I am just using GpuMat and integral image, from the opencv Gpu module (2.3.10). the problem I am facing is that my code doesnt give me anything but 0 and that i dont know how to save the 166.000 features for horizontal and vertical filters. it depends on threads?, im only using threads for reading the integral image (662 * 701). I should be grateful if you would consider to help me with this issue.

this is what i get until now:

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <stdio.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <cuda.h>

using namespace cv;
using namespace gpu;
using namespace std;



#define BLOCK_DIM 32

__global__ void kernel_X2(cv::gpu::PtrStepSz<int> mat,  cv::gpu::PtrStepSz<int> mat1, int* feat){


	unsigned int xIndex = blockIdx.x * BLOCK_DIM + threadIdx.x;
	unsigned int yIndex = blockIdx.y * BLOCK_DIM + threadIdx.y;
	unsigned int Idx = yIndex*mat.cols + xIndex;



	__shared__ int shMem[BLOCK_DIM][BLOCK_DIM+1];


	unsigned int shY = threadIdx.y;
	unsigned int shX = threadIdx.x;

	float outputPixel=0.0;

	int cont = 0;
	if (xIndex < mat.cols && yIndex < mat.rows)
	{

		shMem[shY][shX] = mat.ptr(yIndex)[xIndex];


		__syncthreads();



		for (int h = 1; h <= BLOCK_DIM; ++h)
		{
			for (int w = 1; w <=(BLOCK_DIM/2); ++w)
			{
				if (threadIdx.x <= BLOCK_DIM-(2*w)||threadIdx.y <= BLOCK_DIM-h){
					int x1 = shX;          int y1 = shY;
					int x2 = shX;          int y3 = shY;
					int x3 = shX + w;      int y5 = shY;
					int x4 = shX + w;      int y2 = shY + h;
					int x5 = shX + 2 * w;  int y4 = shY + h;
					int x6 = shX + 2 * w;  int y6 = shY + h;

					feat[cont]= -shMem[y1][x1]+ shMem[y2][x2] + 2 * shMem[y3][x3]
					- 2 *shMem[y4][x4] - shMem[y5][x5] + shMem[y6][x6];
					cont++;
				}

			}

		}

	}


}



int main(void)
{

	Mat image, host_res, dst_host;
	int *result, *result_d;
	//open a file for outputting the result
	GpuMat gpu_img, gpu_imgint, gpu_res;
	image = imread("damnit.jpg", IMREAD_GRAYSCALE);
	dst_host.create(image.size(),image.type());
	Size size(image.cols+1, image.rows+1);
	host_res.create(size,CV_32SC1);
	gpu_res.create(size,CV_32SC1);
	getDevice();
	gpu_img.upload(image);
	gpu_res.upload(host_res);
	gpu::integral(gpu_img, gpu_imgint);
	
	
	
	//assing result space;
	result=(int*)calloc(512,sizeof(int));
	cudaMalloc(&result_d,512*sizeof(int));

	//copy value from host to device
	cudaMemcpy(result_d,result,512*sizeof(int),cudaMemcpyHostToDevice);

	int num_elements_x = size.width;
	int num_elements_y = size.height;


	dim3 grid_size;
	grid_size.x = ((num_elements_x+BLOCK_DIM-1)/ BLOCK_DIM);
	grid_size.y = (num_elements_y+BLOCK_DIM-1) / BLOCK_DIM;
	dim3 block_size(BLOCK_DIM,BLOCK_DIM, 1);
	//// grid_size & block_size are passed as arguments to the triple chevrons as usual
	kernel_X2 << <grid_size, block_size >>>(gpu_imgint,gpu_res,result_d);
	cudaMemcpy(result,result_d,512*sizeof(int),cudaMemcpyDeviceToHost);
	//gpu_res.download(host_res);
	//imshow("imagen",host_res);
	//waitKey();

	for(int j=0;j<512;j++){
		printf("%2d ",result[j]);
		printf("\n");
	}

	cudaFree(result_d); 
	free(result);
	return 0;
}


regards;


jenn_tm
The administrator has disabled public write access.
Time to create page: 0.058 seconds