1. cutCreateTimer : 單位 ms ->據說在GPU上比較準(我自己實際測好像也是如此)
#include <cutil_inline.h>
....
// create and start timer
unsigned int timer = 0;
cutilCheckError(cutCreateTimer(&timer));
cutilCheckError(cutStartTimer(timer));
....
// stop and destroy timer
cutilCheckError(cutStopTimer(timer));
printf("GPU Processing time: %f (ms) \n", cutGetTimerValue(timer));
cutilCheckError(cutDeleteTimer(timer));
2. cuda event: 單位 ms ->還在測試準度...感覺沒上面來的精準
.....
cudaEvent_t gpu0,gpu1;
cudaEventCreate(&gpu0);
cudaEventCreate(&gpu0);
cudaEventCreate(&gpu1);
...
// Start
cudaEventRecord(gpu0, 0);
.... // your program
// stop
cudaEventRecord(gpu1, 0);
cudaEventSynchronize(gpu1); // not sure if we have to add this one....
float gpu_time;
cudaEventElapsedTime(&gputime, gpu0, gpu1);
printf(" %f " ,gpu_time);
...
// Start
cudaEventRecord(gpu0, 0);
.... // your program
// stop
cudaEventRecord(gpu1, 0);
cudaEventSynchronize(gpu1); // not sure if we have to add this one....
float gpu_time;
cudaEventElapsedTime(&gputime, gpu0, gpu1);
printf(" %f " ,gpu_time);