将vdma的基址映射到虚拟地址空间上,在linux系统下就可以直接通过指针访问vdma的各个寄存器
handle->vdmaVirtualAddress = (unsigned int*)mmap(NULL, 65535, PROT_READ | PROT_WRITE, MAP_SHARED, handle->vdmaHandler, (off_t)handle->baseAddr);
这个函数的返回值就是申请到的vdma基址的虚拟地址,这个地址加上寄存器offset就可以用来配置各个寄存器了,在使用之前要校验下申请的地址是不是有效的
if (handle->vdmaVirtualAddress == MAP_FAILED) {
perror("vdmaVirtualAddress mapping for absolute memory access failed.\n");
return -1;
}
然后还要申请图像存放的地址,这里根据vdma的运行方式可以配置多个地址存放多幅图像数据。
下面是我的主程序,仅供参考:
int main() {
//variable start
int j, i;
Vec2b pix;
struct timeval tstart, tend, hls_start, hls_end;
float timeuse;
Mat src_rgb = imread(INPUT_IMAGE, 1);
Mat src_yuv(src_rgb.rows, src_rgb.cols, CV_8UC2);
Mat dst_yuv(src_rgb.rows, src_rgb.cols, CV_8UC2);
Mat dst_rgb(src_rgb.rows, src_rgb.cols, CV_8UC3);
//convert to yuv format
cvtcolor_rgb2yuv422(src_rgb, src_yuv);
IplImage src = src_yuv;
IplImage dst = dst_yuv;
//variable end
#if SW_GENERATE
printf("opencv software processing\n");
//calculate software used time
gettimeofday(&tstart, NULL);
opencv_sobel_init();
opencv_sobel(&src, &dst);
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec);
timeuse /= 1000000;
printf("soft used time is %f\n", timeuse);
cvtColor(dst_yuv, dst_rgb, CV_YUV2BGR_YUYV);
imwrite(OUTPUT_IMAGE_GOLDEN, dst_rgb);
#endif
if (!(init_filter() == XST_SUCCESS))
{
printf("filter init faild!");
}
set_reg_filter();
// Setup VDMA handle and memory-mapped ranges
vdma_setup(&handle, 0x43000000, 640, 480, 2, 0x1f400000, 0x1f800000, 0x1fc00000);
gettimeofday(&tstart, NULL);
memcpy(handle.fb1VirtualAddress, (uchar *)src.imageData, 640 * 480 * 2);
#if MEMCPY_CHECK
printf("memcpy checking \n");
u32 memcpy_error_flag = 0;
for (i = 0; i < src_yuv.rows; i++) //row 480
{
for (j = 0; j < src_yuv.cols; j++) //col 640*2
{
pix = src_yuv.at(i, j);
if ((handle.fb1VirtualAddress[j * 2 + i * 640 * 2] != pix.val[0]) || (
handle.fb1VirtualAddress[j * 2 + i * 640 * 2 + 1] != pix.val[1]))
{
memcpy_error_flag = 1;
}
}
}
if (memcpy_error_flag == 1)
{
printf("img copy error");
return 0;
}
//memset(handle.fb1VirtualAddress, 0, handle.width * handle.height * handle.pixelLength);
printf("memcpy check result FB2:(ORI)\n");
for (j = 512; j < 512 + 20; j++) printf(" %02x", handle.fb2VirtualAddress[j]); printf("\n");
#endif
gettimeofday(&hls_start, NULL);
vdma_start_triple_buffering(&handle);
//printf("hahahaha\n");
wait_done_filter();
gettimeofday(&hls_end, NULL);
#if RESULT_CHECK
printf("RESULT CHECK FB2:(NOW)\n");
for (j = 635 * 2; j < 635 * 2 + 20; j++) printf(" %02x", handle.fb2VirtualAddress[j]); printf("\n");
//}
#endif
memcpy((uchar *)dst.imageData, handle.fb2VirtualAddress, 640 * 480 * 2);
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec);
timeuse /= 1000000;
printf("hard total used time is %f\n", timeuse);
timeuse = 1000000 * (hls_end.tv_sec - hls_start.tv_sec) + (hls_end.tv_usec - hls_start.tv_usec);
timeuse /= 1000000;
printf("hard hls used time is %f\n", timeuse);
print_vdma_register_status();
cvtColor(dst_yuv, dst_rgb, CV_YUV2BGR_YUYV);
imwrite(OUTPUT_IMAGE, dst_rgb);
stop_filter();
// Halt VDMA and unmap memory ranges
vdma_halt(&handle);
return image_compare(OUTPUT_IMAGE, OUTPUT_IMAGE_GOLDEN);
}
vdma寄存器配置参考pg020_axi_vdma文档
软件编译选项
这里单独列出来是因为感觉在这种处理器性能不是很好的硬件平台下进行大计算量算法的实施的情况下,一定要让软件以最高效率运行(尽力最高效率吧)下面是辛辛苦苦写的Makefile,用的通配符@梅神,稍微改改就可以用在新工程上
CC=g++
CFLAGS= -g -O2 -mcpu=cortex-a9 -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad #-mfloat-abi=softfp -ffast-math
CFLAGS+=`pkg-config --cflags opencv`
LDFLAGS+=`pkg-config --libs opencv`
OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
OBJS += $(patsubst %.cpp,%.o,$(wildcard *.cpp))
all: vdma_test
%.o: %.cpp
$(CC) -c $(CFLAGS) -o $@ $<
%.o: %.c
$(CC) -c $(CFLAGS) -o $@ $<
vdma_test: $(OBJS)
$(CC) -o $@ $(OBJS) $(LDFLAGS)
clean:
rm vdma_test $(OBJS)
cflag的优化配置参考的xapp1206-boost-sw-performance-zynq7soc-w-neon文档。
评论
查看更多