opencv dnn模块

最近在研究opencv的dnn模块,简单的做一个记录,以备后面使用。

dnn简介

​ dnn是opencv中的深度神经网络模块,支持运行网络,但是不支持训练,实现图像场景中的图像分类,图像检测和图像分割,并且支持主流的深度学习框架。

dnn尝试

​ 最近一端时间尝试了tensorflow,darknet还有caffe。调通了darknet的yolov3还有caffe的ssd_mobilnet检测网络,但是都是基于cpu的,所以速度可想而知,yolov3速度大约在1~2fps,ssd-mobilnet大约在9fps。

​ googlenet模型是从caffe官方github仓库里面下的,从网上下载了一张雨伞图片,测试还可可以,准确率蛮高的。

资料来源

​ 代码主要是在github上,还有各个博客上找的。其实opencv也提供了相关demo,网上的代码也是基于demo改的,安装完opencv之后,在source/samples/dnn里面提供了相关代码,可以参考。

问题记录

  • 首先是tensorflow下的ssd网络,如果调用opencv里面的权重是可以跑通的,但是自己生成的却不行,但是网上有大佬用自己训练的模型跑通了。
  • 使用opencl加速,代码里有一个参数可以进行加速,但是使用之后就报错,从网上查了一下
    • opencv对显卡支持不好 -- github opencv issue
    • opencv只支持intel显卡-- 这个是前几年的文章看见的,现在支持了。

总结

  • 个人感觉darknet是用c写的,caffe用c++写的,比较容易调通,调用过程基本上没有遇到困难,但是tensorflow和pytorch使用python写的,调用却不太顺利,是不是和语言有关系呢?
  • dnn模块软肋
    • 只支持部分网络
    • gpu加速还是个问题,可能还不太成熟吧,博客上看见的也是用cpu调通的,但是速度这么慢,落地不行呀。
##caffe-ssd mobilnet opencv3.4.1

#include<iostream>
#include<opencv2/opencv.hpp>
#include<opencv2/dnn.hpp>
using namespace std;
using namespace cv;
using namespace cv::dnn;
class Object
{
public:
	Object();
	Object(int index, float confidence, String name, Rect rect);
	~Object();
public:
	int index;
	String name;
	float confidence;
	Rect rect;
private:
};
Object::Object() {
}
Object::Object(int index, float confidence, String name, Rect rect) {
	this->index = index;
	this->confidence = confidence;
	this->name = name;
	this->rect = rect;
}
Object::~Object() {
}
//----------------------------全局常量----------------------------------
//配置好protxt文件,网络结构描述文件
//配置好caffemodel文件,训练好的网络权重
const String PROTOTX_FILE = "no_bn.prototxt";
const String CAFFE_MODEL_FILE = "no_bn.caffemodel";
const String classNames[] = { "background", "jingling", "t16", "xiao_spark", "yu_mavic"};
const float CONF_THRESH = 0.5;
int main() {
	//------------------------实例化网络----------------------------
	Net mobileNetSSD = readNetFromCaffe(PROTOTX_FILE, CAFFE_MODEL_FILE);
	if (mobileNetSSD.empty()) {
		cerr << "加载网络失败!" << endl;
		return -1;
	}
	TickMeter t;
	VideoCapture video;
	video.open("scr.mp4");
	
	Mat srcImg;
	video >> srcImg;
	while (!srcImg.empty())
	{
		clock_t start_time,end_time;
		start_time = clock();
		//----------------------设置网络输入-----------------------
		//将二维图像转换为CNN输入的张量Tensor,作为网络的输入
		mobileNetSSD.setInput(blobFromImage(srcImg, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false));
		t.start();
		//--------------------CNN网络前向计算----------------------
		Mat netOut = mobileNetSSD.forward();
		t.stop();
		//----------------------解析计算结果-----------------------
		vector<Object> detectObjects;
		Mat detectionResult(netOut.size[2], netOut.size[3], CV_32F, netOut.ptr<float>());
		for (int i = 0; i < detectionResult.rows; i++) {
			//目标类别的索引
			int objectIndex = detectionResult.at<float>(i, 1);
			//检测结果置信度
			float confidence = detectionResult.at<float>(i, 2);
			//根据置信度阈值过滤掉置信度较小的目标
			if (confidence<CONF_THRESH) {
				continue;
			}
			//反归一化,得到图像坐标
			int xLeftUp = static_cast<int>(detectionResult.at<float>(i, 3)*srcImg.cols);
			int yLeftUp = static_cast<int>(detectionResult.at<float>(i, 4)*srcImg.rows);
			int xRightBottom = static_cast<int>(detectionResult.at<float>(i, 5)*srcImg.cols);
			int yRightBottom = static_cast<int>(detectionResult.at<float>(i, 6)*srcImg.rows);
			//矩形框
			Rect rect(Point{ xLeftUp,yLeftUp }, Point{ xRightBottom,yRightBottom });
			//保存结果
			detectObjects.push_back(Object{ objectIndex,confidence,classNames[objectIndex],rect });
		}
		//------------------------显示结果-----------------------------------
		int count = 0;
		for (auto& i : detectObjects) {
			rectangle(srcImg, i.rect, Scalar(0, 255, 255), 2);
			putText(srcImg, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
			cout << "第" << count << "个目标:" << i.name << "\t" << i.rect << "\t" << i.confidence << endl;
			count++;
		}


		end_time = clock();
		imshow("MobileNet-SSD", srcImg);
		waitKey(10);
		cout << "FPS:" << CLOCKS_PER_SEC/(double)(end_time - start_time)  << endl;
		video >> srcImg;
	}


	waitKey(0);
}

### opencv4.1 darknet-yolov3
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>
using namespace std;
using namespace cv;
using namespace cv::dnn;
void image_detection();
void video_detection();
float confidenceThreshold = 0.25;
String yolo_cfg = "yolov3-voc.cfg";
String yolo_model = "yolov3-voc_f.weights";

int main(int argc, char** argv)
{
	//image_detection();
	video_detection();
}

void image_detection() {
	Net net = readNetFromDarknet(yolo_cfg, yolo_model);
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);
	//net.setPreferableBackend(DNN_BACKEND_OPENCV);       //启动GPU加速
	//net.setPreferableTarget(DNN_TARGET_OPENCL);         //启动GPU加速
	std::vector<String> outNames = net.getUnconnectedOutLayersNames();
	for (int i = 0; i < outNames.size(); i++) {
		printf("output layer name : %s\n", outNames[i].c_str());
	}


	vector<string> classNamesVec;
	//ifstream classNamesFile("object_detection_classes_yolov3.txt");
	ifstream classNamesFile("myData.names");
	if (classNamesFile.is_open())
	{
		string className = "";
		while (std::getline(classNamesFile, className))
			classNamesVec.push_back(className);

	}
	clock_t start_time;
	start_time = clock();
	// 加载图像 
	Mat frame = imread("scr.jpg");
	//Mat frame = imread("cat.jpg");
	Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false);
	net.setInput(inputBlob);

	// 检测
	std::vector<Mat> outs;
	net.forward(outs, outNames);
	vector<double> layersTimings;
	double freq = getTickFrequency() / 1000;
	double time = net.getPerfProfile(layersTimings) / freq;
	ostringstream ss;
	cout << "detection time: " << time << " ms";
	//putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));
	vector<Rect> boxes;
	vector<int> classIds;
	vector<float> confidences;
	for (size_t i = 0; i<outs.size(); ++i)
	{
		// Network produces output blob with a shape NxC where N is a number of
		// detected objects and C is a number of classes + 4 where the first 4
		// numbers are [center_x, center_y, width, height]
		float* data = (float*)outs[i].data;
		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
		{
			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
			Point classIdPoint;
			double confidence;
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > confidenceThreshold)
			{
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}

	vector<int> indices;
	NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		String className = classNamesVec[classIds[idx]] ;
		putText(frame, className.c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 0, 0), 2, 8);
		rectangle(frame, box, Scalar(0, 0, 255), 2, 8, 0);
	}
	cout <<"运行时间:" << ((double)(clock() - start_time) / CLOCKS_PER_SEC) * 1000 << "ms" << "\n" << endl;
	imshow("YOLOv3-Detections", frame);
	waitKey(0);
	return;
}

void video_detection() {

	Net net = readNetFromDarknet(yolo_cfg, yolo_model);
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);
	//net.setPreferableBackend(DNN_BACKEND_OPENCV);       //启动GPU加速
	//net.setPreferableTarget(DNN_TARGET_OPENCL);         //启动GPU加速
	if (net.empty())
	{
		printf("Could not load net...\n");
		return;
	}
	std::vector<String> outNames = net.getUnconnectedOutLayersNames();
	for (int i = 0; i < outNames.size(); i++) {
		printf("output layer name : %s\n", outNames[i].c_str());
	}
	vector<string> classNamesVec;
	ifstream classNamesFile("myData.names");
	if (classNamesFile.is_open())
	{
		string className = "";
		while (std::getline(classNamesFile, className))
			classNamesVec.push_back(className);
	}

	VideoCapture video_read;
	video_read.open("scr2.mp4");
	Mat frame;
	video_read >> frame;
	/*
	VideoCapture capture;
	capture.open("D:/vcprojects/images/fbb.avi");
	if (!capture.isOpened()) {
		printf("could not open the camera...\n");
		return;
	}
	*/

	while (!frame.empty())
	{
		//加载图像
		//if (frame.empty())
		//	if (frame.channels() == 4)
		//		cvtColor(frame, frame, COLOR_BGRA2BGR);
		Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false);
		net.setInput(inputBlob, "data");
		
	


		//Mat detectionMat = net.forward("detection_out");
		//vector<double> layersTimings;
		//double freq = getTickFrequency() / 1000;
		//double time = net.getPerfProfile(layersTimings) / freq;
		//ostringstream ss;
		//ss << "FPS: " << 1000 / time << " ; time: " << time << " ms";
		//putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));


		// 检测
		std::vector<Mat> outs;
		net.forward(outs, outNames);
		vector<double> layersTimings;
		double freq = getTickFrequency() / 1000;
		double time = net.getPerfProfile(layersTimings) / freq;
		ostringstream ss;
		ss << "detection time: " << time << " ms";
		putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));
		vector<Rect> boxes;
		vector<int> classIds;
		vector<float> confidences;

		for (size_t i = 0; i < outs.size(); ++i)
		{
			// Network produces output blob with a shape NxC where N is a number of
			// detected objects and C is a number of classes + 4 where the first 4
			// numbers are [center_x, center_y, width, height]
			float* data = (float*)outs[i].data;
			for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
			{
				Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
				Point classIdPoint;
				double confidence;
				minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
				if (confidence > confidenceThreshold)
				{
					int centerX = (int)(data[0] * frame.cols);
					int centerY = (int)(data[1] * frame.rows);
					int width = (int)(data[2] * frame.cols);
					int height = (int)(data[3] * frame.rows);
					int left = centerX - width / 2;
					int top = centerY - height / 2;

					classIds.push_back(classIdPoint.x);
					confidences.push_back((float)confidence);
					boxes.push_back(Rect(left, top, width, height));
				}
			}
		}

		vector<int> indices;
		NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
		for (size_t i = 0; i < indices.size(); ++i)
		{
			int idx = indices[i];
			Rect box = boxes[idx];
			String className = classNamesVec[classIds[idx]] + std::to_string(confidences[idx]);
			putText(frame, className.c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 0, 0), 2, 8);
			rectangle(frame, box, Scalar(0, 0, 255), 2, 8, 0);
		}

		/*for (int i = 0; i < detectionMat.rows; i++)
		{
			const int probability_index = 5;
			const int probability_size = detectionMat.cols - probability_index;
			float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);
			size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
			float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index);
			if (confidence > confidenceThreshold)
			{
				float x = detectionMat.at<float>(i, 0);
				float y = detectionMat.at<float>(i, 1);
				float width = detectionMat.at<float>(i, 2);
				float height = detectionMat.at<float>(i, 3);
				int xLeftBottom = static_cast<int>((x - width / 2) * frame.cols);
				int yLeftBottom = static_cast<int>((y - height / 2) * frame.rows);
				int xRightTop = static_cast<int>((x + width / 2) * frame.cols);
				int yRightTop = static_cast<int>((y + height / 2) * frame.rows);
				Rect object(xLeftBottom, yLeftBottom,
					xRightTop - xLeftBottom,
					yRightTop - yLeftBottom);
				rectangle(frame, object, Scalar(0, 255, 0));
				if (objectClass < classNamesVec.size())
				{
					ss.str("");
					ss << confidence;
					String conf(ss.str());
					String label = String(classNamesVec[objectClass]) + ": " + conf;
					int baseLine = 0;
					Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
					rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom),
						Size(labelSize.width, labelSize.height + baseLine)),
						Scalar(255, 255, 255), -1);
					putText(frame, label, Point(xLeftBottom, yLeftBottom + labelSize.height),
						FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
				}
			}
		}*/
		imshow("YOLOv3: Detections", frame);
		video_read >> frame;
		if (waitKey(1) >= 0) break;
	}
}
#opencv3.4.1 caffe googlenet 
#include<opencv2\opencv.hpp>
#include<vector>
#include<string>

using namespace cv;
using namespace dnn;
using namespace std;

String model_file = "bvlc_googlenet.caffemodel";
String model_txtfile = "deploy.prototxt";
String labels_file = "synset_words.txt";

vector<String>readLabels();



int main(int arc, char** argv) {

	Mat src = imread("scr1.jpg");
	namedWindow("input", CV_WINDOW_AUTOSIZE);
	imshow("input", src);

	//¶ÁÈ¡Ä£Ð͵ÄÀà±ð£šÎıŸ£©
	vector<String> labels = readLabels();


	//¶ÁÈ¡google_netµÄÄ£ÐͺÍÃèÊöÎÄŒþ
	Net net = readNetFromCaffe(model_txtfile, model_file);
	if (net.empty()) {

		printf("read caffee model data failure\n");

		return -1;

	}

	//œ«ÍŒÏñתΪgoogle_netÍøÂçÊäÈëµÄ¶ÔÏó£¬ÓÉÃèÊöÎÄŒþ¿ÉÖª£¬ÍŒÏñ³ßŽçͳһΪ224*224
	Mat inputBlob = blobFromImage(src, 1.0, Size(224, 224), Scalar(104, 117, 123));
	//œøÐÐÇ°ÏòŽ«²¥£¬ÓÉÃèÊöÎÄŒþ¿ÉÖª£¬µÚÒ»²ãÓÃÁË10žöŸí»ý²ã£¬ÌáÈ¡ÍŒÏñ10ÖÖ²»Í¬µÄÌØÕ÷
	Mat prob;

	for (int i = 0; i < 10; i++) {
		net.setInput(inputBlob, "data");
		prob = net.forward("prob");//×îºóÒ»²ãµÄÊä³öΪ¡°prob¡±
	}


	//Êä³ö
	//µÃµœµÄžÅÂÊֵΪ1ÐÐ1000ÁеÄ
	Mat promat = prob.reshape(1, 1);
	Point classLoc;
	double classProb;
	minMaxLoc(promat, NULL, &classProb, NULL, &classLoc);
	printf("current image classification: %s,probablity %f\n", labels.at(classLoc.x).c_str(), classProb);

	putText(src, labels.at(classLoc.x), Point(20, 20), FONT_HERSHEY_COMPLEX, 1.0, Scalar(0, 0, 255), 2);

	imshow("output", src);

	waitKey(0);

	return 0;

}



//¶ÁÈ¡Ä£Ð͵ÄÀà±ð£šÎıŸ£©

vector<String>readLabels() {
	vector<String>classNames;
	ifstream fp(labels_file);//Žò¿ªÎÄŒþ
	if (!fp.is_open()) {//ÎÄŒþûŽò¿ª
		printf("could not open the file ");
		exit(-1);
	}

	string name;

	while (!fp.eof()) {//ÎÄŒþû¶Áµœœáβ
		getline(fp, name);//µÃµœÃ¿Ò»ÐУ¬·ÅµœnameÖÐ
		if (name.length()) {//·Ç¿ÕÐÐ
			classNames.push_back(name.substr(name.find(' ') + 1));//
		}
	}

	fp.close();
	return classNames;

}