OpenCV DNN模块教程(三)SSD/Faster-RCNN目标检测实例

Color Space

发布于 2020-10-29 10:09:12

1.4K0

发布于 2020-10-29 10:09:12

文章被收录于专栏：OpenCV与AI深度学习

本文为OpenCV DNN模块官方教程的扩展，介绍如何使用OpenCV加载TensorFlow Object Detection API训练的模型做目标检测，以SSD和Faster-RCNN为例。

TensorFlow Object Detection API的github链接地址如下：

https://github.com/tensorflow/models/tree/master/research/object_detection 现同时支持TensorFlow1.x和TesnorFlow2.x版本。

本文以TensorFlow 1.x为例(TF2.x等后续稳定支持OpenCV后介绍)，介绍OpenCV DNN模块调用SSD和Faster-RCNN模型检测目标的步骤如下：

(1) 下载或自己训练生成 .pb 格式的模型文件。本文以Model Zoo中的ssd_mobilenet_v1_coco为例，下载解压后得到frozen_inference_graph.pb

(2) 使用指令用.pb文件生成.pbtxt文件, SSD模型使用tf_text_graph_ssd.py, Faster-RCNN模型使用tf_text_graph_faster_rcnn.py

SSD:

Faster-RCNN:

主要参数三个：

--input 输入.pb模型文件完整路径；

--output 输出.pbtxt文件完整路径；

--config 输入config文件完整路径

完整指令：

python tf_text_graph_ssd.py --input E:\Practice\TensorFlow\model\ssd_mobilenet_v1_coco_2018_01_28\frozen_inference_graph.pb --output E:\Practice\TensorFlow\model\ssd_mobilenet_v1_coco_2018_01_28\frozen_inference_graph.pbtxt --config D:\models\research\object_detection\samples\configs\ssd_mobilenet_v1_coco.config

运行结果：

(3) 配置OpenCV4.4，加载图片测试，代码如下：

#include<opencv2/opencv.hpp>
#include<opencv2/dnn.hpp>
#include <iostream>

using namespace std;
using namespace cv;
using namespace dnn;

const size_t blobSize = 300;

const char* classNames[] = { "background", "person", "bicycle", "car", 
"motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", 
"street sign", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep" }; //SSD需要加background

String weights = "./model2/frozen_inference_graph.pb";
String prototxt = "./model2/frozen_inference_graph.pbtxt";
string imgPath = "./imgs/test.jpg";

dnn::Net net = cv::dnn::readNetFromTensorflow(weights, prototxt);

Mat object_detection(Mat &frame)
{
  Size frame_size = frame.size();
  double start = (double)getTickCount();

  cv::Mat blob = cv::dnn::blobFromImage(frame, 1.0, Size(blobSize, blobSize));
  //cout << "blob size: " << blob.size << endl;
  net.setPreferableBackend(DNN_BACKEND_OPENCV);
  net.setPreferableTarget(DNN_TARGET_CPU);

  net.setInput(blob);
  Mat output = net.forward();
  //cout << "output size: " << output.size << endl;

  Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>());

  float confidenceThreshold = 0.50;
  for (int i = 0; i < detectionMat.rows; i++)
  {
    float confidence = detectionMat.at<float>(i, 2);

    if (confidence > confidenceThreshold)
    {
      size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));

      int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
      int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
      int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
      int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);

      char conf[20];
      sprintf_s(conf, "%0.2f", confidence);

      Rect object((int)xLeftBottom, (int)yLeftBottom,
        (int)(xRightTop - xLeftBottom),
        (int)(yRightTop - yLeftBottom));

      rectangle(frame, object, Scalar(255, 0, 255), 2);
      String label = String(classNames[objectClass]) + ": " + conf;
      int baseLine = 0;
      Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.7, 1, &baseLine);
      rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
        Size(labelSize.width, labelSize.height + baseLine)),
        Scalar(0, 255, 255), -1);
      putText(frame, label, Point(xLeftBottom, yLeftBottom),
        FONT_HERSHEY_SIMPLEX, 0.7, Scalar(255, 0, 0), 2);
    }
  }
  double end = (double)getTickCount();
  cout << "use_time :" << (end - start) * 1000.0 / cv::getTickFrequency() << " ms \n";
  return frame;
}

int main(int argc, char** agrv)
{
  Mat frame = cv::imread(imgPath);
  if (frame.empty())
  {
    cout << "img is empty......" << endl;
    return 1;
  }
  Mat result = object_detection(frame);
  imshow("OpenCV DNN Test", result);
  imwrite("result.jpg", result);
  waitKey(0);
  return 0;
}