21 #include "objdetectdata.pb.h"
22 #include <google/protobuf/util/time_util.h>
26 using google::protobuf::util::TimeUtil;
28 CVObjectDetection::CVObjectDetection(std::string processInfoJson,
ProcessingController &processingController)
29 : processingController(&processingController), processingDevice(
"CPU"){
35 void CVObjectDetection::setProcessingDevice(){
36 if(processingDevice ==
"GPU"){
37 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
38 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
40 else if(processingDevice ==
"CPU"){
41 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
42 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
49 start = _start; end = _end;
57 processingController->
SetError(
false,
"");
60 std::ifstream ifs(classesFile.c_str());
62 while (std::getline(ifs, line)) classNames.push_back(line);
65 if(classesFile ==
"" || modelConfiguration ==
"" || modelWeights ==
"")
67 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
68 setProcessingDevice();
71 if(!process_interval || end <= 1 || end-start == 0){
73 start = (int)(video.
Start() * video.
Reader()->info.fps.ToFloat());
74 end = (int)(video.
End() * video.
Reader()->info.fps.ToFloat());
77 for (frame_number = start; frame_number <= end; frame_number++)
84 std::shared_ptr<openshot::Frame> f = video.
GetFrame(frame_number);
87 cv::Mat cvimage = f->GetImageCV();
89 DetectObjects(cvimage, frame_number);
92 processingController->
SetProgress(uint(100*(frame_number-start)/(end-start)));
97 void CVObjectDetection::DetectObjects(
const cv::Mat &frame,
size_t frameId){
102 int inpWidth, inpHeight;
103 inpWidth = inpHeight = 416;
105 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0),
true,
false);
111 std::vector<cv::Mat> outs;
112 net.forward(outs, getOutputsNames(net));
115 postprocess(frame.size(), outs, frameId);
121 void CVObjectDetection::postprocess(
const cv::Size &frameDims,
const std::vector<cv::Mat>& outs,
size_t frameId)
123 std::vector<int> classIds;
124 std::vector<float> confidences;
125 std::vector<cv::Rect> boxes;
126 std::vector<int> objectIds;
128 for (
size_t i = 0; i < outs.size(); ++i)
133 float* data = (
float*)outs[i].data;
134 for (
int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
136 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
137 cv::Point classIdPoint;
140 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
141 if (confidence > confThreshold)
143 int centerX = (int)(data[0] * frameDims.width);
144 int centerY = (int)(data[1] * frameDims.height);
145 int width = (int)(data[2] * frameDims.width);
146 int height = (int)(data[3] * frameDims.height);
147 int left = centerX - width / 2;
148 int top = centerY - height / 2;
150 classIds.push_back(classIdPoint.x);
151 confidences.push_back((
float)confidence);
152 boxes.push_back(cv::Rect(left, top, width, height));
159 std::vector<int> indices;
160 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
163 std::vector<cv::Rect> sortBoxes;
164 for(
auto box : boxes)
165 sortBoxes.push_back(box);
166 sort.
update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
169 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
172 if(TBox.frame == frameId){
173 boxes.push_back(TBox.box);
174 confidences.push_back(TBox.confidence);
175 classIds.push_back(TBox.classId);
176 objectIds.push_back(TBox.id);
181 for(uint i = 0; i<boxes.size(); i++){
182 for(uint j = i+1; j<boxes.size(); j++){
183 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
184 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
186 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
187 if(classIds[i] == classIds[j]){
188 if(confidences[i] >= confidences[j]){
189 boxes.erase(boxes.begin() + j);
190 classIds.erase(classIds.begin() + j);
191 confidences.erase(confidences.begin() + j);
192 objectIds.erase(objectIds.begin() + j);
196 boxes.erase(boxes.begin() + i);
197 classIds.erase(classIds.begin() + i);
198 confidences.erase(confidences.begin() + i);
199 objectIds.erase(objectIds.begin() + i);
209 for(uint i = 0; i<boxes.size(); i++){
210 for(uint j = i+1; j<boxes.size(); j++){
212 if( iou(boxes[i], boxes[j])){
213 if(classIds[i] == classIds[j]){
214 if(confidences[i] >= confidences[j]){
215 boxes.erase(boxes.begin() + j);
216 classIds.erase(classIds.begin() + j);
217 confidences.erase(confidences.begin() + j);
218 objectIds.erase(objectIds.begin() + j);
222 boxes.erase(boxes.begin() + i);
223 classIds.erase(classIds.begin() + i);
224 confidences.erase(confidences.begin() + i);
225 objectIds.erase(objectIds.begin() + i);
235 std::vector<cv::Rect_<float>> normalized_boxes;
236 for(
auto box : boxes){
237 cv::Rect_<float> normalized_box;
238 normalized_box.x = (box.x)/(
float)frameDims.width;
239 normalized_box.y = (box.y)/(
float)frameDims.height;
240 normalized_box.width = (box.width)/(
float)frameDims.width;
241 normalized_box.height = (box.height)/(
float)frameDims.height;
242 normalized_boxes.push_back(normalized_box);
249 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
251 int xA = std::max(pred_box.x, sort_box.x);
252 int yA = std::max(pred_box.y, sort_box.y);
253 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
254 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
257 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
260 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
261 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
264 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
273 std::vector<cv::String> CVObjectDetection::getOutputsNames(
const cv::dnn::Net& net)
275 static std::vector<cv::String> names;
278 std::vector<int> outLayers = net.getUnconnectedOutLayers();
281 std::vector<cv::String> layersNames = net.getLayerNames();
284 names.resize(outLayers.size());
285 for (
size_t i = 0; i < outLayers.size(); ++i)
286 names[i] = layersNames[outLayers[i] - 1];
303 pb_objdetect::ObjDetect objMessage;
306 for(
int i = 0; i<classNames.size(); i++){
307 std::string* className = objMessage.add_classnames();
308 className->assign(classNames.at(i));
314 pb_objdetect::Frame* pbFrameData;
319 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
323 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
324 if (!objMessage.SerializeToOstream(&output)) {
325 cerr <<
"Failed to write protobuf message." << endl;
331 google::protobuf::ShutdownProtobufLibrary();
341 pbFrameData->set_id(dData.
frameId);
343 for(
size_t i = 0; i < dData.
boxes.size(); i++){
344 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
347 box->set_x(dData.
boxes.at(i).x);
348 box->set_y(dData.
boxes.at(i).y);
349 box->set_w(dData.
boxes.at(i).width);
350 box->set_h(dData.
boxes.at(i).height);
351 box->set_classid(dData.
classIds.at(i));
353 box->set_objectid(dData.
objectIds.at(i));
368 catch (
const std::exception& e)
372 std::cout<<
"JSON is invalid (missing keys or invalid data types)"<<std::endl;
380 if (!root[
"protobuf_data_path"].isNull()){
381 protobuf_data_path = (root[
"protobuf_data_path"].asString());
383 if (!root[
"processing-device"].isNull()){
384 processingDevice = (root[
"processing-device"].asString());
386 if (!root[
"model-config"].isNull()){
387 modelConfiguration = (root[
"model-config"].asString());
388 std::ifstream infile(modelConfiguration);
390 processingController->
SetError(
true,
"Incorrect path to model config file");
395 if (!root[
"model-weights"].isNull()){
396 modelWeights= (root[
"model-weights"].asString());
397 std::ifstream infile(modelWeights);
399 processingController->
SetError(
true,
"Incorrect path to model weight file");
404 if (!root[
"class-names"].isNull()){
405 classesFile = (root[
"class-names"].asString());
407 std::ifstream infile(classesFile);
409 processingController->
SetError(
true,
"Incorrect path to class name file");
425 pb_objdetect::ObjDetect objMessage;
429 fstream input(protobuf_data_path, ios::in | ios::binary);
430 if (!objMessage.ParseFromIstream(&input)) {
431 cerr <<
"Failed to parse protobuf message." << endl;
440 for(
int i = 0; i < objMessage.classnames_size(); i++){
441 classNames.push_back(objMessage.classnames(i));
445 for (
size_t i = 0; i < objMessage.frame_size(); i++) {
447 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
450 size_t id = pbFrameData.id();
453 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
456 std::vector<int> classIds;
457 std::vector<float> confidences;
458 std::vector<cv::Rect_<float>> boxes;
459 std::vector<int> objectIds;
461 for(
int i = 0; i < pbFrameData.bounding_box_size(); i++){
463 float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
464 float w = pBox.Get(i).w();
float h = pBox.Get(i).h();
466 cv::Rect_<float> box(x, y, w, h);
469 int classId = pBox.Get(i).classid();
float confidence = pBox.Get(i).confidence();
471 int objectId = pBox.Get(i).objectid();
474 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
482 google::protobuf::ShutdownProtobufLibrary();