Multipose Detection for Humans
Leveraging Computer Vision in real-time to detect multiple people in motion.
- Project At A Glance
- Dependencies
- Load the MoveNet.Lightning Model
- VideoCapture
- Vectorized Frame
- Keypoint Matrix and Confidence Scores
- Logistics for Multi-Body Detection
- Looping Through Multiple People
Project At A Glance
Objective
:
- Sucessfully track people in motion by rendering keypoints, edges and connections on the active Video Capture device i.e. be accessible through both the Webcam and external Video files.
- Further enhance this setup by looping through multiple people at once and pursue Multi-pose Detection.
Setup
: OpenCV, MoveNet.Lightning Pre-Trained Model [Download]
Implementation
:
- Captures video input resized to Dimensions 32m x 32n where (m, n) are scaled to match the original dimensions closely.
- Renders 17 keypoints inter-connected with edges to define Skeletal Blueprints on every person in the frame.
- Assigns a Confidence Score to each keypoint. Points are rendered when the Confidence >= 0.3.
Results
:
- The model has been successfully tweaked to detect multiple people with tested high performance on resolutions upto 4K.
Deployment
: View this project on GitHub.
!pip install tensorflow==2.8.0 tensorflow-gpu==2.8.0 tensorflow-hub opencv-python matplotlib
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np
model= hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet= model.signatures['serving_default']
cap= cv2.VideoCapture('football.mp4')
while cap.isOpened():
ret, frame= cap.read()
img= frame.copy()
img= tf.image.resize_with_pad(tf.expand_dims(img, axis= 0), 544, 1024)
input_img= tf.cast(img, dtype= tf.int32)
# VideoCapture should be in dimensions 32m x 32n, closest to actual resolution.
results= movenet(input_img)
keypoints_and_scores= results['output_0'].numpy()[:, :, :51].reshape((6, 17, 3))
loop_through_people(frame, keypoints_and_scores, EDGES, 0.3)
cv2.imshow('Movenet Multipose Window', frame)
if cv2.waitKey(10) & 0xFF==ord('q'):
break
cap.release()
cv2.destroyAllWindows()
1080/2048
frame
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
keypoints_and_scores[0]
scores= keypoints_and_scores[0][:, 2]
scores
def draw_keypoints(frame, keypoints, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for kp in shaped:
ky, kx, kp_conf = kp
if kp_conf > confidence_threshold:
cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)
EDGES = {
(0, 1): 'm',
(0, 2): 'c',
(1, 3): 'm',
(2, 4): 'c',
(0, 5): 'm',
(0, 6): 'c',
(5, 7): 'm',
(7, 9): 'm',
(6, 8): 'c',
(8, 10): 'c',
(5, 6): 'y',
(5, 11): 'm',
(6, 12): 'c',
(11, 12): 'y',
(11, 13): 'm',
(13, 15): 'm',
(12, 14): 'c',
(14, 16): 'c'
}
def draw_connections(frame, keypoints, edges, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for edge, color in edges.items():
p1, p2 = edge
y1, x1, c1 = shaped[p1]
y2, x2, c2 = shaped[p2]
if (c1 > confidence_threshold) & (c2 > confidence_threshold):
cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)
def loop_through_people(frame, keypoints_and_scores, edges, confidence_threshold):
for person in keypoints_and_scores:
draw_connections(frame, person, edges, confidence_threshold)
draw_keypoints(frame, person, confidence_threshold)