21
loading...
This website collects cookies to deliver better user experience
pip install mediapipe
.mediapipe
. pip install keyboard
.import mediapipe as mp
import cv2
import numpy as np
import keyboard
keyboard
package is pretty easy just like mouse
package. For test, we are going to simulate down
and then !echo hey
. I am using Jupyter Notebook hence I have to use !
to use windows commands.# lets simulate down key and hello world
keyboard.press_and_release("!,e,c,h,o,space,h,e,y")
# !ECHO HEY
control
.# lets simulate down key and hello world
keyboard.press_and_release("h,e,l,l,o,ctrl+a")
Keyboard
package but to play a dino game. We will do something like gesture recognition based on the distance between certain landmarks. So lets define a method to find Euclidean distance.def euclidean(pt1, pt2):
d = np.sqrt((pt1[0]-pt2[0])**2+(pt1[1]-pt2[1])**2)
return d
euclidean((4, 3), (0, 0))
5.0
cam = cv2.VideoCapture(0)
fsize = (520, 720)
drawing_utilities
and hands
from Mediapipe solutions's. As the name, drawing_utils
will draw landmark here and the hands
will let us work with detection models.
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
check_every = 10
check_cnt = 0
last_event = None
max_num_hands
, min_detection_confidence
and so on. As name suggests, max_num_hands
is to search up to that number of hands and min_detection_confidence
is the minimum confidence threshold value of detection and below which, detected hands are discarded.
with mp_hands.Hands(
static_image_mode=True,
max_num_hands = 2,
min_detection_confidence=0.6) as hands:
while cam.isOpened():
ret, frame = cam.read()
if not ret:
continue
frame = cv2.flip(frame, 1)
frame = cv2.resize(frame, (fsize[1], fsize[0]))
h, w,_ = frame.shape
Hand
object expects image as a RGB format.
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
process
module of Hand
object to get the result.
res = hands.process(rgb)
if res.multi_hand_landmarks:
for hand_landmarks in res.multi_hand_landmarks:
index_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y,
w, h)
thumb_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y,
w, h)
middle_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y,
w, h)
if index_tip is not None:
if check_cnt==check_every:
if index_tip is not None and middle_tip is not None:
if euclidean(index_tip, middle_tip)<40:
last_event = "jump"
else:
if last_event=="jump":
last_event=None
if thumb_tip is not None and index_tip is not None:
if euclidean(thumb_tip, index_pip) < 60: # 60 should be relative to height/width of frame
last_event = "duck"
else:
if last_event=="duck":
last_event=None
check_cnt=0
if check_cnt==0:
if last_event=="jump":
keyboard.press_and_release("space")
elif last_event=="duck":
keyboard.press("down")
else:
keyboard.release("down")
print(last_event)
check_cnt+=1
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)```
{% endraw %}
* Show the frame.
{% raw %}
```python
cv2.imshow("Controller Window", frame)
if cv2.waitKey(1)&0xFF == 27:
break
cam = cv2.VideoCapture(0)
fsize = (520, 720)
last_event = None
check_cnt = 0
check_every = 5
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
with mp_hands.Hands(
static_image_mode=True,
max_num_hands = 1,
min_detection_confidence=0.6) as hands:
while cam.isOpened():
ret, frame = cam.read()
if not ret:
continue
frame = cv2.flip(frame, 1)
frame = cv2.resize(frame, (fsize[1], fsize[0]))
h, w,_ = frame.shape
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
rgb.flags.writeable = False
res = hands.process(rgb)
#cv2.imshow("roi", roi)
rgb.flags.writeable = True
if res.multi_hand_landmarks:
for hand_landmarks in res.multi_hand_landmarks:
index_dip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_DIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_DIP].y,
w, h)
index_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y,
w, h)
index_pip = np.array(mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y,
w, h))
thumb_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y,
w, h)
middle_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y,
w, h)
if index_tip is not None:
if check_cnt==check_every:
if index_tip is not None and middle_tip is not None:
if euclidean(index_tip, middle_tip)<40: # 60 should be relative to the height of frame
last_event = "jump"
else:
if last_event=="jump":
last_event=None
if thumb_tip is not None and index_tip is not None:
print(euclidean(index_tip, middle_tip))
if euclidean(thumb_tip, index_tip) < 60:
last_event="duck"
else:
if last_event == "duck":
last_event = None
check_cnt=0
if check_cnt==0:
if last_event=="jump":
keyboard.press_and_release("space")
elif last_event=="duck":
keyboard.press("down")
else:
keyboard.release("down")
print(last_event)
check_cnt+=1
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imshow("Controller Window", frame)
if cv2.waitKey(1)&0xFF == 27:
break
cam.release()
cv2.destroyAllWindows()