13
loading...
This website collects cookies to deliver better user experience
import mediapipe as mp
import cv2
import numpy as np
import matplotlib.pyplot as plt
def show(img, fsize=(10, 10)):
figure = plt.figure(figsize=fsize)
plt.imshow(img)
plt.show()
show(np.zeros((10, 10)))
def frame_pos2screen_pos(frame_size=(480, 640), screen_size=(768, 1366), frame_pos=None):
x,y = screen_size[0]/frame_size[0], screen_size[1]/frame_size[1]
screen_pos = [frame_pos[0]*y, frame_pos[1]*x]
return np.array(screen_pos).astype(np.uint32)
def euclidean(pt1, pt2):
d = np.sqrt((pt1[0]-pt2[0])**2+(pt1[1]-pt2[1])**2)
return d
euclidean((4, 3), (0, 0))
5.0
cam = cv2.VideoCapture(0)
fsize
as frame simze and ssize
as screen size.
fsize = (520, 720)
ssize = (520, 720)
bg_color = (100, 100, 100)
pen_size = 8
ink_size = 10
colors = {"red": [0, 0, 255], "blue":[255, 0, 0], "green":[0, 255, 0]}
modes = ["write", "move", "erase"]
current_mode = "draw"
last_mode = None
current_color = "red"
cnames = list(colors.keys())
ink_color = colors[current_color]
pen_color = np.array(ink_color)-[100, 50, 100]
pen_color = pen_color.astype(np.uint8).tolist()
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
left, top, right, bottom = (400, 100, 700, 300)
canvas = np.zeros((ssize[0], ssize[1], 3))+bg_color
canvas = canvas.astype(np.uint8)
check_every = 5
check_cnt = 0
with mp_hands.Hands(static_image_mode=True,
max_num_hands = 1,
min_detection_confidence=0.2) as hands:
while cam.isOpened():
ret, frame = cam.read()
if not ret:
continue
frame = cv2.GaussianBlur(frame, (5, 5), -1)
temp_canvas
is a canvas that we define in every frame and add new changes into it. Similarly, we will use pen_canvas
just to view pointer. Both should be of uint8
.
temp_canvas = np.zeros((ssize[0], ssize[1], 3)).astype(np.uint8)
pen_canvas = np.zeros((ssize[0], ssize[1], 3)).astype(np.uint8)
frame = cv2.flip(frame, 1)
frame = cv2.resize(frame, (fsize[1], fsize[0]))
h, w, _ = frame.shape
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = hands.process(rgb)
if res.multi_hand_landmarks:
for hand_landmarks in res.multi_hand_landmarks:
index_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y,
w, h)
index_pip = np.array(mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y,
w, h))
thumb_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y,
w, h)
middle_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y,
w, h)
index_tipm = list(index_tip) # make list
index_tipm[0] = np.clip(index_tipm[0], left, right) # clip cloumn value within left to right
index_tipm[1] = np.clip(index_tipm[1], top, bottom) # clip row value within top to bottom
index_tipm[0] = (index_tipm[0]-left) * fsize[1]/(right-left) # column value from ROI to frame world
index_tipm[1] = (index_tipm[1]-top) * fsize[0]/(bottom-top) # row value from ROI to frame world
# convert coordinate from frame world to canvas or screen world
screen_pos = frame_pos2screen_pos(fsize, ssize, index_tipm)
if check_cnt >= check_every:
if thumb_tip is not None and index_pip is not None:
if euclidean(thumb_tip, index_pip)<60:
if current_mode == "draw" or current_mode == "erase":
current_mode = "move"
last_mode = "draw"
elif current_mode == "move":
current_mode="draw"
last_mode = "move"
if middle_tip is not None and index_tip is not None:
if euclidean(index_tip, middle_tip)<60:
if current_mode != "erase":
current_mode = "erase"
else:
current_mode = "move"
last_mode = "erase"
if thumb_tip is not None and middle_tip is not None:
if euclidean(thumb_tip, middle_tip)<60:
current_color = cnames[(cnames.index(current_color)+1)%len(cnames)]
ink_color = colors[current_color]
pen_color = np.array(ink_color)-[100, 50, 100]
pen_color = pen_color.astype(np.uint8).tolist()
# print(pen_color)
check_cnt = 0
if current_mode=="draw":
cv2.circle(temp_canvas, screen_pos, int(ink_size), ink_color, -1)
if current_mode=="erase":
cv2.circle(temp_canvas, screen_pos, int(ink_size), bg_color, -1)
cv2.circle(pen_canvas, screen_pos, int(pen_size), pen_color, -1)
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
if current_mode in ["draw", "erase"]:
mask = np.sum(temp_canvas, axis=-1)!=0
canvas[mask]=temp_canvas[mask]
ncanvas = canvas.copy()
mask = np.sum(pen_canvas, axis=-1)!=0
ncanvas[mask]=pen_canvas[mask]
uint8
type.
d = np.zeros((100, ssize[1], 3), dtype="int")
text = f"Mode: {current_mode} | Color: {current_color} | Pen: {ink_size}"
cv2.putText(d, text, (10, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (200, 100, 200), 2)
ncanvas = np.vstack([d, ncanvas]).astype(np.uint8)
nshape = ncanvas.shape
# draw a ROI rectangle
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 1)
frame = cv2.resize(frame, (nshape[1], nshape[0]))
cv2.imshow("Window", temp_canvas)
cv2.imshow("Canvas", np.hstack([frame, ncanvas]).astype(np.uint8))
check_cnt += 1
if cv2.waitKey(1)&0xFF == 27:
break
cam.release()
#out.release()
cv2.destroyAllWindows()
cam = cv2.VideoCapture(0)
fsize = (520, 720)
ssize = (520, 720)
bg_color = (100, 100, 100)
pen_size = 15
ink_size = 13
colors = {"red": [0, 0, 255], "blue":[255, 0, 0], "green":[0, 255, 0]}
modes = ["write", "move", "erase"]
current_mode = "draw"
last_mode = None
current_color = "red"
cnames = list(colors.keys())
ink_color = colors[current_color]
pen_color = np.array(ink_color)-[100, 50, 100]
pen_color = pen_color.astype(np.uint8).tolist()
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
left, top, right, bottom = (400, 100, 700, 300)
canvas = np.zeros((ssize[0], ssize[1], 3))+bg_color
canvas = canvas.astype(np.uint8)
check_every = 5
check_cnt = 0
#out = cv2.VideoWriter("out.avi", cv2.VideoWriter_fourcc(*'XVID'), 30, (fsize[1], fsize[0]))
with mp_hands.Hands(static_image_mode=True,
max_num_hands = 1,
min_detection_confidence=0.2) as hands:
while cam.isOpened():
ret, frame = cam.read()
if not ret:
continue
frame = cv2.GaussianBlur(frame, (5, 5), -1)
temp_canvas = np.zeros((ssize[0], ssize[1], 3)).astype(np.uint8)
pen_canvas = np.zeros((ssize[0], ssize[1], 3)).astype(np.uint8)
frame = cv2.flip(frame, 1)
frame = cv2.resize(frame, (fsize[1], fsize[0]))
h, w, _ = frame.shape
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
res = hands.process(rgb)
if res.multi_hand_landmarks:
for hand_landmarks in res.multi_hand_landmarks:
index_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y,
w, h)
index_pip = np.array(mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y,
w, h))
thumb_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y,
w, h)
middle_tip = mp_drawing._normalized_to_pixel_coordinates(
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].x,
hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y,
w, h)
#print(index_tip)
index_tipm = list(index_tip)
index_tipm[0] = np.clip(index_tipm[0], left, right)
index_tipm[1] = np.clip(index_tipm[1], top, bottom)
index_tipm[0] = (index_tipm[0]-left) * fsize[1]/(right-left)
index_tipm[1] = (index_tipm[1]-top) * fsize[0]/(bottom-top)
#print(index_tipm)
screen_pos = frame_pos2screen_pos(fsize, ssize, index_tipm)
if check_cnt >= check_every:
if thumb_tip is not None and index_pip is not None:
if euclidean(thumb_tip, index_pip)<60:
if current_mode == "draw" or current_mode == "erase":
current_mode = "move"
last_mode = "draw"
elif current_mode == "move":
current_mode="draw"
last_mode = "move"
if middle_tip is not None and index_pip is not None:
if euclidean(index_tip, middle_tip)<60:
if current_mode != "erase":
current_mode = "erase"
else:
current_mode = "move"
last_mode = "erase"
if thumb_tip is not None and middle_tip is not None:
if euclidean(thumb_tip, middle_tip)<60:
current_color = cnames[(cnames.index(current_color)+1)%len(cnames)]
ink_color = colors[current_color]
pen_color = np.array(ink_color)-[100, 50, 100]
pen_color = pen_color.astype(np.uint8).tolist()
# print(pen_color)
check_cnt = 0
if current_mode=="draw":
cv2.circle(temp_canvas, screen_pos, int(ink_size), ink_color, -1)
if current_mode=="erase":
cv2.circle(temp_canvas, screen_pos, int(ink_size), bg_color, -1)
cv2.circle(pen_canvas, screen_pos, int(pen_size), pen_color, -1)
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
if current_mode in ["draw", "erase"]:
mask = np.sum(temp_canvas, axis=-1)!=0
canvas[mask]=temp_canvas[mask]
ncanvas = canvas.copy()
mask = np.sum(pen_canvas, axis=-1)!=0
ncanvas[mask]=pen_canvas[mask]
d = np.zeros((100, ssize[1], 3), dtype="int")
text = f"Mode: {current_mode} | Color: {current_color} | Pen: {ink_size}"
cv2.putText(d, text, (10, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (200, 100, 200), 2)
ncanvas = np.vstack([d, ncanvas]).astype(np.uint8)
nshape = ncanvas.shape
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 1)
frame = cv2.resize(frame, (nshape[1], nshape[0]))
check_cnt += 1
cv2.imshow("Window", temp_canvas)
cv2.imshow("Canvas", np.hstack([frame, ncanvas]).astype(np.uint8))
if cv2.waitKey(1)&0xFF == 27:
break
cam.release()
#out.release()
cv2.destroyAllWindows()