| | import pickle
|
| | import cv2
|
| | import mediapipe as mp
|
| | import numpy as np
|
| | from PIL import Image
|
| | import requests
|
| | from io import BytesIO
|
| | import gradio as gr
|
| |
|
| | model_dict = pickle.load(open('stacked_model_new.p', 'rb'))
|
| |
|
| | labels = ['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y']
|
| |
|
| | model = model_dict['model']
|
| |
|
| |
|
| |
|
| |
|
| | def predict(url):
|
| | response = requests.get(url)
|
| | print(response)
|
| | img = Image.open(BytesIO(response.content))
|
| | img.save('image.jpg')
|
| | mp_hands = mp.solutions.hands
|
| | mp_drawing = mp.solutions.drawing_utils
|
| | mp_drawing_styles = mp.solutions.drawing_styles
|
| |
|
| | hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3)
|
| | hands.maxHands = 1
|
| |
|
| | data_aux = []
|
| | x_ = []
|
| | y_ = []
|
| |
|
| | frame = cv2.imread('image.jpg')
|
| |
|
| | H,W, _ = frame.shape
|
| | frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| |
|
| | results = hands.process(frame_rgb)
|
| | if results.multi_hand_landmarks:
|
| | if(len(results.multi_hand_landmarks) == 1):
|
| |
|
| | for hand_landmarks in results.multi_hand_landmarks:
|
| | for i in range(len(hand_landmarks.landmark)):
|
| | x = hand_landmarks.landmark[i].x
|
| | y = hand_landmarks.landmark[i].y
|
| |
|
| | x_.append(x)
|
| | y_.append(y)
|
| |
|
| | for i in range(len(hand_landmarks.landmark)):
|
| | x = hand_landmarks.landmark[i].x
|
| | y = hand_landmarks.landmark[i].y
|
| | data_aux.append(x - min(x_))
|
| | data_aux.append(y - min(y_))
|
| |
|
| | x1 = int(min(x_) * W) - 10
|
| | y1 = int(min(y_) * H) - 10
|
| |
|
| | x2 = int(max(x_) * W) - 10
|
| | y2 = int(max(y_) * H) - 10
|
| |
|
| | if(len(data_aux) == 42):
|
| | prediction = model.predict([np.asarray(data_aux)])
|
| |
|
| | predicted_character = labels[prediction[0]]
|
| |
|
| | return {"prediction":predicted_character}
|
| | else:
|
| |
|
| | return {"prediction": "Too many Hands"}
|
| |
|
| |
|
| | iface = gr.Interface(fn=predict, inputs="image", outputs="text", title="Image to Text Model")
|
| | iface.launch() |