Hi,
I watched on Youtube following video:
It's a machine learning based hand detection, written in python.
He wrote a wrapper package for the MediaPipe called cvzone and created a UDP Stream for Unity.
The Hand detection looks very promising.
I thought this could work also in VAM.
Therefore I wrote and VAM Plugin that consumes that UDP Stream, see code below.
As Input also a video file is possible.
Here is the Python code:
You need the following packages (exact version):
But there are several issues:
I watched on Youtube following video:
He wrote a wrapper package for the MediaPipe called cvzone and created a UDP Stream for Unity.
The Hand detection looks very promising.
I thought this could work also in VAM.
Therefore I wrote and VAM Plugin that consumes that UDP Stream, see code below.
As Input also a video file is possible.
cap = cv2.VideoCapture(0) for webcam
cap = cv2.VideoCapture("anyvideofile.mp4") for video
Here is the Python code:
Python:
import cv2
from cvzone.HandTrackingModule import HandDetector
import socket
# Paramters
width, height = 1280, 720
#width, height = 1920, 1080
# Video or Webcam
cap = cv2.VideoCapture(0)
cap.set(3, width)
cap.set(4, height)
# Hand Detector
detector = HandDetector(maxHands=1, detectionCon=0.8)
# Communication
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
serverAddressPort = ("127.0.0.1", 5052)
while True:
# Get the frame from the webcam
success, img = cap.read()
# Hands
hands, img = detector.findHands(img)
data = []
# Landmark values - (x,y,z)*21
if hands:
# Get the first hand detected
hand = hands[0]
# Get the landmark list
lmList = hand['lmList']
#print(lmList)
for lm in lmList:
data.extend([lm[0], height - lm[1], lm[2]])
#print(data)
sock.sendto(str.encode(str(data)), serverAddressPort)
cv2.imshow("Image", img)
cv2.waitKey(1)
You need the following packages (exact version):
- Python 3.7.1
- cvzone 1.5.6
- mediapipe 0.9.0.1
C#:
using System;
using System.Net;
using System.Net.Sockets;
using UnityEngine;
namespace VAMDev
{
public class HandTracking : MVRScript
{
FreeControllerV3 controller;
protected UIDynamicButton connectToServer;
protected UIDynamicButton disconnectFromServer;
UdpClient clientData;
int portData = 5052;
public int receiveBufferSize = 120000;
public bool showDebug = false;
IPEndPoint ipEndPointData;
private object obj = null;
private AsyncCallback AC;
byte[] receivedBytes;
public override void Init()
{
try
{
var connectToServer = CreateButton("Connect", false);
connectToServer.button.onClick.AddListener(ConnectToServerCallback);
var disconnectFromServer = CreateButton("Disconnect", true);
disconnectFromServer.button.onClick.AddListener(DisconnectFromServerCallback);
}
catch (Exception e)
{
SuperController.LogError("Exception caught in Init(): " + e);
}
}
protected void ConnectToServerCallback()
{
try
{
InitializeUDPListener();
SuperController.LogMessage("Connected to server!");
}
catch (Exception e)
{
SuperController.LogError("Exception caught: " + e);
}
}
protected void DisconnectFromServerCallback()
{
OnDestroy();
SuperController.LogMessage("Disconnected from server.");
}
public void InitializeUDPListener()
{
ipEndPointData = new IPEndPoint(IPAddress.Any, portData);
clientData = new UdpClient();
clientData.Client.ReceiveBufferSize = receiveBufferSize;
clientData.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, optionValue: true);
clientData.ExclusiveAddressUse = false;
clientData.EnableBroadcast = true;
clientData.Client.Bind(ipEndPointData);
clientData.DontFragment = true;
if (showDebug) SuperController.LogMessage("BufSize: " + clientData.Client.ReceiveBufferSize);
AC = new AsyncCallback(ReceivedUDPPacket);
clientData.BeginReceive(AC, obj);
SuperController.LogMessage("UDP - Start Receiving..");
}
void ReceivedUDPPacket(IAsyncResult result)
{
receivedBytes = clientData.EndReceive(result, ref ipEndPointData);
ParsePacket();
clientData.BeginReceive(AC, obj);
}
void ParsePacket()
{
string data = System.Text.Encoding.UTF8.GetString(receivedBytes);
data = data.Remove(0, 1);
data = data.Remove(data.Length - 1, 1);
//SuperController.LogMessage(data);
string[] points = data.Split(',');
//SuperController.LogMessage(points[0]);
//RIGHT HAND WRIST
Vector3 pointA = new Vector3(float.Parse(points[0]), float.Parse(points[1]), float.Parse(points[2]));
//RIGHT HAND MIDDLE_FINGER_TIP
Vector3 pointB = new Vector3(float.Parse(points[36]), float.Parse(points[37]), float.Parse(points[38]));
Vector3 dir = pointA - pointB;
Quaternion rotation = Quaternion.LookRotation(Vector3.Cross((dir), Vector3.up).normalized);
controller = containingAtom.GetStorableByID("rHandControl") as FreeControllerV3;
SetControllerState(controller, FreeControllerV3.PositionState.On, FreeControllerV3.RotationState.On);
controller.SetLocalPosition(new Vector3(pointA.x, pointA.y, pointA.z).normalized);
controller.SetLocalEulerAngles(rotation.eulerAngles);
}
void OnDestroy()
{
if (clientData != null)
{
clientData.Close();
}
}
protected void SetControllerState(FreeControllerV3 controller, FreeControllerV3.PositionState positionState, FreeControllerV3.RotationState rotationState)
{
controller.currentPositionState = positionState;
controller.currentRotationState = rotationState;
}
}
}
But there are several issues:
- x and y offset, the correct value is not that easy to find
- can't get the finger working correctly, it's bending and streching all over the screen
- breaking VAM UI after several runs
Last edited: