[Mediapipe, Tensorflow] I am getting [[0,0,0]....] as output when I try to detect hand landmarks

Steinkelsson · January 15

# Importing libraries I deemed necessary
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
import mediapipe as mp
import cv2
import numpy as np

# The training data set contains 78,000 images which are 200x200 pixels. There are 26 classes for the letters A-Z.
# The test data set contains a mere 26 images, to encourage the use of real-world test images.

# for dirname, _, filenames in os.walk(r'D:\ASL Recognition using CNN\Input_Images'):
#     print("Data Loading....")
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
#     print("Data Successfully Loaded")


train_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=r'D:\ASL Recognition using CNN\Input_Images\asl_alphabets\asl_alphabet_train',
    labels='inferred',
    label_mode='int',
    image_size=(200, 200),
    color_mode='rgb',
    batch_size=32,
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset='training'
)
# for image, label in train_dataset.take(1):
#     print("Before preprocessing", image.dtype, label.dtype)
#     print(image)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=r'D:\ASL Recognition using CNN\Input_Images\asl_alphabets\asl_alphabet_train',
    labels='inferred',
    label_mode='int',
    image_size=(200, 200),
    color_mode='rgb',
    batch_size=32,
    shuffle=False,
    seed=42,
    validation_split=0.2,
    subset='validation'
)

# test_dataset = tf.keras.utils.image_dataset_from_directory(
#     directory=r'D:\ASL Recognition using CNN\Input_Images\asl_alphabets\asl_alphabet_test',
#     labels=None,
#     image_size=(200, 200),
#     color_mode='rgb',
#     batch_size=32,
#     shuffle=False,
# )

# Since the test_data didn't have subdirectories that reflected their class_names,
# I couldn't use tf.keras.utils.image_dataset_from_directory()
# So I had to create a function that extracts labels from file name and creates a dataset


def test_dataset_generator():
    test_image_directory = r'D:\ASL Recognition using CNN\Input_Images\asl_alphabets\asl_alphabet_test'
    batch_size = 26
    img_height, img_width = 200, 200

    # Get the list of test image file paths
    test_filepaths = tf.data.Dataset.list_files(os.path.join(test_image_directory, '*.jpg'))

    # Extract labels from file names
    def extract_label(file_path):
        # Assuming file names are like 'A_test.jpg' ....
        # and filepaths are like "D:\ASL Recognition using CNN\Input_Images\asl_alphabets\asl_alphabet_test\A_test.jpg"
        parts = tf.strings.split(tf.strings.split(file_path, '\\')[-1], '_')
        return parts[0]

    # Map file paths to images and labels
    def process_path(file_path):
        label = extract_label(file_path)
        img = tf.io.read_file(file_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [img_height, img_width])
        return img, label

    # Create the test dataset
    test_datasets = test_filepaths.map(process_path)
    test_datasets = test_datasets.batch(batch_size)
    return test_datasets


test_dataset = test_dataset_generator()


# Creating Preprocessing Layers
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.Rescaling(1. / 255)
])

# data_rescaling = tf.keras.Sequential([
#      layers.Rescaling(1. / 255)
# ])

# Applying Preprocessing Layers to the dataset
# We only augment the training data!
# Configuring the datasets for performance, using parallel reads and buffered prefetching ....
# ... to yield batches from disk without I/O become blocking.

AUTOTUNE = tf.data.AUTOTUNE


def preprocess(ds, augment=False):
    if augment:
        ds = ds.map(lambda image, label: (data_augmentation(image, training=True), label), num_parallel_calls=AUTOTUNE)
    return ds.prefetch(buffer_size=AUTOTUNE)


train_preprocessed_dataset = preprocess(train_dataset, augment=True)
validation_preprocessed_dataset = preprocess(validation_dataset, augment=True)
test_preprocessed_dataset = preprocess(test_dataset)

# for image, label in train_preprocessed_dataset.take(1):
#     print(image.dtype, label.dtype)
# for image, label in train_dataset.take(1):
#     print(image.dtype, label.dtype)

# Creating Mediapipe Hands Landmark Layers
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.3,
    min_tracking_confidence=0.3,
    model_complexity=1,
    static_image_mode=True
)

# Vectorized landmark extraction function using TensorFlow ops
def feature_extraction(image):
    img = (image.numpy() * 255).astype(np.uint8)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    print(img.dtype, img.shape)
    # Apply MediaPipe Hands processing
    results = hands.process(img)
    # print("The resulting array is:", results.multi_hand_landmarks)

    # Handle cases where no hands are detected
    if results.multi_hand_landmarks is None:
        return tf.zeros((21, 3), dtype=tf.float32)  # Placeholder for no hands

    # Extract landmarks with error handling
    try:
        # landmarks = tf.gather_nd(results.multi_hand_landmarks, [[0, 0, 0], [0, 1, 0], [0, 2, 0], ...])
        landmarks = np.array(
            [landmark.x, landmark.y, landmark.z] for hand_landmarks in results.multi_hand_landmarks for landmark in
            hand_landmarks.landmark)
    except ValueError as e:
        print(f"Error extracting landmarks: {e}")
        return tf.zeros((21, 3), dtype=tf.float32)  # Fallback placeholder

    return landmarks


train_landmark_dataset = train_dataset.map(lambda x, y: (tf.py_function(feature_extraction, [x], tf.float32), y))
validation_landmark_dataset = validation_dataset.map(lambda x, y: (tf.py_function(feature_extraction, [x], tf.float32), y))

# train_combined_dataset = tf.data.Dataset.zip((train_preprocessed_dataset, train_landmark_dataset))
# validation_combined_dataset = tf.data.Dataset.zip((validation_preprocessed_dataset, validation_landmark_dataset))

for landmarks in train_landmark_dataset.take(1):
    print("\n\n\nThe landmarks of a batch are:", landmarks)

Sign In

[Mediapipe, Tensorflow] I am getting [[0,0,0]....] as output when I try to detect hand landmarks

Link to comment

Share on other sites

Link to post

Share on other sites

Create an account or sign in to comment

Create an account

Sign in

Featured Topics

Topics

Latest From Linus Tech Tips:

I Was Never Meant to Have This Prototype CPU

Latest From Tech Quickie:

Why Do Speakers Hiss?

Latest From TechLinked:

Intel: “It Wasn’t Me”

Latest From GameLinked:

Bethesda Knows It’s Broken

Latest From ShortCircuit:

How is this even handheld?! - OneXPlayer X1

Latest From Mac Address:

Why did you buy an Apple Vision Pro?

Latest From Channel Super Fun:

I Swapped the CEO's Assistant For a Day!

My Activity Streams