Create a Simple Robot Arm Movement File 2

October 12, 2024 / Naixian Zhang

This is the second blog in a series to develop a comprehensive suite of codes for robot arm software that could potentially become a realistic application in life or industry usage.

In this set, I added three more features: context reading so the robot remember three previous actions; visual package so the robot can see and tell its position; obstacle avoidance: main and robot_arm

# Import necessary libraries
import speech_recognition as sr
import spacy
import cv2 # python -m pip install opencv-python
import numpy as np
from robot_arm import RobotArm  # Import the RobotArm module

# Initialize NLP model
nlp = spacy.load("en_core_web_sm")

# Initialize the robot arm with link lengths
robot_arm = RobotArm(link1_length=10, link2_length=10)

# Initialize object detection
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

class ContextManager:
    def __init__(self):
        self.previous_actions = []
        self.current_object = None
        self.current_position = None
        self.detected_objects = {}

    def update(self, action, objects=None, position=None):
        self.previous_actions.append(action)
        if objects:
            self.current_object = objects[-1]  # Last mentioned object
        if position:
            self.current_position = position

    def get_context(self):
        return {
            "previous_actions": self.previous_actions[-3:],  # Last 3 actions
            "current_object": self.current_object,
            "current_position": self.current_position,
            "detected_objects": self.detected_objects
        }

context_manager = ContextManager()

def listen_for_command():
    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Use the microphone as the source for input
    with sr.Microphone() as source:
        print("Listening for command...")
        audio = recognizer.listen(source)

    # Convert audio to text
    try:
        command_text = recognizer.recognize_google(audio)
        print(f"Command received: {command_text}")
        return command_text
    except sr.UnknownValueError:
        print("Could not understand audio. Please try again.")
        return None
    except sr.RequestError as e:
        print(f"Could not request results; {e}. Please check your network connection.")
        return None

def detect_objects():
    # Capture frame from camera
    cap = cv2.VideoCapture(0)
    ret, frame = cap.read()
    cap.release()

    height, width, channels = frame.shape

    # Detecting objects
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Information to return
    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    detected_objects = {}
    for i in range(len(boxes)):
        if i in indexes:
            label = str(classes[class_ids[i]])
            detected_objects[label] = boxes[i]

    context_manager.detected_objects = detected_objects
    return detected_objects

def process_command(command_text):
    # Perform object detection
    detected_objects = detect_objects()
    print(f"Detected objects: {list(detected_objects.keys())}")

    # Set detected objects as obstacles for the robot arm
    robot_arm.set_obstacles(list(detected_objects.values()))

    # Parse the command using NLP
    doc = nlp(command_text)

    # Extract the verb (action) and objects from the command
    action = None
    objects = []
    position = None

    for token in doc:
        if token.pos_ == "VERB":
            action = token.lemma_
        if token.pos_ in ["NOUN", "PROPN"]:
            objects.append(token.text.lower())
        if token.like_num:
            if position is None:
                position = [float(token.text)]
            else:
                position.append(float(token.text))

    # Use context if no objects or position are specified
    context = context_manager.get_context()
    if not objects and context["current_object"]:
        objects = [context["current_object"]]
    if not position and context["current_position"]:
        position = context["current_position"]

    # Define a dictionary of supported actions and their corresponding robot functions
    actions = {
        "lift": robot_arm.lift,
        "bring": robot_arm.bring,
        "move": robot_arm.move,
        "grab": robot_arm.grab,
        "put": robot_arm.put,
        "go": robot_arm.move_to_position
    }

    # Check if the action is supported and execute the corresponding function
    if action in actions:
        if action == "go" and position and len(position) == 2:
            print(f"Moving to position: ({position[0]}, {position[1]}) while avoiding obstacles")
            actions[action](position[0], position[1])
            context_manager.update(action, position=position)
        elif objects:
            print(f"Executing: {action} {', '.join(objects)}")
            actions[action](*objects)
            context_manager.update(action, objects=objects)
        else:
            print(f"Action '{action}' recognized, but no objects or position specified.")
    else:
        print(f"Action '{action}' not recognized. Supported actions are: {', '.join(actions.keys())}")

    # Handle multi-step tasks based on context
    if "lift" in context["previous_actions"] and action == "move":
        print("Detected multi-step task: lift and move")
        # Implement specific behavior for lift-and-move sequence

def main():
    while True:
        command_text = listen_for_command()
        if command_text:
            process_command(command_text)
        else:
            print("No valid command received. Listening again...")

if __name__ == "__main__":
    main()

import numpy as np
from scipy.linalg import expm, logm

class RobotArm:
    def __init__(self, link1_length, link2_length):
        self.link1_length = link1_length
        self.link2_length = link2_length
        self.obstacles = []

    def skew(self, v):
        """Convert a 3D vector to a skew-symmetric matrix."""
        return np.array([[0, -v[2], v[1]],
                         [v[2], 0, -v[0]],
                         [-v[1], v[0], 0]])

    def forward_kinematics(self, theta1, theta2):
        """Compute the end-effector position given joint angles."""
        T1 = self.joint_transform(self.link1_length, theta1)
        T2 = self.joint_transform(self.link2_length, theta2)
        T = np.dot(T1, T2)
        return T[:2, 3]  # Return x, y coordinates

    def joint_transform(self, length, theta):
        """Compute the transformation matrix for a joint."""
        c, s = np.cos(theta), np.sin(theta)
        return np.array([[c, -s, 0, length*c],
                         [s, c, 0, length*s],
                         [0, 0, 1, 0],
                         [0, 0, 0, 1]])

    def compute_jacobian(self, theta1, theta2):
        """Compute the Jacobian matrix."""
        J = np.zeros((2, 2))
        J[:, 0] = [-self.link1_length*np.sin(theta1) - self.link2_length*np.sin(theta1 + theta2),
                   self.link1_length*np.cos(theta1) + self.link2_length*np.cos(theta1 + theta2)]
        J[:, 1] = [-self.link2_length*np.sin(theta1 + theta2),
                   self.link2_length*np.cos(theta1 + theta2)]
        return J

    def inverse_kinematics(self, target_x, target_y, max_iterations=100, tolerance=1e-3):
        """Compute inverse kinematics using Lie group methods."""
        theta = np.array([0, 0])  # Initial guess
        for _ in range(max_iterations):
            current_pos = self.forward_kinematics(theta[0], theta[1])
            error = np.array([target_x, target_y]) - current_pos
            
            if np.linalg.norm(error) < tolerance:
                return theta

            J = self.compute_jacobian(theta[0], theta[1])
            
            # Compute the twist
            twist = np.linalg.pinv(J) @ error
            
            # Convert twist to se(2) Lie algebra
            se2_algebra = np.array([[0, -twist[1], twist[0]],
                                    [twist[1], 0, 0],
                                    [0, 0, 0]])
            
            # Compute the incremental transformation
            delta_T = expm(se2_algebra)
            
            # Extract the rotation part
            delta_R = delta_T[:2, :2]
            
            # Convert rotation to angle
            delta_theta = np.arctan2(delta_R[1, 0], delta_R[0, 0])
            
            # Update joint angles
            theta += np.array([delta_theta, delta_theta])

        print("Inverse kinematics did not converge")
        return None

    def set_obstacles(self, obstacles):
        """Set the list of obstacles in the environment."""
        self.obstacles = obstacles

    def check_collision(self, x, y):
        """Check if the given position collides with any obstacle."""
        for obstacle in self.obstacles:
            ox, oy, w, h = obstacle
            if ox < x < ox + w and oy < y < oy + h:
                return True
        return False

    def find_path(self, start_x, start_y, goal_x, goal_y, step_size=0.1):
        """Find a path from start to goal, avoiding obstacles."""
        path = [(start_x, start_y)]
        current_x, current_y = start_x, start_y

        while np.sqrt((current_x - goal_x)**2 + (current_y - goal_y)**2) > step_size:
            best_x, best_y = current_x, current_y
            min_dist = float('inf')

            for angle in np.linspace(0, 2*np.pi, 36):  # Check 36 directions
                next_x = current_x + step_size * np.cos(angle)
                next_y = current_y + step_size * np.sin(angle)

                if not self.check_collision(next_x, next_y):
                    dist = np.sqrt((next_x - goal_x)**2 + (next_y - goal_y)**2)
                    if dist < min_dist:
                        min_dist = dist
                        best_x, best_y = next_x, next_y

            if (best_x, best_y) == (current_x, current_y):
                print("No path found, stuck at obstacle")
                return None

            path.append((best_x, best_y))
            current_x, current_y = best_x, best_y

        path.append((goal_x, goal_y))
        return path

    def move_to_position(self, target_x, target_y):
        current_pos = self.forward_kinematics(0, 0)  # Assume starting position
        path = self.find_path(current_pos[0], current_pos[1], target_x, target_y)

        if path:
            print(f"Path found to position ({target_x}, {target_y}). Following path:")
            for x, y in path:
                angles = self.inverse_kinematics(x, y)
                if angles is not None:
                    print(f"Moving to intermediate position ({x:.2f}, {y:.2f}) with angles: {np.degrees(angles[0]):.2f}°, {np.degrees(angles[1]):.2f}°")
                    # Here you would add the code to physically move the robot arm
                else:
                    print(f"Cannot move to intermediate position ({x:.2f}, {y:.2f})")
            print("Reached target position")
        else:
            print("Cannot find a path to the specified position.")

    def lift(self, *objects):
        for obj in objects:
            if self.is_within_reach(obj) and self.is_weight_supported(obj):
                print(f"Lifting {obj}")
                # Simulate the lifting process
                self.perform_lift(obj)
                print(f"{obj} has been lifted successfully.")
            else:
                print(f"Cannot lift {obj}. It may be out of reach or too heavy.")

    def is_within_reach(self, obj):
        # Placeholder logic to check if the object is within reach
        # In a real scenario, this would involve sensor data or predefined coordinates
        print(f"Checking if {obj} is within reach...")
        return True  # Assume all objects are within reach for this example

    def is_weight_supported(self, obj):
        # Placeholder logic to check if the object's weight is supported
        # In a real scenario, this would involve weight sensors or predefined weight limits
        print(f"Checking if {obj} is within weight limits...")
        return True  # Assume all objects are within weight limits for this example

    def perform_lift(self, obj):
        # Placeholder for the actual lifting mechanism
        # This could involve sending commands to motors or actuators
        print(f"Performing lift operation for {obj}...")

    def bring(self, *objects):
        print(f"Bringing {', '.join(objects)}")

    def move(self, *objects):
        print(f"Moving {', '.join(objects)}")

    def grab(self, *objects):
        print(f"Grabbing {', '.join(objects)}")

    def put(self, *objects):
        print(f"Putting {', '.join(objects)}")

# Example usage
if __name__ == "__main__":
    arm = RobotArm(link1_length=10, link2_length=10)
    arm.move_to_position(15, 5)

Leave a comment Cancel reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.