This is the second blog in a series to develop a comprehensive suite of codes for robot arm software that could potentially become a realistic application in life or industry usage.
In this set, I added three more features: context reading so the robot remember three previous actions; visual package so the robot can see and tell its position; obstacle avoidance: main and robot_arm
# Import necessary libraries
import speech_recognition as sr
import spacy
import cv2 # python -m pip install opencv-python
import numpy as np
from robot_arm import RobotArm # Import the RobotArm module
# Initialize NLP model
nlp = spacy.load("en_core_web_sm")
# Initialize the robot arm with link lengths
robot_arm = RobotArm(link1_length=10, link2_length=10)
# Initialize object detection
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
class ContextManager:
def __init__(self):
self.previous_actions = []
self.current_object = None
self.current_position = None
self.detected_objects = {}
def update(self, action, objects=None, position=None):
self.previous_actions.append(action)
if objects:
self.current_object = objects[-1] # Last mentioned object
if position:
self.current_position = position
def get_context(self):
return {
"previous_actions": self.previous_actions[-3:], # Last 3 actions
"current_object": self.current_object,
"current_position": self.current_position,
"detected_objects": self.detected_objects
}
context_manager = ContextManager()
def listen_for_command():
# Initialize the recognizer
recognizer = sr.Recognizer()
# Use the microphone as the source for input
with sr.Microphone() as source:
print("Listening for command...")
audio = recognizer.listen(source)
# Convert audio to text
try:
command_text = recognizer.recognize_google(audio)
print(f"Command received: {command_text}")
return command_text
except sr.UnknownValueError:
print("Could not understand audio. Please try again.")
return None
except sr.RequestError as e:
print(f"Could not request results; {e}. Please check your network connection.")
return None
def detect_objects():
# Capture frame from camera
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cap.release()
height, width, channels = frame.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Information to return
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
detected_objects = {}
for i in range(len(boxes)):
if i in indexes:
label = str(classes[class_ids[i]])
detected_objects[label] = boxes[i]
context_manager.detected_objects = detected_objects
return detected_objects
def process_command(command_text):
# Perform object detection
detected_objects = detect_objects()
print(f"Detected objects: {list(detected_objects.keys())}")
# Set detected objects as obstacles for the robot arm
robot_arm.set_obstacles(list(detected_objects.values()))
# Parse the command using NLP
doc = nlp(command_text)
# Extract the verb (action) and objects from the command
action = None
objects = []
position = None
for token in doc:
if token.pos_ == "VERB":
action = token.lemma_
if token.pos_ in ["NOUN", "PROPN"]:
objects.append(token.text.lower())
if token.like_num:
if position is None:
position = [float(token.text)]
else:
position.append(float(token.text))
# Use context if no objects or position are specified
context = context_manager.get_context()
if not objects and context["current_object"]:
objects = [context["current_object"]]
if not position and context["current_position"]:
position = context["current_position"]
# Define a dictionary of supported actions and their corresponding robot functions
actions = {
"lift": robot_arm.lift,
"bring": robot_arm.bring,
"move": robot_arm.move,
"grab": robot_arm.grab,
"put": robot_arm.put,
"go": robot_arm.move_to_position
}
# Check if the action is supported and execute the corresponding function
if action in actions:
if action == "go" and position and len(position) == 2:
print(f"Moving to position: ({position[0]}, {position[1]}) while avoiding obstacles")
actions[action](position[0], position[1])
context_manager.update(action, position=position)
elif objects:
print(f"Executing: {action} {', '.join(objects)}")
actions[action](*objects)
context_manager.update(action, objects=objects)
else:
print(f"Action '{action}' recognized, but no objects or position specified.")
else:
print(f"Action '{action}' not recognized. Supported actions are: {', '.join(actions.keys())}")
# Handle multi-step tasks based on context
if "lift" in context["previous_actions"] and action == "move":
print("Detected multi-step task: lift and move")
# Implement specific behavior for lift-and-move sequence
def main():
while True:
command_text = listen_for_command()
if command_text:
process_command(command_text)
else:
print("No valid command received. Listening again...")
if __name__ == "__main__":
main()
import numpy as np
from scipy.linalg import expm, logm
class RobotArm:
def __init__(self, link1_length, link2_length):
self.link1_length = link1_length
self.link2_length = link2_length
self.obstacles = []
def skew(self, v):
"""Convert a 3D vector to a skew-symmetric matrix."""
return np.array([[0, -v[2], v[1]],
[v[2], 0, -v[0]],
[-v[1], v[0], 0]])
def forward_kinematics(self, theta1, theta2):
"""Compute the end-effector position given joint angles."""
T1 = self.joint_transform(self.link1_length, theta1)
T2 = self.joint_transform(self.link2_length, theta2)
T = np.dot(T1, T2)
return T[:2, 3] # Return x, y coordinates
def joint_transform(self, length, theta):
"""Compute the transformation matrix for a joint."""
c, s = np.cos(theta), np.sin(theta)
return np.array([[c, -s, 0, length*c],
[s, c, 0, length*s],
[0, 0, 1, 0],
[0, 0, 0, 1]])
def compute_jacobian(self, theta1, theta2):
"""Compute the Jacobian matrix."""
J = np.zeros((2, 2))
J[:, 0] = [-self.link1_length*np.sin(theta1) - self.link2_length*np.sin(theta1 + theta2),
self.link1_length*np.cos(theta1) + self.link2_length*np.cos(theta1 + theta2)]
J[:, 1] = [-self.link2_length*np.sin(theta1 + theta2),
self.link2_length*np.cos(theta1 + theta2)]
return J
def inverse_kinematics(self, target_x, target_y, max_iterations=100, tolerance=1e-3):
"""Compute inverse kinematics using Lie group methods."""
theta = np.array([0, 0]) # Initial guess
for _ in range(max_iterations):
current_pos = self.forward_kinematics(theta[0], theta[1])
error = np.array([target_x, target_y]) - current_pos
if np.linalg.norm(error) < tolerance:
return theta
J = self.compute_jacobian(theta[0], theta[1])
# Compute the twist
twist = np.linalg.pinv(J) @ error
# Convert twist to se(2) Lie algebra
se2_algebra = np.array([[0, -twist[1], twist[0]],
[twist[1], 0, 0],
[0, 0, 0]])
# Compute the incremental transformation
delta_T = expm(se2_algebra)
# Extract the rotation part
delta_R = delta_T[:2, :2]
# Convert rotation to angle
delta_theta = np.arctan2(delta_R[1, 0], delta_R[0, 0])
# Update joint angles
theta += np.array([delta_theta, delta_theta])
print("Inverse kinematics did not converge")
return None
def set_obstacles(self, obstacles):
"""Set the list of obstacles in the environment."""
self.obstacles = obstacles
def check_collision(self, x, y):
"""Check if the given position collides with any obstacle."""
for obstacle in self.obstacles:
ox, oy, w, h = obstacle
if ox < x < ox + w and oy < y < oy + h:
return True
return False
def find_path(self, start_x, start_y, goal_x, goal_y, step_size=0.1):
"""Find a path from start to goal, avoiding obstacles."""
path = [(start_x, start_y)]
current_x, current_y = start_x, start_y
while np.sqrt((current_x - goal_x)**2 + (current_y - goal_y)**2) > step_size:
best_x, best_y = current_x, current_y
min_dist = float('inf')
for angle in np.linspace(0, 2*np.pi, 36): # Check 36 directions
next_x = current_x + step_size * np.cos(angle)
next_y = current_y + step_size * np.sin(angle)
if not self.check_collision(next_x, next_y):
dist = np.sqrt((next_x - goal_x)**2 + (next_y - goal_y)**2)
if dist < min_dist:
min_dist = dist
best_x, best_y = next_x, next_y
if (best_x, best_y) == (current_x, current_y):
print("No path found, stuck at obstacle")
return None
path.append((best_x, best_y))
current_x, current_y = best_x, best_y
path.append((goal_x, goal_y))
return path
def move_to_position(self, target_x, target_y):
current_pos = self.forward_kinematics(0, 0) # Assume starting position
path = self.find_path(current_pos[0], current_pos[1], target_x, target_y)
if path:
print(f"Path found to position ({target_x}, {target_y}). Following path:")
for x, y in path:
angles = self.inverse_kinematics(x, y)
if angles is not None:
print(f"Moving to intermediate position ({x:.2f}, {y:.2f}) with angles: {np.degrees(angles[0]):.2f}°, {np.degrees(angles[1]):.2f}°")
# Here you would add the code to physically move the robot arm
else:
print(f"Cannot move to intermediate position ({x:.2f}, {y:.2f})")
print("Reached target position")
else:
print("Cannot find a path to the specified position.")
def lift(self, *objects):
for obj in objects:
if self.is_within_reach(obj) and self.is_weight_supported(obj):
print(f"Lifting {obj}")
# Simulate the lifting process
self.perform_lift(obj)
print(f"{obj} has been lifted successfully.")
else:
print(f"Cannot lift {obj}. It may be out of reach or too heavy.")
def is_within_reach(self, obj):
# Placeholder logic to check if the object is within reach
# In a real scenario, this would involve sensor data or predefined coordinates
print(f"Checking if {obj} is within reach...")
return True # Assume all objects are within reach for this example
def is_weight_supported(self, obj):
# Placeholder logic to check if the object's weight is supported
# In a real scenario, this would involve weight sensors or predefined weight limits
print(f"Checking if {obj} is within weight limits...")
return True # Assume all objects are within weight limits for this example
def perform_lift(self, obj):
# Placeholder for the actual lifting mechanism
# This could involve sending commands to motors or actuators
print(f"Performing lift operation for {obj}...")
def bring(self, *objects):
print(f"Bringing {', '.join(objects)}")
def move(self, *objects):
print(f"Moving {', '.join(objects)}")
def grab(self, *objects):
print(f"Grabbing {', '.join(objects)}")
def put(self, *objects):
print(f"Putting {', '.join(objects)}")
# Example usage
if __name__ == "__main__":
arm = RobotArm(link1_length=10, link2_length=10)
arm.move_to_position(15, 5)