Spaces:

MCP-1st-Birthday
/

AI-RADIO

Sleeping

AI-RADIO / src /voice_input.py

Nikita Makarov

ai radio with sample auth

61f40a7 about 1 month ago

8.46 kB

	"""Voice Input Service for Speech Recognition"""
	import speech_recognition as sr
	from typing import Optional, Callable
	import io

	class VoiceInputService:
	"""Service for handling voice input and speech recognition"""

	def __init__(self):
	"""Initialize voice input service"""
	self.recognizer = sr.Recognizer()
	self.microphone = None
	self.available = False

	try:
	# Try to initialize microphone (requires pyaudio)
	self.microphone = sr.Microphone()
	# Adjust for ambient noise
	with self.microphone as source:
	self.recognizer.adjust_for_ambient_noise(source, duration=0.5)
	self.available = True
	except OSError as e:
	print(f"Warning: Could not initialize microphone: {e}")
	print("Voice input will not be available")
	print("To enable voice input, install PortAudio:")
	print(" macOS: brew install portaudio")
	print(" Linux: sudo apt-get install portaudio19-dev")
	print(" Then: pip install pyaudio")
	self.available = False
	except Exception as e:
	print(f"Warning: Could not initialize microphone: {e}")
	print("Voice input will not be available")
	self.available = False

	def listen_and_recognize(self, timeout: int = 5, phrase_time_limit: int = 10) -> Optional[str]:
	"""
	Listen to microphone and recognize speech

	Args:
	timeout: Maximum time to wait for speech to start
	phrase_time_limit: Maximum time for a phrase

	Returns:
	Recognized text or None if error
	"""
	if not self.available or not self.microphone:
	return None

	try:
	with self.microphone as source:
	print("Listening... Speak now!")
	audio = self.recognizer.listen(
	source,
	timeout=timeout,
	phrase_time_limit=phrase_time_limit
	)

	print("Processing speech...")
	# Use Google's free speech recognition API
	text = self.recognizer.recognize_google(audio)
	print(f"Recognized: {text}")
	return text

	except sr.WaitTimeoutError:
	print("No speech detected within timeout")
	return None
	except sr.UnknownValueError:
	print("Could not understand audio")
	return None
	except sr.RequestError as e:
	print(f"Error with speech recognition service: {e}")
	return None
	except Exception as e:
	print(f"Error during voice recognition: {e}")
	return None

	def process_song_request(self, recognized_text: str) -> dict:
	"""
	Process a song request from recognized speech

	Args:
	recognized_text: Text recognized from speech

	Returns:
	Dictionary with song request details
	"""
	text_lower = recognized_text.lower()

	# Extract keywords
	request = {
	"original_text": recognized_text,
	"action": None,
	"song": None,
	"artist": None,
	"genre": None,
	"mood": None
	}

	# Remove common action words to get the actual query
	# Order matters - longer phrases first
	action_phrases = [
	"i want to hear", "i want to", "want to hear",
	"i'd like to hear", "i would like to hear",
	"play", "put on", "listen to", "i want",
	"can you", "please", "i'd like", "i would like"
	]
	cleaned_text = recognized_text.lower()
	for phrase in action_phrases:
	if phrase in cleaned_text:
	cleaned_text = cleaned_text.replace(phrase, "").strip()
	break # Only remove one phrase

	# Clean up extra spaces and remove standalone "i", "a", "the"
	words = cleaned_text.split()
	words = [w for w in words if w not in ["i", "a", "an", "the"]]
	cleaned_text = " ".join(words).strip()

	# Detect action
	if any(word in text_lower for word in ["play", "put on", "listen to", "want to hear"]):
	request["action"] = "play"
	elif any(word in text_lower for word in ["skip", "next", "change"]):
	request["action"] = "skip"
	else:
	request["action"] = "play" # Default

	# Try to extract song/artist/genre
	# Simple keyword extraction - can be enhanced with NLP
	if "by" in text_lower:
	parts = text_lower.split("by")
	if len(parts) == 2:
	request["song"] = parts[0].strip()
	request["artist"] = parts[1].strip()
	else:
	# If no "by", treat the cleaned text as the song/query
	# But remove genre/mood words that are already extracted
	song_text = cleaned_text if cleaned_text else recognized_text
	if request.get("genre"):
	# Remove genre from song text
	song_text = song_text.replace(request["genre"], "").strip()
	if request.get("mood"):
	# Remove mood from song text
	song_text = song_text.replace(request["mood"], "").strip()
	song_text = " ".join(song_text.split()) # Clean up spaces
	request["song"] = song_text if song_text else recognized_text

	# Check for genre keywords - first try known genres, then extract custom ones
	known_genres = ["pop", "rock", "jazz", "classical", "electronic", "hip-hop", "hip hop", "country", "indie", "rap", "blues", "folk"]
	genre_found = False

	# First, check for known genres
	for genre in known_genres:
	if genre in text_lower:
	request["genre"] = genre
	genre_found = True
	break

	# If no known genre found, try to extract a custom genre
	if not genre_found:
	# Look for patterns like "some [genre] music", "[genre] music", "play [genre]"
	# Remove action words and common words to find potential genre
	genre_indicators = ["music", "song", "track", "tune"]
	words = cleaned_text.split()

	# Find words that might be genres (not action words, not common words)
	common_words = {"i", "want", "to", "hear", "play", "put", "on", "listen", "some", "a", "an", "the", "me", "my"}

	# Look for genre-like words (usually before "music" or standalone)
	for i, word in enumerate(words):
	# If word is before "music" or similar, it might be a genre
	if i < len(words) - 1 and words[i + 1] in genre_indicators:
	if word not in common_words and len(word) > 2:
	request["genre"] = word
	genre_found = True
	break
	# Or if it's a standalone word that's not a common word
	elif word not in common_words and len(word) > 3 and word not in known_genres:
	# Check if it looks like a genre (not a song/artist name pattern)
	# Simple heuristic: if it's a single word and not capitalized in original, might be genre
	if word in text_lower and not word[0].isupper() if word[0].isalpha() else False:
	# Additional check: if user said "some [word]" or "[word] music", likely a genre
	if i > 0 and words[i-1] in ["some", "any", "a", "an"]:
	request["genre"] = word
	genre_found = True
	break
	elif i < len(words) - 1 and words[i+1] in genre_indicators:
	request["genre"] = word
	genre_found = True
	break

	# Check for mood keywords
	moods = ["happy", "sad", "energetic", "calm", "relaxed", "focused", "upbeat", "chill"]
	for mood in moods:
	if mood in text_lower:
	request["mood"] = mood
	break

	return request