#!/bin/bash # Transcribe audio using Groq's free Whisper API # Usage: transcribe-audio.sh set -e AUDIO_FILE="$1" GROQ_API_KEY="${GROQ_API_KEY:-}" if [ -z "$AUDIO_FILE" ]; then echo "Usage: $0 " >&2 exit 1 fi if [ ! -f "$AUDIO_FILE" ]; then echo "Error: File not found: $AUDIO_FILE" >&2 exit 1 fi if [ -z "$GROQ_API_KEY" ]; then echo "Error: GROQ_API_KEY not set" >&2 exit 1 fi # Convert ogg to mp3 if needed (Groq prefers standard formats) TEMP_FILE="" if [[ "$AUDIO_FILE" == *.ogg ]]; then TEMP_FILE=$(mktemp --suffix=.mp3) ffmpeg -y -i "$AUDIO_FILE" -acodec libmp3lame -q:a 2 "$TEMP_FILE" 2>/dev/null AUDIO_FILE="$TEMP_FILE" fi # Call Groq Whisper API RESPONSE=$(curl -s -X POST "https://api.groq.com/openai/v1/audio/transcriptions" \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: multipart/form-data" \ -F "file=@$AUDIO_FILE" \ -F "model=whisper-large-v3" \ -F "response_format=json") # Clean up temp file if [ -n "$TEMP_FILE" ]; then rm -f "$TEMP_FILE" fi # Extract text from response echo "$RESPONSE" | jq -r '.text // empty'