48 lines
1.1 KiB
Bash
Executable File
48 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# Transcribe audio using Groq's free Whisper API
|
|
# Usage: transcribe-audio.sh <audio_file>
|
|
|
|
set -e
|
|
|
|
AUDIO_FILE="$1"
|
|
GROQ_API_KEY="${GROQ_API_KEY:-}"
|
|
|
|
if [ -z "$AUDIO_FILE" ]; then
|
|
echo "Usage: $0 <audio_file>" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -f "$AUDIO_FILE" ]; then
|
|
echo "Error: File not found: $AUDIO_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$GROQ_API_KEY" ]; then
|
|
echo "Error: GROQ_API_KEY not set" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Convert ogg to mp3 if needed (Groq prefers standard formats)
|
|
TEMP_FILE=""
|
|
if [[ "$AUDIO_FILE" == *.ogg ]]; then
|
|
TEMP_FILE=$(mktemp --suffix=.mp3)
|
|
ffmpeg -y -i "$AUDIO_FILE" -acodec libmp3lame -q:a 2 "$TEMP_FILE" 2>/dev/null
|
|
AUDIO_FILE="$TEMP_FILE"
|
|
fi
|
|
|
|
# Call Groq Whisper API
|
|
RESPONSE=$(curl -s -X POST "https://api.groq.com/openai/v1/audio/transcriptions" \
|
|
-H "Authorization: Bearer $GROQ_API_KEY" \
|
|
-H "Content-Type: multipart/form-data" \
|
|
-F "file=@$AUDIO_FILE" \
|
|
-F "model=whisper-large-v3" \
|
|
-F "response_format=json")
|
|
|
|
# Clean up temp file
|
|
if [ -n "$TEMP_FILE" ]; then
|
|
rm -f "$TEMP_FILE"
|
|
fi
|
|
|
|
# Extract text from response
|
|
echo "$RESPONSE" | jq -r '.text // empty'
|