privacy-toolkit/tools/meeting-record/install.sh
2025-12-31 19:44:07 -07:00

485 lines
13 KiB
Bash
Executable file

#!/bin/bash
set -euo pipefail
# ==============================================================================
# Meeting Record Installation Script
# ==============================================================================
# Installs meeting-record - Local voice recording with live Whisper transcription
#
# Privacy Model:
# - All audio processing happens locally on YOUR machine
# - Uses whisper.cpp for transcription - no cloud APIs
# - No data leaves your computer, ever
# - Alternative to cloud-based "AI recorders" that harvest your conversations
#
# Features:
# - Continuous audio recording (full backup)
# - Live chunked transcription every 10 seconds
# - Final high-quality transcription on stop
# - Simple CLI interface
#
# Source: PAI (Personal AI Infrastructure)
# ==============================================================================
# Colors
readonly RED='\033[0;31m'
readonly GREEN='\033[0;32m'
readonly YELLOW='\033[1;33m'
readonly BLUE='\033[0;34m'
readonly NC='\033[0m'
log() { echo -e "${GREEN}[INFO]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
step() { echo -e "${BLUE}[STEP]${NC} $*"; }
# Configuration
WHISPER_CPP_DIR="${WHISPER_CPP_DIR:-$HOME/opt/whisper.cpp}"
WHISPER_MODEL="${WHISPER_MODEL:-base.en}"
BIN_DIR="${BIN_DIR:-$HOME/bin}"
RECORDINGS_DIR="${RECORDINGS_DIR:-$HOME/Recordings/meetings}"
# Detect distro
detect_distro() {
if [ -f /etc/os-release ]; then
. /etc/os-release
echo "$ID"
else
error "Cannot detect distribution"
fi
}
# Install system dependencies
install_dependencies() {
step "Installing system dependencies..."
DISTRO=$(detect_distro)
case "$DISTRO" in
debian|ubuntu|pop)
sudo apt-get update
sudo apt-get install -y \
ffmpeg \
pulseaudio-utils \
build-essential \
git \
cmake
;;
fedora)
sudo dnf install -y \
ffmpeg \
pulseaudio-utils \
gcc-c++ \
git \
cmake
;;
arch|manjaro)
sudo pacman -S --noconfirm \
ffmpeg \
pulseaudio \
base-devel \
git \
cmake
;;
*)
warn "Unknown distro: $DISTRO"
warn "Please manually install: ffmpeg, pulseaudio-utils, build-essential, git, cmake"
;;
esac
log "System dependencies installed"
}
# Build whisper.cpp
install_whisper_cpp() {
step "Installing whisper.cpp..."
if [ -f "$WHISPER_CPP_DIR/build/bin/whisper-cli" ]; then
log "whisper.cpp already installed at $WHISPER_CPP_DIR"
return 0
fi
# Clone if not exists
if [ ! -d "$WHISPER_CPP_DIR" ]; then
log "Cloning whisper.cpp..."
mkdir -p "$(dirname "$WHISPER_CPP_DIR")"
git clone https://github.com/ggerganov/whisper.cpp.git "$WHISPER_CPP_DIR"
fi
cd "$WHISPER_CPP_DIR"
# Build
log "Building whisper.cpp..."
mkdir -p build
cd build
cmake ..
cmake --build . --config Release -j$(nproc)
if [ ! -f "$WHISPER_CPP_DIR/build/bin/whisper-cli" ]; then
error "whisper.cpp build failed - whisper-cli not found"
fi
log "whisper.cpp built successfully"
}
# Download Whisper model
download_model() {
step "Downloading Whisper model ($WHISPER_MODEL)..."
MODEL_FILE="$WHISPER_CPP_DIR/models/ggml-${WHISPER_MODEL}.bin"
if [ -f "$MODEL_FILE" ]; then
log "Model already exists: $MODEL_FILE"
return 0
fi
cd "$WHISPER_CPP_DIR"
# Use the download script
log "Downloading ggml-${WHISPER_MODEL}.bin..."
bash models/download-ggml-model.sh "$WHISPER_MODEL"
if [ ! -f "$MODEL_FILE" ]; then
error "Model download failed"
fi
log "Model downloaded: $MODEL_FILE"
}
# Install meeting-record scripts
install_scripts() {
step "Installing meeting-record scripts..."
mkdir -p "$BIN_DIR"
mkdir -p "$RECORDINGS_DIR"
# Get the directory where this install script lives
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Install meeting-record
cat > "$BIN_DIR/meeting-record" << 'SCRIPT'
#!/usr/bin/env bash
#
# meeting-record - Live meeting recorder with chunked transcription
#
# Usage:
# meeting-record [meeting-name]
# meeting-record "client-call-acme"
#
# Features:
# - Continuous audio recording (full backup)
# - Live chunked transcription every 10 seconds
# - Uses local Whisper models (privacy-respecting)
# - Separates recording from transcription (robust)
#
# Controls:
# - Ctrl+C to stop recording
#
set -euo pipefail
# Configuration - adjust paths as needed
WHISPER_BIN="${WHISPER_BIN:-$HOME/opt/whisper.cpp/build/bin/whisper-cli}"
WHISPER_MODEL="${WHISPER_MODEL:-$HOME/opt/whisper.cpp/models/ggml-base.en.bin}"
CHUNK_DURATION=10 # seconds
RECORDINGS_DIR="${RECORDINGS_DIR:-$HOME/Recordings/meetings}"
# Validate dependencies
command -v parecord >/dev/null 2>&1 || { echo "Error: parecord not found. Install pulseaudio-utils"; exit 1; }
command -v ffmpeg >/dev/null 2>&1 || { echo "Error: ffmpeg not found"; exit 1; }
[ -f "$WHISPER_BIN" ] || { echo "Error: whisper-cli not found at $WHISPER_BIN"; exit 1; }
[ -f "$WHISPER_MODEL" ] || { echo "Error: Whisper model not found at $WHISPER_MODEL"; exit 1; }
# Meeting name from argument or auto-generate
MEETING_NAME="${1:-meeting-$(date +%Y%m%d-%H%M%S)}"
MEETING_DIR="$RECORDINGS_DIR/$MEETING_NAME"
# Create directory structure
mkdir -p "$MEETING_DIR"/{chunks,transcripts}
# File paths
FULL_AUDIO="$MEETING_DIR/full-audio.wav"
LIVE_TRANSCRIPT="$MEETING_DIR/live-transcript.txt"
FINAL_TRANSCRIPT="$MEETING_DIR/final-transcript.txt"
PID_FILE="$MEETING_DIR/.recording.pid"
CHUNK_INDEX_FILE="$MEETING_DIR/.chunk_index"
# Initialize chunk index
echo "0" > "$CHUNK_INDEX_FILE"
# Cleanup function
cleanup() {
echo ""
echo "Stopping recording..."
# Kill all child processes
pkill -P $$ 2>/dev/null || true
# Kill from PID file
if [ -f "$PID_FILE" ]; then
while read -r PID; do
kill "$PID" 2>/dev/null || true
done < "$PID_FILE"
rm -f "$PID_FILE"
fi
echo "Generating final transcript..."
"$WHISPER_BIN" \
-m "$WHISPER_MODEL" \
-f "$FULL_AUDIO" \
-otxt \
-of "${FINAL_TRANSCRIPT%.txt}" \
--language en \
--threads 4 \
2>/dev/null || echo "Warning: Final transcription failed"
echo ""
echo "Recording complete!"
echo "Location: $MEETING_DIR"
echo "Audio: $FULL_AUDIO"
echo "Live transcript: $LIVE_TRANSCRIPT"
echo "Final transcript: $FINAL_TRANSCRIPT"
rm -f "$CHUNK_INDEX_FILE"
exit 0
}
trap cleanup INT TERM
# Print header
echo "Meeting Recorder - Live Transcription"
echo "======================================"
echo "Meeting: $MEETING_NAME"
echo "Location: $MEETING_DIR"
echo "Model: $(basename "$WHISPER_MODEL")"
echo "Chunk size: ${CHUNK_DURATION}s"
echo ""
echo "Recording started - Press Ctrl+C to stop"
echo "======================================"
echo ""
# Start continuous audio recording
parecord \
--format=s16le \
--rate=16000 \
--channels=1 \
"$FULL_AUDIO" &
RECORD_PID=$!
echo "$RECORD_PID" > "$PID_FILE"
# Give recording time to start
sleep 1
# Chunk recorder and transcriber loop
while kill -0 $RECORD_PID 2>/dev/null; do
sleep "$CHUNK_DURATION"
CHUNK_NUM=$(cat "$CHUNK_INDEX_FILE")
CHUNK_FILE="$MEETING_DIR/chunks/chunk-$(printf '%04d' $CHUNK_NUM).wav"
TRANSCRIPT_FILE="$MEETING_DIR/transcripts/chunk-$(printf '%04d' $CHUNK_NUM).txt"
# Get duration of current recording
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$FULL_AUDIO" 2>/dev/null || echo "0")
# Handle N/A or invalid duration
if [[ "$DURATION" == "N/A" ]] || [[ ! "$DURATION" =~ ^[0-9.]+$ ]]; then
DURATION="0"
fi
DURATION_INT=${DURATION%.*}
if [ "$DURATION_INT" -ge "$CHUNK_DURATION" ]; then
START_TIME=$((DURATION_INT - CHUNK_DURATION))
# Extract chunk
ffmpeg -ss $START_TIME -i "$FULL_AUDIO" -t $CHUNK_DURATION -c copy "$CHUNK_FILE" -y 2>/dev/null
# Transcribe chunk in background
(
"$WHISPER_BIN" \
-m "$WHISPER_MODEL" \
-f "$CHUNK_FILE" \
-otxt \
-of "$MEETING_DIR/transcripts/chunk-$(printf '%04d' $CHUNK_NUM)" \
--language en \
--threads 2 \
--no-timestamps \
2>/dev/null
if [ -f "$TRANSCRIPT_FILE" ]; then
TIMESTAMP=$(date '+%H:%M:%S')
echo "[$TIMESTAMP] $(cat "$TRANSCRIPT_FILE")" >> "$LIVE_TRANSCRIPT"
echo "[$(date '+%H:%M:%S')] Chunk $CHUNK_NUM transcribed"
fi
) &
echo $((CHUNK_NUM + 1)) > "$CHUNK_INDEX_FILE"
fi
done
cleanup
SCRIPT
chmod +x "$BIN_DIR/meeting-record"
log "Installed: $BIN_DIR/meeting-record"
# Install meeting-record-test
cat > "$BIN_DIR/meeting-record-test" << 'SCRIPT'
#!/usr/bin/env bash
#
# meeting-record-test - Test the meeting recorder for 30 seconds
#
echo "Testing meeting recorder for 30 seconds..."
echo "Please speak during this test so we can verify transcription."
echo ""
# Start recording in background
meeting-record "test-$(date +%Y%m%d-%H%M%S)" &
TEST_PID=$!
# Wait 30 seconds
sleep 30
# Stop recording
kill -INT $TEST_PID 2>/dev/null
# Wait for cleanup
wait $TEST_PID 2>/dev/null
echo ""
echo "Test complete! Check the output above for results."
SCRIPT
chmod +x "$BIN_DIR/meeting-record-test"
log "Installed: $BIN_DIR/meeting-record-test"
}
# Update script to use correct paths
update_paths() {
step "Configuring paths..."
# Update the installed script with correct whisper.cpp path
sed -i "s|\$HOME/opt/whisper.cpp|$WHISPER_CPP_DIR|g" "$BIN_DIR/meeting-record"
log "Paths configured"
}
# Verify installation
verify_installation() {
step "Verifying installation..."
local errors=0
# Check whisper-cli
if [ -f "$WHISPER_CPP_DIR/build/bin/whisper-cli" ]; then
log "whisper-cli: OK"
else
warn "whisper-cli: NOT FOUND"
((errors++))
fi
# Check model
if [ -f "$WHISPER_CPP_DIR/models/ggml-${WHISPER_MODEL}.bin" ]; then
log "Whisper model: OK"
else
warn "Whisper model: NOT FOUND"
((errors++))
fi
# Check ffmpeg
if command -v ffmpeg >/dev/null 2>&1; then
log "ffmpeg: OK"
else
warn "ffmpeg: NOT FOUND"
((errors++))
fi
# Check parecord
if command -v parecord >/dev/null 2>&1; then
log "parecord: OK"
else
warn "parecord: NOT FOUND"
((errors++))
fi
# Check scripts
if [ -x "$BIN_DIR/meeting-record" ]; then
log "meeting-record script: OK"
else
warn "meeting-record script: NOT FOUND"
((errors++))
fi
# Check PATH
if echo "$PATH" | grep -q "$BIN_DIR"; then
log "$BIN_DIR in PATH: OK"
else
warn "$BIN_DIR not in PATH - add to your shell config:"
warn " export PATH=\"\$HOME/bin:\$PATH\""
fi
if [ $errors -eq 0 ]; then
log "All components verified!"
return 0
else
warn "$errors component(s) need attention"
return 1
fi
}
# Print usage
print_usage() {
echo ""
echo "=== Installation Complete ==="
echo ""
echo "Usage:"
echo " meeting-record # Auto-named recording"
echo " meeting-record \"client-call\" # Named recording"
echo " meeting-record-test # 30-second test"
echo ""
echo "Output files are saved to: $RECORDINGS_DIR/<meeting-name>/"
echo " - full-audio.wav Complete recording"
echo " - live-transcript.txt Real-time transcription"
echo " - final-transcript.txt High-quality full transcription"
echo ""
echo "Privacy: All processing happens locally. No cloud APIs."
echo ""
}
# Main
main() {
echo "=== Meeting Record Installation ==="
echo ""
echo "This will install:"
echo " - whisper.cpp (local speech recognition)"
echo " - Whisper model: $WHISPER_MODEL"
echo " - meeting-record scripts"
echo ""
echo "Install location: $WHISPER_CPP_DIR"
echo "Scripts location: $BIN_DIR"
echo ""
read -p "Continue? [Y/n] " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]?$ ]]; then
echo "Aborted."
exit 0
fi
echo ""
install_dependencies
install_whisper_cpp
download_model
install_scripts
update_paths
echo ""
verify_installation || true
print_usage
}
main "$@"