#!/usr/bin/env python3 """ Search documents in the document management system. """ import os import sys import json import argparse from pathlib import Path from datetime import datetime DOCUMENTS_ROOT = Path.home() / "documents" INDEX = DOCUMENTS_ROOT / "index" RECORDS = DOCUMENTS_ROOT / "records" def load_index() -> dict: """Load the master index.""" index_path = INDEX / "master.json" if index_path.exists(): with open(index_path) as f: return json.load(f) return {"documents": []} def search_documents(query: str, category: str = None, doc_type: str = None) -> list: """Search documents by query, optionally filtered by category/type.""" data = load_index() results = [] query_lower = query.lower() if query else "" for doc in data["documents"]: # Apply filters if category and doc.get("category") != category: continue if doc_type and doc.get("type") != doc_type: continue # If no query, return all matching filters if not query: results.append(doc) continue # Search in indexed fields searchable = f"{doc.get('filename', '')} {doc.get('category', '')} {doc.get('type', '')} {doc.get('date', '')} {doc.get('amount', '')}".lower() if query_lower in searchable: results.append(doc) continue # Search in full text record record_path = find_record(doc["id"], doc["category"]) if record_path and record_path.exists(): content = record_path.read_text().lower() if query_lower in content: results.append(doc) return results def find_record(doc_id: str, category: str) -> Path: """Find the record file for a document.""" cat_dir = RECORDS / category if cat_dir.exists(): for f in cat_dir.iterdir(): if doc_id in f.name: return f return None def show_document(doc_id: str) -> None: """Show full details of a document.""" data = load_index() for doc in data["documents"]: if doc["id"] == doc_id or doc_id in doc.get("filename", ""): print(f"\n{'='*60}") print(f"Document: {doc['filename']}") print(f"ID: {doc['id']}") print(f"Category: {doc['category']}") print(f"Type: {doc.get('type', 'unknown')}") print(f"Date: {doc.get('date', 'N/A')}") print(f"Amount: {doc.get('amount', 'N/A')}") print(f"Processed: {doc.get('processed', 'N/A')}") print(f"{'='*60}") # Show record content record_path = find_record(doc["id"], doc["category"]) if record_path: print(f"\nRecord: {record_path}") print("-"*60) print(record_path.read_text()) return print(f"Document not found: {doc_id}") def list_stats() -> None: """Show document statistics.""" data = load_index() print("\nšŸ“Š Document Statistics") print("="*40) print(f"Total documents: {data['stats']['total']}") print("\nBy type:") for dtype, count in sorted(data["stats"].get("by_type", {}).items()): print(f" {dtype}: {count}") print("\nBy category:") by_cat = {} for doc in data["documents"]: cat = doc.get("category", "unknown") by_cat[cat] = by_cat.get(cat, 0) + 1 for cat, count in sorted(by_cat.items()): print(f" {cat}: {count}") def main(): parser = argparse.ArgumentParser(description="Search documents") parser.add_argument("query", nargs="?", help="Search query") parser.add_argument("-c", "--category", help="Filter by category") parser.add_argument("-t", "--type", help="Filter by document type") parser.add_argument("-s", "--show", help="Show full document by ID") parser.add_argument("--stats", action="store_true", help="Show statistics") parser.add_argument("-l", "--list", action="store_true", help="List all documents") args = parser.parse_args() if args.stats: list_stats() return if args.show: show_document(args.show) return if args.list or args.query or args.category or args.type: results = search_documents(args.query, args.category, args.type) if not results: print("No documents found") return print(f"\nFound {len(results)} document(s):\n") for doc in results: date = doc.get("date", "")[:10] if doc.get("date") else "" amount = doc.get("amount", "") print(f" [{doc['id'][:8]}] {doc['category']:12} {doc.get('type', ''):15} {date:12} {amount:10} {doc['filename']}") else: parser.print_help() if __name__ == "__main__": main()