156 lines
4.8 KiB
Python
Executable File
156 lines
4.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Search documents in the document management system.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
DOCUMENTS_ROOT = Path.home() / "documents"
|
|
INDEX = DOCUMENTS_ROOT / "index"
|
|
RECORDS = DOCUMENTS_ROOT / "records"
|
|
|
|
|
|
def load_index() -> dict:
|
|
"""Load the master index."""
|
|
index_path = INDEX / "master.json"
|
|
if index_path.exists():
|
|
with open(index_path) as f:
|
|
return json.load(f)
|
|
return {"documents": []}
|
|
|
|
|
|
def search_documents(query: str, category: str = None, doc_type: str = None) -> list:
|
|
"""Search documents by query, optionally filtered by category/type."""
|
|
data = load_index()
|
|
results = []
|
|
|
|
query_lower = query.lower() if query else ""
|
|
|
|
for doc in data["documents"]:
|
|
# Apply filters
|
|
if category and doc.get("category") != category:
|
|
continue
|
|
if doc_type and doc.get("type") != doc_type:
|
|
continue
|
|
|
|
# If no query, return all matching filters
|
|
if not query:
|
|
results.append(doc)
|
|
continue
|
|
|
|
# Search in indexed fields
|
|
searchable = f"{doc.get('filename', '')} {doc.get('category', '')} {doc.get('type', '')} {doc.get('date', '')} {doc.get('amount', '')}".lower()
|
|
if query_lower in searchable:
|
|
results.append(doc)
|
|
continue
|
|
|
|
# Search in full text record
|
|
record_path = find_record(doc["id"], doc["category"])
|
|
if record_path and record_path.exists():
|
|
content = record_path.read_text().lower()
|
|
if query_lower in content:
|
|
results.append(doc)
|
|
|
|
return results
|
|
|
|
|
|
def find_record(doc_id: str, category: str) -> Path:
|
|
"""Find the record file for a document."""
|
|
cat_dir = RECORDS / category
|
|
if cat_dir.exists():
|
|
for f in cat_dir.iterdir():
|
|
if doc_id in f.name:
|
|
return f
|
|
return None
|
|
|
|
|
|
def show_document(doc_id: str) -> None:
|
|
"""Show full details of a document."""
|
|
data = load_index()
|
|
|
|
for doc in data["documents"]:
|
|
if doc["id"] == doc_id or doc_id in doc.get("filename", ""):
|
|
print(f"\n{'='*60}")
|
|
print(f"Document: {doc['filename']}")
|
|
print(f"ID: {doc['id']}")
|
|
print(f"Category: {doc['category']}")
|
|
print(f"Type: {doc.get('type', 'unknown')}")
|
|
print(f"Date: {doc.get('date', 'N/A')}")
|
|
print(f"Amount: {doc.get('amount', 'N/A')}")
|
|
print(f"Processed: {doc.get('processed', 'N/A')}")
|
|
print(f"{'='*60}")
|
|
|
|
# Show record content
|
|
record_path = find_record(doc["id"], doc["category"])
|
|
if record_path:
|
|
print(f"\nRecord: {record_path}")
|
|
print("-"*60)
|
|
print(record_path.read_text())
|
|
return
|
|
|
|
print(f"Document not found: {doc_id}")
|
|
|
|
|
|
def list_stats() -> None:
|
|
"""Show document statistics."""
|
|
data = load_index()
|
|
|
|
print("\n📊 Document Statistics")
|
|
print("="*40)
|
|
print(f"Total documents: {data['stats']['total']}")
|
|
|
|
print("\nBy type:")
|
|
for dtype, count in sorted(data["stats"].get("by_type", {}).items()):
|
|
print(f" {dtype}: {count}")
|
|
|
|
print("\nBy category:")
|
|
by_cat = {}
|
|
for doc in data["documents"]:
|
|
cat = doc.get("category", "unknown")
|
|
by_cat[cat] = by_cat.get(cat, 0) + 1
|
|
for cat, count in sorted(by_cat.items()):
|
|
print(f" {cat}: {count}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Search documents")
|
|
parser.add_argument("query", nargs="?", help="Search query")
|
|
parser.add_argument("-c", "--category", help="Filter by category")
|
|
parser.add_argument("-t", "--type", help="Filter by document type")
|
|
parser.add_argument("-s", "--show", help="Show full document by ID")
|
|
parser.add_argument("--stats", action="store_true", help="Show statistics")
|
|
parser.add_argument("-l", "--list", action="store_true", help="List all documents")
|
|
args = parser.parse_args()
|
|
|
|
if args.stats:
|
|
list_stats()
|
|
return
|
|
|
|
if args.show:
|
|
show_document(args.show)
|
|
return
|
|
|
|
if args.list or args.query or args.category or args.type:
|
|
results = search_documents(args.query, args.category, args.type)
|
|
|
|
if not results:
|
|
print("No documents found")
|
|
return
|
|
|
|
print(f"\nFound {len(results)} document(s):\n")
|
|
for doc in results:
|
|
date = doc.get("date", "")[:10] if doc.get("date") else ""
|
|
amount = doc.get("amount", "")
|
|
print(f" [{doc['id'][:8]}] {doc['category']:12} {doc.get('type', ''):15} {date:12} {amount:10} {doc['filename']}")
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|