inou/tools/import-json/main.go

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"os"
	"path/filepath"
	"strings"
	"time"

	"inou/lib"
)

// Same types as portal/import_json.go
type labExtraction struct {
	OrderName string          `json:"order_name"`
	Date      string          `json:"date"`
	Provider  string          `json:"provider"`
	LabName   string          `json:"lab_name"`
	Specimen  string          `json:"specimen"`
	Tests     []labTestResult `json:"tests"`
}

type labTestResult struct {
	Name         string   `json:"name"`
	CommonName   string   `json:"common_name"`
	Value        string   `json:"value"`
	NumericValue *float64 `json:"numeric_value"`
	Unit         string   `json:"unit"`
}

func main() {
	if len(os.Args) < 3 {
		fmt.Println("Usage: import-json <dossier-id> <path>")
		fmt.Println("  path: directory of files or single file (JSON, text, markdown)")
		os.Exit(1)
	}
	dossierID := os.Args[1]
	inputPath := os.Args[2]

	if err := lib.Init(); err != nil {
		fmt.Println("lib.Init failed:", err)
		os.Exit(1)
	}
	lib.ConfigInit()

	if lib.GeminiKey == "" {
		fmt.Println("No Gemini API key configured")
		os.Exit(1)
	}

	// Collect input files
	var files []string
	info, err := os.Stat(inputPath)
	if err != nil {
		fmt.Printf("Cannot access %s: %v\n", inputPath, err)
		os.Exit(1)
	}
	if info.IsDir() {
		for _, ext := range []string{"*.json", "*.txt", "*.md", "*.csv"} {
			matches, _ := filepath.Glob(filepath.Join(inputPath, ext))
			files = append(files, matches...)
		}
	} else {
		files = []string{inputPath}
	}
	fmt.Printf("Found %d files\n", len(files))

	// Load existing lab entries for dedup
	existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*")
	if err != nil {
		fmt.Printf("Warning: could not load existing entries: %v\n", err)
	}
	existingByKey := make(map[string]*lib.Entry, len(existing))
	for _, e := range existing {
		var data struct {
			SourceKey string `json:"source_key"`
		}
		if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
			existingByKey[data.SourceKey] = e
		}
	}
	fmt.Printf("Loaded %d existing lab entries (%d with source_key)\n", len(existing), len(existingByKey))

	var totalEntries int
	var skipped, failed, imported int

	for i, f := range files {
		content, err := os.ReadFile(f)
		if err != nil {
			log.Printf("[%d/%d] %s: read error: %v", i+1, len(files), filepath.Base(f), err)
			failed++
			continue
		}

		// Step 1: Identify
		category, err := identifyContent(content)
		if err != nil {
			log.Printf("[%d/%d] %s: identify error: %v", i+1, len(files), filepath.Base(f), err)
			failed++
			continue
		}

		if category != "lab" {
			log.Printf("[%d/%d] %s: category=%s (skipping, only lab supported)", i+1, len(files), filepath.Base(f), category)
			skipped++
			continue
		}

		// Step 2: Extract (returns array of orders)
		extractions, err := extractLabData(content)
		if err != nil {
			log.Printf("[%d/%d] %s: extract error: %v", i+1, len(files), filepath.Base(f), err)
			failed++
			continue
		}

		// Step 3: Build entries for each order and write
		fileEntries := 0
		for _, extraction := range extractions {
			if len(extraction.Tests) == 0 {
				continue
			}

			sourceKey := extraction.OrderName + "|" + extraction.Date
			if _, ok := existingByKey[sourceKey]; ok {
				log.Printf("[%d/%d] %s: already imported (%s), skipping", i+1, len(files), filepath.Base(f), extraction.OrderName)
				skipped++
				continue
			}

			entries := buildEntries(dossierID, &extraction, existingByKey)
			if err := lib.EntryWrite("", entries...); err != nil {
				log.Printf("[%d/%d] %s: write error: %v", i+1, len(files), filepath.Base(f), err)
				failed++
				continue
			}

			// Update dedup index
			for _, e := range entries {
				var data struct {
					SourceKey string `json:"source_key"`
				}
				if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
					existingByKey[data.SourceKey] = e
				}
			}

			fileEntries += len(entries)
			log.Printf("[%d/%d] %s: %s — %d tests", i+1, len(files), filepath.Base(f), extraction.OrderName, len(extraction.Tests))
		}

		if fileEntries > 0 {
			totalEntries += fileEntries
			imported++
		}
	}

	fmt.Printf("\nDone: %d imported, %d skipped, %d failed, %d total entries written\n", imported, skipped, failed, totalEntries)

	// Normalize once at the end
	fmt.Println("Normalizing...")
	if err := lib.Normalize(dossierID, lib.CategoryLab); err != nil {
		fmt.Printf("Normalize warning: %v\n", err)
	}
	fmt.Println("Complete.")
}

// identifyContent — same Gemini prompt as portal/import_json.go
func identifyContent(content []byte) (string, error) {
	prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text.

Identify what category of health data this is.

Return a JSON object with a single key "category" set to one of these values:
- "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.)
- "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.)
- "medication" — medication list or prescriptions
- "supplement" — supplements or vitamins
- "diagnosis" — medical diagnoses
- "document" — clinical notes, medical reports, or other text documents
- "history" — medical history
- "family_history" — family medical history
- "consultation" — doctor visit or consultation notes
- "unknown" — cannot determine

Content:
%s`, string(content))

	resp, err := lib.CallGemini(prompt)
	if err != nil {
		return "", err
	}

	var result struct {
		Category string `json:"category"`
	}
	if err := json.Unmarshal([]byte(resp), &result); err != nil {
		return "", fmt.Errorf("parse: %w", err)
	}

	if result.Category == "" || result.Category == "unknown" {
		return "", fmt.Errorf("could not identify")
	}
	if _, ok := lib.CategoryFromString[result.Category]; !ok {
		return "", fmt.Errorf("unknown category: %s", result.Category)
	}
	return result.Category, nil
}

// extractLabData — same Gemini prompt as portal/import_json.go
func extractLabData(content []byte) ([]labExtraction, error) {
	prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text.

Return a JSON ARRAY of orders. Each distinct date or panel is a separate order.
If there is only one order, return a single-element array.

[
  {
    "order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)",
    "date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)",
    "provider": "Ordering provider name or lab company",
    "lab_name": "Laboratory name",
    "specimen": "Specimen type (e.g. Blood, Urine, Stool)",
    "tests": [
      {
        "name": "Full test name (e.g. Hemoglobin)",
        "common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.",
        "value": "The measured value as a string",
        "numeric_value": 14.2,
        "unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)"
      }
    ]
  }
]

CRITICAL RULES:
- Extract ONLY the measured/observed values
- NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags
- NEVER include "Trend", "Notes", or commentary columns
- numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen")
- Include ALL test components, even those with non-numeric values
- A dash "-" means "Not Detected" — include the test with value "Not Detected"
- If a table has multiple date columns, each date column is a separate order with its own tests
- Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows
- Give each order a descriptive name based on the panel or test type

Content:
%s`, string(content))

	maxTokens := 8192
	temp := 0.0
	config := &lib.GeminiConfig{
		Temperature:     &temp,
		MaxOutputTokens: &maxTokens,
	}

	resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config)
	if err != nil {
		return nil, err
	}

	resp = strings.TrimSpace(resp)

	// Try array first, fall back to single object
	var extractions []labExtraction
	if err := json.Unmarshal([]byte(resp), &extractions); err != nil {
		var single labExtraction
		if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil {
			return nil, fmt.Errorf("parse: %w (first 300 chars: %.300s)", err, resp)
		}
		extractions = []labExtraction{single}
	}
	return extractions, nil
}

// buildEntries — same entry creation logic as portal/import_json.go
func buildEntries(dossierID string, extraction *labExtraction, existingByKey map[string]*lib.Entry) []*lib.Entry {
	sourceKey := extraction.OrderName + "|" + extraction.Date

	var ts int64
	for _, layout := range []string{
		time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01",
		"Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006",
	} {
		if t, err := time.Parse(layout, extraction.Date); err == nil {
			// For date-only formats, interpret as noon EST to avoid timezone rollback
			if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") {
				est := time.FixedZone("EST", -5*60*60)
				t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est)
			}
			ts = t.Unix()
			break
		}
	}
	if ts == 0 {
		ts = time.Now().Unix()
	}

	parentID := lib.NewID()

	parentData, _ := json.Marshal(map[string]interface{}{
		"source_key": sourceKey,
		"source":     "json_import",
		"provider":   extraction.Provider,
		"lab_name":   extraction.LabName,
		"specimen":   extraction.Specimen,
		"local_time": extraction.Date,
	})

	var entries []*lib.Entry
	entries = append(entries, &lib.Entry{
		EntryID:   parentID,
		DossierID: dossierID,
		Category:  lib.CategoryLab,
		Type:      "lab_order",
		Value:     extraction.OrderName,
		Timestamp: ts,
		Data:      string(parentData),
	})

	for _, test := range extraction.Tests {
		if test.Name == "" || test.Value == "" {
			continue
		}

		testKey := sourceKey + "|" + test.Name
		childID := lib.NewID()

		displayName := test.CommonName
		if displayName == "" {
			displayName = test.Name
		}
		summary := displayName + ": " + test.Value
		if test.Unit != "" {
			summary += " " + test.Unit
		}

		childData := map[string]interface{}{
			"source_key": testKey,
		}
		if test.CommonName != "" {
			childData["common_name"] = test.CommonName
		}
		if test.NumericValue != nil {
			childData["numeric_value"] = *test.NumericValue
		}
		if test.Unit != "" {
			childData["unit"] = test.Unit
		}
		dataJSON, _ := json.Marshal(childData)

		entries = append(entries, &lib.Entry{
			EntryID:   childID,
			DossierID: dossierID,
			ParentID:  parentID,
			Category:  lib.CategoryLab,
			Type:      test.Name,
			Value:     test.Value,
			Summary:   summary,
			Timestamp: ts,
			Data:      string(dataJSON),
		})
	}

	return entries
}