inou/portal/import_json.go

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"log"
	"strings"
	"time"

	"inou/lib"
)

// processJSONBatch processes all pending JSON/text uploads for a dossier.
// Identifies and extracts each file via Gemini, then normalizes once at the end.
// Returns the number of successfully imported files.
func processJSONBatch(dossierID string, uploads []*lib.Entry) int {
	// Load existing lab entries for dedup (once for entire batch)
	existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*")
	if err != nil {
		log.Printf("import_json: warning: could not load existing entries: %v", err)
	}
	existingByKey := make(map[string]*lib.Entry, len(existing))
	for _, e := range existing {
		var data struct {
			SourceKey string `json:"source_key"`
		}
		if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
			existingByKey[data.SourceKey] = e
		}
	}

	importID := lib.NextImportID()
	var imported int
	for i, upload := range uploads {
		processProgress.Store(dossierID, map[string]interface{}{
			"stage": "importing_json", "processed": i + 1, "total": len(uploads),
		})

		var d UploadData
		json.Unmarshal([]byte(upload.Data), &d)

		// Read and decrypt
		content, err := lib.DecryptFile(d.Path)
		if err != nil {
			updateUploadStatus(upload.EntryID, "failed", "Decrypt: "+err.Error())
			continue
		}

		// Verify JSON for json uploads; text uploads skip this check
		if upload.Type == "json" {
			var jsonCheck json.RawMessage
			if json.Unmarshal(content, &jsonCheck) != nil {
				updateUploadStatus(upload.EntryID, "failed", "Not valid JSON")
				continue
			}
		}

		// Step 1: Identify (skip if user already selected a category)
		categoryMap := map[string]string{"labs": "lab", "vitals": "vital", "documents": "document"}
		category := categoryMap[upload.Type]
		if category == "" {
			category, err = identifyContent(content)
			if err != nil {
				updateUploadStatus(upload.EntryID, "failed", "Identify: "+err.Error())
				continue
			}
		}

		// Step 2+3: Extract and create entries
		var parentIDs []string
		switch category {
		case "lab":
			parentIDs, err = importLabFromContent(dossierID, content, existingByKey, importID)
		default:
			updateUploadStatus(upload.EntryID, "failed", "Unsupported: "+category)
			continue
		}

		if err != nil {
			updateUploadStatus(upload.EntryID, "failed", "Import: "+err.Error())
			continue
		}

		// Store created entry IDs in upload data for undo
		var uploadData map[string]interface{}
		json.Unmarshal([]byte(upload.Data), &uploadData)
		uploadData["status"] = "processed"
		uploadData["created_entries"] = parentIDs
		b, _ := json.Marshal(uploadData)
		upload.Data = string(b)
		lib.EntryWrite("", upload)

		lib.AuditLog("", "json_import_file", dossierID, fmt.Sprintf("category=%s orders=%d upload=%s", category, len(parentIDs), upload.Value))
		imported++
	}

	// Normalize once for the whole batch
	if imported > 0 {
		log.Printf("import_json: normalizing after %d imports", imported)
		if err := lib.Normalize(dossierID, lib.CategoryLab, func(processed, total int) {
			processProgress.Store(dossierID, map[string]interface{}{
				"stage": "normalizing", "processed": processed, "total": total,
			})
		}); err != nil {
			log.Printf("import_json: normalize warning: %v", err)
		}
	}

	return imported
}

// identifyContent sends content to Gemini and asks what category of health data it is.
// Handles JSON, markdown, and plain text.
func identifyContent(content []byte) (string, error) {
	prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text.

Identify what category of health data this is.

Return a JSON object with a single key "category" set to one of these values:
- "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.)
- "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.)
- "medication" — medication list or prescriptions
- "supplement" — supplements or vitamins
- "diagnosis" — medical diagnoses
- "document" — clinical notes, medical reports, or other text documents
- "history" — medical history
- "family_history" — family medical history
- "consultation" — doctor visit or consultation notes
- "unknown" — cannot determine

Content:
%s`, string(content))

	resp, err := lib.CallGemini(prompt)
	if err != nil {
		return "", err
	}

	var result struct {
		Category string `json:"category"`
	}
	if err := json.Unmarshal([]byte(resp), &result); err != nil {
		return "", fmt.Errorf("parse identify response: %w", err)
	}

	if result.Category == "" || result.Category == "unknown" {
		return "", fmt.Errorf("could not identify content")
	}

	if _, ok := lib.CategoryFromString[result.Category]; !ok {
		return "", fmt.Errorf("unknown category: %s", result.Category)
	}

	return result.Category, nil
}

// labExtraction is the structured data Gemini returns for lab results.
type labExtraction struct {
	OrderName string          `json:"order_name"`
	Date      string          `json:"date"`
	Provider  string          `json:"provider"`
	LabName   string          `json:"lab_name"`
	Specimen  string          `json:"specimen"`
	Tests     []labTestResult `json:"tests"`
}

type labTestResult struct {
	Name         string   `json:"name"`
	CommonName   string   `json:"common_name"`
	Value        string   `json:"value"`
	NumericValue *float64 `json:"numeric_value"`
	Unit         string   `json:"unit"`
}

// importLabFromContent extracts lab data and creates entries.
// Uses direct parsing for known formats (MyChart), falls back to Gemini.
// Shared existingByKey map allows dedup across the batch (updated in place).
func importLabFromContent(dossierID string, content []byte, existingByKey map[string]*lib.Entry, importID int64) ([]string, error) {
	var extractions []labExtraction
	var err error
	if bytes.Contains(content, []byte("resultComponents")) {
		extractions, err = parseMyChart(content)
	} else {
		extractions, err = extractLabData(content)
	}
	if err != nil {
		return nil, fmt.Errorf("extract: %w", err)
	}

	if len(extractions) == 0 {
		return nil, fmt.Errorf("no orders found")
	}

	var parentIDs []string
	for _, extraction := range extractions {
		if len(extraction.Tests) == 0 {
			continue
		}

		sourceKey := extraction.OrderName + "|" + extraction.Date

		// Skip if this order was already imported
		if _, ok := existingByKey[sourceKey]; ok {
			log.Printf("import_json: skipping duplicate %s", extraction.OrderName)
			continue
		}

		var ts int64
		for _, layout := range []string{
			time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01",
			"Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006",
		} {
			if t, err := time.Parse(layout, extraction.Date); err == nil {
				// For date-only formats, interpret as noon EST to avoid timezone rollback
				if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") {
					est := time.FixedZone("EST", -5*60*60)
					t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est)
				}
				ts = t.Unix()
				break
			}
		}
		if ts == 0 {
			ts = time.Now().Unix()
		}

		parentID := lib.NewID()

		parentData, _ := json.Marshal(map[string]interface{}{
			"source_key": sourceKey,
			"source":     "json_import",
			"provider":   extraction.Provider,
			"lab_name":   extraction.LabName,
			"specimen":   extraction.Specimen,
			"local_time": extraction.Date,
		})

		var entries []*lib.Entry
		entries = append(entries, &lib.Entry{
			EntryID:   parentID,
			DossierID: dossierID,
			Category:  lib.CategoryLab,
			Type:      "lab_order",
			Value:     extraction.OrderName,
			Timestamp: ts,
			Data:      string(parentData),
		})

		for _, test := range extraction.Tests {
			if test.Name == "" || test.Value == "" {
				continue
			}

			testKey := sourceKey + "|" + test.Name
			childID := lib.NewID()

			displayName := test.CommonName
			if displayName == "" {
				displayName = test.Name
			}
			summary := displayName + ": " + test.Value
			if test.Unit != "" {
				summary += " " + test.Unit
			}

			childData := map[string]interface{}{
				"source_key": testKey,
			}
			if test.CommonName != "" {
				childData["common_name"] = test.CommonName
			}
			if test.NumericValue != nil {
				childData["numeric_value"] = *test.NumericValue
			}
			if test.Unit != "" {
				childData["unit"] = test.Unit
			}
			dataJSON, _ := json.Marshal(childData)

			entries = append(entries, &lib.Entry{
				EntryID:   childID,
				DossierID: dossierID,
				ParentID:  parentID,
				Category:  lib.CategoryLab,
				Type:      test.Name,
				Value:     test.Value,
				Summary:   summary,
				Timestamp: ts,
				Data:      string(dataJSON),
			})
		}

		for _, e := range entries {
			e.Import = importID
		}
		if err := lib.EntryWrite("", entries...); err != nil {
			return parentIDs, fmt.Errorf("write: %w", err)
		}

		// Update dedup index for subsequent files in the batch
		for _, e := range entries {
			var data struct {
				SourceKey string `json:"source_key"`
			}
			if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
				existingByKey[data.SourceKey] = e
			}
		}

		parentIDs = append(parentIDs, parentID)
		log.Printf("import_json: %s — %d results", extraction.OrderName, len(extraction.Tests))
	}

	if len(parentIDs) == 0 {
		return nil, fmt.Errorf("no new test results found (all duplicates or empty)")
	}

	return parentIDs, nil
}

// extractLabData sends content to Gemini with a targeted extraction prompt.
// Returns an array of orders (supports multi-date tables).
func extractLabData(content []byte) ([]labExtraction, error) {
	prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text.

Return a JSON ARRAY of orders. Each distinct date or panel is a separate order.
If there is only one order, return a single-element array.

[
  {
    "order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)",
    "date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)",
    "provider": "Ordering provider name or lab company",
    "lab_name": "Laboratory name",
    "specimen": "Specimen type (e.g. Blood, Urine, Stool)",
    "tests": [
      {
        "name": "Full test name (e.g. Hemoglobin)",
        "common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.",
        "value": "The measured value as a string",
        "numeric_value": 14.2,
        "unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)"
      }
    ]
  }
]

CRITICAL RULES:
- Extract ONLY the measured/observed values
- NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags
- NEVER include "Trend", "Notes", or commentary columns
- numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen")
- Include ALL test components, even those with non-numeric values
- A dash "-" means "Not Detected" — include the test with value "Not Detected"
- If a table has multiple date columns, each date column is a separate order with its own tests
- Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows
- Give each order a descriptive name based on the panel or test type

Content:
%s`, string(content))

	maxTokens := 8192
	temp := 0.0
	config := &lib.GeminiConfig{
		Temperature:     &temp,
		MaxOutputTokens: &maxTokens,
	}

	resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config)
	if err != nil {
		return nil, err
	}

	resp = strings.TrimSpace(resp)

	// Try array first, fall back to single object
	var extractions []labExtraction
	if err := json.Unmarshal([]byte(resp), &extractions); err != nil {
		var single labExtraction
		if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil {
			return nil, fmt.Errorf("parse extraction: %w (first 300 chars: %.300s)", err, resp)
		}
		extractions = []labExtraction{single}
	}
	return extractions, nil
}

// parseMyChart parses MyChart JSON directly without Gemini.
// Detects via presence of "resultComponents" in the JSON.
func parseMyChart(content []byte) ([]labExtraction, error) {
	var mc struct {
		OrderName string `json:"orderName"`
		Results   []struct {
			OrderMetadata struct {
				PrioritizedInstantISO string `json:"prioritizedInstantISO"`
				OrderProviderName     string `json:"orderProviderName"`
				AuthorizingProvider   string `json:"authorizingProviderName"`
				SpecimensDisplay      string `json:"specimensDisplay"`
				ResultingLab          struct {
					Name string `json:"name"`
				} `json:"resultingLab"`
			} `json:"orderMetadata"`
			ResultComponents []struct {
				ComponentInfo struct {
					Name       string `json:"name"`
					CommonName string `json:"commonName"`
					Units      string `json:"units"`
				} `json:"componentInfo"`
				ComponentResultInfo struct {
					Value        string   `json:"value"`
					NumericValue *float64 `json:"numericValue"`
				} `json:"componentResultInfo"`
			} `json:"resultComponents"`
		} `json:"results"`
	}
	if err := json.Unmarshal(content, &mc); err != nil {
		return nil, fmt.Errorf("parse mychart: %w", err)
	}

	var extractions []labExtraction
	for _, r := range mc.Results {
		if len(r.ResultComponents) == 0 {
			continue
		}
		provider := r.OrderMetadata.OrderProviderName
		if provider == "" {
			provider = r.OrderMetadata.AuthorizingProvider
		}
		ext := labExtraction{
			OrderName: mc.OrderName,
			Date:      r.OrderMetadata.PrioritizedInstantISO,
			Provider:  provider,
			LabName:   r.OrderMetadata.ResultingLab.Name,
			Specimen:  r.OrderMetadata.SpecimensDisplay,
		}
		for _, c := range r.ResultComponents {
			if c.ComponentInfo.Name == "" || c.ComponentResultInfo.Value == "" {
				continue
			}
			ext.Tests = append(ext.Tests, labTestResult{
				Name:         c.ComponentInfo.Name,
				CommonName:   c.ComponentInfo.CommonName,
				Value:        c.ComponentResultInfo.Value,
				NumericValue: c.ComponentResultInfo.NumericValue,
				Unit:         c.ComponentInfo.Units,
			})
		}
		if len(ext.Tests) > 0 {
			extractions = append(extractions, ext)
		}
	}
	return extractions, nil
}