package main import ( "bytes" "encoding/json" "fmt" "log" "strings" "time" "inou/lib" ) // processJSONBatch processes all pending JSON/text uploads for a dossier. // Identifies and extracts each file via Gemini, then normalizes once at the end. // Returns the number of successfully imported files. func processJSONBatch(dossierID string, uploads []*lib.Entry) int { // Load existing lab entries for dedup (once for entire batch) existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*") if err != nil { log.Printf("import_json: warning: could not load existing entries: %v", err) } existingByKey := make(map[string]*lib.Entry, len(existing)) for _, e := range existing { var data struct { SourceKey string `json:"source_key"` } if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" { existingByKey[data.SourceKey] = e } } importID := lib.NextImportID() var imported int for i, upload := range uploads { processProgress.Store(dossierID, map[string]interface{}{ "stage": "importing_json", "processed": i + 1, "total": len(uploads), }) var d UploadData json.Unmarshal([]byte(upload.Data), &d) // Read and decrypt content, err := lib.DecryptFile(d.Path) if err != nil { updateUploadStatus(upload.EntryID, "failed", "Decrypt: "+err.Error()) continue } // Verify JSON for json uploads; text uploads skip this check if upload.Type == "json" { var jsonCheck json.RawMessage if json.Unmarshal(content, &jsonCheck) != nil { updateUploadStatus(upload.EntryID, "failed", "Not valid JSON") continue } } // Step 1: Identify (skip if user already selected a category) categoryMap := map[string]string{"labs": "lab", "vitals": "vital", "documents": "document"} category := categoryMap[upload.Type] if category == "" { category, err = identifyContent(content) if err != nil { updateUploadStatus(upload.EntryID, "failed", "Identify: "+err.Error()) continue } } // Step 2+3: Extract and create entries var parentIDs []string switch category { case "lab": parentIDs, err = importLabFromContent(dossierID, content, existingByKey, importID) default: updateUploadStatus(upload.EntryID, "failed", "Unsupported: "+category) continue } if err != nil { updateUploadStatus(upload.EntryID, "failed", "Import: "+err.Error()) continue } // Store created entry IDs in upload data for undo var uploadData map[string]interface{} json.Unmarshal([]byte(upload.Data), &uploadData) uploadData["status"] = "processed" uploadData["created_entries"] = parentIDs b, _ := json.Marshal(uploadData) upload.Data = string(b) lib.EntryWrite("", upload) lib.AuditLog("", "json_import_file", dossierID, fmt.Sprintf("category=%s orders=%d upload=%s", category, len(parentIDs), upload.Value)) imported++ } // Normalize once for the whole batch if imported > 0 { log.Printf("import_json: normalizing after %d imports", imported) if err := lib.Normalize(dossierID, lib.CategoryLab, func(processed, total int) { processProgress.Store(dossierID, map[string]interface{}{ "stage": "normalizing", "processed": processed, "total": total, }) }); err != nil { log.Printf("import_json: normalize warning: %v", err) } } return imported } // identifyContent sends content to Gemini and asks what category of health data it is. // Handles JSON, markdown, and plain text. func identifyContent(content []byte) (string, error) { prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text. Identify what category of health data this is. Return a JSON object with a single key "category" set to one of these values: - "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.) - "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.) - "medication" — medication list or prescriptions - "supplement" — supplements or vitamins - "diagnosis" — medical diagnoses - "document" — clinical notes, medical reports, or other text documents - "history" — medical history - "family_history" — family medical history - "consultation" — doctor visit or consultation notes - "unknown" — cannot determine Content: %s`, string(content)) resp, err := lib.CallGemini(prompt) if err != nil { return "", err } var result struct { Category string `json:"category"` } if err := json.Unmarshal([]byte(resp), &result); err != nil { return "", fmt.Errorf("parse identify response: %w", err) } if result.Category == "" || result.Category == "unknown" { return "", fmt.Errorf("could not identify content") } if _, ok := lib.CategoryFromString[result.Category]; !ok { return "", fmt.Errorf("unknown category: %s", result.Category) } return result.Category, nil } // labExtraction is the structured data Gemini returns for lab results. type labExtraction struct { OrderName string `json:"order_name"` Date string `json:"date"` Provider string `json:"provider"` LabName string `json:"lab_name"` Specimen string `json:"specimen"` Tests []labTestResult `json:"tests"` } type labTestResult struct { Name string `json:"name"` CommonName string `json:"common_name"` Value string `json:"value"` NumericValue *float64 `json:"numeric_value"` Unit string `json:"unit"` } // importLabFromContent extracts lab data and creates entries. // Uses direct parsing for known formats (MyChart), falls back to Gemini. // Shared existingByKey map allows dedup across the batch (updated in place). func importLabFromContent(dossierID string, content []byte, existingByKey map[string]*lib.Entry, importID int64) ([]string, error) { var extractions []labExtraction var err error if bytes.Contains(content, []byte("resultComponents")) { extractions, err = parseMyChart(content) } else { extractions, err = extractLabData(content) } if err != nil { return nil, fmt.Errorf("extract: %w", err) } if len(extractions) == 0 { return nil, fmt.Errorf("no orders found") } var parentIDs []string for _, extraction := range extractions { if len(extraction.Tests) == 0 { continue } sourceKey := extraction.OrderName + "|" + extraction.Date // Skip if this order was already imported if _, ok := existingByKey[sourceKey]; ok { log.Printf("import_json: skipping duplicate %s", extraction.OrderName) continue } var ts int64 for _, layout := range []string{ time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01", "Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006", } { if t, err := time.Parse(layout, extraction.Date); err == nil { // For date-only formats, interpret as noon EST to avoid timezone rollback if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") { est := time.FixedZone("EST", -5*60*60) t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est) } ts = t.Unix() break } } if ts == 0 { ts = time.Now().Unix() } parentID := lib.NewID() parentData, _ := json.Marshal(map[string]interface{}{ "source_key": sourceKey, "source": "json_import", "provider": extraction.Provider, "lab_name": extraction.LabName, "specimen": extraction.Specimen, "local_time": extraction.Date, }) var entries []*lib.Entry entries = append(entries, &lib.Entry{ EntryID: parentID, DossierID: dossierID, Category: lib.CategoryLab, Type: "lab_order", Value: extraction.OrderName, Timestamp: ts, Data: string(parentData), }) for _, test := range extraction.Tests { if test.Name == "" || test.Value == "" { continue } testKey := sourceKey + "|" + test.Name childID := lib.NewID() displayName := test.CommonName if displayName == "" { displayName = test.Name } summary := displayName + ": " + test.Value if test.Unit != "" { summary += " " + test.Unit } childData := map[string]interface{}{ "source_key": testKey, } if test.CommonName != "" { childData["common_name"] = test.CommonName } if test.NumericValue != nil { childData["numeric_value"] = *test.NumericValue } if test.Unit != "" { childData["unit"] = test.Unit } dataJSON, _ := json.Marshal(childData) entries = append(entries, &lib.Entry{ EntryID: childID, DossierID: dossierID, ParentID: parentID, Category: lib.CategoryLab, Type: test.Name, Value: test.Value, Summary: summary, Timestamp: ts, Data: string(dataJSON), }) } for _, e := range entries { e.Import = importID } if err := lib.EntryWrite("", entries...); err != nil { return parentIDs, fmt.Errorf("write: %w", err) } // Update dedup index for subsequent files in the batch for _, e := range entries { var data struct { SourceKey string `json:"source_key"` } if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" { existingByKey[data.SourceKey] = e } } parentIDs = append(parentIDs, parentID) log.Printf("import_json: %s — %d results", extraction.OrderName, len(extraction.Tests)) } if len(parentIDs) == 0 { return nil, fmt.Errorf("no new test results found (all duplicates or empty)") } return parentIDs, nil } // extractLabData sends content to Gemini with a targeted extraction prompt. // Returns an array of orders (supports multi-date tables). func extractLabData(content []byte) ([]labExtraction, error) { prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text. Return a JSON ARRAY of orders. Each distinct date or panel is a separate order. If there is only one order, return a single-element array. [ { "order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)", "date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)", "provider": "Ordering provider name or lab company", "lab_name": "Laboratory name", "specimen": "Specimen type (e.g. Blood, Urine, Stool)", "tests": [ { "name": "Full test name (e.g. Hemoglobin)", "common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.", "value": "The measured value as a string", "numeric_value": 14.2, "unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)" } ] } ] CRITICAL RULES: - Extract ONLY the measured/observed values - NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags - NEVER include "Trend", "Notes", or commentary columns - numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen") - Include ALL test components, even those with non-numeric values - A dash "-" means "Not Detected" — include the test with value "Not Detected" - If a table has multiple date columns, each date column is a separate order with its own tests - Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows - Give each order a descriptive name based on the panel or test type Content: %s`, string(content)) maxTokens := 8192 temp := 0.0 config := &lib.GeminiConfig{ Temperature: &temp, MaxOutputTokens: &maxTokens, } resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config) if err != nil { return nil, err } resp = strings.TrimSpace(resp) // Try array first, fall back to single object var extractions []labExtraction if err := json.Unmarshal([]byte(resp), &extractions); err != nil { var single labExtraction if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil { return nil, fmt.Errorf("parse extraction: %w (first 300 chars: %.300s)", err, resp) } extractions = []labExtraction{single} } return extractions, nil } // parseMyChart parses MyChart JSON directly without Gemini. // Detects via presence of "resultComponents" in the JSON. func parseMyChart(content []byte) ([]labExtraction, error) { var mc struct { OrderName string `json:"orderName"` Results []struct { OrderMetadata struct { PrioritizedInstantISO string `json:"prioritizedInstantISO"` OrderProviderName string `json:"orderProviderName"` AuthorizingProvider string `json:"authorizingProviderName"` SpecimensDisplay string `json:"specimensDisplay"` ResultingLab struct { Name string `json:"name"` } `json:"resultingLab"` } `json:"orderMetadata"` ResultComponents []struct { ComponentInfo struct { Name string `json:"name"` CommonName string `json:"commonName"` Units string `json:"units"` } `json:"componentInfo"` ComponentResultInfo struct { Value string `json:"value"` NumericValue *float64 `json:"numericValue"` } `json:"componentResultInfo"` } `json:"resultComponents"` } `json:"results"` } if err := json.Unmarshal(content, &mc); err != nil { return nil, fmt.Errorf("parse mychart: %w", err) } var extractions []labExtraction for _, r := range mc.Results { if len(r.ResultComponents) == 0 { continue } provider := r.OrderMetadata.OrderProviderName if provider == "" { provider = r.OrderMetadata.AuthorizingProvider } ext := labExtraction{ OrderName: mc.OrderName, Date: r.OrderMetadata.PrioritizedInstantISO, Provider: provider, LabName: r.OrderMetadata.ResultingLab.Name, Specimen: r.OrderMetadata.SpecimensDisplay, } for _, c := range r.ResultComponents { if c.ComponentInfo.Name == "" || c.ComponentResultInfo.Value == "" { continue } ext.Tests = append(ext.Tests, labTestResult{ Name: c.ComponentInfo.Name, CommonName: c.ComponentInfo.CommonName, Value: c.ComponentResultInfo.Value, NumericValue: c.ComponentResultInfo.NumericValue, Unit: c.ComponentInfo.Units, }) } if len(ext.Tests) > 0 { extractions = append(extractions, ext) } } return extractions, nil }