package main import ( "encoding/json" "fmt" "log" "os" "path/filepath" "strings" "time" "inou/lib" ) // Same types as portal/import_json.go type labExtraction struct { OrderName string `json:"order_name"` Date string `json:"date"` Provider string `json:"provider"` LabName string `json:"lab_name"` Specimen string `json:"specimen"` Tests []labTestResult `json:"tests"` } type labTestResult struct { Name string `json:"name"` CommonName string `json:"common_name"` Value string `json:"value"` NumericValue *float64 `json:"numeric_value"` Unit string `json:"unit"` } func main() { if len(os.Args) < 3 { fmt.Println("Usage: import-json ") fmt.Println(" path: directory of files or single file (JSON, text, markdown)") os.Exit(1) } dossierID := os.Args[1] inputPath := os.Args[2] if err := lib.Init(); err != nil { fmt.Println("lib.Init failed:", err) os.Exit(1) } lib.ConfigInit() if lib.GeminiKey == "" { fmt.Println("No Gemini API key configured") os.Exit(1) } // Collect input files var files []string info, err := os.Stat(inputPath) if err != nil { fmt.Printf("Cannot access %s: %v\n", inputPath, err) os.Exit(1) } if info.IsDir() { for _, ext := range []string{"*.json", "*.txt", "*.md", "*.csv"} { matches, _ := filepath.Glob(filepath.Join(inputPath, ext)) files = append(files, matches...) } } else { files = []string{inputPath} } fmt.Printf("Found %d files\n", len(files)) // Load existing lab entries for dedup existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*") if err != nil { fmt.Printf("Warning: could not load existing entries: %v\n", err) } existingByKey := make(map[string]*lib.Entry, len(existing)) for _, e := range existing { var data struct { SourceKey string `json:"source_key"` } if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" { existingByKey[data.SourceKey] = e } } fmt.Printf("Loaded %d existing lab entries (%d with source_key)\n", len(existing), len(existingByKey)) var totalEntries int var skipped, failed, imported int for i, f := range files { content, err := os.ReadFile(f) if err != nil { log.Printf("[%d/%d] %s: read error: %v", i+1, len(files), filepath.Base(f), err) failed++ continue } // Step 1: Identify category, err := identifyContent(content) if err != nil { log.Printf("[%d/%d] %s: identify error: %v", i+1, len(files), filepath.Base(f), err) failed++ continue } if category != "lab" { log.Printf("[%d/%d] %s: category=%s (skipping, only lab supported)", i+1, len(files), filepath.Base(f), category) skipped++ continue } // Step 2: Extract (returns array of orders) extractions, err := extractLabData(content) if err != nil { log.Printf("[%d/%d] %s: extract error: %v", i+1, len(files), filepath.Base(f), err) failed++ continue } // Step 3: Build entries for each order and write fileEntries := 0 for _, extraction := range extractions { if len(extraction.Tests) == 0 { continue } sourceKey := extraction.OrderName + "|" + extraction.Date if _, ok := existingByKey[sourceKey]; ok { log.Printf("[%d/%d] %s: already imported (%s), skipping", i+1, len(files), filepath.Base(f), extraction.OrderName) skipped++ continue } entries := buildEntries(dossierID, &extraction, existingByKey) if err := lib.EntryWrite("", entries...); err != nil { log.Printf("[%d/%d] %s: write error: %v", i+1, len(files), filepath.Base(f), err) failed++ continue } // Update dedup index for _, e := range entries { var data struct { SourceKey string `json:"source_key"` } if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" { existingByKey[data.SourceKey] = e } } fileEntries += len(entries) log.Printf("[%d/%d] %s: %s — %d tests", i+1, len(files), filepath.Base(f), extraction.OrderName, len(extraction.Tests)) } if fileEntries > 0 { totalEntries += fileEntries imported++ } } fmt.Printf("\nDone: %d imported, %d skipped, %d failed, %d total entries written\n", imported, skipped, failed, totalEntries) // Normalize once at the end fmt.Println("Normalizing...") if err := lib.Normalize(dossierID, lib.CategoryLab); err != nil { fmt.Printf("Normalize warning: %v\n", err) } fmt.Println("Complete.") } // identifyContent — same Gemini prompt as portal/import_json.go func identifyContent(content []byte) (string, error) { prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text. Identify what category of health data this is. Return a JSON object with a single key "category" set to one of these values: - "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.) - "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.) - "medication" — medication list or prescriptions - "supplement" — supplements or vitamins - "diagnosis" — medical diagnoses - "document" — clinical notes, medical reports, or other text documents - "history" — medical history - "family_history" — family medical history - "consultation" — doctor visit or consultation notes - "unknown" — cannot determine Content: %s`, string(content)) resp, err := lib.CallGemini(prompt) if err != nil { return "", err } var result struct { Category string `json:"category"` } if err := json.Unmarshal([]byte(resp), &result); err != nil { return "", fmt.Errorf("parse: %w", err) } if result.Category == "" || result.Category == "unknown" { return "", fmt.Errorf("could not identify") } if _, ok := lib.CategoryFromString[result.Category]; !ok { return "", fmt.Errorf("unknown category: %s", result.Category) } return result.Category, nil } // extractLabData — same Gemini prompt as portal/import_json.go func extractLabData(content []byte) ([]labExtraction, error) { prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text. Return a JSON ARRAY of orders. Each distinct date or panel is a separate order. If there is only one order, return a single-element array. [ { "order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)", "date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)", "provider": "Ordering provider name or lab company", "lab_name": "Laboratory name", "specimen": "Specimen type (e.g. Blood, Urine, Stool)", "tests": [ { "name": "Full test name (e.g. Hemoglobin)", "common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.", "value": "The measured value as a string", "numeric_value": 14.2, "unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)" } ] } ] CRITICAL RULES: - Extract ONLY the measured/observed values - NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags - NEVER include "Trend", "Notes", or commentary columns - numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen") - Include ALL test components, even those with non-numeric values - A dash "-" means "Not Detected" — include the test with value "Not Detected" - If a table has multiple date columns, each date column is a separate order with its own tests - Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows - Give each order a descriptive name based on the panel or test type Content: %s`, string(content)) maxTokens := 8192 temp := 0.0 config := &lib.GeminiConfig{ Temperature: &temp, MaxOutputTokens: &maxTokens, } resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config) if err != nil { return nil, err } resp = strings.TrimSpace(resp) // Try array first, fall back to single object var extractions []labExtraction if err := json.Unmarshal([]byte(resp), &extractions); err != nil { var single labExtraction if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil { return nil, fmt.Errorf("parse: %w (first 300 chars: %.300s)", err, resp) } extractions = []labExtraction{single} } return extractions, nil } // buildEntries — same entry creation logic as portal/import_json.go func buildEntries(dossierID string, extraction *labExtraction, existingByKey map[string]*lib.Entry) []*lib.Entry { sourceKey := extraction.OrderName + "|" + extraction.Date var ts int64 for _, layout := range []string{ time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01", "Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006", } { if t, err := time.Parse(layout, extraction.Date); err == nil { // For date-only formats, interpret as noon EST to avoid timezone rollback if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") { est := time.FixedZone("EST", -5*60*60) t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est) } ts = t.Unix() break } } if ts == 0 { ts = time.Now().Unix() } parentID := lib.NewID() parentData, _ := json.Marshal(map[string]interface{}{ "source_key": sourceKey, "source": "json_import", "provider": extraction.Provider, "lab_name": extraction.LabName, "specimen": extraction.Specimen, "local_time": extraction.Date, }) var entries []*lib.Entry entries = append(entries, &lib.Entry{ EntryID: parentID, DossierID: dossierID, Category: lib.CategoryLab, Type: "lab_order", Value: extraction.OrderName, Timestamp: ts, Data: string(parentData), }) for _, test := range extraction.Tests { if test.Name == "" || test.Value == "" { continue } testKey := sourceKey + "|" + test.Name childID := lib.NewID() displayName := test.CommonName if displayName == "" { displayName = test.Name } summary := displayName + ": " + test.Value if test.Unit != "" { summary += " " + test.Unit } childData := map[string]interface{}{ "source_key": testKey, } if test.CommonName != "" { childData["common_name"] = test.CommonName } if test.NumericValue != nil { childData["numeric_value"] = *test.NumericValue } if test.Unit != "" { childData["unit"] = test.Unit } dataJSON, _ := json.Marshal(childData) entries = append(entries, &lib.Entry{ EntryID: childID, DossierID: dossierID, ParentID: parentID, Category: lib.CategoryLab, Type: test.Name, Value: test.Value, Summary: summary, Timestamp: ts, Data: string(dataJSON), }) } return entries }