inou/tools/import-json/main.go

369 lines
11 KiB
Go

package main
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
"inou/lib"
)
// Same types as portal/import_json.go
type labExtraction struct {
OrderName string `json:"order_name"`
Date string `json:"date"`
Provider string `json:"provider"`
LabName string `json:"lab_name"`
Specimen string `json:"specimen"`
Tests []labTestResult `json:"tests"`
}
type labTestResult struct {
Name string `json:"name"`
CommonName string `json:"common_name"`
Value string `json:"value"`
NumericValue *float64 `json:"numeric_value"`
Unit string `json:"unit"`
}
func main() {
if len(os.Args) < 3 {
fmt.Println("Usage: import-json <dossier-id> <path>")
fmt.Println(" path: directory of files or single file (JSON, text, markdown)")
os.Exit(1)
}
dossierID := os.Args[1]
inputPath := os.Args[2]
if err := lib.Init(); err != nil {
fmt.Println("lib.Init failed:", err)
os.Exit(1)
}
lib.ConfigInit()
if lib.GeminiKey == "" {
fmt.Println("No Gemini API key configured")
os.Exit(1)
}
// Collect input files
var files []string
info, err := os.Stat(inputPath)
if err != nil {
fmt.Printf("Cannot access %s: %v\n", inputPath, err)
os.Exit(1)
}
if info.IsDir() {
for _, ext := range []string{"*.json", "*.txt", "*.md", "*.csv"} {
matches, _ := filepath.Glob(filepath.Join(inputPath, ext))
files = append(files, matches...)
}
} else {
files = []string{inputPath}
}
fmt.Printf("Found %d files\n", len(files))
// Load existing lab entries for dedup
existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*")
if err != nil {
fmt.Printf("Warning: could not load existing entries: %v\n", err)
}
existingByKey := make(map[string]*lib.Entry, len(existing))
for _, e := range existing {
var data struct {
SourceKey string `json:"source_key"`
}
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
existingByKey[data.SourceKey] = e
}
}
fmt.Printf("Loaded %d existing lab entries (%d with source_key)\n", len(existing), len(existingByKey))
var totalEntries int
var skipped, failed, imported int
for i, f := range files {
content, err := os.ReadFile(f)
if err != nil {
log.Printf("[%d/%d] %s: read error: %v", i+1, len(files), filepath.Base(f), err)
failed++
continue
}
// Step 1: Identify
category, err := identifyContent(content)
if err != nil {
log.Printf("[%d/%d] %s: identify error: %v", i+1, len(files), filepath.Base(f), err)
failed++
continue
}
if category != "lab" {
log.Printf("[%d/%d] %s: category=%s (skipping, only lab supported)", i+1, len(files), filepath.Base(f), category)
skipped++
continue
}
// Step 2: Extract (returns array of orders)
extractions, err := extractLabData(content)
if err != nil {
log.Printf("[%d/%d] %s: extract error: %v", i+1, len(files), filepath.Base(f), err)
failed++
continue
}
// Step 3: Build entries for each order and write
fileEntries := 0
for _, extraction := range extractions {
if len(extraction.Tests) == 0 {
continue
}
sourceKey := extraction.OrderName + "|" + extraction.Date
if _, ok := existingByKey[sourceKey]; ok {
log.Printf("[%d/%d] %s: already imported (%s), skipping", i+1, len(files), filepath.Base(f), extraction.OrderName)
skipped++
continue
}
entries := buildEntries(dossierID, &extraction, existingByKey)
if err := lib.EntryWrite("", entries...); err != nil {
log.Printf("[%d/%d] %s: write error: %v", i+1, len(files), filepath.Base(f), err)
failed++
continue
}
// Update dedup index
for _, e := range entries {
var data struct {
SourceKey string `json:"source_key"`
}
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
existingByKey[data.SourceKey] = e
}
}
fileEntries += len(entries)
log.Printf("[%d/%d] %s: %s — %d tests", i+1, len(files), filepath.Base(f), extraction.OrderName, len(extraction.Tests))
}
if fileEntries > 0 {
totalEntries += fileEntries
imported++
}
}
fmt.Printf("\nDone: %d imported, %d skipped, %d failed, %d total entries written\n", imported, skipped, failed, totalEntries)
// Normalize once at the end
fmt.Println("Normalizing...")
if err := lib.Normalize(dossierID, lib.CategoryLab); err != nil {
fmt.Printf("Normalize warning: %v\n", err)
}
fmt.Println("Complete.")
}
// identifyContent — same Gemini prompt as portal/import_json.go
func identifyContent(content []byte) (string, error) {
prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text.
Identify what category of health data this is.
Return a JSON object with a single key "category" set to one of these values:
- "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.)
- "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.)
- "medication" — medication list or prescriptions
- "supplement" — supplements or vitamins
- "diagnosis" — medical diagnoses
- "document" — clinical notes, medical reports, or other text documents
- "history" — medical history
- "family_history" — family medical history
- "consultation" — doctor visit or consultation notes
- "unknown" — cannot determine
Content:
%s`, string(content))
resp, err := lib.CallGemini(prompt)
if err != nil {
return "", err
}
var result struct {
Category string `json:"category"`
}
if err := json.Unmarshal([]byte(resp), &result); err != nil {
return "", fmt.Errorf("parse: %w", err)
}
if result.Category == "" || result.Category == "unknown" {
return "", fmt.Errorf("could not identify")
}
if _, ok := lib.CategoryFromString[result.Category]; !ok {
return "", fmt.Errorf("unknown category: %s", result.Category)
}
return result.Category, nil
}
// extractLabData — same Gemini prompt as portal/import_json.go
func extractLabData(content []byte) ([]labExtraction, error) {
prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text.
Return a JSON ARRAY of orders. Each distinct date or panel is a separate order.
If there is only one order, return a single-element array.
[
{
"order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)",
"date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)",
"provider": "Ordering provider name or lab company",
"lab_name": "Laboratory name",
"specimen": "Specimen type (e.g. Blood, Urine, Stool)",
"tests": [
{
"name": "Full test name (e.g. Hemoglobin)",
"common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.",
"value": "The measured value as a string",
"numeric_value": 14.2,
"unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)"
}
]
}
]
CRITICAL RULES:
- Extract ONLY the measured/observed values
- NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags
- NEVER include "Trend", "Notes", or commentary columns
- numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen")
- Include ALL test components, even those with non-numeric values
- A dash "-" means "Not Detected" — include the test with value "Not Detected"
- If a table has multiple date columns, each date column is a separate order with its own tests
- Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows
- Give each order a descriptive name based on the panel or test type
Content:
%s`, string(content))
maxTokens := 8192
temp := 0.0
config := &lib.GeminiConfig{
Temperature: &temp,
MaxOutputTokens: &maxTokens,
}
resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config)
if err != nil {
return nil, err
}
resp = strings.TrimSpace(resp)
// Try array first, fall back to single object
var extractions []labExtraction
if err := json.Unmarshal([]byte(resp), &extractions); err != nil {
var single labExtraction
if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil {
return nil, fmt.Errorf("parse: %w (first 300 chars: %.300s)", err, resp)
}
extractions = []labExtraction{single}
}
return extractions, nil
}
// buildEntries — same entry creation logic as portal/import_json.go
func buildEntries(dossierID string, extraction *labExtraction, existingByKey map[string]*lib.Entry) []*lib.Entry {
sourceKey := extraction.OrderName + "|" + extraction.Date
var ts int64
for _, layout := range []string{
time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01",
"Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006",
} {
if t, err := time.Parse(layout, extraction.Date); err == nil {
// For date-only formats, interpret as noon EST to avoid timezone rollback
if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") {
est := time.FixedZone("EST", -5*60*60)
t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est)
}
ts = t.Unix()
break
}
}
if ts == 0 {
ts = time.Now().Unix()
}
parentID := lib.NewID()
parentData, _ := json.Marshal(map[string]interface{}{
"source_key": sourceKey,
"source": "json_import",
"provider": extraction.Provider,
"lab_name": extraction.LabName,
"specimen": extraction.Specimen,
"local_time": extraction.Date,
})
var entries []*lib.Entry
entries = append(entries, &lib.Entry{
EntryID: parentID,
DossierID: dossierID,
Category: lib.CategoryLab,
Type: "lab_order",
Value: extraction.OrderName,
Timestamp: ts,
Data: string(parentData),
})
for _, test := range extraction.Tests {
if test.Name == "" || test.Value == "" {
continue
}
testKey := sourceKey + "|" + test.Name
childID := lib.NewID()
displayName := test.CommonName
if displayName == "" {
displayName = test.Name
}
summary := displayName + ": " + test.Value
if test.Unit != "" {
summary += " " + test.Unit
}
childData := map[string]interface{}{
"source_key": testKey,
}
if test.CommonName != "" {
childData["common_name"] = test.CommonName
}
if test.NumericValue != nil {
childData["numeric_value"] = *test.NumericValue
}
if test.Unit != "" {
childData["unit"] = test.Unit
}
dataJSON, _ := json.Marshal(childData)
entries = append(entries, &lib.Entry{
EntryID: childID,
DossierID: dossierID,
ParentID: parentID,
Category: lib.CategoryLab,
Type: test.Name,
Value: test.Value,
Summary: summary,
Timestamp: ts,
Data: string(dataJSON),
})
}
return entries
}