369 lines
11 KiB
Go
369 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"inou/lib"
|
|
)
|
|
|
|
// Same types as portal/import_json.go
|
|
type labExtraction struct {
|
|
OrderName string `json:"order_name"`
|
|
Date string `json:"date"`
|
|
Provider string `json:"provider"`
|
|
LabName string `json:"lab_name"`
|
|
Specimen string `json:"specimen"`
|
|
Tests []labTestResult `json:"tests"`
|
|
}
|
|
|
|
type labTestResult struct {
|
|
Name string `json:"name"`
|
|
CommonName string `json:"common_name"`
|
|
Value string `json:"value"`
|
|
NumericValue *float64 `json:"numeric_value"`
|
|
Unit string `json:"unit"`
|
|
}
|
|
|
|
func main() {
|
|
if len(os.Args) < 3 {
|
|
fmt.Println("Usage: import-json <dossier-id> <path>")
|
|
fmt.Println(" path: directory of files or single file (JSON, text, markdown)")
|
|
os.Exit(1)
|
|
}
|
|
dossierID := os.Args[1]
|
|
inputPath := os.Args[2]
|
|
|
|
if err := lib.Init(); err != nil {
|
|
fmt.Println("lib.Init failed:", err)
|
|
os.Exit(1)
|
|
}
|
|
lib.ConfigInit()
|
|
|
|
if lib.GeminiKey == "" {
|
|
fmt.Println("No Gemini API key configured")
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Collect input files
|
|
var files []string
|
|
info, err := os.Stat(inputPath)
|
|
if err != nil {
|
|
fmt.Printf("Cannot access %s: %v\n", inputPath, err)
|
|
os.Exit(1)
|
|
}
|
|
if info.IsDir() {
|
|
for _, ext := range []string{"*.json", "*.txt", "*.md", "*.csv"} {
|
|
matches, _ := filepath.Glob(filepath.Join(inputPath, ext))
|
|
files = append(files, matches...)
|
|
}
|
|
} else {
|
|
files = []string{inputPath}
|
|
}
|
|
fmt.Printf("Found %d files\n", len(files))
|
|
|
|
// Load existing lab entries for dedup
|
|
existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*")
|
|
if err != nil {
|
|
fmt.Printf("Warning: could not load existing entries: %v\n", err)
|
|
}
|
|
existingByKey := make(map[string]*lib.Entry, len(existing))
|
|
for _, e := range existing {
|
|
var data struct {
|
|
SourceKey string `json:"source_key"`
|
|
}
|
|
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
|
|
existingByKey[data.SourceKey] = e
|
|
}
|
|
}
|
|
fmt.Printf("Loaded %d existing lab entries (%d with source_key)\n", len(existing), len(existingByKey))
|
|
|
|
var totalEntries int
|
|
var skipped, failed, imported int
|
|
|
|
for i, f := range files {
|
|
content, err := os.ReadFile(f)
|
|
if err != nil {
|
|
log.Printf("[%d/%d] %s: read error: %v", i+1, len(files), filepath.Base(f), err)
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
// Step 1: Identify
|
|
category, err := identifyContent(content)
|
|
if err != nil {
|
|
log.Printf("[%d/%d] %s: identify error: %v", i+1, len(files), filepath.Base(f), err)
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
if category != "lab" {
|
|
log.Printf("[%d/%d] %s: category=%s (skipping, only lab supported)", i+1, len(files), filepath.Base(f), category)
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
// Step 2: Extract (returns array of orders)
|
|
extractions, err := extractLabData(content)
|
|
if err != nil {
|
|
log.Printf("[%d/%d] %s: extract error: %v", i+1, len(files), filepath.Base(f), err)
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
// Step 3: Build entries for each order and write
|
|
fileEntries := 0
|
|
for _, extraction := range extractions {
|
|
if len(extraction.Tests) == 0 {
|
|
continue
|
|
}
|
|
|
|
sourceKey := extraction.OrderName + "|" + extraction.Date
|
|
if _, ok := existingByKey[sourceKey]; ok {
|
|
log.Printf("[%d/%d] %s: already imported (%s), skipping", i+1, len(files), filepath.Base(f), extraction.OrderName)
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
entries := buildEntries(dossierID, &extraction, existingByKey)
|
|
if err := lib.EntryWrite("", entries...); err != nil {
|
|
log.Printf("[%d/%d] %s: write error: %v", i+1, len(files), filepath.Base(f), err)
|
|
failed++
|
|
continue
|
|
}
|
|
|
|
// Update dedup index
|
|
for _, e := range entries {
|
|
var data struct {
|
|
SourceKey string `json:"source_key"`
|
|
}
|
|
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
|
|
existingByKey[data.SourceKey] = e
|
|
}
|
|
}
|
|
|
|
fileEntries += len(entries)
|
|
log.Printf("[%d/%d] %s: %s — %d tests", i+1, len(files), filepath.Base(f), extraction.OrderName, len(extraction.Tests))
|
|
}
|
|
|
|
if fileEntries > 0 {
|
|
totalEntries += fileEntries
|
|
imported++
|
|
}
|
|
}
|
|
|
|
fmt.Printf("\nDone: %d imported, %d skipped, %d failed, %d total entries written\n", imported, skipped, failed, totalEntries)
|
|
|
|
// Normalize once at the end
|
|
fmt.Println("Normalizing...")
|
|
if err := lib.Normalize(dossierID, lib.CategoryLab); err != nil {
|
|
fmt.Printf("Normalize warning: %v\n", err)
|
|
}
|
|
fmt.Println("Complete.")
|
|
}
|
|
|
|
// identifyContent — same Gemini prompt as portal/import_json.go
|
|
func identifyContent(content []byte) (string, error) {
|
|
prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text.
|
|
|
|
Identify what category of health data this is.
|
|
|
|
Return a JSON object with a single key "category" set to one of these values:
|
|
- "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.)
|
|
- "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.)
|
|
- "medication" — medication list or prescriptions
|
|
- "supplement" — supplements or vitamins
|
|
- "diagnosis" — medical diagnoses
|
|
- "document" — clinical notes, medical reports, or other text documents
|
|
- "history" — medical history
|
|
- "family_history" — family medical history
|
|
- "consultation" — doctor visit or consultation notes
|
|
- "unknown" — cannot determine
|
|
|
|
Content:
|
|
%s`, string(content))
|
|
|
|
resp, err := lib.CallGemini(prompt)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
var result struct {
|
|
Category string `json:"category"`
|
|
}
|
|
if err := json.Unmarshal([]byte(resp), &result); err != nil {
|
|
return "", fmt.Errorf("parse: %w", err)
|
|
}
|
|
|
|
if result.Category == "" || result.Category == "unknown" {
|
|
return "", fmt.Errorf("could not identify")
|
|
}
|
|
if _, ok := lib.CategoryFromString[result.Category]; !ok {
|
|
return "", fmt.Errorf("unknown category: %s", result.Category)
|
|
}
|
|
return result.Category, nil
|
|
}
|
|
|
|
// extractLabData — same Gemini prompt as portal/import_json.go
|
|
func extractLabData(content []byte) ([]labExtraction, error) {
|
|
prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text.
|
|
|
|
Return a JSON ARRAY of orders. Each distinct date or panel is a separate order.
|
|
If there is only one order, return a single-element array.
|
|
|
|
[
|
|
{
|
|
"order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)",
|
|
"date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)",
|
|
"provider": "Ordering provider name or lab company",
|
|
"lab_name": "Laboratory name",
|
|
"specimen": "Specimen type (e.g. Blood, Urine, Stool)",
|
|
"tests": [
|
|
{
|
|
"name": "Full test name (e.g. Hemoglobin)",
|
|
"common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.",
|
|
"value": "The measured value as a string",
|
|
"numeric_value": 14.2,
|
|
"unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
|
|
CRITICAL RULES:
|
|
- Extract ONLY the measured/observed values
|
|
- NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags
|
|
- NEVER include "Trend", "Notes", or commentary columns
|
|
- numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen")
|
|
- Include ALL test components, even those with non-numeric values
|
|
- A dash "-" means "Not Detected" — include the test with value "Not Detected"
|
|
- If a table has multiple date columns, each date column is a separate order with its own tests
|
|
- Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows
|
|
- Give each order a descriptive name based on the panel or test type
|
|
|
|
Content:
|
|
%s`, string(content))
|
|
|
|
maxTokens := 8192
|
|
temp := 0.0
|
|
config := &lib.GeminiConfig{
|
|
Temperature: &temp,
|
|
MaxOutputTokens: &maxTokens,
|
|
}
|
|
|
|
resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp = strings.TrimSpace(resp)
|
|
|
|
// Try array first, fall back to single object
|
|
var extractions []labExtraction
|
|
if err := json.Unmarshal([]byte(resp), &extractions); err != nil {
|
|
var single labExtraction
|
|
if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil {
|
|
return nil, fmt.Errorf("parse: %w (first 300 chars: %.300s)", err, resp)
|
|
}
|
|
extractions = []labExtraction{single}
|
|
}
|
|
return extractions, nil
|
|
}
|
|
|
|
// buildEntries — same entry creation logic as portal/import_json.go
|
|
func buildEntries(dossierID string, extraction *labExtraction, existingByKey map[string]*lib.Entry) []*lib.Entry {
|
|
sourceKey := extraction.OrderName + "|" + extraction.Date
|
|
|
|
var ts int64
|
|
for _, layout := range []string{
|
|
time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01",
|
|
"Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006",
|
|
} {
|
|
if t, err := time.Parse(layout, extraction.Date); err == nil {
|
|
// For date-only formats, interpret as noon EST to avoid timezone rollback
|
|
if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") {
|
|
est := time.FixedZone("EST", -5*60*60)
|
|
t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est)
|
|
}
|
|
ts = t.Unix()
|
|
break
|
|
}
|
|
}
|
|
if ts == 0 {
|
|
ts = time.Now().Unix()
|
|
}
|
|
|
|
parentID := lib.NewID()
|
|
|
|
parentData, _ := json.Marshal(map[string]interface{}{
|
|
"source_key": sourceKey,
|
|
"source": "json_import",
|
|
"provider": extraction.Provider,
|
|
"lab_name": extraction.LabName,
|
|
"specimen": extraction.Specimen,
|
|
"local_time": extraction.Date,
|
|
})
|
|
|
|
var entries []*lib.Entry
|
|
entries = append(entries, &lib.Entry{
|
|
EntryID: parentID,
|
|
DossierID: dossierID,
|
|
Category: lib.CategoryLab,
|
|
Type: "lab_order",
|
|
Value: extraction.OrderName,
|
|
Timestamp: ts,
|
|
Data: string(parentData),
|
|
})
|
|
|
|
for _, test := range extraction.Tests {
|
|
if test.Name == "" || test.Value == "" {
|
|
continue
|
|
}
|
|
|
|
testKey := sourceKey + "|" + test.Name
|
|
childID := lib.NewID()
|
|
|
|
displayName := test.CommonName
|
|
if displayName == "" {
|
|
displayName = test.Name
|
|
}
|
|
summary := displayName + ": " + test.Value
|
|
if test.Unit != "" {
|
|
summary += " " + test.Unit
|
|
}
|
|
|
|
childData := map[string]interface{}{
|
|
"source_key": testKey,
|
|
}
|
|
if test.CommonName != "" {
|
|
childData["common_name"] = test.CommonName
|
|
}
|
|
if test.NumericValue != nil {
|
|
childData["numeric_value"] = *test.NumericValue
|
|
}
|
|
if test.Unit != "" {
|
|
childData["unit"] = test.Unit
|
|
}
|
|
dataJSON, _ := json.Marshal(childData)
|
|
|
|
entries = append(entries, &lib.Entry{
|
|
EntryID: childID,
|
|
DossierID: dossierID,
|
|
ParentID: parentID,
|
|
Category: lib.CategoryLab,
|
|
Type: test.Name,
|
|
Value: test.Value,
|
|
Summary: summary,
|
|
Timestamp: ts,
|
|
Data: string(dataJSON),
|
|
})
|
|
}
|
|
|
|
return entries
|
|
}
|