456 lines
14 KiB
Go
456 lines
14 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
"time"
|
|
|
|
"inou/lib"
|
|
)
|
|
|
|
// processJSONBatch processes all pending JSON/text uploads for a dossier.
|
|
// Identifies and extracts each file via Gemini, then normalizes once at the end.
|
|
// Returns the number of successfully imported files.
|
|
func processJSONBatch(dossierID string, uploads []*lib.Entry) int {
|
|
// Load existing lab entries for dedup (once for entire batch)
|
|
existing, err := lib.EntryQuery(nil, dossierID, lib.CategoryLab, "", "*")
|
|
if err != nil {
|
|
log.Printf("import_json: warning: could not load existing entries: %v", err)
|
|
}
|
|
existingByKey := make(map[string]*lib.Entry, len(existing))
|
|
for _, e := range existing {
|
|
var data struct {
|
|
SourceKey string `json:"source_key"`
|
|
}
|
|
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
|
|
existingByKey[data.SourceKey] = e
|
|
}
|
|
}
|
|
|
|
importID := lib.NextImportID()
|
|
var imported int
|
|
for i, upload := range uploads {
|
|
processProgress.Store(dossierID, map[string]interface{}{
|
|
"stage": "importing_json", "processed": i + 1, "total": len(uploads),
|
|
})
|
|
|
|
var d UploadData
|
|
json.Unmarshal([]byte(upload.Data), &d)
|
|
|
|
// Read and decrypt
|
|
content, err := lib.DecryptFile(d.Path)
|
|
if err != nil {
|
|
updateUploadStatus(upload.EntryID, "failed", "Decrypt: "+err.Error())
|
|
continue
|
|
}
|
|
|
|
// Verify JSON for json uploads; text uploads skip this check
|
|
if upload.Type == "json" {
|
|
var jsonCheck json.RawMessage
|
|
if json.Unmarshal(content, &jsonCheck) != nil {
|
|
updateUploadStatus(upload.EntryID, "failed", "Not valid JSON")
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Step 1: Identify (skip if user already selected a category)
|
|
categoryMap := map[string]string{"labs": "lab", "vitals": "vital", "documents": "document"}
|
|
category := categoryMap[upload.Type]
|
|
if category == "" {
|
|
category, err = identifyContent(content)
|
|
if err != nil {
|
|
updateUploadStatus(upload.EntryID, "failed", "Identify: "+err.Error())
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Step 2+3: Extract and create entries
|
|
var parentIDs []string
|
|
switch category {
|
|
case "lab":
|
|
parentIDs, err = importLabFromContent(dossierID, content, existingByKey, importID)
|
|
default:
|
|
updateUploadStatus(upload.EntryID, "failed", "Unsupported: "+category)
|
|
continue
|
|
}
|
|
|
|
if err != nil {
|
|
updateUploadStatus(upload.EntryID, "failed", "Import: "+err.Error())
|
|
continue
|
|
}
|
|
|
|
// Store created entry IDs in upload data for undo
|
|
var uploadData map[string]interface{}
|
|
json.Unmarshal([]byte(upload.Data), &uploadData)
|
|
uploadData["status"] = "processed"
|
|
uploadData["created_entries"] = parentIDs
|
|
b, _ := json.Marshal(uploadData)
|
|
upload.Data = string(b)
|
|
lib.EntryWrite("", upload)
|
|
|
|
lib.AuditLog("", "json_import_file", dossierID, fmt.Sprintf("category=%s orders=%d upload=%s", category, len(parentIDs), upload.Value))
|
|
imported++
|
|
}
|
|
|
|
// Normalize once for the whole batch
|
|
if imported > 0 {
|
|
log.Printf("import_json: normalizing after %d imports", imported)
|
|
if err := lib.Normalize(dossierID, lib.CategoryLab, func(processed, total int) {
|
|
processProgress.Store(dossierID, map[string]interface{}{
|
|
"stage": "normalizing", "processed": processed, "total": total,
|
|
})
|
|
}); err != nil {
|
|
log.Printf("import_json: normalize warning: %v", err)
|
|
}
|
|
}
|
|
|
|
return imported
|
|
}
|
|
|
|
// identifyContent sends content to Gemini and asks what category of health data it is.
|
|
// Handles JSON, markdown, and plain text.
|
|
func identifyContent(content []byte) (string, error) {
|
|
prompt := fmt.Sprintf(`You are given a file containing health-related data. It may be JSON, markdown, CSV, or plain text.
|
|
|
|
Identify what category of health data this is.
|
|
|
|
Return a JSON object with a single key "category" set to one of these values:
|
|
- "lab" — laboratory test results (blood work, urinalysis, metabolic panels, stool analysis, microbiome, etc.)
|
|
- "vital" — vital signs (weight, blood pressure, heart rate, temperature, etc.)
|
|
- "medication" — medication list or prescriptions
|
|
- "supplement" — supplements or vitamins
|
|
- "diagnosis" — medical diagnoses
|
|
- "document" — clinical notes, medical reports, or other text documents
|
|
- "history" — medical history
|
|
- "family_history" — family medical history
|
|
- "consultation" — doctor visit or consultation notes
|
|
- "unknown" — cannot determine
|
|
|
|
Content:
|
|
%s`, string(content))
|
|
|
|
resp, err := lib.CallGemini(prompt)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
var result struct {
|
|
Category string `json:"category"`
|
|
}
|
|
if err := json.Unmarshal([]byte(resp), &result); err != nil {
|
|
return "", fmt.Errorf("parse identify response: %w", err)
|
|
}
|
|
|
|
if result.Category == "" || result.Category == "unknown" {
|
|
return "", fmt.Errorf("could not identify content")
|
|
}
|
|
|
|
if _, ok := lib.CategoryFromString[result.Category]; !ok {
|
|
return "", fmt.Errorf("unknown category: %s", result.Category)
|
|
}
|
|
|
|
return result.Category, nil
|
|
}
|
|
|
|
// labExtraction is the structured data Gemini returns for lab results.
|
|
type labExtraction struct {
|
|
OrderName string `json:"order_name"`
|
|
Date string `json:"date"`
|
|
Provider string `json:"provider"`
|
|
LabName string `json:"lab_name"`
|
|
Specimen string `json:"specimen"`
|
|
Tests []labTestResult `json:"tests"`
|
|
}
|
|
|
|
type labTestResult struct {
|
|
Name string `json:"name"`
|
|
CommonName string `json:"common_name"`
|
|
Value string `json:"value"`
|
|
NumericValue *float64 `json:"numeric_value"`
|
|
Unit string `json:"unit"`
|
|
}
|
|
|
|
// importLabFromContent extracts lab data and creates entries.
|
|
// Uses direct parsing for known formats (MyChart), falls back to Gemini.
|
|
// Shared existingByKey map allows dedup across the batch (updated in place).
|
|
func importLabFromContent(dossierID string, content []byte, existingByKey map[string]*lib.Entry, importID int64) ([]string, error) {
|
|
var extractions []labExtraction
|
|
var err error
|
|
if bytes.Contains(content, []byte("resultComponents")) {
|
|
extractions, err = parseMyChart(content)
|
|
} else {
|
|
extractions, err = extractLabData(content)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("extract: %w", err)
|
|
}
|
|
|
|
if len(extractions) == 0 {
|
|
return nil, fmt.Errorf("no orders found")
|
|
}
|
|
|
|
var parentIDs []string
|
|
for _, extraction := range extractions {
|
|
if len(extraction.Tests) == 0 {
|
|
continue
|
|
}
|
|
|
|
sourceKey := extraction.OrderName + "|" + extraction.Date
|
|
|
|
// Skip if this order was already imported
|
|
if _, ok := existingByKey[sourceKey]; ok {
|
|
log.Printf("import_json: skipping duplicate %s", extraction.OrderName)
|
|
continue
|
|
}
|
|
|
|
var ts int64
|
|
for _, layout := range []string{
|
|
time.RFC3339, "2006-01-02T15:04:05", "2006-01-02", "2006-01",
|
|
"Jan 2, 2006", "Jan 02, 2006", "January 2, 2006", "01/02/2006",
|
|
} {
|
|
if t, err := time.Parse(layout, extraction.Date); err == nil {
|
|
// For date-only formats, interpret as noon EST to avoid timezone rollback
|
|
if !strings.Contains(extraction.Date, "T") && !strings.Contains(extraction.Date, ":") {
|
|
est := time.FixedZone("EST", -5*60*60)
|
|
t = time.Date(t.Year(), t.Month(), t.Day(), 12, 0, 0, 0, est)
|
|
}
|
|
ts = t.Unix()
|
|
break
|
|
}
|
|
}
|
|
if ts == 0 {
|
|
ts = time.Now().Unix()
|
|
}
|
|
|
|
parentID := lib.NewID()
|
|
|
|
parentData, _ := json.Marshal(map[string]interface{}{
|
|
"source_key": sourceKey,
|
|
"source": "json_import",
|
|
"provider": extraction.Provider,
|
|
"lab_name": extraction.LabName,
|
|
"specimen": extraction.Specimen,
|
|
"local_time": extraction.Date,
|
|
})
|
|
|
|
var entries []*lib.Entry
|
|
entries = append(entries, &lib.Entry{
|
|
EntryID: parentID,
|
|
DossierID: dossierID,
|
|
Category: lib.CategoryLab,
|
|
Type: "lab_order",
|
|
Value: extraction.OrderName,
|
|
Timestamp: ts,
|
|
Data: string(parentData),
|
|
})
|
|
|
|
for _, test := range extraction.Tests {
|
|
if test.Name == "" || test.Value == "" {
|
|
continue
|
|
}
|
|
|
|
testKey := sourceKey + "|" + test.Name
|
|
childID := lib.NewID()
|
|
|
|
displayName := test.CommonName
|
|
if displayName == "" {
|
|
displayName = test.Name
|
|
}
|
|
summary := displayName + ": " + test.Value
|
|
if test.Unit != "" {
|
|
summary += " " + test.Unit
|
|
}
|
|
|
|
childData := map[string]interface{}{
|
|
"source_key": testKey,
|
|
}
|
|
if test.CommonName != "" {
|
|
childData["common_name"] = test.CommonName
|
|
}
|
|
if test.NumericValue != nil {
|
|
childData["numeric_value"] = *test.NumericValue
|
|
}
|
|
if test.Unit != "" {
|
|
childData["unit"] = test.Unit
|
|
}
|
|
dataJSON, _ := json.Marshal(childData)
|
|
|
|
entries = append(entries, &lib.Entry{
|
|
EntryID: childID,
|
|
DossierID: dossierID,
|
|
ParentID: parentID,
|
|
Category: lib.CategoryLab,
|
|
Type: test.Name,
|
|
Value: test.Value,
|
|
Summary: summary,
|
|
Timestamp: ts,
|
|
Data: string(dataJSON),
|
|
})
|
|
}
|
|
|
|
for _, e := range entries {
|
|
e.Import = importID
|
|
}
|
|
if err := lib.EntryWrite("", entries...); err != nil {
|
|
return parentIDs, fmt.Errorf("write: %w", err)
|
|
}
|
|
|
|
// Update dedup index for subsequent files in the batch
|
|
for _, e := range entries {
|
|
var data struct {
|
|
SourceKey string `json:"source_key"`
|
|
}
|
|
if json.Unmarshal([]byte(e.Data), &data) == nil && data.SourceKey != "" {
|
|
existingByKey[data.SourceKey] = e
|
|
}
|
|
}
|
|
|
|
parentIDs = append(parentIDs, parentID)
|
|
log.Printf("import_json: %s — %d results", extraction.OrderName, len(extraction.Tests))
|
|
}
|
|
|
|
if len(parentIDs) == 0 {
|
|
return nil, fmt.Errorf("no new test results found (all duplicates or empty)")
|
|
}
|
|
|
|
return parentIDs, nil
|
|
}
|
|
|
|
// extractLabData sends content to Gemini with a targeted extraction prompt.
|
|
// Returns an array of orders (supports multi-date tables).
|
|
func extractLabData(content []byte) ([]labExtraction, error) {
|
|
prompt := fmt.Sprintf(`Extract lab test results from this health data. It may be JSON, markdown tables, CSV, or plain text.
|
|
|
|
Return a JSON ARRAY of orders. Each distinct date or panel is a separate order.
|
|
If there is only one order, return a single-element array.
|
|
|
|
[
|
|
{
|
|
"order_name": "Name of the lab order/panel (e.g. CBC WITH DIFFERENTIAL, COMPREHENSIVE METABOLIC PANEL, Stool Analysis)",
|
|
"date": "Collection/result date in ISO 8601 format (e.g. 2022-07-18 or 2022-07-18T10:01:00-04:00)",
|
|
"provider": "Ordering provider name or lab company",
|
|
"lab_name": "Laboratory name",
|
|
"specimen": "Specimen type (e.g. Blood, Urine, Stool)",
|
|
"tests": [
|
|
{
|
|
"name": "Full test name (e.g. Hemoglobin)",
|
|
"common_name": "Standard abbreviation if one exists (e.g. HGB, WBC, RBC, PLT, Na, K, Cl). Leave empty if no standard abbreviation.",
|
|
"value": "The measured value as a string",
|
|
"numeric_value": 14.2,
|
|
"unit": "Unit of measurement (e.g. g/dL, mg/dL, mmol/L)"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
|
|
CRITICAL RULES:
|
|
- Extract ONLY the measured/observed values
|
|
- NEVER include reference ranges, normal ranges, low/high bounds, or abnormal flags
|
|
- NEVER include "Trend", "Notes", or commentary columns
|
|
- numeric_value should be null if the value is not numeric (e.g. "Negative", "Yellow", "None seen")
|
|
- Include ALL test components, even those with non-numeric values
|
|
- A dash "-" means "Not Detected" — include the test with value "Not Detected"
|
|
- If a table has multiple date columns, each date column is a separate order with its own tests
|
|
- Skip section headers (e.g. "Bacterial Pathogens", "Viral Pathogens") — only extract actual test rows
|
|
- Give each order a descriptive name based on the panel or test type
|
|
|
|
Content:
|
|
%s`, string(content))
|
|
|
|
maxTokens := 8192
|
|
temp := 0.0
|
|
config := &lib.GeminiConfig{
|
|
Temperature: &temp,
|
|
MaxOutputTokens: &maxTokens,
|
|
}
|
|
|
|
resp, err := lib.CallGeminiMultimodal([]lib.GeminiPart{{Text: prompt}}, config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp = strings.TrimSpace(resp)
|
|
|
|
// Try array first, fall back to single object
|
|
var extractions []labExtraction
|
|
if err := json.Unmarshal([]byte(resp), &extractions); err != nil {
|
|
var single labExtraction
|
|
if err2 := json.Unmarshal([]byte(resp), &single); err2 != nil {
|
|
return nil, fmt.Errorf("parse extraction: %w (first 300 chars: %.300s)", err, resp)
|
|
}
|
|
extractions = []labExtraction{single}
|
|
}
|
|
return extractions, nil
|
|
}
|
|
|
|
// parseMyChart parses MyChart JSON directly without Gemini.
|
|
// Detects via presence of "resultComponents" in the JSON.
|
|
func parseMyChart(content []byte) ([]labExtraction, error) {
|
|
var mc struct {
|
|
OrderName string `json:"orderName"`
|
|
Results []struct {
|
|
OrderMetadata struct {
|
|
PrioritizedInstantISO string `json:"prioritizedInstantISO"`
|
|
OrderProviderName string `json:"orderProviderName"`
|
|
AuthorizingProvider string `json:"authorizingProviderName"`
|
|
SpecimensDisplay string `json:"specimensDisplay"`
|
|
ResultingLab struct {
|
|
Name string `json:"name"`
|
|
} `json:"resultingLab"`
|
|
} `json:"orderMetadata"`
|
|
ResultComponents []struct {
|
|
ComponentInfo struct {
|
|
Name string `json:"name"`
|
|
CommonName string `json:"commonName"`
|
|
Units string `json:"units"`
|
|
} `json:"componentInfo"`
|
|
ComponentResultInfo struct {
|
|
Value string `json:"value"`
|
|
NumericValue *float64 `json:"numericValue"`
|
|
} `json:"componentResultInfo"`
|
|
} `json:"resultComponents"`
|
|
} `json:"results"`
|
|
}
|
|
if err := json.Unmarshal(content, &mc); err != nil {
|
|
return nil, fmt.Errorf("parse mychart: %w", err)
|
|
}
|
|
|
|
var extractions []labExtraction
|
|
for _, r := range mc.Results {
|
|
if len(r.ResultComponents) == 0 {
|
|
continue
|
|
}
|
|
provider := r.OrderMetadata.OrderProviderName
|
|
if provider == "" {
|
|
provider = r.OrderMetadata.AuthorizingProvider
|
|
}
|
|
ext := labExtraction{
|
|
OrderName: mc.OrderName,
|
|
Date: r.OrderMetadata.PrioritizedInstantISO,
|
|
Provider: provider,
|
|
LabName: r.OrderMetadata.ResultingLab.Name,
|
|
Specimen: r.OrderMetadata.SpecimensDisplay,
|
|
}
|
|
for _, c := range r.ResultComponents {
|
|
if c.ComponentInfo.Name == "" || c.ComponentResultInfo.Value == "" {
|
|
continue
|
|
}
|
|
ext.Tests = append(ext.Tests, labTestResult{
|
|
Name: c.ComponentInfo.Name,
|
|
CommonName: c.ComponentInfo.CommonName,
|
|
Value: c.ComponentResultInfo.Value,
|
|
NumericValue: c.ComponentResultInfo.NumericValue,
|
|
Unit: c.ComponentInfo.Units,
|
|
})
|
|
}
|
|
if len(ext.Tests) > 0 {
|
|
extractions = append(extractions, ext)
|
|
}
|
|
}
|
|
return extractions, nil
|
|
}
|
|
|