package lib import ( "archive/tar" "archive/zip" "bytes" "compress/bzip2" "compress/gzip" "encoding/binary" "encoding/json" "fmt" "image" "image/color" "image/png" "io" "math" "os" "os/exec" "path/filepath" "sort" "strconv" "strings" "time" "unicode/utf8" "golang.org/x/text/cases" "golang.org/x/text/language" ) // ============================================================================ // PUBLIC TYPES // ============================================================================ // ImportResult holds statistics from a completed DICOM import run. type ImportResult struct { Studies int Series int Slices int Errors []string } // ============================================================================ // INTERNAL STATE // ============================================================================ // importState holds per-run state: caches, counters, logger. // Not safe for concurrent use — one importState per import goroutine. type importState struct { dossierID string importID int64 studyCache map[string]string // studyUID -> entryID seriesCache map[string]string // seriesUID -> entryID result *ImportResult logFn func(string, ...interface{}) } func (s *importState) writeEntry(e *Entry) error { e.Import = s.importID return EntryWrite("", e) } func newImportState(dossierID string, logFn func(string, ...interface{})) *importState { if logFn == nil { logFn = func(string, ...interface{}) {} } return &importState{ dossierID: dossierID, studyCache: make(map[string]string), seriesCache: make(map[string]string), result: &ImportResult{}, logFn: logFn, } } func (s *importState) log(format string, args ...interface{}) { s.logFn(format, args...) } // preloadCaches populates study/series caches from existing DB entries // so a restarted import skips already-processed data. func (s *importState) preloadCaches() { studies, _ := EntryRead("", s.dossierID, &Filter{Category: CategoryImaging, Type: "study"}) for _, e := range studies { var d struct { StudyUID string `json:"study_uid"` } if json.Unmarshal([]byte(e.Data), &d) == nil && d.StudyUID != "" { s.studyCache[d.StudyUID] = e.EntryID } } series, _ := EntryRead("", s.dossierID, &Filter{Category: CategoryImaging, Type: "series"}) for _, e := range series { var d struct { SeriesUID string `json:"series_instance_uid"` SeriesDesc string `json:"series_desc"` } if json.Unmarshal([]byte(e.Data), &d) == nil && d.SeriesUID != "" { s.seriesCache[d.SeriesUID+"|"+d.SeriesDesc] = e.EntryID } } } // ============================================================================ // PUBLIC ENTRY POINTS // ============================================================================ // ImportDICOMFromPath imports DICOM files from a directory on disk. // Used by the import-dicom CLI tool for direct archive imports. func ImportDICOMFromPath(dossierID, inputPath, seriesFilter string, logFn func(string, ...interface{})) (*ImportResult, error) { s := newImportState(dossierID, logFn) s.importID = NextImportID() s.preloadCaches() if err := s.importFromDir(inputPath, strings.ToUpper(seriesFilter)); err != nil { return s.result, err } return s.result, nil } // ImportDICOMFromEntries imports DICOM files from upload entries in the object store. // Used by the portal's background import goroutine. Archives are extracted in-memory. func ImportDICOMFromEntries(dossierID string, entries []*Entry, seriesFilter string, logFn func(string, ...interface{})) (*ImportResult, error) { s := newImportState(dossierID, logFn) s.preloadCaches() tmpDir, err := os.MkdirTemp("", "dicom_import_*") if err != nil { return s.result, fmt.Errorf("creating temp dir: %w", err) } defer os.RemoveAll(tmpDir) for _, e := range entries { data, err := ObjectRead(nil, dossierID, e.EntryID) if err != nil { s.result.Errors = append(s.result.Errors, fmt.Sprintf("read %s: %v", e.Value, err)) continue } filename := e.Value if isArchiveFilename(filename) { destDir := filepath.Join(tmpDir, e.EntryID) if err := os.MkdirAll(destDir, 0700); err == nil { if err := extractArchiveBytes(data, filename, destDir); err != nil { s.result.Errors = append(s.result.Errors, fmt.Sprintf("extract %s: %v", filename, err)) } } } else { // Write individual file using entryID as name to avoid collisions ext := filepath.Ext(filename) dest := filepath.Join(tmpDir, e.EntryID+ext) if err := os.WriteFile(dest, data, 0600); err != nil { s.result.Errors = append(s.result.Errors, fmt.Sprintf("write %s: %v", filename, err)) } } } if err := s.importFromDir(tmpDir, strings.ToUpper(seriesFilter)); err != nil { return s.result, err } return s.result, nil } // ============================================================================ // ARCHIVE SUPPORT // ============================================================================ // SupportedArchiveExts lists the compressed formats accepted for upload. var SupportedArchiveExts = []string{".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2"} // UnsupportedArchiveExts lists known compressed formats that are NOT accepted. var UnsupportedArchiveExts = []string{".7z", ".rar", ".xz", ".tar.xz", ".gz", ".bz2", ".zst", ".lz4", ".cab", ".iso"} func isArchiveFilename(filename string) bool { low := strings.ToLower(filename) for _, ext := range SupportedArchiveExts { if strings.HasSuffix(low, ext) { return true } } return false } // IsUnsupportedArchive returns true if the filename looks like an archive // format we don't support (so the UI can warn the user). func IsUnsupportedArchive(filename string) bool { low := strings.ToLower(filename) for _, ext := range UnsupportedArchiveExts { if strings.HasSuffix(low, ext) { return true } } return false } // extractArchiveBytes extracts a supported archive (provided as bytes) into destDir. func extractArchiveBytes(data []byte, filename, destDir string) error { low := strings.ToLower(filename) switch { case strings.HasSuffix(low, ".zip"): return extractZip(data, destDir) case strings.HasSuffix(low, ".tar.gz"), strings.HasSuffix(low, ".tgz"): gr, err := gzip.NewReader(bytes.NewReader(data)) if err != nil { return err } defer gr.Close() return extractTar(tar.NewReader(gr), destDir) case strings.HasSuffix(low, ".tar.bz2"), strings.HasSuffix(low, ".tbz2"): return extractTar(tar.NewReader(bzip2.NewReader(bytes.NewReader(data))), destDir) case strings.HasSuffix(low, ".tar"): return extractTar(tar.NewReader(bytes.NewReader(data)), destDir) default: return fmt.Errorf("unsupported archive format: %s", filename) } } func extractZip(data []byte, destDir string) error { r, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) if err != nil { return err } for _, f := range r.File { dest, err := safeExtractPath(destDir, f.Name) if err != nil { continue // skip unsafe paths silently } if f.FileInfo().IsDir() { os.MkdirAll(dest, 0700) continue } if err := os.MkdirAll(filepath.Dir(dest), 0700); err != nil { return err } rc, err := f.Open() if err != nil { return err } err = writeExtractedFile(dest, rc) rc.Close() if err != nil { return err } } return nil } func extractTar(tr *tar.Reader, destDir string) error { for { hdr, err := tr.Next() if err == io.EOF { break } if err != nil { return err } dest, err := safeExtractPath(destDir, hdr.Name) if err != nil { continue } if hdr.Typeflag == tar.TypeDir { os.MkdirAll(dest, 0700) continue } if err := os.MkdirAll(filepath.Dir(dest), 0700); err != nil { return err } if err := writeExtractedFile(dest, tr); err != nil { return err } } return nil } func writeExtractedFile(dest string, r io.Reader) error { f, err := os.OpenFile(dest, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) if err != nil { return err } defer f.Close() _, err = io.Copy(f, r) return err } // safeExtractPath prevents zip-slip: ensures dest is inside destDir. func safeExtractPath(destDir, name string) (string, error) { dest := filepath.Join(destDir, filepath.Clean("/"+name)) if !strings.HasPrefix(dest, filepath.Clean(destDir)+string(os.PathSeparator)) { return "", fmt.Errorf("zip slip: %s", name) } return dest, nil } // ============================================================================ // DICOM TAG READING // ============================================================================ // walkToTag walks the DICOM element stream from startPos, respecting VR/length // fields so it never matches tag bytes inside binary payloads (e.g. Siemens CSA // OB blobs). Returns the byte offset of the matching element, or -1. func walkToTag(data []byte, startPos int, group, elem uint16) int { pos := startPos n := len(data) for pos+4 <= n { if pos+4 > n { break } g := binary.LittleEndian.Uint16(data[pos : pos+2]) e := binary.LittleEndian.Uint16(data[pos+2 : pos+4]) if g == group && e == elem { return pos } // Determine value length to skip to next element. // If we can't parse a sensible length, fall back to byte scan. if pos+6 > n { break } vr := string(data[pos+4 : pos+6]) var valLen uint32 var headerLen int if isValidVR(data, pos+4) { // Explicit VR switch vr { case "OB", "OW", "SQ", "UN", "OD", "UC", "UR", "UT": // 4-byte reserved + 4-byte length if pos+12 > n { break } valLen = binary.LittleEndian.Uint32(data[pos+8 : pos+12]) headerLen = 12 default: // 2-byte length if pos+8 > n { break } valLen = uint32(binary.LittleEndian.Uint16(data[pos+6 : pos+8])) headerLen = 8 } } else { // Implicit VR: tag(4) + length(4) if pos+8 > n { break } valLen = binary.LittleEndian.Uint32(data[pos+4 : pos+8]) headerLen = 8 } // 0xFFFFFFFF = undefined length (SQ/item) — step past header only and // let the inner loop find the sequence delimiter naturally. if valLen == 0xFFFFFFFF { pos += headerLen } else { next := pos + headerLen + int(valLen) if next <= pos || next > n { // Corrupt/truncated length — fall back to byte-scan from here return findTagBytes(data, pos+1, group, elem) } pos = next } } return -1 } // findTagBytes is the original byte-scan fallback used only when the stream // walker cannot continue (corrupt length field). func findTagBytes(data []byte, startPos int, group, elem uint16) int { target := make([]byte, 4) binary.LittleEndian.PutUint16(target[0:2], group) binary.LittleEndian.PutUint16(target[2:4], elem) for i := startPos; i < len(data)-4; i++ { if data[i] == target[0] && data[i+1] == target[1] && data[i+2] == target[2] && data[i+3] == target[3] { return i } } return -1 } // findTag finds the first occurrence of a DICOM tag, using the stream walker // starting from the DICOM preamble offset (128-byte preamble + 4-byte DICM). func findTag(data []byte, group, elem uint16) int { // DICOM files start with 128-byte preamble + "DICM" magic. // Meta header (group 0x0002) always lives there; for the main dataset // we start the walk right after the preamble when present. startPos := 0 if len(data) >= 132 && string(data[128:132]) == "DICM" { startPos = 132 // For meta-header tags (group 0x0002), walk from 132. // For dataset tags, also walk from 132 — the walker handles both. } result := walkToTag(data, startPos, group, elem) if result < 0 && startPos > 0 { // Retry from byte 0 for edge cases (no preamble, raw DICOM) result = walkToTag(data, 0, group, elem) } return result } func findLastTag(data []byte, group, elem uint16) int { // Walk the full stream and keep the last match position. startPos := 0 if len(data) >= 132 && string(data[128:132]) == "DICM" { startPos = 132 } lastPos := -1 pos := startPos for { found := walkToTag(data, pos, group, elem) if found < 0 { break } lastPos = found pos = found + 1 // advance past this match to find a later one } return lastPos } // isValidVR checks if the 2 bytes at offset look like a valid DICOM VR func isValidVR(data []byte, offset int) bool { if offset+2 > len(data) { return false } vr := string(data[offset : offset+2]) validVRs := map[string]bool{ "AE": true, "AS": true, "AT": true, "CS": true, "DA": true, "DS": true, "DT": true, "FL": true, "FD": true, "IS": true, "LO": true, "LT": true, "OB": true, "OD": true, "OF": true, "OW": true, "PN": true, "SH": true, "SL": true, "SQ": true, "SS": true, "ST": true, "TM": true, "UC": true, "UI": true, "UL": true, "UN": true, "UR": true, "US": true, "UT": true, } return validVRs[vr] } // isImplicitVR returns true if transfer syntax uses implicit VR (no VR field in data elements) func isImplicitVR(data []byte) bool { // Check transfer syntax UID from file meta info (group 0x0002) ts := readStringTagExplicit(data, 0x0002, 0x0010) if ts == "" { // No transfer syntax specified - default to Explicit VR Little Endian return false } // Implicit VR Little Endian: 1.2.840.10008.1.2 // Also check for Siemens private implicit VR variants return ts == "1.2.840.10008.1.2" || strings.Contains(ts, "1.2.276.0.7230010") } // readStringTagExplicit reads with explicit VR assumption (for meta-header) func readStringTagExplicit(data []byte, group, elem uint16) string { pos := findTag(data, group, elem) if pos < 0 { return "" } vr := string(data[pos+4 : pos+6]) var length uint16 var valPos int if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" || vr == "OD" || vr == "UC" || vr == "UT" { length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12])) valPos = pos + 12 } else { length = binary.LittleEndian.Uint16(data[pos+6 : pos+8]) valPos = pos + 8 } if valPos+int(length) > len(data) { return "" } raw := data[valPos : valPos+int(length)] return strings.TrimRight(string(raw), " \x00") } func readStringTag(data []byte, group, elem uint16) string { pos := findTag(data, group, elem) if pos < 0 { return "" } // Check for implicit VR by validating the VR field implicitVR := !isValidVR(data, pos+4) if implicitVR { // Implicit VR: tag (4) + length (4) + value length := binary.LittleEndian.Uint32(data[pos+4 : pos+8]) valPos := pos + 8 if valPos+int(length) > len(data) { return "" } raw := data[valPos : valPos+int(length)] var s string if utf8.Valid(raw) { s = string(raw) } else { runes := make([]rune, len(raw)) for i, b := range raw { runes[i] = rune(b) } s = string(runes) } return strings.TrimRight(s, " \x00") } // Explicit VR path vr := string(data[pos+4 : pos+6]) var length uint16 var valPos int if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" || vr == "OD" || vr == "UC" || vr == "UT" { length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12])) valPos = pos + 12 } else { length = binary.LittleEndian.Uint16(data[pos+6 : pos+8]) valPos = pos + 8 } if valPos+int(length) > len(data) { return "" } raw := data[valPos : valPos+int(length)] var s string if utf8.Valid(raw) { s = string(raw) } else { // Latin-1 (ISO_IR 100) — each byte maps to its Unicode code point runes := make([]rune, len(raw)) for i, b := range raw { runes[i] = rune(b) } s = string(runes) } for len(s) > 0 && (s[len(s)-1] == ' ' || s[len(s)-1] == 0) { s = s[:len(s)-1] } return s } func readIntTag(data []byte, group, elem uint16) int { s := strings.TrimSpace(readStringTag(data, group, elem)) n, _ := strconv.Atoi(s) return n } func readIntTagSmart(data []byte, group, elem uint16) int { pos := findTag(data, group, elem) if pos < 0 { return 0 } // Check for implicit VR before reading VR field if isValidVR(data, pos+4) { vr := string(data[pos+4 : pos+6]) if vr == "US" || vr == "SS" { valPos := pos + 8 if valPos+2 <= len(data) { return int(binary.LittleEndian.Uint16(data[valPos : valPos+2])) } } } // For implicit VR or non-integer VRs, fall back to string parsing s := strings.TrimSpace(readStringTag(data, group, elem)) n, _ := strconv.Atoi(s) return n } func readFloatTag(data []byte, group, elem uint16) float64 { s := strings.TrimSpace(readStringTag(data, group, elem)) f, _ := strconv.ParseFloat(s, 64) return f } func readFloatTag2(data []byte, group, elem uint16) (float64, float64) { s := strings.TrimSpace(readStringTag(data, group, elem)) parts := strings.Split(s, "\\") var v1, v2 float64 if len(parts) >= 1 { v1, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64) } if len(parts) >= 2 { v2, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64) } return v1, v2 } func formatStudyProgressLabel(bodyPart, modality, studyDate string) string { modalityLabel := modality switch strings.ToUpper(modality) { case "MR": modalityLabel = "MRI" case "CT": modalityLabel = "CT" case "DX", "CR": modalityLabel = "X-ray" case "PT": modalityLabel = "PET" case "NM": modalityLabel = "Nuclear Med" case "US": modalityLabel = "Ultrasound" } label := bodyPart if modalityLabel != "" { if label != "" { label += " " + modalityLabel } else { label = modalityLabel } } if studyDate != "" && len(studyDate) == 8 { // YYYYMMDD → MM/DD/YYYY label += " " + studyDate[4:6] + "/" + studyDate[6:8] + "/" + studyDate[0:4] } if label == "" { label = "DICOM" } return label } // ============================================================================ // PIXEL DATA // ============================================================================ func findPixelData(data []byte, expectedBytes int) int { target := []byte{0xE0, 0x7F, 0x10, 0x00} pos := 0 for { idx := -1 for i := pos; i < len(data)-12; i++ { if data[i] == target[0] && data[i+1] == target[1] && data[i+2] == target[2] && data[i+3] == target[3] { idx = i break } } if idx < 0 { return -1 } vr := string(data[idx+4 : idx+6]) var length int if vr == "OW" || vr == "OB" { length = int(binary.LittleEndian.Uint32(data[idx+8 : idx+12])) } else { length = int(binary.LittleEndian.Uint16(data[idx+6 : idx+8])) } if length > 0 && (length == expectedBytes || (length > expectedBytes*9/10 && length < expectedBytes*11/10)) { return idx } pos = idx + 1 } } // ============================================================================ // IMAGE ENCODING // ============================================================================ type gray16Image struct { Pixels []uint16 Width, Height int } func (g *gray16Image) ColorModel() color.Model { return color.Gray16Model } func (g *gray16Image) Bounds() image.Rectangle { return image.Rect(0, 0, g.Width, g.Height) } func (g *gray16Image) At(x, y int) color.Color { if x < 0 || x >= g.Width || y < 0 || y >= g.Height { return color.Gray16{Y: 0} } return color.Gray16{Y: g.Pixels[y*g.Width+x]} } func encode16BitPNG(pixels []uint16, width, height int) ([]byte, error) { img := &gray16Image{Pixels: pixels, Width: width, Height: height} var buf bytes.Buffer if err := png.Encode(&buf, img); err != nil { return nil, err } return buf.Bytes(), nil } func encodeRGBPNG(pixels []byte, width, height int) ([]byte, error) { img := image.NewRGBA(image.Rect(0, 0, width, height)) for y := 0; y < height; y++ { for x := 0; x < width; x++ { idx := (y*width + x) * 3 img.Set(x, y, color.RGBA{R: pixels[idx], G: pixels[idx+1], B: pixels[idx+2], A: 255}) } } var buf bytes.Buffer if err := png.Encode(&buf, img); err != nil { return nil, err } return buf.Bytes(), nil } func boundingBox(pixels []uint16, width, height int, minVal, maxVal uint16) (x1, y1, x2, y2 int, valid bool) { threshold := minVal + uint16(float64(maxVal-minVal)*0.05) if threshold < 50 { threshold = 50 } x1, y1 = width, height x2, y2 = -1, -1 for y := 0; y < height; y++ { for x := 0; x < width; x++ { if pixels[y*width+x] > threshold { if x < x1 { x1 = x } if y < y1 { y1 = y } if x > x2 { x2 = x } if y > y2 { y2 = y } } } } if x2 < 0 { return 0, 0, 0, 0, false } return x1, y1, x2, y2, true } func unionBBox(a, b [4]int) [4]int { return [4]int{ min(a[0], b[0]), min(a[1], b[1]), max(a[2], b[2]), max(a[3], b[3]), } } // ============================================================================ // ORIENTATION & LOCALIZER DETECTION // ============================================================================ func getOrientationType(data []byte) string { orientStr := readStringTag(data, 0x0020, 0x0037) if orientStr == "" { return "" } parts := strings.Split(orientStr, "\\") if len(parts) < 6 { return "" } vals := make([]float64, 6) for i := 0; i < 6; i++ { vals[i], _ = strconv.ParseFloat(strings.TrimSpace(parts[i]), 64) } rowX, rowY, rowZ := vals[0], vals[1], vals[2] colX, colY, colZ := vals[3], vals[4], vals[5] normX := rowY*colZ - rowZ*colY normY := rowZ*colX - rowX*colZ normZ := rowX*colY - rowY*colX absX := math.Abs(normX) absY := math.Abs(normY) absZ := math.Abs(normZ) if absZ >= absX && absZ >= absY { return "AX" } else if absX >= absY && absX >= absZ { return "SAG" } return "COR" } func isLocalizer(data []byte) bool { imageType := strings.ToUpper(readStringTag(data, 0x0008, 0x0008)) return strings.Contains(imageType, "LOCALIZER") || strings.Contains(imageType, "SCOUT") } // ============================================================================ // MULTI-FRAME SUPPORT // ============================================================================ // FramePosition holds per-frame position data from enhanced multi-frame DICOM. type FramePosition struct { X, Y, Z float64 } func parsePerFramePositions(data []byte, numFrames int) []FramePosition { positions := make([]FramePosition, numFrames) perFrameTag := []byte{0x00, 0x52, 0x30, 0x92} perFramePos := -1 for i := 0; i < len(data)-4; i++ { if data[i] == perFrameTag[0] && data[i+1] == perFrameTag[1] && data[i+2] == perFrameTag[2] && data[i+3] == perFrameTag[3] { perFramePos = i break } } if perFramePos < 0 { return positions } planeTag := []byte{0x20, 0x00, 0x13, 0x91} imgPosTag := []byte{0x20, 0x00, 0x32, 0x00} frameIdx := 0 searchStart := perFramePos for frameIdx < numFrames { planePos := -1 for i := searchStart; i < len(data)-4; i++ { if data[i] == planeTag[0] && data[i+1] == planeTag[1] && data[i+2] == planeTag[2] && data[i+3] == planeTag[3] { planePos = i break } } if planePos < 0 { break } for i := planePos; i < planePos+100 && i < len(data)-8; i++ { if data[i] == imgPosTag[0] && data[i+1] == imgPosTag[1] && data[i+2] == imgPosTag[2] && data[i+3] == imgPosTag[3] { if data[i+4] == 'D' && data[i+5] == 'S' { length := int(binary.LittleEndian.Uint16(data[i+6 : i+8])) if i+8+length <= len(data) { value := strings.TrimSpace(string(data[i+8 : i+8+length])) parts := strings.Split(value, "\\") if len(parts) >= 3 { positions[frameIdx].X, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64) positions[frameIdx].Y, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64) positions[frameIdx].Z, _ = strconv.ParseFloat(strings.TrimSpace(parts[2]), 64) } } } break } } searchStart = planePos + 1 frameIdx++ } return positions } // ============================================================================ // DECOMPRESSION // ============================================================================ func getTransferSyntax(data []byte) string { return readStringTag(data, 0x0002, 0x0010) } func isCompressedTransferSyntax(ts string) bool { // JPEG family: 1.2.840.10008.1.2.4.x (baseline, extended, lossless, JPEG 2000, etc.) // JPEG-LS: 1.2.840.10008.1.2.4.80 / .81 // JPEG 2000 Lossless: 1.2.840.10008.1.2.4.90 // JPEG 2000 Lossy: 1.2.840.10008.1.2.4.91 // JPEG 2000 Multi: 1.2.840.10008.1.2.4.92 / .93 // RLE Lossless: 1.2.840.10008.1.2.5 // Deflated: 1.2.840.10008.1.2.1.99 return strings.HasPrefix(ts, "1.2.840.10008.1.2.4") || ts == "1.2.840.10008.1.2.5" || ts == "1.2.840.10008.1.2.1.99" } // decompressDICOM uses gdcmconv to decompress any compressed DICOM transfer // syntax (JPEG, JPEG 2000 Lossless/Lossy, JPEG-LS, RLE, Deflated). // Requires gdcmconv from libgdcm-tools (apt install libgdcm-tools). func decompressDICOM(dicomPath string) ([]byte, error) { tmpFile := fmt.Sprintf("/tmp/dcm_%d_%d.dcm", os.Getpid(), time.Now().UnixNano()) defer os.Remove(tmpFile) cmd := exec.Command("gdcmconv", "-w", "-i", dicomPath, "-o", tmpFile) output, err := cmd.CombinedOutput() if err != nil { return nil, fmt.Errorf("gdcmconv failed: %v - %s", err, string(output)) } return os.ReadFile(tmpFile) } // ============================================================================ // METADATA HELPERS // ============================================================================ // parseDICOMDate parses a DICOM date string (YYYYMMDD) to Unix timestamp. func parseDICOMDate(s string) int64 { s = strings.TrimSpace(s) if len(s) >= 8 { if t, err := time.Parse("20060102", s[:8]); err == nil { return t.Unix() } } return time.Now().Unix() } // formatDICOMDate formats a DICOM date string (YYYYMMDD) as "Jan 2006". func formatDICOMDate(s string) string { s = strings.TrimSpace(s) if len(s) >= 8 { if t, err := time.Parse("20060102", s[:8]); err == nil { return t.Format("Jan 2006") } } return s } func formatPatientName(name string) string { titleCase := cases.Title(language.English) parts := strings.Split(name, "^") for i, p := range parts { parts[i] = titleCase.String(strings.ToLower(strings.TrimSpace(p))) } if len(parts) >= 3 && parts[2] != "" { return parts[1] + " " + parts[2] + " " + parts[0] } else if len(parts) >= 2 { return parts[1] + " " + parts[0] } return name } // ============================================================================ // FILE SCANNING // ============================================================================ type dicomFileRef struct { Path string InstanceNum int } func findDICOMFiles(root string) ([]string, error) { var files []string var nonDicom []string scanned := 0 err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return nil } if info.IsDir() { return nil } scanned++ name := info.Name() if name == "DICOMDIR" || name == "LOCKFILE" || name == "VERSION" || strings.HasSuffix(name, ".jpg") || strings.HasSuffix(name, ".png") || strings.HasSuffix(name, ".exe") || strings.HasSuffix(name, ".dll") { nonDicom = append(nonDicom, path) return nil } data, err := os.ReadFile(path) if err != nil || len(data) < 132 { nonDicom = append(nonDicom, path) return nil } if string(data[128:132]) == "DICM" { files = append(files, path) } else { nonDicom = append(nonDicom, path) } return nil }) if len(nonDicom) > 0 { logPath := "/tmp/dicom_import_other.log" if f, err := os.Create(logPath); err == nil { for _, path := range nonDicom { fmt.Fprintln(f, path) } f.Close() } } return files, err } // ============================================================================ // DB OPERATIONS (importState methods) // ============================================================================ func (s *importState) getOrCreateRoot(category int) (string, error) { roots, err := EntryRead("", s.dossierID, &Filter{Category: category, Type: "root"}) if err == nil { for _, r := range roots { return r.EntryID, nil } } e := &Entry{ EntryID: NewID(), DossierID: s.dossierID, ParentID: s.dossierID, Category: category, Type: "root", Timestamp: time.Now().Unix(), } if err := s.writeEntry(e); err != nil { return "", err } return e.EntryID, nil } func (s *importState) getOrCreateStudy(data []byte, rootID string) (string, error) { studyUID := readStringTag(data, 0x0020, 0x000D) if id, ok := s.studyCache[studyUID]; ok { return id, nil } studies, err := EntryRead("", s.dossierID, &Filter{Category: CategoryImaging, Type: "study"}) if err == nil { for _, e := range studies { var d struct { StudyUID string `json:"study_uid"` } if json.Unmarshal([]byte(e.Data), &d) == nil && d.StudyUID == studyUID { s.studyCache[studyUID] = e.EntryID return e.EntryID, nil } } } patientName := formatPatientName(readStringTag(data, 0x0010, 0x0010)) studyDesc := readStringTag(data, 0x0008, 0x1030) studyDate := readStringTag(data, 0x0008, 0x0020) bodyPart := readStringTag(data, 0x0018, 0x0015) modality := readStringTag(data, 0x0008, 0x0060) studyData := map[string]interface{}{ "study_uid": studyUID, "modality": modality, "patient_name": patientName, "patient_dob": readStringTag(data, 0x0010, 0x0030), "patient_sex": readStringTag(data, 0x0010, 0x0040), "patient_age": readStringTag(data, 0x0010, 0x1010), "study_date": studyDate, "study_time": readStringTag(data, 0x0008, 0x0030), "study_desc": studyDesc, "institution": readStringTag(data, 0x0008, 0x0080), "accession_number": readStringTag(data, 0x0008, 0x0050), "referring_physician": readStringTag(data, 0x0008, 0x0090), "body_part": bodyPart, } dataJSON, _ := json.Marshal(studyData) summary := studyDesc if bodyPart != "" && !strings.Contains(strings.ToUpper(studyDesc), strings.ToUpper(bodyPart)) { summary = bodyPart + " " + studyDesc } if studyDate != "" { summary = summary + " (" + formatDICOMDate(studyDate) + ")" } if summary == "" { summary = "Imaging Study" } e := &Entry{ EntryID: NewID(), DossierID: s.dossierID, ParentID: rootID, Category: CategoryImaging, Type: "study", Summary: summary, Timestamp: parseDICOMDate(studyDate), Data: string(dataJSON), } if err := s.writeEntry(e); err != nil { return "", err } s.studyCache[studyUID] = e.EntryID s.result.Studies++ return e.EntryID, nil } func (s *importState) getOrCreateSeries(data []byte, studyID string) (string, error) { seriesUID := readStringTag(data, 0x0020, 0x000E) seriesDesc := readStringTag(data, 0x0008, 0x103E) cacheKey := seriesUID + "|" + seriesDesc if id, ok := s.seriesCache[cacheKey]; ok { return id, nil } children, err := EntryRead("", s.dossierID, &Filter{Category: CategoryImaging, ParentID: studyID, Type: "series"}) if err == nil { for _, c := range children { var d struct { SeriesUID string `json:"series_instance_uid"` SeriesDesc string `json:"series_desc"` } if json.Unmarshal([]byte(c.Data), &d) == nil && d.SeriesUID == seriesUID && d.SeriesDesc == seriesDesc { s.seriesCache[cacheKey] = c.EntryID return c.EntryID, nil } } } modality := readStringTag(data, 0x0008, 0x0060) if modality == "" { modality = "UN" } studyDate := readStringTag(data, 0x0008, 0x0020) bodyPart := readStringTag(data, 0x0018, 0x0015) seriesData := map[string]interface{}{ "series_instance_uid": seriesUID, "series_desc": seriesDesc, "body_part": bodyPart, "modality": modality, "protocol_name": readStringTag(data, 0x0018, 0x1030), "manufacturer": readStringTag(data, 0x0008, 0x0070), "model": readStringTag(data, 0x0008, 0x1090), "magnetic_field_strength": readStringTag(data, 0x0018, 0x0087), } dataJSON, _ := json.Marshal(seriesData) summary := modality if seriesDesc != "" { summary = modality + " " + seriesDesc } e := &Entry{ EntryID: NewID(), DossierID: s.dossierID, ParentID: studyID, Category: CategoryImaging, Type: "series", Summary: summary, Ordinal: readIntTag(data, 0x0020, 0x0011), Timestamp: parseDICOMDate(studyDate), Data: string(dataJSON), } if err := s.writeEntry(e); err != nil { return "", err } s.seriesCache[cacheKey] = e.EntryID s.result.Series++ return e.EntryID, nil } func (s *importState) insertSlice(data []byte, sliceID, seriesID string, pixelMin, pixelMax int, orientationType string, instanceNumber int, framePos *FramePosition) error { objPath := filepath.Join(s.dossierID, sliceID[0:1], sliceID) sopUID := readStringTag(data, 0x0008, 0x0018) if framePos != nil { sopUID = fmt.Sprintf("%s.%d", sopUID, instanceNumber) } psRow, psCol := readFloatTag2(data, 0x0028, 0x0030) var posX, posY, posZ float64 if framePos != nil && (framePos.X != 0 || framePos.Y != 0 || framePos.Z != 0) { posX, posY, posZ = framePos.X, framePos.Y, framePos.Z } else { posStr := readStringTag(data, 0x0020, 0x0032) if posStr != "" { parts := strings.Split(posStr, "\\") if len(parts) >= 3 { posX, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64) posY, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64) posZ, _ = strconv.ParseFloat(strings.TrimSpace(parts[2]), 64) } } } var sliceLocation float64 if framePos != nil { switch orientationType { case "SAG": sliceLocation = posX case "COR": sliceLocation = posY default: sliceLocation = posZ } } else { sliceLocation = readFloatTag(data, 0x0020, 0x1041) } wc1, wc2 := readFloatTag2(data, 0x0028, 0x1050) ww1, ww2 := readFloatTag2(data, 0x0028, 0x1051) rows := readIntTagSmart(data, 0x0028, 0x0010) cols := readIntTagSmart(data, 0x0028, 0x0011) sliceData := map[string]interface{}{ "sop_instance_uid": sopUID, "slice_location": sliceLocation, "slice_thickness": readFloatTag(data, 0x0018, 0x0050), "rows": rows, "cols": cols, "pixel_spacing_row": psRow, "pixel_spacing_col": psCol, "bits_allocated": readIntTagSmart(data, 0x0028, 0x0100), "bits_stored": readIntTagSmart(data, 0x0028, 0x0101), "high_bit": readIntTagSmart(data, 0x0028, 0x0102), "pixel_representation": readIntTagSmart(data, 0x0028, 0x0103), "window_center": wc1, "window_width": ww1, "window_center_2": wc2, "window_width_2": ww2, "rescale_intercept": readFloatTag(data, 0x0028, 0x1052), "rescale_slope": readFloatTag(data, 0x0028, 0x1053), "image_position_x": posX, "image_position_y": posY, "image_position_z": posZ, "image_orientation": readStringTag(data, 0x0020, 0x0037), "tr": readFloatTag(data, 0x0018, 0x0080), "te": readFloatTag(data, 0x0018, 0x0081), "ti": readFloatTag(data, 0x0018, 0x0082), "flip_angle": readFloatTag(data, 0x0018, 0x1314), "echo_number": readIntTag(data, 0x0018, 0x0086), "spacing_between_slices": readFloatTag(data, 0x0018, 0x0088), "acquisition_time": readStringTag(data, 0x0008, 0x0032), "content_time": readStringTag(data, 0x0008, 0x0033), "photometric_interpretation": readStringTag(data, 0x0028, 0x0004), "samples_per_pixel": readIntTagSmart(data, 0x0028, 0x0002), "pixel_min": pixelMin, "pixel_max": pixelMax, "path": objPath, } dataJSON, _ := json.Marshal(sliceData) summary := fmt.Sprintf("%s #%d z=%.1fmm %dx%d", orientationType, instanceNumber, sliceLocation, cols, rows) e := &Entry{ EntryID: sliceID, DossierID: s.dossierID, ParentID: seriesID, Category: CategoryImaging, Type: "slice", Summary: summary, Ordinal: instanceNumber, Timestamp: parseDICOMDate(readStringTag(data, 0x0008, 0x0020)), Data: string(dataJSON), } if err := s.writeEntry(e); err != nil { return err } s.result.Slices++ return nil } // ============================================================================ // CORE PROCESSING // ============================================================================ func (s *importState) importFromDir(inputDir, seriesFilter string) error { s.log("Scanning files...\n") rootID, err := s.getOrCreateRoot(CategoryImaging) if err != nil { return fmt.Errorf("creating imaging root: %w", err) } files, err := findDICOMFiles(inputDir) if err != nil { return err } if len(files) == 0 { return fmt.Errorf("no DICOM files found in %s", inputDir) } // Group files by series type seriesGroup struct { slices []dicomFileRef firstBuf []byte } seriesMap := make(map[string]*seriesGroup) for _, path := range files { data, err := os.ReadFile(path) if err != nil { continue } seriesUID := readStringTag(data, 0x0020, 0x000E) seriesDesc := readStringTag(data, 0x0008, 0x103E) instanceNum := readIntTagSmart(data, 0x0020, 0x0013) key := seriesUID + "|" + seriesDesc if _, ok := seriesMap[key]; !ok { seriesMap[key] = &seriesGroup{firstBuf: data} } seriesMap[key].slices = append(seriesMap[key].slices, dicomFileRef{Path: path, InstanceNum: instanceNum}) } totalFileCount := 0 for _, sg := range seriesMap { totalFileCount += len(sg.slices) } s.log("Found %d series, %d files\n", len(seriesMap), totalFileCount) fileCounter := 0 for _, sg := range seriesMap { sort.Slice(sg.slices, func(i, j int) bool { return sg.slices[i].InstanceNum < sg.slices[j].InstanceNum }) data := sg.firstBuf seriesDesc := readStringTag(data, 0x0008, 0x103E) if seriesDesc == "" { seriesDesc = "UNKNOWN" } bodyPart := strings.ToUpper(strings.TrimSpace(readStringTag(data, 0x0018, 0x0015))) modality := strings.TrimSpace(readStringTag(data, 0x0008, 0x0060)) studyDate := strings.TrimSpace(readStringTag(data, 0x0008, 0x0020)) studyLabel := formatStudyProgressLabel(bodyPart, modality, studyDate) if seriesFilter != "" && !strings.Contains(strings.ToUpper(seriesDesc), seriesFilter) { s.log("Skipping series (filter): %s\n", seriesDesc) continue } studyID, err := s.getOrCreateStudy(data, rootID) if err != nil { s.log("Error creating study: %v\n", err) s.result.Errors = append(s.result.Errors, fmt.Sprintf("study: %v", err)) continue } seriesID, err := s.getOrCreateSeries(data, studyID) if err != nil { s.log("Error creating series: %v\n", err) s.result.Errors = append(s.result.Errors, fmt.Sprintf("series: %v", err)) continue } totalFrames := 0 for _, sl := range sg.slices { slData, err := os.ReadFile(sl.Path) if err != nil { continue } nf := readIntTagSmart(slData, 0x0028, 0x0008) if nf == 0 { nf = 1 } totalFrames += nf } sliceThickness := readFloatTag(data, 0x0018, 0x0050) if sliceThickness == 0 { sliceThickness = readFloatTag(data, 0x0018, 0x0088) } s.log("\nProcessing series: %s (%d frames, ST=%.1fmm)\n", seriesDesc, totalFrames, sliceThickness) seriesBBox := [4]int{-1, -1, -1, -1} seriesRows, seriesCols := 0, 0 frameCounter := 0 for _, sl := range sg.slices { fileCounter++ data, err := os.ReadFile(sl.Path) if err != nil { continue } s.log("file %d/%d\n", fileCounter, totalFileCount) transferSyntax := getTransferSyntax(data) isCompressed := isCompressedTransferSyntax(transferSyntax) rows := readIntTagSmart(data, 0x0028, 0x0010) cols := readIntTagSmart(data, 0x0028, 0x0011) if rows == 0 || cols == 0 { continue } numFrames := readIntTagSmart(data, 0x0028, 0x0008) if numFrames == 0 { numFrames = 1 } samplesPerPixel := readIntTagSmart(data, 0x0028, 0x0002) if samplesPerPixel == 0 { samplesPerPixel = 1 } isRGB := samplesPerPixel == 3 framePositions := parsePerFramePositions(data, numFrames) orientationType := getOrientationType(data) bytesPerPixel := 2 if isRGB { bytesPerPixel = 3 } frameSize := rows * cols * bytesPerPixel expectedBytes := frameSize * numFrames pixelPos := findPixelData(data, expectedBytes) if pixelPos < 0 { pixelPos = findLastTag(data, 0x7FE0, 0x0010) } if pixelPos < 0 { continue } vr := string(data[pixelPos+4 : pixelPos+6]) var pixelDataStart int if vr == "OW" || vr == "OB" { pixelDataStart = pixelPos + 12 } else { pixelDataStart = pixelPos + 8 } if isCompressed { decompData, err := decompressDICOM(sl.Path) if err != nil { s.log(" Error decompressing: %v\n", err) continue } decompRows := readIntTagSmart(decompData, 0x0028, 0x0010) decompCols := readIntTagSmart(decompData, 0x0028, 0x0011) if decompRows == 0 || decompCols == 0 { decompRows, decompCols = rows, cols } decompExpected := decompRows * decompCols * bytesPerPixel * numFrames decompPixelPos := findPixelData(decompData, decompExpected) if decompPixelPos < 0 { decompPixelPos = findLastTag(decompData, 0x7FE0, 0x0010) } if decompPixelPos < 0 { s.log(" Error: no pixel data in decompressed DICOM\n") continue } decompVR := string(decompData[decompPixelPos+4 : decompPixelPos+6]) var decompPixelStart int if decompVR == "OW" || decompVR == "OB" { decompPixelStart = decompPixelPos + 12 } else { decompPixelStart = decompPixelPos + 8 } decompFrameSize := decompRows * decompCols * bytesPerPixel for frameIdx := 0; frameIdx < numFrames; frameIdx++ { frameCounter++ frameOffset := decompPixelStart + frameIdx*decompFrameSize var fp *FramePosition if frameIdx < len(framePositions) { fp = &framePositions[frameIdx] } sliceID := NewID() if isRGB { numPixels := decompRows * decompCols pixels := make([]byte, numPixels*3) for i := 0; i < numPixels*3; i++ { if off := frameOffset + i; off < len(decompData) { pixels[i] = decompData[off] } } pngData, err := encodeRGBPNG(pixels, decompCols, decompRows) if err != nil { s.log(" Error encoding RGB: %v\n", err) continue } if err := ObjectWrite(nil, s.dossierID, sliceID, pngData); err != nil { s.log(" Error saving RGB: %v\n", err) continue } if err := s.insertSlice(data, sliceID, seriesID, 0, 0, orientationType, frameCounter, fp); err != nil { s.log(" Error inserting slice: %v\n", err) } } else { numPixels := decompRows * decompCols pixels := make([]uint16, numPixels) var minVal, maxVal uint16 = 65535, 0 for i := 0; i < numPixels; i++ { off := frameOffset + i*2 if off+2 > len(decompData) { break } v := binary.LittleEndian.Uint16(decompData[off : off+2]) pixels[i] = v if v < minVal { minVal = v } if v > maxVal { maxVal = v } } x1, y1, x2, y2, valid := boundingBox(pixels, decompCols, decompRows, minVal, maxVal) if valid { sliceBBox := [4]int{x1, y1, x2, y2} if seriesBBox[0] < 0 { seriesBBox = sliceBBox seriesRows, seriesCols = decompRows, decompCols } else { seriesBBox = unionBBox(seriesBBox, sliceBBox) } } pngData, err := encode16BitPNG(pixels, decompCols, decompRows) if err != nil { s.log(" Error encoding 16-bit: %v\n", err) continue } if err := ObjectWrite(nil, s.dossierID, sliceID, pngData); err != nil { s.log(" Error saving 16-bit: %v\n", err) continue } if err := s.insertSlice(data, sliceID, seriesID, int(minVal), int(maxVal), orientationType, frameCounter, fp); err != nil { s.log(" Error inserting slice: %v\n", err) } } s.log("%s - %s - slice %d/%d\n", studyLabel, seriesDesc, frameCounter, totalFrames) } continue } // Uncompressed for frameIdx := 0; frameIdx < numFrames; frameIdx++ { frameCounter++ frameOffset := pixelDataStart + frameIdx*frameSize var fp *FramePosition if frameIdx < len(framePositions) { fp = &framePositions[frameIdx] } sliceID := NewID() if isRGB { numPixels := rows * cols pixels := make([]byte, numPixels*3) for i := 0; i < numPixels*3; i++ { if off := frameOffset + i; off < len(data) { pixels[i] = data[off] } } pngData, err := encodeRGBPNG(pixels, cols, rows) if err != nil { s.log(" Error encoding RGB: %v\n", err) continue } if err := ObjectWrite(nil, s.dossierID, sliceID, pngData); err != nil { s.log(" Error saving RGB: %v\n", err) continue } if err := s.insertSlice(data, sliceID, seriesID, 0, 0, orientationType, frameCounter, fp); err != nil { s.log(" Error inserting slice: %v\n", err) } } else { numPixels := rows * cols pixels := make([]uint16, numPixels) var minVal, maxVal uint16 = 65535, 0 for i := 0; i < numPixels; i++ { off := frameOffset + i*2 if off+2 > len(data) { break } v := binary.LittleEndian.Uint16(data[off : off+2]) pixels[i] = v if v < minVal { minVal = v } if v > maxVal { maxVal = v } } x1, y1, x2, y2, valid := boundingBox(pixels, cols, rows, minVal, maxVal) if valid { sliceBBox := [4]int{x1, y1, x2, y2} if seriesBBox[0] < 0 { seriesBBox = sliceBBox seriesRows, seriesCols = rows, cols } else { seriesBBox = unionBBox(seriesBBox, sliceBBox) } } pngData, err := encode16BitPNG(pixels, cols, rows) if err != nil { s.log(" Error encoding 16-bit: %v\n", err) continue } if err := ObjectWrite(nil, s.dossierID, sliceID, pngData); err != nil { s.log(" Error saving 16-bit: %v\n", err) continue } if err := s.insertSlice(data, sliceID, seriesID, int(minVal), int(maxVal), orientationType, frameCounter, fp); err != nil { s.log(" Error inserting slice: %v\n", err) } } s.log("%s - %s - slice %d/%d\n", studyLabel, seriesDesc, frameCounter, totalFrames) } } // Compute viewport: aspect-ratio-preserving content region as 0.0-1.0 ratios if seriesBBox[0] >= 0 && seriesRows > 0 && seriesCols > 0 { contentW := float64(seriesBBox[2] - seriesBBox[0] + 1) contentH := float64(seriesBBox[3] - seriesBBox[1] + 1) savedPct := 100.0 * (1.0 - (contentW*contentH)/float64(seriesCols*seriesRows)) if savedPct > 5.0 { // Expand content rect to match original aspect ratio aspect := float64(seriesCols) / float64(seriesRows) centerX := float64(seriesBBox[0]+seriesBBox[2]) / 2.0 centerY := float64(seriesBBox[1]+seriesBBox[3]) / 2.0 var expandedW, expandedH float64 if contentW/contentH > aspect { expandedW = contentW expandedH = contentW / aspect } else { expandedH = contentH expandedW = contentH * aspect } // Convert to 0.0-1.0 ratios, clamped to image bounds vpLeft := math.Max(0, (centerX-expandedW/2)/float64(seriesCols)) vpTop := math.Max(0, (centerY-expandedH/2)/float64(seriesRows)) vpRight := math.Min(1, (centerX+expandedW/2)/float64(seriesCols)) vpBottom := math.Min(1, (centerY+expandedH/2)/float64(seriesRows)) if series, err := entryGetByID("", seriesID); err == nil && series != nil { var seriesDataMap map[string]interface{} json.Unmarshal([]byte(series.Data), &seriesDataMap) if seriesDataMap == nil { seriesDataMap = make(map[string]interface{}) } seriesDataMap["viewport_left"] = math.Round(vpLeft*1000) / 1000 seriesDataMap["viewport_top"] = math.Round(vpTop*1000) / 1000 seriesDataMap["viewport_right"] = math.Round(vpRight*1000) / 1000 seriesDataMap["viewport_bottom"] = math.Round(vpBottom*1000) / 1000 updatedData, _ := json.Marshal(seriesDataMap) series.Data = string(updatedData) s.writeEntry(series) s.log(" Viewport: %.3f,%.3f → %.3f,%.3f (saves %.0f%%)\n", vpLeft, vpTop, vpRight, vpBottom, savedPct) } } else { s.log(" No significant viewport (would only save %.0f%%)\n", savedPct) } } } return nil }