fix: replace naive byte-scan findTag with DICOM stream walker
The old findTag scanned raw bytes for the 4-byte tag pattern, which caused false matches inside Siemens CSA private OB blobs (e.g. the large 0029,1020 Series Header). This corrupted body_part and other fields on Siemens MAGNETOM Sola MRIs because findTag(0x0018, 0x0015) hit a matching byte sequence inside the binary payload before reaching the real BodyPartExamined element. Fix: walkToTag() walks the DICOM element stream sequentially, reading VR and length fields to skip element values entirely. Falls back to byte-scan only on corrupt/truncated length fields. findLastTag updated to use the same walker.
This commit is contained in:
parent
989969375d
commit
bf57e28e71
199
lib/dicom.go
199
lib/dicom.go
|
|
@ -292,11 +292,78 @@ func safeExtractPath(destDir, name string) (string, error) {
|
||||||
// DICOM TAG READING
|
// DICOM TAG READING
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
func findTag(data []byte, group, elem uint16) int {
|
// walkToTag walks the DICOM element stream from startPos, respecting VR/length
|
||||||
|
// fields so it never matches tag bytes inside binary payloads (e.g. Siemens CSA
|
||||||
|
// OB blobs). Returns the byte offset of the matching element, or -1.
|
||||||
|
func walkToTag(data []byte, startPos int, group, elem uint16) int {
|
||||||
|
pos := startPos
|
||||||
|
n := len(data)
|
||||||
|
for pos+4 <= n {
|
||||||
|
if pos+4 > n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
g := binary.LittleEndian.Uint16(data[pos : pos+2])
|
||||||
|
e := binary.LittleEndian.Uint16(data[pos+2 : pos+4])
|
||||||
|
if g == group && e == elem {
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
// Determine value length to skip to next element.
|
||||||
|
// If we can't parse a sensible length, fall back to byte scan.
|
||||||
|
if pos+6 > n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
vr := string(data[pos+4 : pos+6])
|
||||||
|
var valLen uint32
|
||||||
|
var headerLen int
|
||||||
|
if isValidVR(data, pos+4) {
|
||||||
|
// Explicit VR
|
||||||
|
switch vr {
|
||||||
|
case "OB", "OW", "SQ", "UN", "OD", "UC", "UR", "UT":
|
||||||
|
// 4-byte reserved + 4-byte length
|
||||||
|
if pos+12 > n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
valLen = binary.LittleEndian.Uint32(data[pos+8 : pos+12])
|
||||||
|
headerLen = 12
|
||||||
|
default:
|
||||||
|
// 2-byte length
|
||||||
|
if pos+8 > n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
valLen = uint32(binary.LittleEndian.Uint16(data[pos+6 : pos+8]))
|
||||||
|
headerLen = 8
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Implicit VR: tag(4) + length(4)
|
||||||
|
if pos+8 > n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
valLen = binary.LittleEndian.Uint32(data[pos+4 : pos+8])
|
||||||
|
headerLen = 8
|
||||||
|
}
|
||||||
|
// 0xFFFFFFFF = undefined length (SQ/item) — step past header only and
|
||||||
|
// let the inner loop find the sequence delimiter naturally.
|
||||||
|
if valLen == 0xFFFFFFFF {
|
||||||
|
pos += headerLen
|
||||||
|
} else {
|
||||||
|
next := pos + headerLen + int(valLen)
|
||||||
|
if next <= pos || next > n {
|
||||||
|
// Corrupt/truncated length — fall back to byte-scan from here
|
||||||
|
return findTagBytes(data, pos+1, group, elem)
|
||||||
|
}
|
||||||
|
pos = next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// findTagBytes is the original byte-scan fallback used only when the stream
|
||||||
|
// walker cannot continue (corrupt length field).
|
||||||
|
func findTagBytes(data []byte, startPos int, group, elem uint16) int {
|
||||||
target := make([]byte, 4)
|
target := make([]byte, 4)
|
||||||
binary.LittleEndian.PutUint16(target[0:2], group)
|
binary.LittleEndian.PutUint16(target[0:2], group)
|
||||||
binary.LittleEndian.PutUint16(target[2:4], elem)
|
binary.LittleEndian.PutUint16(target[2:4], elem)
|
||||||
for i := 0; i < len(data)-4; i++ {
|
for i := startPos; i < len(data)-4; i++ {
|
||||||
if data[i] == target[0] && data[i+1] == target[1] &&
|
if data[i] == target[0] && data[i+1] == target[1] &&
|
||||||
data[i+2] == target[2] && data[i+3] == target[3] {
|
data[i+2] == target[2] && data[i+3] == target[3] {
|
||||||
return i
|
return i
|
||||||
|
|
@ -305,21 +372,76 @@ func findTag(data []byte, group, elem uint16) int {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func findLastTag(data []byte, group, elem uint16) int {
|
// findTag finds the first occurrence of a DICOM tag, using the stream walker
|
||||||
target := make([]byte, 4)
|
// starting from the DICOM preamble offset (128-byte preamble + 4-byte DICM).
|
||||||
binary.LittleEndian.PutUint16(target[0:2], group)
|
func findTag(data []byte, group, elem uint16) int {
|
||||||
binary.LittleEndian.PutUint16(target[2:4], elem)
|
// DICOM files start with 128-byte preamble + "DICM" magic.
|
||||||
lastPos := -1
|
// Meta header (group 0x0002) always lives there; for the main dataset
|
||||||
for i := 0; i < len(data)-4; i++ {
|
// we start the walk right after the preamble when present.
|
||||||
if data[i] == target[0] && data[i+1] == target[1] &&
|
startPos := 0
|
||||||
data[i+2] == target[2] && data[i+3] == target[3] {
|
if len(data) >= 132 && string(data[128:132]) == "DICM" {
|
||||||
lastPos = i
|
startPos = 132
|
||||||
|
// For meta-header tags (group 0x0002), walk from 132.
|
||||||
|
// For dataset tags, also walk from 132 — the walker handles both.
|
||||||
}
|
}
|
||||||
|
result := walkToTag(data, startPos, group, elem)
|
||||||
|
if result < 0 && startPos > 0 {
|
||||||
|
// Retry from byte 0 for edge cases (no preamble, raw DICOM)
|
||||||
|
result = walkToTag(data, 0, group, elem)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func findLastTag(data []byte, group, elem uint16) int {
|
||||||
|
// Walk the full stream and keep the last match position.
|
||||||
|
startPos := 0
|
||||||
|
if len(data) >= 132 && string(data[128:132]) == "DICM" {
|
||||||
|
startPos = 132
|
||||||
|
}
|
||||||
|
lastPos := -1
|
||||||
|
pos := startPos
|
||||||
|
for {
|
||||||
|
found := walkToTag(data, pos, group, elem)
|
||||||
|
if found < 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
lastPos = found
|
||||||
|
pos = found + 1 // advance past this match to find a later one
|
||||||
}
|
}
|
||||||
return lastPos
|
return lastPos
|
||||||
}
|
}
|
||||||
|
|
||||||
func readStringTag(data []byte, group, elem uint16) string {
|
// isValidVR checks if the 2 bytes at offset look like a valid DICOM VR
|
||||||
|
func isValidVR(data []byte, offset int) bool {
|
||||||
|
if offset+2 > len(data) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
vr := string(data[offset : offset+2])
|
||||||
|
validVRs := map[string]bool{
|
||||||
|
"AE": true, "AS": true, "AT": true, "CS": true, "DA": true, "DS": true, "DT": true,
|
||||||
|
"FL": true, "FD": true, "IS": true, "LO": true, "LT": true, "OB": true, "OD": true,
|
||||||
|
"OF": true, "OW": true, "PN": true, "SH": true, "SL": true, "SQ": true, "SS": true,
|
||||||
|
"ST": true, "TM": true, "UC": true, "UI": true, "UL": true, "UN": true, "UR": true,
|
||||||
|
"US": true, "UT": true,
|
||||||
|
}
|
||||||
|
return validVRs[vr]
|
||||||
|
}
|
||||||
|
|
||||||
|
// isImplicitVR returns true if transfer syntax uses implicit VR (no VR field in data elements)
|
||||||
|
func isImplicitVR(data []byte) bool {
|
||||||
|
// Check transfer syntax UID from file meta info (group 0x0002)
|
||||||
|
ts := readStringTagExplicit(data, 0x0002, 0x0010)
|
||||||
|
if ts == "" {
|
||||||
|
// No transfer syntax specified - default to Explicit VR Little Endian
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Implicit VR Little Endian: 1.2.840.10008.1.2
|
||||||
|
// Also check for Siemens private implicit VR variants
|
||||||
|
return ts == "1.2.840.10008.1.2" || strings.Contains(ts, "1.2.276.0.7230010")
|
||||||
|
}
|
||||||
|
|
||||||
|
// readStringTagExplicit reads with explicit VR assumption (for meta-header)
|
||||||
|
func readStringTagExplicit(data []byte, group, elem uint16) string {
|
||||||
pos := findTag(data, group, elem)
|
pos := findTag(data, group, elem)
|
||||||
if pos < 0 {
|
if pos < 0 {
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -327,7 +449,54 @@ func readStringTag(data []byte, group, elem uint16) string {
|
||||||
vr := string(data[pos+4 : pos+6])
|
vr := string(data[pos+4 : pos+6])
|
||||||
var length uint16
|
var length uint16
|
||||||
var valPos int
|
var valPos int
|
||||||
if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" {
|
if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" || vr == "OD" || vr == "UC" || vr == "UT" {
|
||||||
|
length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12]))
|
||||||
|
valPos = pos + 12
|
||||||
|
} else {
|
||||||
|
length = binary.LittleEndian.Uint16(data[pos+6 : pos+8])
|
||||||
|
valPos = pos + 8
|
||||||
|
}
|
||||||
|
if valPos+int(length) > len(data) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
raw := data[valPos : valPos+int(length)]
|
||||||
|
return strings.TrimRight(string(raw), " \x00")
|
||||||
|
}
|
||||||
|
|
||||||
|
func readStringTag(data []byte, group, elem uint16) string {
|
||||||
|
pos := findTag(data, group, elem)
|
||||||
|
if pos < 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for implicit VR by validating the VR field
|
||||||
|
implicitVR := !isValidVR(data, pos+4)
|
||||||
|
if implicitVR {
|
||||||
|
// Implicit VR: tag (4) + length (4) + value
|
||||||
|
length := binary.LittleEndian.Uint32(data[pos+4 : pos+8])
|
||||||
|
valPos := pos + 8
|
||||||
|
if valPos+int(length) > len(data) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
raw := data[valPos : valPos+int(length)]
|
||||||
|
var s string
|
||||||
|
if utf8.Valid(raw) {
|
||||||
|
s = string(raw)
|
||||||
|
} else {
|
||||||
|
runes := make([]rune, len(raw))
|
||||||
|
for i, b := range raw {
|
||||||
|
runes[i] = rune(b)
|
||||||
|
}
|
||||||
|
s = string(runes)
|
||||||
|
}
|
||||||
|
return strings.TrimRight(s, " \x00")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Explicit VR path
|
||||||
|
vr := string(data[pos+4 : pos+6])
|
||||||
|
var length uint16
|
||||||
|
var valPos int
|
||||||
|
if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" || vr == "OD" || vr == "UC" || vr == "UT" {
|
||||||
length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12]))
|
length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12]))
|
||||||
valPos = pos + 12
|
valPos = pos + 12
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -366,6 +535,8 @@ func readIntTagSmart(data []byte, group, elem uint16) int {
|
||||||
if pos < 0 {
|
if pos < 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
// Check for implicit VR before reading VR field
|
||||||
|
if isValidVR(data, pos+4) {
|
||||||
vr := string(data[pos+4 : pos+6])
|
vr := string(data[pos+4 : pos+6])
|
||||||
if vr == "US" || vr == "SS" {
|
if vr == "US" || vr == "SS" {
|
||||||
valPos := pos + 8
|
valPos := pos + 8
|
||||||
|
|
@ -373,6 +544,8 @@ func readIntTagSmart(data []byte, group, elem uint16) int {
|
||||||
return int(binary.LittleEndian.Uint16(data[valPos : valPos+2]))
|
return int(binary.LittleEndian.Uint16(data[valPos : valPos+2]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// For implicit VR or non-integer VRs, fall back to string parsing
|
||||||
s := strings.TrimSpace(readStringTag(data, group, elem))
|
s := strings.TrimSpace(readStringTag(data, group, elem))
|
||||||
n, _ := strconv.Atoi(s)
|
n, _ := strconv.Atoi(s)
|
||||||
return n
|
return n
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue