2024-08-29 16:59:25 +08:00
|
|
|
|
import path from "path";
|
|
|
|
|
import * as dicomParser from "dicom-parser";
|
|
|
|
|
import fs from "fs";
|
2024-09-02 14:18:06 +08:00
|
|
|
|
import crypto from "crypto";
|
2024-08-29 16:59:25 +08:00
|
|
|
|
|
|
|
|
|
export interface StructuredData {
|
2024-08-30 16:58:44 +08:00
|
|
|
|
[SeriesInstanceUID: string]: ExtractMetadata[];
|
2024-08-29 16:59:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface ExtractMetadata {
|
|
|
|
|
filePath: string;
|
|
|
|
|
StudyInstanceUID?: string;
|
|
|
|
|
SeriesInstanceUID?: string;
|
2024-08-30 16:58:44 +08:00
|
|
|
|
PatientName?: string;
|
2024-08-29 16:59:25 +08:00
|
|
|
|
pixelData?: Uint16Array;
|
2024-09-02 14:18:06 +08:00
|
|
|
|
PatientSex: string;
|
|
|
|
|
PatientAge: string;
|
2024-08-29 16:59:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-08-30 16:58:44 +08:00
|
|
|
|
/**
|
|
|
|
|
* 检查文件是否为 DICOM 文件(通过 Magic Number 判断)
|
|
|
|
|
* @param filePath 文件路径
|
|
|
|
|
* @returns 是否为 DICOM 文件
|
|
|
|
|
*/
|
|
|
|
|
const isDICOMFile = async (filePath: string) => {
|
|
|
|
|
try {
|
|
|
|
|
// 打开文件以进行读取
|
|
|
|
|
const fileHandle = await fs.promises.open(filePath, "r");
|
|
|
|
|
const buffer = Buffer.alloc(132); // 创建一个 132 字节的缓冲区
|
|
|
|
|
|
|
|
|
|
// 从文件中读取前 132 个字节
|
|
|
|
|
await fileHandle.read(buffer, 0, 132, 0);
|
|
|
|
|
await fileHandle.close(); // 关闭文件
|
|
|
|
|
|
|
|
|
|
// 检查 "DICM" 标识 (偏移 128-131 字节)
|
|
|
|
|
const magicNumber = buffer.toString("utf-8", 128, 132);
|
|
|
|
|
return magicNumber === "DICM";
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error(`Error reading file ${filePath}:`, error);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-08-29 16:59:25 +08:00
|
|
|
|
/**
|
|
|
|
|
* 定义一个异步函数来递归地查找.dcm文件
|
|
|
|
|
* @param dir
|
|
|
|
|
* @param fileList
|
|
|
|
|
* @returns
|
|
|
|
|
*/
|
|
|
|
|
export const findDcmFiles = async (
|
|
|
|
|
dir: string,
|
|
|
|
|
fileList: string[] = []
|
|
|
|
|
): Promise<string[]> => {
|
|
|
|
|
const files = await fs.promises.readdir(dir, { withFileTypes: true });
|
|
|
|
|
await Promise.all(
|
|
|
|
|
files.map(async (file) => {
|
|
|
|
|
const filePath = path.join(dir, file.name);
|
|
|
|
|
if (file.isDirectory()) {
|
|
|
|
|
await findDcmFiles(filePath, fileList); // 递归调用以遍历子目录
|
2024-08-30 16:58:44 +08:00
|
|
|
|
} else if (await isDICOMFile(filePath)) {
|
|
|
|
|
fileList.push(filePath); // 如果文件是 DICOM 文件,添加到列表中
|
2024-08-29 16:59:25 +08:00
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
);
|
|
|
|
|
return fileList;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 获取单个dcm文件的metadata信息
|
|
|
|
|
*/
|
|
|
|
|
export const parseDICOMFile = async (
|
|
|
|
|
filePath: string
|
|
|
|
|
): Promise<ExtractMetadata | undefined> => {
|
|
|
|
|
try {
|
|
|
|
|
const arrayBuffer = await fs.promises.readFile(filePath);
|
|
|
|
|
const byteArray = new Uint8Array(arrayBuffer);
|
|
|
|
|
const options = { TransferSyntaxUID: "1.2.840.10008.1.2" };
|
|
|
|
|
const dataSet = dicomParser.parseDicom(byteArray, options);
|
|
|
|
|
const StudyInstanceUID = dataSet.string("x0020000d");
|
|
|
|
|
const SeriesInstanceUID = dataSet.string("x0020000e");
|
2024-08-30 16:58:44 +08:00
|
|
|
|
const PatientName = dataSet.string("x00100030");
|
2024-09-02 14:18:06 +08:00
|
|
|
|
const PatientSex = dataSet.string("x00100040") ?? "";
|
|
|
|
|
const PatientAge = dataSet.string("x00101010") ?? "";
|
2024-08-29 16:59:25 +08:00
|
|
|
|
const pixelDataElement = dataSet.elements.x7fe00010;
|
|
|
|
|
const pixelData = new Uint16Array(
|
|
|
|
|
dataSet.byteArray.buffer,
|
|
|
|
|
pixelDataElement.dataOffset,
|
|
|
|
|
pixelDataElement.length / 2
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
filePath,
|
|
|
|
|
StudyInstanceUID,
|
|
|
|
|
SeriesInstanceUID,
|
2024-08-30 16:58:44 +08:00
|
|
|
|
PatientName,
|
2024-09-02 14:18:06 +08:00
|
|
|
|
PatientSex,
|
|
|
|
|
PatientAge,
|
2024-08-29 16:59:25 +08:00
|
|
|
|
// pixelData,
|
|
|
|
|
};
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error(`Error parsing file ${filePath}:`, error);
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理文件的函数,分批异步处理
|
|
|
|
|
* @param filePaths
|
|
|
|
|
* @param {number} batchSize 批次数
|
|
|
|
|
* @returns
|
|
|
|
|
*/
|
|
|
|
|
export const processFilesInBatches = async (
|
|
|
|
|
filePaths: string[],
|
|
|
|
|
batchSize: number
|
|
|
|
|
) => {
|
|
|
|
|
const results = [];
|
|
|
|
|
for (let i = 0; i < filePaths.length; i += batchSize) {
|
|
|
|
|
const batch = filePaths.slice(i, i + batchSize);
|
|
|
|
|
const batchResults = await Promise.allSettled(
|
|
|
|
|
batch.map((filePath) => parseDICOMFile(filePath))
|
|
|
|
|
);
|
|
|
|
|
// 只提取状态为 'fulfilled' 的结果的 value
|
|
|
|
|
const fulfilledResults = batchResults
|
|
|
|
|
.filter((result) => result.status === "fulfilled")
|
|
|
|
|
.map(
|
|
|
|
|
(result) => (result as PromiseFulfilledResult<ExtractMetadata>).value
|
|
|
|
|
);
|
|
|
|
|
results.push(...fulfilledResults);
|
|
|
|
|
}
|
|
|
|
|
return results;
|
|
|
|
|
};
|
|
|
|
|
|
2024-08-30 16:58:44 +08:00
|
|
|
|
export interface StructuredMetadata {
|
|
|
|
|
filePaths: string[];
|
|
|
|
|
StudyInstanceUID?: string;
|
|
|
|
|
SeriesInstanceUID?: string;
|
|
|
|
|
PatientName?: string;
|
2024-09-02 14:18:06 +08:00
|
|
|
|
fileHash: string[];
|
|
|
|
|
PatientSex: string;
|
|
|
|
|
PatientAge: string;
|
2024-08-30 16:58:44 +08:00
|
|
|
|
}
|
2024-08-29 16:59:25 +08:00
|
|
|
|
|
2024-08-31 02:18:35 +08:00
|
|
|
|
export interface ScanProgress {
|
2024-09-02 14:18:06 +08:00
|
|
|
|
percentage: number;
|
2024-08-31 02:18:35 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 计算文件的哈希值
|
|
|
|
|
async function calculateFileHash(filePath: string): Promise<string> {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
2024-09-02 14:18:06 +08:00
|
|
|
|
const hash = crypto.createHash("sha256");
|
2024-08-31 02:18:35 +08:00
|
|
|
|
const stream = fs.createReadStream(filePath);
|
|
|
|
|
|
2024-09-02 14:18:06 +08:00
|
|
|
|
stream.on("error", reject);
|
|
|
|
|
stream.pipe(hash).on("finish", () => resolve(hash.digest("hex")));
|
2024-08-31 02:18:35 +08:00
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-02 14:18:06 +08:00
|
|
|
|
// 序列级别
|
|
|
|
|
export const keyProp = "SeriesInstanceUID";
|
|
|
|
|
|
2024-08-31 02:18:35 +08:00
|
|
|
|
export const structureMetadata = async (
|
|
|
|
|
data: ExtractMetadata[],
|
|
|
|
|
progressCallback?: (progress: ScanProgress) => void
|
|
|
|
|
): Promise<StructuredMetadata[]> => {
|
2024-08-30 16:58:44 +08:00
|
|
|
|
const result: StructuredMetadata[] = [];
|
2024-08-29 16:59:25 +08:00
|
|
|
|
|
2024-08-31 02:18:35 +08:00
|
|
|
|
for (let i = 0; i < data.length; i++) {
|
2024-09-02 14:18:06 +08:00
|
|
|
|
const item = data[i];
|
2024-08-31 02:18:35 +08:00
|
|
|
|
const existItem = result.find((i) => i[keyProp] === item[keyProp]);
|
|
|
|
|
const hash = await calculateFileHash(item.filePath);
|
2024-08-29 16:59:25 +08:00
|
|
|
|
|
2024-08-31 02:18:35 +08:00
|
|
|
|
if (existItem) {
|
2024-08-30 16:58:44 +08:00
|
|
|
|
// 如果找到了相同的条目,合并 filePath
|
2024-08-31 02:18:35 +08:00
|
|
|
|
if (!existItem.fileHash.includes(hash)) {
|
|
|
|
|
existItem.filePaths.push(item.filePath);
|
2024-09-02 14:18:06 +08:00
|
|
|
|
existItem.fileHash.push(hash);
|
2024-08-31 02:18:35 +08:00
|
|
|
|
}
|
2024-08-30 16:58:44 +08:00
|
|
|
|
} else {
|
|
|
|
|
// 如果没有找到,创建一个新的条目
|
|
|
|
|
result.push({
|
|
|
|
|
filePaths: [item.filePath],
|
2024-08-31 02:18:35 +08:00
|
|
|
|
fileHash: [hash],
|
2024-08-30 16:58:44 +08:00
|
|
|
|
StudyInstanceUID: item.StudyInstanceUID,
|
|
|
|
|
SeriesInstanceUID: item.SeriesInstanceUID,
|
|
|
|
|
PatientName: item.PatientName,
|
2024-09-02 14:18:06 +08:00
|
|
|
|
PatientAge: item.PatientAge,
|
|
|
|
|
PatientSex: item.PatientSex,
|
2024-08-30 16:58:44 +08:00
|
|
|
|
});
|
2024-08-29 16:59:25 +08:00
|
|
|
|
}
|
2024-08-31 02:18:35 +08:00
|
|
|
|
const progress: ScanProgress = {
|
2024-09-02 14:18:06 +08:00
|
|
|
|
percentage: ((i + 1) / data.length) * 100,
|
2024-08-31 02:18:35 +08:00
|
|
|
|
};
|
|
|
|
|
progressCallback?.(progress);
|
|
|
|
|
}
|
2024-08-29 16:59:25 +08:00
|
|
|
|
|
2024-08-30 16:58:44 +08:00
|
|
|
|
return result;
|
2024-08-29 16:59:25 +08:00
|
|
|
|
};
|