124 lines
3.6 KiB
TypeScript
124 lines
3.6 KiB
TypeScript
import path from "path";
|
||
import * as dicomParser from "dicom-parser";
|
||
import fs from "fs";
|
||
|
||
export interface StructuredData {
|
||
[StudyInstanceUID: string]: {
|
||
[SeriesInstanceUID: string]: ExtractMetadata[];
|
||
};
|
||
}
|
||
|
||
export interface ExtractMetadata {
|
||
filePath: string;
|
||
StudyInstanceUID?: string;
|
||
SeriesInstanceUID?: string;
|
||
pixelData?: Uint16Array;
|
||
}
|
||
|
||
/**
|
||
* 定义一个异步函数来递归地查找.dcm文件
|
||
* @param dir
|
||
* @param fileList
|
||
* @returns
|
||
*/
|
||
export const findDcmFiles = async (
|
||
dir: string,
|
||
fileList: string[] = []
|
||
): Promise<string[]> => {
|
||
const files = await fs.promises.readdir(dir, { withFileTypes: true });
|
||
await Promise.all(
|
||
files.map(async (file) => {
|
||
const filePath = path.join(dir, file.name);
|
||
if (file.isDirectory()) {
|
||
await findDcmFiles(filePath, fileList); // 递归调用以遍历子目录
|
||
} else if (file.name.endsWith(".dcm")) {
|
||
fileList.push(filePath); // 如果文件是.dcm文件,添加到列表中
|
||
}
|
||
})
|
||
);
|
||
return fileList;
|
||
};
|
||
|
||
/**
|
||
* 获取单个dcm文件的metadata信息
|
||
*/
|
||
export const parseDICOMFile = async (
|
||
filePath: string
|
||
): Promise<ExtractMetadata | undefined> => {
|
||
try {
|
||
const arrayBuffer = await fs.promises.readFile(filePath);
|
||
const byteArray = new Uint8Array(arrayBuffer);
|
||
const options = { TransferSyntaxUID: "1.2.840.10008.1.2" };
|
||
const dataSet = dicomParser.parseDicom(byteArray, options);
|
||
const StudyInstanceUID = dataSet.string("x0020000d");
|
||
const SeriesInstanceUID = dataSet.string("x0020000e");
|
||
const pixelDataElement = dataSet.elements.x7fe00010;
|
||
const pixelData = new Uint16Array(
|
||
dataSet.byteArray.buffer,
|
||
pixelDataElement.dataOffset,
|
||
pixelDataElement.length / 2
|
||
);
|
||
|
||
return {
|
||
filePath,
|
||
StudyInstanceUID,
|
||
SeriesInstanceUID,
|
||
// pixelData,
|
||
};
|
||
} catch (error) {
|
||
console.error(`Error parsing file ${filePath}:`, error);
|
||
return undefined;
|
||
}
|
||
};
|
||
|
||
/**
|
||
* 处理文件的函数,分批异步处理
|
||
* @param filePaths
|
||
* @param {number} batchSize 批次数
|
||
* @returns
|
||
*/
|
||
export const processFilesInBatches = async (
|
||
filePaths: string[],
|
||
batchSize: number
|
||
) => {
|
||
const results = [];
|
||
for (let i = 0; i < filePaths.length; i += batchSize) {
|
||
const batch = filePaths.slice(i, i + batchSize);
|
||
const batchResults = await Promise.allSettled(
|
||
batch.map((filePath) => parseDICOMFile(filePath))
|
||
);
|
||
// 只提取状态为 'fulfilled' 的结果的 value
|
||
const fulfilledResults = batchResults
|
||
.filter((result) => result.status === "fulfilled")
|
||
.map(
|
||
(result) => (result as PromiseFulfilledResult<ExtractMetadata>).value
|
||
);
|
||
results.push(...fulfilledResults);
|
||
}
|
||
return results;
|
||
};
|
||
|
||
export const structureMetadata = (data: ExtractMetadata[]): StructuredData => {
|
||
const structured: StructuredData = {};
|
||
|
||
data.forEach((item) => {
|
||
// 确保每个元素都有有效的 StudyInstanceUID 和 SeriesInstanceUID
|
||
if (item.StudyInstanceUID && item.SeriesInstanceUID) {
|
||
// 如果还没有为这个 StudyInstanceUID 创建记录,则初始化一个空对象
|
||
if (!structured[item.StudyInstanceUID]) {
|
||
structured[item.StudyInstanceUID] = {};
|
||
}
|
||
|
||
// 如果这个 StudyInstanceUID 下还没有这个 SeriesInstanceUID 的记录,则初始化一个空数组
|
||
if (!structured[item.StudyInstanceUID][item.SeriesInstanceUID]) {
|
||
structured[item.StudyInstanceUID][item.SeriesInstanceUID] = [];
|
||
}
|
||
|
||
// 将当前元素添加到对应的数组中
|
||
structured[item.StudyInstanceUID][item.SeriesInstanceUID].push(item);
|
||
}
|
||
});
|
||
|
||
return structured;
|
||
};
|