import path from "path"; import * as dicomParser from "dicom-parser"; import fs from "fs"; export interface StructuredData { [SeriesInstanceUID: string]: ExtractMetadata[]; } export interface ExtractMetadata { filePath: string; StudyInstanceUID?: string; SeriesInstanceUID?: string; PatientName?: string; pixelData?: Uint16Array; } /** * 检查文件是否为 DICOM 文件(通过 Magic Number 判断) * @param filePath 文件路径 * @returns 是否为 DICOM 文件 */ const isDICOMFile = async (filePath: string) => { try { // 打开文件以进行读取 const fileHandle = await fs.promises.open(filePath, "r"); const buffer = Buffer.alloc(132); // 创建一个 132 字节的缓冲区 // 从文件中读取前 132 个字节 await fileHandle.read(buffer, 0, 132, 0); await fileHandle.close(); // 关闭文件 // 检查 "DICM" 标识 (偏移 128-131 字节) const magicNumber = buffer.toString("utf-8", 128, 132); return magicNumber === "DICM"; } catch (error) { console.error(`Error reading file ${filePath}:`, error); return false; } }; /** * 定义一个异步函数来递归地查找.dcm文件 * @param dir * @param fileList * @returns */ export const findDcmFiles = async ( dir: string, fileList: string[] = [] ): Promise => { const files = await fs.promises.readdir(dir, { withFileTypes: true }); await Promise.all( files.map(async (file) => { const filePath = path.join(dir, file.name); if (file.isDirectory()) { await findDcmFiles(filePath, fileList); // 递归调用以遍历子目录 } else if (await isDICOMFile(filePath)) { fileList.push(filePath); // 如果文件是 DICOM 文件,添加到列表中 } }) ); return fileList; }; /** * 获取单个dcm文件的metadata信息 */ export const parseDICOMFile = async ( filePath: string ): Promise => { try { const arrayBuffer = await fs.promises.readFile(filePath); const byteArray = new Uint8Array(arrayBuffer); const options = { TransferSyntaxUID: "1.2.840.10008.1.2" }; const dataSet = dicomParser.parseDicom(byteArray, options); const StudyInstanceUID = dataSet.string("x0020000d"); const SeriesInstanceUID = dataSet.string("x0020000e"); const PatientName = dataSet.string("x00100030"); const pixelDataElement = dataSet.elements.x7fe00010; const pixelData = new Uint16Array( dataSet.byteArray.buffer, pixelDataElement.dataOffset, pixelDataElement.length / 2 ); return { filePath, StudyInstanceUID, SeriesInstanceUID, PatientName, // pixelData, }; } catch (error) { console.error(`Error parsing file ${filePath}:`, error); return undefined; } }; /** * 处理文件的函数,分批异步处理 * @param filePaths * @param {number} batchSize 批次数 * @returns */ export const processFilesInBatches = async ( filePaths: string[], batchSize: number ) => { const results = []; for (let i = 0; i < filePaths.length; i += batchSize) { const batch = filePaths.slice(i, i + batchSize); const batchResults = await Promise.allSettled( batch.map((filePath) => parseDICOMFile(filePath)) ); // 只提取状态为 'fulfilled' 的结果的 value const fulfilledResults = batchResults .filter((result) => result.status === "fulfilled") .map( (result) => (result as PromiseFulfilledResult).value ); results.push(...fulfilledResults); } return results; }; export interface StructuredMetadata { filePaths: string[]; StudyInstanceUID?: string; SeriesInstanceUID?: string; PatientName?: string; } export const structureMetadata = ( data: ExtractMetadata[] ): StructuredMetadata[] => { const result: StructuredMetadata[] = []; data.forEach((item) => { // 查找是否已经有相同 UID 和 PatientName 的条目 const existingEntry = result.find( (entry) => entry.StudyInstanceUID === item.StudyInstanceUID && entry.SeriesInstanceUID === item.SeriesInstanceUID && entry.PatientName === item.PatientName ); if (existingEntry) { // 如果找到了相同的条目,合并 filePath existingEntry.filePaths.push(item.filePath); } else { // 如果没有找到,创建一个新的条目 result.push({ filePaths: [item.filePath], StudyInstanceUID: item.StudyInstanceUID, SeriesInstanceUID: item.SeriesInstanceUID, PatientName: item.PatientName, }); } }); return result; };