cvpilot-tool/apps/desktop/electron/core/dicom.ts

import path from "path";
import * as dicomParser from "dicom-parser";
import fs from "fs";
import crypto from "crypto";

export interface StructuredData {
  [SeriesInstanceUID: string]: ExtractMetadata[];
}

export interface ExtractMetadata {
  filePath: string;
  StudyInstanceUID?: string;
  SeriesInstanceUID?: string;
  PatientName?: string;
  pixelData?: Uint16Array;
  PatientSex: string;
  PatientAge: string;
  AcquisitionDate: string;
}

/**
 * 检查文件是否为 DICOM 文件（通过 Magic Number 判断）
 * @param filePath 文件路径
 * @returns 是否为 DICOM 文件
 */
const isDICOMFile = async (filePath: string) => {
  try {
    // 打开文件以进行读取
    const fileHandle = await fs.promises.open(filePath, "r");
    const buffer = Buffer.alloc(132); // 创建一个 132 字节的缓冲区

    // 从文件中读取前 132 个字节
    await fileHandle.read(buffer, 0, 132, 0);
    await fileHandle.close(); // 关闭文件

    // 检查 "DICM" 标识 (偏移 128-131 字节)
    const magicNumber = buffer.toString("utf-8", 128, 132);
    return magicNumber === "DICM";
  } catch (error) {
    console.error(`Error reading file ${filePath}:`, error);
    return false;
  }
};

/**
 * 定义一个异步函数来递归地查找.dcm文件
 * @param dir
 * @param fileList
 * @returns
 */
export const findDcmFiles = async (
  dir: string,
  fileList: string[] = []
): Promise<string[]> => {
  const files = await fs.promises.readdir(dir, { withFileTypes: true });
  await Promise.all(
    files.map(async (file) => {
      const filePath = path.join(dir, file.name);
      if (file.isDirectory()) {
        await findDcmFiles(filePath, fileList); // 递归调用以遍历子目录
      } else if (await isDICOMFile(filePath)) {
        fileList.push(filePath); // 如果文件是 DICOM 文件，添加到列表中
      }
    })
  );
  return fileList;
};

/**
 * 获取单个dcm文件的metadata信息
 */
export const parseDICOMFile = async (
  filePath: string
): Promise<ExtractMetadata | undefined> => {
  try {
    const arrayBuffer = await fs.promises.readFile(filePath);
    const byteArray = new Uint8Array(arrayBuffer);
    const options = { TransferSyntaxUID: "1.2.840.10008.1.2" };
    const dataSet = dicomParser.parseDicom(byteArray, options);
    const StudyInstanceUID = dataSet.string("x0020000d");
    const SeriesInstanceUID = dataSet.string("x0020000e");
    const PatientName = dataSet.string("x00100030");
    const PatientSex = dataSet.string("x00100040") ?? "";
    const PatientAge = dataSet.string("x00101010") ?? "";
    const AcquisitionDate = dataSet.string("x00080022") ?? "";
    // const pixelDataElement = dataSet.elements.x7fe00010;
    // const pixelData = new Uint16Array(
    //   dataSet.byteArray.buffer,
    //   pixelDataElement.dataOffset,
    //   pixelDataElement.length / 2
    // );

    return {
      filePath,
      StudyInstanceUID,
      SeriesInstanceUID,
      PatientName,
      PatientSex,
      PatientAge,
      AcquisitionDate,
      // pixelData,
    };
  } catch (error) {
    console.error(`Error parsing file ${filePath}:`, error);
    return undefined;
  }
};

/**
 * 处理文件的函数，分批异步处理
 * @param filePaths
 * @param {number} batchSize 批次数
 * @returns
 */
export const processFilesInBatches = async (
  filePaths: string[],
  batchSize: number
) => {
  const results = [];
  for (let i = 0; i < filePaths.length; i += batchSize) {
    const batch = filePaths.slice(i, i + batchSize);
    const batchResults = await Promise.allSettled(
      batch.map((filePath) => parseDICOMFile(filePath))
    );
    // 只提取状态为 'fulfilled' 的结果的 value
    const fulfilledResults = batchResults
      .filter((result) => result.status === "fulfilled")
      .map(
        (result) => (result as PromiseFulfilledResult<ExtractMetadata>).value
      );
    results.push(...fulfilledResults);
  }
  return results;
};

export type StructuredMetadata = Partial<ExtractMetadata> & {
  filePaths: string[];
  fileHash: string[];
};

export interface ScanProgress {
  percentage: number;
}

// 计算文件的哈希值
async function calculateFileHash(filePath: string): Promise<string> {
  return new Promise((resolve, reject) => {
    const hash = crypto.createHash("sha256");
    const stream = fs.createReadStream(filePath);

    stream.on("error", reject);
    stream.pipe(hash).on("finish", () => resolve(hash.digest("hex")));
  });
}

// 序列级别
export const keyProp = "SeriesInstanceUID";

export const structureMetadata = async (
  data: ExtractMetadata[],
  progressCallback?: (progress: ScanProgress) => void
): Promise<StructuredMetadata[]> => {
  const result: StructuredMetadata[] = [];

  for (let i = 0; i < data.length; i++) {
    const item = data[i];
    const existItem = result.find((i) => i[keyProp] === item[keyProp]);
    const hash = await calculateFileHash(item.filePath);

    if (existItem) {
      // 如果找到了相同的条目，合并 filePath
      if (!existItem.fileHash.includes(hash)) {
        existItem.filePaths.push(item.filePath);
        existItem.fileHash.push(hash);
      }
    } else {
      // 如果没有找到，创建一个新的条目
      result.push({
        filePaths: [item.filePath],
        fileHash: [hash],
        StudyInstanceUID: item.StudyInstanceUID,
        SeriesInstanceUID: item.SeriesInstanceUID,
        PatientName: item.PatientName,
        PatientAge: item.PatientAge,
        AcquisitionDate: item.AcquisitionDate,
        PatientSex: item.PatientSex,
      });
    }
    const progress: ScanProgress = {
      percentage: ((i + 1) / data.length) * 100,
    };
    progressCallback?.(progress);
  }

  return result;
};