import { Storage, API } from "aws-amplify";
import { PDFDocument } from "pdf-lib";

const ON_TEXTRACT_PUBLISHED = `
  subscription onBackgroundFileTextractPublished($executionArn: String!) {
    onBackgroundFileTextractPublished(executionArn: $executionArn) {
      executionArn
      fileId
      extractedFileContent
      nextIndex
    }
  }
`;

export const chunkLargePdfsForTextract = async (params) => {
  console.log("chunkLargePdfsForTextract params", params);
  let { pdfBytes, bucketName, name, currDate, type } = params;
  let pageCount;
  try {
    const pdfDoc = await PDFDocument.load(pdfBytes);
    pageCount = pdfDoc.getPageCount();

    console.log(`PDF is ${pageCount} pages long`);

    if (pageCount > 40) {
      // Split the PDF into parts of 40 pages each
      console.log(`Splitting PDF by 40 pages`);

      const pdfParts = [];
      for (let i = 0; i < pageCount; i += 40) {
        const partDoc = await PDFDocument.create();
        const copiedPages = await partDoc.copyPages(
          pdfDoc,
          pdfDoc.getPageIndices().slice(i, i + 40)
        );
        copiedPages.forEach((page) => partDoc.addPage(page));
        const partBytes = await partDoc.save();
        pdfParts.push(partBytes);
      }

      // Upload each part separately
      await Promise.all(
        pdfParts.map(async (pdfPart, partIndex) => {
          const partKey = `temp/${bucketName}/part${
            partIndex + 1
          }-${currDate}${name
            ?.replaceAll(/\s/g, "")
            .replaceAll(/[^a-zA-Z.0-9]+|\.(?=.*\.)/g, "")}`;

          const partData = new Blob([pdfPart], { type: "application/pdf" });
          const partURL = URL.createObjectURL(partData);

          console.log(`PDF PART ${partIndex + 1}: ${partURL}`);
          await Storage.put(partKey, partData, {
            contentType: type,
            errorCallback: (err) => {
              console.error("204: Unexpected error while uploading", err);
            },
          });
        })
      );
    }
  } catch (err) {
    pageCount = null;
    console.warn(
      "Unable to chunk pdf for faster text extraction due to file encryption/corruption",
      err
    );
  }

  return pageCount;
};

export const handleTextractSubscription = async (executionArn) => {
  let resultArray = [];

  let response = await new Promise(async (resolve, reject) => {
    const subscription = await API.graphql({
      query: ON_TEXTRACT_PUBLISHED,
      variables: { executionArn },
    }).subscribe({
      next: async ({ value }) => {
        const result = value?.data?.onBackgroundFileTextractPublished;
        console.log("Received text extraction response", result);

        if (
          resultArray.length === 0 &&
          result.nextIndex === -1 &&
          result.fileId === "[ERROR IN TEXTRACTION]"
        ) {
          console.log("Error in text extraction");
          reject(new Error("Error in text extraction"));
        } else {
          resultArray.push(result);

          if (result.nextIndex === -1) {
            console.log("Text extraction complete", resultArray);

            let combinedArray =
              combineTextractSubscriptionResponse(resultArray);

            console.log("Text extraction combined results", combinedArray);
            subscription.unsubscribe();
            resolve(combinedArray);
          }
        }
      },
      error: (error) => {
        console.warn(error);
        reject(error);
      },
    });
  });

  return response;
};

function combineTextractSubscriptionResponse(arr) {
  // Create a map to store combined file contents
  const fileMap = new Map();

  // Iterate through the array of objects
  arr.forEach(({ fileId, extractedFileContent, nextIndex }) => {
    // If fileId exists in the map, append the content
    if (fileMap.has(fileId)) {
      const fileData = fileMap.get(fileId);
      fileData.push({ extractedFileContent, nextIndex });
    } else {
      // Otherwise, add a new entry for the fileId
      fileMap.set(fileId, [{ extractedFileContent, nextIndex }]);
    }
  });

  // Combine the content for each fileId, placing the -1 (last element) at the end
  const result = [];
  fileMap.forEach((fileData, fileId) => {
    // Separate the last element
    const lastElement = fileData.find((item) => item.nextIndex === -1);
    // Filter out the last element and sort the remaining elements
    const sortedData = fileData
      .filter((item) => item.nextIndex !== -1)
      .sort((a, b) => a.nextIndex - b.nextIndex);

    // Combine all the contents, including the last element at the end
    let combinedContent = sortedData
      .map((item) => item.extractedFileContent)
      .join("");

    if (lastElement) {
      combinedContent += lastElement.extractedFileContent;
    }

    result.push({ fileId, extractedFileContent: combinedContent });
  });

  return result;
}
