import OpenAI from 'openai';
import { SUPPORTED_FILE_TYPES, MAX_CHUNK_SIZE } from '../constants';

interface ProcessedDocument {
  text: string;
  fileName: string;
  fileType: string;
  fileSize: number;
}

export async function processDocuments(files: File[]): Promise<{
  embeddings: ProcessedDocument[];
  documents: ProcessedDocument[];
}> {
  const embeddings: ProcessedDocument[] = [];
  const documents: ProcessedDocument[] = [];

  for (const file of files) {
    try {
      if (!SUPPORTED_FILE_TYPES.includes(file.type)) {
        throw new Error(`Unsupported file type: ${file.type}`);
      }

      const text = await extractText(file);
      const chunks = splitIntoChunks(text);

      for (const chunk of chunks) {
        embeddings.push({
          text: chunk,
          fileName: file.name,
          fileType: file.type,
          fileSize: file.size
        });
      }

      documents.push({
        text,
        fileName: file.name,
        fileType: file.type,
        fileSize: file.size
      });
    } catch (error) {
      console.error(`Error processing file ${file.name}:`, error);
      throw error;
    }
  }

  return { embeddings, documents };
}

async function extractText(file: File): Promise<string> {
  const text = await file.text();
  return text;
}

function splitIntoChunks(text: string): string[] {
  const chunks: string[] = [];
  const words = text.split(/\s+/);
  let currentChunk = '';

  for (const word of words) {
    if ((currentChunk + ' ' + word).length <= MAX_CHUNK_SIZE) {
      currentChunk += (currentChunk ? ' ' : '') + word;
    } else {
      if (currentChunk) {
        chunks.push(currentChunk);
      }
      currentChunk = word;
    }
  }

  if (currentChunk) {
    chunks.push(currentChunk);
  }

  return chunks;
}