package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.ParagraphManager;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

/* loaded from: input_file:org/springframework/ai/reader/pdf/ParagraphPdfDocumentReader.class */
public class ParagraphPdfDocumentReader implements DocumentReader {
    private static final String METADATA_START_PAGE = "page_number";
    private static final String METADATA_END_PAGE = "end_page_number";
    private static final String METADATA_TITLE = "title";
    private static final String METADATA_LEVEL = "level";
    private static final String METADATA_FILE_NAME = "file_name";
    protected final PDDocument document;
    private final Logger logger;
    private final ParagraphManager paragraphTextExtractor;
    protected String resourceFileName;
    private PdfDocumentReaderConfig config;

    public ParagraphPdfDocumentReader(String str) {
        this(new DefaultResourceLoader().getResource(str));
    }

    public ParagraphPdfDocumentReader(Resource resource) {
        this(resource, PdfDocumentReaderConfig.defaultConfig());
    }

    public ParagraphPdfDocumentReader(String str, PdfDocumentReaderConfig pdfDocumentReaderConfig) {
        this(new DefaultResourceLoader().getResource(str), pdfDocumentReaderConfig);
    }

    public ParagraphPdfDocumentReader(Resource resource, PdfDocumentReaderConfig pdfDocumentReaderConfig) {
        this.logger = LoggerFactory.getLogger(getClass());
        try {
            this.document = new PDFParser(new RandomAccessReadBuffer(resource.getInputStream())).parse();
            this.config = pdfDocumentReaderConfig;
            this.paragraphTextExtractor = new ParagraphManager(this.document);
            this.resourceFileName = resource.getFilename();
        } catch (IllegalArgumentException e) {
            throw e;
        } catch (Exception e2) {
            throw new RuntimeException(e2);
        }
    }

    /* renamed from: get, reason: merged with bridge method [inline-methods] */
    public List<Document> m1get() {
        List<ParagraphManager.Paragraph> flatten = this.paragraphTextExtractor.flatten();
        ArrayList arrayList = new ArrayList(flatten.size());
        if (!CollectionUtils.isEmpty(flatten)) {
            this.logger.info("Start processing paragraphs from PDF");
            Iterator<ParagraphManager.Paragraph> it = flatten.iterator();
            ParagraphManager.Paragraph next = it.next();
            if (it.hasNext()) {
                while (it.hasNext()) {
                    ParagraphManager.Paragraph next2 = it.next();
                    Document document = toDocument(next, next2);
                    if (document != null && StringUtils.hasText(document.getText())) {
                        arrayList.add(toDocument(next, next2));
                    }
                    next = next2;
                }
            } else {
                arrayList.add(toDocument(next, next));
            }
        }
        this.logger.info("End processing paragraphs from PDF");
        return arrayList;
    }

    protected Document toDocument(ParagraphManager.Paragraph paragraph, ParagraphManager.Paragraph paragraph2) {
        String textBetweenParagraphs = getTextBetweenParagraphs(paragraph, paragraph2);
        if (!StringUtils.hasText(textBetweenParagraphs)) {
            return null;
        }
        Document document = new Document(textBetweenParagraphs);
        addMetadata(paragraph, paragraph2, document);
        return document;
    }

    protected void addMetadata(ParagraphManager.Paragraph paragraph, ParagraphManager.Paragraph paragraph2, Document document) {
        document.getMetadata().put(METADATA_TITLE, paragraph.title());
        document.getMetadata().put("page_number", Integer.valueOf(paragraph.startPageNumber()));
        document.getMetadata().put("end_page_number", Integer.valueOf(paragraph2.startPageNumber()));
        document.getMetadata().put(METADATA_LEVEL, Integer.valueOf(paragraph.level()));
        document.getMetadata().put("file_name", this.resourceFileName);
    }

    public String getTextBetweenParagraphs(ParagraphManager.Paragraph paragraph, ParagraphManager.Paragraph paragraph2) {
        int startPageNumber = paragraph.startPageNumber() - 1;
        int startPageNumber2 = paragraph2.startPageNumber() - 1;
        try {
            StringBuilder sb = new StringBuilder();
            PDFLayoutTextStripperByArea pDFLayoutTextStripperByArea = new PDFLayoutTextStripperByArea();
            pDFLayoutTextStripperByArea.setSortByPosition(true);
            for (int i = startPageNumber; i <= startPageNumber2; i++) {
                PDPage page = this.document.getPage(i);
                int position = paragraph.position();
                int position2 = paragraph2.position();
                if (this.config.reversedParagraphPosition) {
                    position = (int) (page.getMediaBox().getHeight() - position);
                    position2 = (int) (page.getMediaBox().getHeight() - position2);
                }
                int lowerLeftX = (int) page.getMediaBox().getLowerLeftX();
                int width = (int) page.getMediaBox().getWidth();
                int lowerLeftY = (int) page.getMediaBox().getLowerLeftY();
                int height = (int) page.getMediaBox().getHeight();
                if (i == startPageNumber) {
                    lowerLeftY = position;
                    height = ((int) page.getMediaBox().getHeight()) - lowerLeftY;
                }
                if (i == startPageNumber2) {
                    height = position2 - lowerLeftY;
                }
                if (lowerLeftY + height == ((int) page.getMediaBox().getHeight())) {
                    height -= this.config.pageBottomMargin;
                }
                if (lowerLeftY == 0) {
                    lowerLeftY += this.config.pageTopMargin;
                    height -= this.config.pageTopMargin;
                }
                pDFLayoutTextStripperByArea.addRegion("pdfPageRegion", new Rectangle(lowerLeftX, lowerLeftY, width, height));
                pDFLayoutTextStripperByArea.extractRegions(page);
                String textForRegion = pDFLayoutTextStripperByArea.getTextForRegion("pdfPageRegion");
                if (StringUtils.hasText(textForRegion)) {
                    sb.append(textForRegion);
                }
                pDFLayoutTextStripperByArea.removeRegion("pdfPageRegion");
            }
            String sb2 = sb.toString();
            if (StringUtils.hasText(sb2)) {
                sb2 = this.config.pageExtractedTextFormatter.format(sb2, startPageNumber);
            }
            return sb2;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
