package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

/* loaded from: input_file:org/springframework/ai/reader/pdf/PagePdfDocumentReader.class */
public class PagePdfDocumentReader implements DocumentReader {
    public static final String METADATA_START_PAGE_NUMBER = "page_number";
    public static final String METADATA_END_PAGE_NUMBER = "end_page_number";
    public static final String METADATA_FILE_NAME = "file_name";
    private static final String PDF_PAGE_REGION = "pdfPageRegion";
    protected final PDDocument document;
    private final Logger logger;
    protected String resourceFileName;
    private PdfDocumentReaderConfig config;

    public PagePdfDocumentReader(String str) {
        this(new DefaultResourceLoader().getResource(str));
    }

    public PagePdfDocumentReader(Resource resource) {
        this(resource, PdfDocumentReaderConfig.defaultConfig());
    }

    public PagePdfDocumentReader(String str, PdfDocumentReaderConfig pdfDocumentReaderConfig) {
        this(new DefaultResourceLoader().getResource(str), pdfDocumentReaderConfig);
    }

    public PagePdfDocumentReader(Resource resource, PdfDocumentReaderConfig pdfDocumentReaderConfig) {
        this.logger = LoggerFactory.getLogger(getClass());
        try {
            this.document = new PDFParser(new RandomAccessReadBuffer(resource.getInputStream())).parse();
            this.resourceFileName = resource.getFilename();
            this.config = pdfDocumentReaderConfig;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /* renamed from: get, reason: merged with bridge method [inline-methods] */
    public List<Document> m0get() {
        ArrayList arrayList = new ArrayList();
        try {
            PDFLayoutTextStripperByArea pDFLayoutTextStripperByArea = new PDFLayoutTextStripperByArea();
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            ArrayList arrayList2 = new ArrayList();
            int count = this.document.getDocumentCatalog().getPages().getCount();
            int i4 = count > 10 ? count / 10 : 1;
            int i5 = 0;
            PDPage pDPage = (PDPage) this.document.getDocumentCatalog().getPages().iterator().next();
            Iterator it = this.document.getDocumentCatalog().getPages().iterator();
            while (it.hasNext()) {
                PDPage pDPage2 = (PDPage) it.next();
                pDPage = pDPage2;
                if (i5 % i4 == 0 && i5 / i4 < 10) {
                    this.logger.info("Processing PDF page: {}", Integer.valueOf(i5 + 1));
                }
                i5++;
                i2++;
                if (this.config.pagesPerDocument != 0 && i2 >= this.config.pagesPerDocument) {
                    i2 = 0;
                    String str = (String) arrayList2.stream().collect(Collectors.joining());
                    if (StringUtils.hasText(str)) {
                        arrayList.add(toDocument(pDPage2, str, i3, i));
                    }
                    arrayList2.clear();
                    i3 = i + 1;
                }
                pDFLayoutTextStripperByArea.addRegion(PDF_PAGE_REGION, new Rectangle((int) pDPage2.getMediaBox().getLowerLeftX(), ((int) pDPage2.getMediaBox().getLowerLeftY()) + this.config.pageTopMargin, (int) pDPage2.getMediaBox().getWidth(), ((int) pDPage2.getMediaBox().getHeight()) - (this.config.pageTopMargin + this.config.pageBottomMargin)));
                pDFLayoutTextStripperByArea.extractRegions(pDPage2);
                String textForRegion = pDFLayoutTextStripperByArea.getTextForRegion(PDF_PAGE_REGION);
                if (StringUtils.hasText(textForRegion)) {
                    arrayList2.add(this.config.pageExtractedTextFormatter.format(textForRegion, i));
                }
                i++;
                pDFLayoutTextStripperByArea.removeRegion(PDF_PAGE_REGION);
            }
            if (!CollectionUtils.isEmpty(arrayList2)) {
                arrayList.add(toDocument(pDPage, (String) arrayList2.stream().collect(Collectors.joining()), i3, i));
            }
            this.logger.info("Processing {} pages", Integer.valueOf(count));
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected Document toDocument(PDPage pDPage, String str, int i, int i2) {
        Document document = new Document(str);
        document.getMetadata().put(METADATA_START_PAGE_NUMBER, Integer.valueOf(i));
        if (i != i2) {
            document.getMetadata().put(METADATA_END_PAGE_NUMBER, Integer.valueOf(i2));
        }
        document.getMetadata().put(METADATA_FILE_NAME, this.resourceFileName);
        return document;
    }
}
