package com.alibaba.cloud.ai.parser.tika;

import com.alibaba.cloud.ai.document.DocumentParser;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.function.Supplier;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.ExtractedTextFormatter;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:com/alibaba/cloud/ai/parser/tika/TikaDocumentParser.class */
public class TikaDocumentParser implements DocumentParser {
    private static final int NO_WRITE_LIMIT = -1;
    public static final Supplier<Parser> DEFAULT_PARSER_SUPPLIER = AutoDetectParser::new;
    public static final Supplier<Metadata> DEFAULT_METADATA_SUPPLIER = Metadata::new;
    public static final Supplier<ParseContext> DEFAULT_PARSE_CONTEXT_SUPPLIER = ParseContext::new;
    public static final Supplier<ContentHandler> DEFAULT_CONTENT_HANDLER_SUPPLIER = () -> {
        return new BodyContentHandler(NO_WRITE_LIMIT);
    };
    private final Supplier<Parser> parserSupplier;
    private final Supplier<ContentHandler> contentHandlerSupplier;
    private final Supplier<Metadata> metadataSupplier;
    private final Supplier<ParseContext> parseContextSupplier;
    private final ExtractedTextFormatter textFormatter;

    public TikaDocumentParser() {
        this((Supplier) null, null, null, null, ExtractedTextFormatter.defaults());
    }

    public TikaDocumentParser(ExtractedTextFormatter extractedTextFormatter) {
        this((Supplier) null, null, null, null, extractedTextFormatter);
    }

    public TikaDocumentParser(Supplier<ContentHandler> supplier, ExtractedTextFormatter extractedTextFormatter) {
        this((Supplier) null, supplier, null, null, extractedTextFormatter);
    }

    public TikaDocumentParser(Supplier<Parser> supplier, Supplier<ContentHandler> supplier2, Supplier<Metadata> supplier3, Supplier<ParseContext> supplier4) {
        this(supplier, supplier2, supplier3, supplier4, ExtractedTextFormatter.defaults());
    }

    public TikaDocumentParser(Supplier<Parser> supplier, Supplier<ContentHandler> supplier2, Supplier<Metadata> supplier3, Supplier<ParseContext> supplier4, ExtractedTextFormatter extractedTextFormatter) {
        this.parserSupplier = (Supplier) getOrDefault(supplier, () -> {
            return DEFAULT_PARSER_SUPPLIER;
        });
        this.contentHandlerSupplier = (Supplier) getOrDefault(supplier2, () -> {
            return DEFAULT_CONTENT_HANDLER_SUPPLIER;
        });
        this.metadataSupplier = (Supplier) getOrDefault(supplier3, () -> {
            return DEFAULT_METADATA_SUPPLIER;
        });
        this.parseContextSupplier = (Supplier) getOrDefault(supplier4, () -> {
            return DEFAULT_PARSE_CONTEXT_SUPPLIER;
        });
        this.textFormatter = extractedTextFormatter;
    }

    public List<Document> parse(InputStream inputStream) {
        try {
            Parser parser = this.parserSupplier.get();
            ContentHandler contentHandler = this.contentHandlerSupplier.get();
            parser.parse(inputStream, contentHandler, this.metadataSupplier.get(), this.parseContextSupplier.get());
            String obj = contentHandler.toString();
            if (Objects.isNull(obj)) {
                throw new ZeroByteFileException("The content is blank!");
            }
            return Collections.singletonList(toDocument(obj));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private Document toDocument(String str) {
        return new Document(this.textFormatter.format((String) Objects.requireNonNullElse(str, "")));
    }

    private static <T> T getOrDefault(T t, Supplier<T> supplier) {
        return t != null ? t : supplier.get();
    }
}
