/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pdfbox.tools;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.Map;
import java.util.concurrent.Callable;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.tools.AngleCollector;
import org.apache.pdfbox.tools.FilteredTextStripper;
import org.apache.pdfbox.tools.NullWriter;
import org.apache.pdfbox.tools.PDFText2HTML;
import org.apache.pdfbox.tools.Version;
import org.apache.pdfbox.util.Matrix;
import picocli.CommandLine;

@CommandLine.Command(name="extracttext", header={"Extracts the text from a PDF document"}, versionProvider=Version.class, mixinStandardHelpOptions=true)
public final class ExtractText
implements Callable<Integer> {
    private static final Log LOG = LogFactory.getLog(ExtractText.class);
    private static final String STD_ENCODING = "UTF-8";
    private final PrintStream SYSOUT;
    private final PrintStream SYSERR;
    @CommandLine.Option(names={"-alwaysNext"}, description={"Process next page (if applicable) despite IOException (ignored when -html)"})
    private boolean alwaysNext = false;
    @CommandLine.Option(names={"-console"}, description={"Send text to console instead of file"})
    private boolean toConsole = false;
    @CommandLine.Option(names={"-debug"}, description={"Enables debug output about the time consumption of every stage"})
    private boolean debug = false;
    @CommandLine.Option(names={"-encoding"}, description={"UTF-8 or ISO-8859-1, UTF-16BE, UTF-16LE, etc. (default: ${DEFAULT-VALUE})"})
    private String encoding = "UTF-8";
    @CommandLine.Option(names={"-endPage"}, description={"The last page to extract (1 based, inclusive)"})
    private int endPage = Integer.MAX_VALUE;
    @CommandLine.Option(names={"-html"}, description={"Output in HTML format instead of raw text"})
    private boolean toHTML = false;
    @CommandLine.Option(names={"-ignoreBeads"}, description={"Disables the separation by beads"})
    private boolean ignoreBeads = false;
    @CommandLine.Option(names={"-password"}, description={"the password for the PDF or certificate in keystore."}, arity="0..1", interactive=true)
    private String password = "";
    @CommandLine.Option(names={"-rotationMagic"}, description={"Analyze each page for rotated/skewed text, rotate to 0\u00b0 and extract separately (slower, and ignored when -html)"})
    private boolean rotationMagic = false;
    @CommandLine.Option(names={"-sort"}, description={"Sort the text before writing of every stage"})
    private boolean sort = false;
    @CommandLine.Option(names={"-startPage"}, description={"The first page to start extraction (1 based)"})
    private int startPage = 1;
    @CommandLine.Option(names={"-i", "--input"}, description={"the PDF file"}, required=true)
    private File infile;
    @CommandLine.Option(names={"-o", "--output"}, description={"the exported text file"})
    private File outfile;
    @CommandLine.Option(names={"-addFileName"}, description={"Print PDF file name to the output text"})
    private boolean addFileName = false;
    @CommandLine.Option(names={"-append"}, description={"Use append mode for output file"})
    private boolean append = false;

    public ExtractText() {
        this.SYSOUT = System.out;
        this.SYSERR = System.err;
    }

    public static void main(String[] args) {
        System.setProperty("apple.awt.UIElement", "true");
        int exitCode = new CommandLine(new ExtractText()).execute(args);
        System.exit(exitCode);
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public Integer call() {
        String ext;
        String string = ext = this.toHTML ? ".html" : ".txt";
        if (this.outfile == null) {
            String outPath = FilenameUtils.removeExtension(this.infile.getAbsolutePath()) + ext;
            this.outfile = new File(outPath);
        }
        if (this.toHTML && !STD_ENCODING.equals(this.encoding)) {
            this.encoding = STD_ENCODING;
            this.SYSOUT.println("The encoding parameter is ignored when writing html output.");
        }
        if (this.toConsole && this.encoding != null) {
            this.SYSOUT.println("The encoding parameter is ignored when writing to the console.");
        }
        try (PDDocument document = Loader.loadPDF(this.infile, this.password);
             Writer output = this.createOutputWriter();){
            Map embeddedFileNames;
            PDEmbeddedFilesNameTreeNode embeddedFiles;
            PDFTextStripper stripper;
            long startTime = this.startProcessing("Loading PDF " + this.infile);
            AccessPermission ap = document.getCurrentAccessPermission();
            if (!ap.canExtractContent()) {
                this.SYSERR.println("You do not have permission to extract text");
                Integer n = 1;
                return n;
            }
            this.stopProcessing("Time for loading: ", startTime);
            startTime = this.startProcessing("Starting text extraction");
            if (this.addFileName) {
                output.write("PDF file: " + this.infile);
                output.write(System.lineSeparator());
            }
            if (this.debug) {
                this.SYSERR.println("Writing to " + this.outfile.getAbsolutePath());
            }
            if (this.toHTML) {
                stripper = new PDFText2HTML();
                stripper.setSortByPosition(this.sort);
                stripper.setShouldSeparateByBeads(!this.ignoreBeads);
                stripper.setStartPage(this.startPage);
                stripper.setEndPage(this.endPage);
                stripper.writeText(document, output);
            } else {
                stripper = this.rotationMagic ? new FilteredTextStripper() : new PDFTextStripper();
                stripper.setSortByPosition(this.sort);
                stripper.setShouldSeparateByBeads(!this.ignoreBeads);
                this.extractPages(this.startPage, Math.min(this.endPage, document.getNumberOfPages()), stripper, document, output, this.rotationMagic, this.alwaysNext);
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDDocumentNameDictionary names = catalog.getNames();
            if (names != null && (embeddedFiles = names.getEmbeddedFiles()) != null && (embeddedFileNames = embeddedFiles.getNames()) != null) {
                for (Map.Entry ent : embeddedFileNames.entrySet()) {
                    PDComplexFileSpecification spec;
                    PDEmbeddedFile file;
                    if (this.debug) {
                        this.SYSERR.println("Processing embedded file " + ent.getKey() + ":");
                    }
                    if ((file = (spec = (PDComplexFileSpecification)ent.getValue()).getEmbeddedFile()) == null || !"application/pdf".equals(file.getSubtype())) continue;
                    if (this.debug) {
                        this.SYSERR.println("  is PDF (size=" + file.getSize() + ")");
                    }
                    PDDocument subDoc = Loader.loadPDF(RandomAccessReadBuffer.createBufferFromStream(file.createInputStream()));
                    Throwable throwable = null;
                    try {
                        if (this.toHTML) {
                            stripper.writeText(subDoc, output);
                            continue;
                        }
                        this.extractPages(1, subDoc.getNumberOfPages(), stripper, subDoc, output, this.rotationMagic, this.alwaysNext);
                    }
                    catch (Throwable throwable2) {
                        throwable = throwable2;
                        throw throwable2;
                    }
                    finally {
                        if (subDoc == null) continue;
                        if (throwable != null) {
                            try {
                                subDoc.close();
                            }
                            catch (Throwable throwable3) {
                                throwable.addSuppressed(throwable3);
                            }
                            continue;
                        }
                        subDoc.close();
                    }
                }
            }
            output.flush();
            this.stopProcessing("Time for extraction: ", startTime);
            return 0;
        }
        catch (IOException ioe) {
            this.SYSERR.println("Error extracting text for document [" + ioe.getClass().getSimpleName() + "]: " + ioe.getMessage());
            return 4;
        }
    }

    private Writer createOutputWriter() throws IOException {
        if (this.toConsole) {
            return new PrintWriter(this.SYSOUT){

                @Override
                public void close() {
                }
            };
        }
        return new OutputStreamWriter((OutputStream)new FileOutputStream(this.outfile, this.append), this.encoding);
    }

    private void extractPages(int startPage, int endPage, PDFTextStripper stripper, PDDocument document, Writer output, boolean rotationMagic, boolean alwaysNext) throws IOException {
        for (int p = startPage; p <= endPage; ++p) {
            stripper.setStartPage(p);
            stripper.setEndPage(p);
            try {
                if (rotationMagic) {
                    PDPage page = document.getPage(p - 1);
                    int rotation = page.getRotation();
                    page.setRotation(0);
                    AngleCollector angleCollector = new AngleCollector();
                    angleCollector.setStartPage(p);
                    angleCollector.setEndPage(p);
                    angleCollector.writeText(document, new NullWriter());
                    for (int angle : angleCollector.getAngles()) {
                        try (PDPageContentStream cs = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.PREPEND, false);){
                            cs.transform(Matrix.getRotateInstance(-Math.toRadians(angle), 0.0f, 0.0f));
                        }
                        stripper.writeText(document, output);
                        ((COSArray)page.getCOSObject().getItem(COSName.CONTENTS)).remove(0);
                    }
                    page.setRotation(rotation);
                    continue;
                }
                stripper.writeText(document, output);
                continue;
            }
            catch (IOException ex) {
                if (!alwaysNext) {
                    throw ex;
                }
                LOG.error("Failed to process page " + p, ex);
            }
        }
    }

    private long startProcessing(String message) {
        if (this.debug) {
            this.SYSERR.println(message);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String message, long startTime) {
        if (this.debug) {
            long stopTime = System.currentTimeMillis();
            float elapsedTime = (float)(stopTime - startTime) / 1000.0f;
            this.SYSERR.println(message + elapsedTime + " seconds");
        }
    }

    static int getAngle(TextPosition text) {
        Matrix m4 = text.getTextMatrix().clone();
        m4.concatenate(text.getFont().getFontMatrix());
        return (int)Math.round(Math.toDegrees(Math.atan2(m4.getShearY(), m4.getScaleY())));
    }
}

