/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.ruta.engine;

import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.ruta.engine.HtmlConverterPSpan;
import org.apache.uima.ruta.engine.HtmlConverterPSpanReplacement;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.visitors.TextExtractingVisitor;

public class HtmlConverterVisitor
extends TextExtractingVisitor {
    private boolean inBody = false;
    private boolean inScript = false;
    private boolean skipWhitespace = true;
    private SortedSet<HtmlConverterPSpan> textSpans = new TreeSet<HtmlConverterPSpan>();
    private SortedSet<HtmlConverterPSpan> linebreaksFromHtmlTags = new TreeSet<HtmlConverterPSpan>();
    private SortedSet<HtmlConverterPSpan> gapsFromHtmlTags = new TreeSet<HtmlConverterPSpan>();
    private Collection<String> newlineInducingTags;
    private boolean processAll = true;
    private List<String> gapInducingTags;
    private String gapText;
    private Pattern newlineInducingTagPattern;

    public HtmlConverterVisitor(String[] newlineInducingTags, String newlineInducingTagRegExp, String[] gapInducingTags, String gapText, boolean skipWhitespace, boolean processAll) {
        if (newlineInducingTags != null) {
            this.newlineInducingTags = Arrays.asList(newlineInducingTags);
        }
        if (gapInducingTags != null) {
            this.gapInducingTags = Arrays.asList(gapInducingTags);
        }
        this.gapText = gapText;
        this.skipWhitespace = skipWhitespace;
        this.processAll = processAll;
        if (newlineInducingTagRegExp != null) {
            this.newlineInducingTagPattern = Pattern.compile(newlineInducingTagRegExp);
        }
    }

    public void visitStringNode(Text node) {
        super.visitStringNode(node);
        if (!(!this.processAll && !this.inBody || this.inScript || this.skipWhitespace && StringUtils.isBlank((CharSequence)node.getText()))) {
            int from = node.getStartPosition();
            int to = node.getEndPosition();
            this.textSpans.add(new HtmlConverterPSpan(from, to, node.getText()));
        }
    }

    public void visitTag(Tag tag) {
        Matcher matcher;
        super.visitTag(tag);
        String trimmedTagnameLowercase = tag.getTagName().toLowerCase().trim();
        if (trimmedTagnameLowercase.equals("body")) {
            this.inBody = true;
        } else if (trimmedTagnameLowercase.equals("script")) {
            this.inScript = true;
        }
        boolean matchedByPattern = false;
        if (this.newlineInducingTagPattern != null && (matcher = this.newlineInducingTagPattern.matcher(trimmedTagnameLowercase)).matches()) {
            matchedByPattern = true;
        }
        if (matchedByPattern || this.newlineInducingTags != null && this.newlineInducingTags.contains(trimmedTagnameLowercase)) {
            int begin = tag.getStartPosition();
            this.linebreaksFromHtmlTags.add(new HtmlConverterPSpanReplacement(begin, begin + 1, "\n"));
        }
        if (this.gapInducingTags != null && this.gapInducingTags.contains(trimmedTagnameLowercase)) {
            int begin = tag.getStartPosition();
            this.gapsFromHtmlTags.add(new HtmlConverterPSpanReplacement(begin, begin + this.gapText.length(), this.gapText));
        }
    }

    public void visitEndTag(Tag tag) {
        String tagname = tag.getTagName().toLowerCase().trim();
        if (tagname.equals("body")) {
            this.inBody = false;
        } else if (tagname.equals("script") || tag instanceof ScriptTag) {
            this.inScript = false;
        }
    }

    public SortedSet<HtmlConverterPSpan> getTextSpans() {
        return this.textSpans;
    }

    public SortedSet<HtmlConverterPSpan> getLinebreaksFromHtmlTags() {
        return this.linebreaksFromHtmlTags;
    }

    public SortedSet<HtmlConverterPSpan> getGapsFromHtmlTags() {
        return this.gapsFromHtmlTags;
    }
}

