/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.paragraphs;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ParagraphAnnotator
implements Annotator {
    private static Redwood.RedwoodChannels log = Redwood.channels(ParagraphAnnotator.class);
    private final boolean VERBOSE;
    private final boolean DEBUG = true;
    public String PARAGRAPH_BREAK = "two";

    public ParagraphAnnotator(Properties props, boolean verbose) {
        this.PARAGRAPH_BREAK = props.getProperty("paragraphBreak", "two");
        this.VERBOSE = verbose;
    }

    @Override
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            System.err.print("Adding paragraph index annotation (" + this.PARAGRAPH_BREAK + ") ...");
        }
        Pattern paragraphSplit = null;
        if (this.PARAGRAPH_BREAK.equals("two")) {
            paragraphSplit = Pattern.compile("\\n\\n+");
        } else if (this.PARAGRAPH_BREAK.equals("one")) {
            paragraphSplit = Pattern.compile("\\n+");
        }
        String fullText = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        Matcher m = paragraphSplit.matcher(fullText);
        ArrayList<Integer> paragraphBreaks = Generics.newArrayList();
        while (m.find()) {
            paragraphBreaks.add(m.start());
        }
        List sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class);
        int currParagraph = -1;
        int nextParagraphStartIndex = -1;
        for (CoreMap sent : sentences) {
            int sentBegin = (Integer)sent.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
            if (sentBegin >= nextParagraphStartIndex) {
                nextParagraphStartIndex = currParagraph + 1 < paragraphBreaks.size() ? ((Integer)paragraphBreaks.get(currParagraph + 1)).intValue() : fullText.length();
                ++currParagraph;
            }
            sent.set(CoreAnnotations.ParagraphIndexAnnotation.class, currParagraph);
        }
        if (this.VERBOSE) {
            System.err.println("done");
        }
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.singleton(CoreAnnotations.ParagraphIndexAnnotation.class);
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return new HashSet<Class<? extends CoreAnnotation>>(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.SentencesAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.BeforeAnnotation.class, CoreAnnotations.AfterAnnotation.class, CoreAnnotations.TokenBeginAnnotation.class, CoreAnnotations.TokenEndAnnotation.class, CoreAnnotations.IndexAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class));
    }
}

