/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.pos;

import com.google.common.collect.ListMultimap;
import eus.ixa.ixa.pipe.lemma.StatisticalLemmatizer;
import eus.ixa.ixa.pipe.lemma.dict.MorfologikLemmatizer;
import eus.ixa.ixa.pipe.pos.Morpheme;
import eus.ixa.ixa.pipe.pos.MorphoFactory;
import eus.ixa.ixa.pipe.pos.Resources;
import eus.ixa.ixa.pipe.pos.StatisticalTagger;
import eus.ixa.ixa.pipe.pos.StringUtils;
import eus.ixa.ixa.pipe.pos.dict.DictionaryTagger;
import eus.ixa.ixa.pipe.pos.dict.MorfologikTagger;
import eus.ixa.ixa.pipe.pos.dict.MultiWordMatcher;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.Span;
import ixa.kaflib.Term;
import ixa.kaflib.WF;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class Annotate {
    private final StatisticalTagger posTagger;
    private final StatisticalLemmatizer lemmatizer;
    private final String lang;
    private final MorphoFactory morphoFactory;
    private MorfologikLemmatizer dictLemmatizer;
    private final Boolean multiwords;
    private MultiWordMatcher multiWordMatcher;
    private final Boolean dictag;
    private DictionaryTagger dictMorphoTagger;

    public Annotate(Properties properties) throws IOException {
        this.lang = properties.getProperty("language");
        this.multiwords = Boolean.valueOf(properties.getProperty("multiwords"));
        this.dictag = Boolean.valueOf(properties.getProperty("dictag"));
        if (this.multiwords.booleanValue()) {
            this.multiWordMatcher = new MultiWordMatcher(properties);
        }
        if (this.dictag.booleanValue()) {
            this.loadMorphoTaggerDicts(properties);
        }
        this.loadLemmatizerDicts(properties);
        this.morphoFactory = new MorphoFactory();
        this.posTagger = new StatisticalTagger(properties, this.morphoFactory);
        this.lemmatizer = new StatisticalLemmatizer(properties, this.morphoFactory);
    }

    private void loadLemmatizerDicts(Properties props) {
        Resources resources = new Resources();
        URL binLemmatizerURL = resources.getBinaryDict(this.lang);
        if (binLemmatizerURL == null) {
            System.err.println("WARNING: No lemmatizer dictionary available for language " + this.lang + " in src/main/resources!");
        } else {
            try {
                this.dictLemmatizer = new MorfologikLemmatizer(binLemmatizerURL);
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private void loadMorphoTaggerDicts(Properties props) {
        Resources resources = new Resources();
        URL binDictMorphoTaggerURL = resources.getBinaryTaggerDict(this.lang);
        if (binDictMorphoTaggerURL == null) {
            System.err.println("ERROR: No binary POS tagger dictionary available for language " + this.lang + " in src/main/resources!!");
            System.exit(1);
        }
        try {
            this.dictMorphoTagger = new MorfologikTagger(binDictMorphoTaggerURL, this.lang);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public final void annotatePOSToKAF(KAFDocument kaf) {
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            int i;
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            List<Morpheme> morphemes = null;
            String[] tokens = new String[wfs.size()];
            for (i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            if (this.multiwords.booleanValue()) {
                String[] multiWordTokens = this.multiWordMatcher.getTokensWithMultiWords(tokens);
                morphemes = this.posTagger.getMorphemes(multiWordTokens);
                this.getMultiWordSpans(tokens, wfs, tokenSpans);
            } else {
                List<String> posTags = this.posTagger.posAnnotate(tokens);
                String[] posTagsArray = new String[posTags.size()];
                posTagsArray = posTags.toArray(posTagsArray);
                morphemes = this.lemmatizer.getMorphemes(tokens, posTagsArray);
            }
            for (i = 0; i < morphemes.size(); ++i) {
                String lemma;
                Term term = kaf.newTerm((Span)tokenSpans.get(i));
                if (this.dictag.booleanValue()) {
                    String dictPosTag = this.dictMorphoTagger.tag(morphemes.get(i).getWord(), morphemes.get(i).getTag());
                    morphemes.get(i).setTag(dictPosTag);
                }
                String posId = Resources.getKafTagSet(morphemes.get(i).getTag(), this.lang);
                String type = Resources.setTermType(posId);
                if (this.dictLemmatizer != null && !(lemma = this.dictLemmatizer.apply(morphemes.get(i).getWord(), morphemes.get(i).getTag())).equalsIgnoreCase("O")) {
                    morphemes.get(i).setLemma(lemma);
                }
                term.setType(type);
                term.setLemma(morphemes.get(i).getLemma());
                term.setPos(posId);
                term.setMorphofeat(morphemes.get(i).getTag());
            }
        }
    }

    private void getMultiWordSpans(String[] tokens, List<WF> wfs, List<Span<WF>> tokenSpans) {
        opennlp.tools.util.Span[] multiWordSpans = this.multiWordMatcher.multiWordsToSpans(tokens);
        int counter = 0;
        for (opennlp.tools.util.Span mwSpan : multiWordSpans) {
            Integer fromIndex = mwSpan.getStart() - counter;
            Integer toIndex = mwSpan.getEnd() - counter;
            counter = counter + tokenSpans.subList(fromIndex, toIndex).size() - 1;
            List<WF> wfTargets = wfs.subList(mwSpan.getStart(), mwSpan.getEnd());
            Span<WF> multiWordSpan = KAFDocument.newWFSpan(wfTargets);
            tokenSpans.subList(fromIndex, toIndex).clear();
            tokenSpans.add(fromIndex, multiWordSpan);
        }
    }

    public final String annotatePOSToCoNLL(KAFDocument kaf) throws IOException {
        StringBuilder sb = new StringBuilder();
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            int i;
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            List<Morpheme> morphemes = null;
            String[] tokens = new String[wfs.size()];
            for (i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            if (this.multiwords.booleanValue()) {
                String[] multiWordTokens = this.multiWordMatcher.getTokensWithMultiWords(tokens);
                morphemes = this.posTagger.getMorphemes(multiWordTokens);
                this.getMultiWordSpans(tokens, wfs, tokenSpans);
            } else {
                List<String> posTags = this.posTagger.posAnnotate(tokens);
                String[] posTagsArray = new String[posTags.size()];
                posTagsArray = posTags.toArray(posTagsArray);
                morphemes = this.lemmatizer.getMorphemes(tokens, posTagsArray);
            }
            for (i = 0; i < morphemes.size(); ++i) {
                String lemma;
                String posTag = morphemes.get(i).getTag();
                String word = morphemes.get(i).getWord();
                if (this.dictag.booleanValue()) {
                    String dictPosTag = this.dictMorphoTagger.tag(word, posTag);
                    morphemes.get(i).setTag(dictPosTag);
                }
                if (this.dictLemmatizer != null && !(lemma = this.dictLemmatizer.apply(word, morphemes.get(i).getTag())).equalsIgnoreCase("O")) {
                    morphemes.get(i).setLemma(lemma);
                }
                sb.append(word).append("\t").append(morphemes.get(i).getLemma()).append("\t").append(morphemes.get(i).getTag()).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public final void getAllTagsLemmasToNAF(KAFDocument kaf) {
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            String[] tokens = new String[wfs.size()];
            for (int i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            String[][] allPosTags = this.posTagger.getAllPosTags(tokens);
            ListMultimap<String, String> morphMap = this.lemmatizer.getMultipleLemmas(tokens, allPosTags);
            for (int i = 0; i < tokens.length; ++i) {
                Term term = kaf.newTerm((Span)tokenSpans.get(i));
                List<String> posLemmaValues = morphMap.get(tokens[i]);
                if (this.dictLemmatizer != null) {
                    this.dictLemmatizer.getAllPosLemmas(tokens[i], posLemmaValues);
                }
                String allPosLemmasSet = StringUtils.getSetStringFromList(posLemmaValues);
                String posId = Resources.getKafTagSet(allPosTags[0][i], this.lang);
                String type = Resources.setTermType(posId);
                term.setType(type);
                term.setLemma(posLemmaValues.get(0).split("#")[1]);
                term.setPos(posId);
                term.setMorphofeat(allPosLemmasSet);
            }
        }
    }

    public final String getAllTagsLemmasToCoNLL(KAFDocument kaf) {
        StringBuilder sb = new StringBuilder();
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            String[] tokens = new String[wfs.size()];
            for (int i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            String[][] allPosTags = this.posTagger.getAllPosTags(tokens);
            ListMultimap<String, String> morphMap = this.lemmatizer.getMultipleLemmas(tokens, allPosTags);
            for (int i = 0; i < tokens.length; ++i) {
                List<String> posLemmaValues = morphMap.get(tokens[i]);
                if (this.dictLemmatizer != null) {
                    this.dictLemmatizer.getAllPosLemmas(tokens[i], posLemmaValues);
                }
                String allPosLemmasSet = StringUtils.getSetStringFromList(posLemmaValues);
                sb.append(tokens[i]).append("\t").append(allPosLemmasSet).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }
}

