import spacy
from spacy.tokens import Doc
from spacy.lang.en import English
import pytest
from textdescriptives.components import POSStatistics


@pytest.fixture(scope="function")
def nlp():
    nlp = spacy.load("en_core_web_sm", disable=("ner", "textcat"))
    nlp.add_pipe("pos_stats")

    return nlp


@pytest.fixture(scope="function")
def doc(nlp):
    words = [
        "Here",
        "is",
        "the",
        "first",
        "sentence",
        ".",
        "It",
        "was",
        "pretty",
        "short",
        ".",
        "Let",
        "'s",
        "make",
        "another",
        "one",
        "that",
        "'s",
        "slightly",
        "longer",
        "and",
        "more",
        "complex",
        ".",
    ]
    pos = [
        "ADV",
        "AUX",
        "DET",
        "ADJ",
        "NOUN",
        "PUNCT",
        "PRON",
        "AUX",
        "ADV",
        "ADJ",
        "PUNCT",
        "VERB",
        "PRON",
        "VERB",
        "DET",
        "NOUN",
        "PRON",
        "AUX",
        "ADV",
        "ADJ",
        "CCONJ",
        "ADV",
        "ADJ",
        "PUNCT",
    ]
    doc = Doc(
        nlp.vocab,
        words=words,
        pos=pos,
    )
    return doc


def test_pos_integrations(nlp):
    assert "pos_stats" == nlp.pipe_names[-1]


def test_pos_proportions_doc(doc):
    assert doc._.pos_proportions == pytest.approx(
        {
            "pos_prop_ADV": 0.1666,
            "pos_prop_AUX": 0.125,
            "pos_prop_DET": 0.083,
            "pos_prop_ADJ": 0.1666,
            "pos_prop_NOUN": 0.0833,
            "pos_prop_PUNCT": 0.125,
            "pos_prop_PRON": 0.125,
            "pos_prop_VERB": 0.083,
            "pos_prop_CCONJ": 0.0416,
        },
        rel=0.05,
    )


def test_pos_proportions_span(doc):
    span = doc[:]

    assert span._.pos_proportions == pytest.approx(
        {
            "pos_prop_ADV": 0.1666,
            "pos_prop_AUX": 0.125,
            "pos_prop_DET": 0.083,
            "pos_prop_ADJ": 0.1666,
            "pos_prop_NOUN": 0.0833,
            "pos_prop_PUNCT": 0.125,
            "pos_prop_PRON": 0.125,
            "pos_prop_VERB": 0.083,
            "pos_prop_CCONJ": 0.0416,
        },
        rel=0.01,
    )
