""" Text search to retrieve a statement """
import logging
import sys
import os

import yaml
import pandas as pd
# from rapidfuzz import process
from qary.config import DATA_DIR
from qary.init import maybe_download

log = logging.getLogger('qary')


LIMIT = 1000000


def get_data(name):
    """ replacement for nlpia.loaders.get_data to avoid dependencies that result in version conflicts

    >>> get_data('movie_dialog').shape
    (64350, 2)
    """
    filename = str(name) + '.csv'
    filepath = maybe_download(filename=filename)
    return pd.read_csv(filepath)


def normalize(text):
    return text.lower()


def scale_probability(p):
    """ Levenshtein similarity is only good when it's high, when it's low, the score should go down """
    return p ** 2


def load_faq(faq_path=os.path.join(DATA_DIR, 'dsfaq_plus_faq_data_science_and_machine_learning.yml')):
    faq = None
    with open(faq_path, 'r') as instream:
        try:
            faq = yaml.safe_load(instream)
        except yaml.YAMLError as e:
            print(e)
            raise(e)
    for i, qa in enumerate(faq):
        if not isinstance(qa, dict):
            faq[i] = {}
            log.warning(f'qa #{i} was not a dict')
            continue
        for k in qa:
            if k.lower() != k:
                qa[k.lower()] = qa.pop(k)
        # if 'q' not in qa:
        #     log.warning(f'qa #{i} had no Question: {list(qa)} {qa[list(qa)[0]]}')
        #     qa['q'] = qa.pop('q_student', qa.pop('q_student2', qa.pop('q_teacher')))
        # if 'a' not in qa:
        #     log.warning(f'qa #{i} had no Answer: {list(qa)} {qa[list(qa)[0]]}')
        #     qa['a'] = qa.pop('a_teacher', qa.pop('a_teacher2', qa.pop('a_student')))
        #     continue
    faq = pd.DataFrame(faq)
    faq = faq.dropna()
    return faq


class Skill:
    db = None

    def __init__(self, name='movie_dialog'):
        self.limit = LIMIT
        self.name = name
        # TODO: make this lazy, do it inside reply()
        self.db = self.load_dialog(name=name)

    def load_dialog(self, name='movie_dialog'):
        """ Load movie dialog database

        >>> bot = Skill()
        >>> db = bot.load_dialog()
        >>> len(db)
        58007
        """
        log.warning('Loading movie dialog...')
        if name == 'dsfaq':
            db = load_faq()
        else:
            db = get_data(name)
        log.info(f'Loaded {len(db)} {self.name} statement-reply pairs.')
        if self.limit <= len(db):
            log.info(f'Limiting {self.name} database to {self.limit} statement-reply pairs.')
            db = db.iloc[:self.limit]
        db = dict(zip(db[db.columns[0]], db[db.columns[1]]))
        return db

    def reply(self, statement, context=None, db=None):
        """ Use rapidfuzz to find the closest key in the dictionary then return the value for that key

        >>> bot = Skill()
        >>> reply = bot.reply

        >> reply('hey', {'hello': 'world', 'goodbye': 'fantasy'})
        (0.3, 'fantasy')
        >> reply("Hi!")
        (1.0, 'hey there. tired of breathing?')
        """
        if self.db is None:
            self.db = db
        if self.db is None:
            self.db = self.load_dialog()
        assert len(self.db) > 0, "db of movie quotes is empty"

        # FIXME: rapidfuzz broken on linux:
        # from rapidfuzz import fuzz, utils
        # E   ImportError: ../python3.9/site-packages/rapidfuzz/fuzz.cpython-39-x86_64-linux-gnu.so:
        #                  undefined symbol: _ZSt28__throw_bad_array_new_lengthv

        # movie_statement, percent_match, movie_reply = process.extractOne(
        #     normalize(statement), choices=self.db)
        movie_statement, percent_match, movie_reply = "rapidfuzz borked and DEPRECATED", 0., "undefined symbol: _ZSt28__throw_bad_array_new_lengthv"
        log.info(f'Closest movie_statement = {movie_statement}')
        log.error("import rapidfuzz.process borked and DEPRECATED undefined symbol: _ZSt28__throw_bad_array_new_lengthv")
        return [((percent_match / 100.), movie_reply)]


# class FAQBot(Skill):
#     def __init__(self, name='dsfaq'):
#         self.limit = LIMIT
#         # TODO: make this lazy, do it inside reply()
#         self.db = self.load_dialog(name=name)
# BOTS = (Skill, FAQBot)

if __name__ == '__main__':
    if len(sys.argv) > 1:
        bot = Skill()
        statement = "Hi!"
        statement = ' '.join(sys.argv[1:])
        print(bot.reply(statement))
