
import re
import datetime
import os
import math

import numpy as np
import pandas as pd

from ...util.mail_retriever import MailAttachmentRetriever, IMAP_SPType, UID_FILE_NAME
from ...util.wechat_bot import WechatBot
from ..wrapper.mysql import BasicDatabaseConnector
from ..view.basic_models import HedgeFundNAV
from ..api.basic import BasicDataApi


class HedgeFundNAVReader:

    _COLUMNS_DICT = {
        '基金代码': 'fund_id',
        '产品代码': 'fund_id',
        '基金名称': 'fund_name',
        '产品名称': 'fund_name',
        '基金份额净值': 'net_asset_value',
        '单位净值': 'net_asset_value',
        '计提前单位净值': 'net_asset_value',
        '基金份额累计净值': 'acc_unit_value',
        '累计单位净值': 'acc_unit_value',
        '虚拟后净值': 'v_net_value',
        '虚拟净值': 'v_net_value',
        '计提后单位净值': 'v_net_value',
        '日期': 'datetime',
        '净值日期': 'datetime',
        '业务日期': 'datetime',
        '计算日期': 'calc_date',
    }

    def __init__(self, read_dir: str, user_name: str, password: str):
        self._read_dir = read_dir
        assert os.path.isdir(self._read_dir), f'arg dump_dir should be a directory (now){self._read_dir}'

        self._user_name = user_name
        self._password = password
        self._wechat_bot = WechatBot()

    @staticmethod
    def _read_for_HuangChengZhiYuanNo3_from_cms(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path, header=None)
        df.iloc[:, 0] = df.iloc[:, 0].map(lambda x: x.split('：')[0])
        df = df.set_index(df.columns[0])
        pat = r'\d+'
        date = re.findall(pat, df.index.array[2])
        df = df.T.loc[:, ['基金代码', '基金名称', '基金份额净值', '基金份额累计净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT).assign(datetime=pd.to_datetime('-'.join(date), infer_datetime_format=True).date())
        return df

    @staticmethod
    def _read_for_HuangChengZhiYuanNo3(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path)
        df = df.loc[:, ['产品代码', '产品名称', '业务日期', '单位净值', '累计单位净值', '虚拟后净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT)
        df['datetime'] = pd.to_datetime(df.datetime.astype(str), infer_datetime_format=True).dt.date
        return df

    @staticmethod
    def _read_for_TianYanGuangQuan_from_tg(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path, skipfooter=1)
        df = df.loc[:, ['产品名称', '产品代码', '净值日期', '单位净值', '累计单位净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT)
        return df

    @staticmethod
    def _read_for_TianYanGuangQuan(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path, skipfooter=1)
        df = df.loc[:, ['基金名称', '基金代码', '净值日期', '计算日期', '单位净值', '累计单位净值', '虚拟净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT)
        return df

    @staticmethod
    def _read_for_AnXianHuaMuChangSheng(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path)
        df = df.loc[:, ['产品名称', '产品代码', '日期', '单位净值', '累计单位净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT)
        return df

    @staticmethod
    def _read_for_WuZhiCTA1(file_path: str) -> pd.DataFrame:
        df = pd.read_excel(file_path)
        df = df.loc[:, ['产品名称', '产品代码', '净值日期', '计提前单位净值', '累计单位净值', '计提后单位净值']]
        df = df.rename(columns=HedgeFundNAVReader._COLUMNS_DICT)
        return df

    def _notify_error_event(err_msg: str):
        print(f'[read_navs_and_dump_to_db] {err_msg}')
        self._wechat_bot.send_hedge_fund_nav_update_failed(err_msg)

    def read_navs_and_dump_to_db(self):
        try:
            with open(os.path.join(self._read_dir, UID_FILE_NAME), 'rb') as f:
                uid_last = f.read()
                if not uid_last:
                    uid_last = None
        except Exception as e:
            self._notify_error_event(f'read uid file failed (e){e}, use None instead(read all emails)')
            uid_last = None

        try:
            mar = MailAttachmentRetriever(self._read_dir)
            data = mar.get_excels(IMAP_SPType.IMAP_QQ, self._user_name, self._password, uid_last)
        except Exception as e:
            self._notify_error_event(f'FATAL ERROR!! get new data of hedge fund nav failed (e){e}')
            return

        uid_last_succeed: Optional[bytes] = None
        df_list: List[pd.DataFrame] = []
        for name, comp_date in data.items():
            uid, file_path = comp_date
            if '.xls' not in name and '.xlsx' not in name:
                self._notify_error_event(f'not a valid file, do not process it (name){name} (file_path){file_path}')
                continue

            try:
                if 'SGM473' in name:
                    df = HedgeFundNAVReader._read_for_HuangChengZhiYuanNo3_from_cms(file_path)
                elif '华澄致远三号' in name:
                    df = HedgeFundNAVReader._read_for_HuangChengZhiYuanNo3(file_path)
                elif 'SLC213_天演广全' in name:
                    df = HedgeFundNAVReader._read_for_TianYanGuangQuan_from_tg(file_path)
                elif '天演广全' in name:
                    df = HedgeFundNAVReader._read_for_TianYanGuangQuan(file_path)
                elif '安贤花木长盛' in name:
                    df = HedgeFundNAVReader._read_for_AnXianHuaMuChangSheng(file_path)
                elif '吾执CTA一号' in name:
                    df = HedgeFundNAVReader._read_for_WuZhiCTA1(file_path)
                else:
                    raise NotImplementedError('unknown hedge fund nav file from attachment')
            except Exception as e:
                self._notify_error_event(f'{e} (parse) (name){name} (file_path){file_path}')
                continue

            try:
                # FIXME 暂时不需要处理没有虚拟净值的情况
                if 'v_net_value' in df.columns:
                    df = df.drop(columns=['fund_name'])
                    df['insert_time'] = datetime.datetime.now()
                    df = self._dump_to_db(df)
                    if df is not None:
                        df_list.append(df)
                    else:
                        print(f'[read_navs_and_dump_to_db] duplicated data, do not process it (name){name}')
                else:
                    print(f'[read_navs_and_dump_to_db] no virtual net value, do not process it (name){name}')
                # 走到这里都认为是已经处理完了这条数据
                uid_last_succeed = uid
            except Exception as e:
                self._notify_error_event(f'{e} (dump) (name){name} (file_path){file_path}')
                continue

        if df_list:
            try:
                whole_df = pd.concat(df_list).set_index('fund_id')
                print(whole_df)
            except Exception as e:
                self._notify_error_event(f'{e} (concat)')
                return
            else:
                self._wechat_bot.send_hedge_fund_nav_update(whole_df)
                print(f'[read_navs_and_dump_to_db] done (uid_last){uid_last_succeed} (df){whole_df}')
        else:
            print(f'[read_navs_and_dump_to_db] no new data this time, done (uid_last){uid_last_succeed}')
        # 记录下成功的最后一个uid
        if uid_last_succeed is not None:
            with open(os.path.join(self._read_dir, UID_FILE_NAME), 'wb') as f:
                f.write(uid_last_succeed)

    def _dump_to_db(self, df: pd.DataFrame):
        def _check_after_merged(x: pd.Series, now_df: pd.DataFrame):
            try:
                now_data = now_df.loc[(x.fund_id, x.datetime)]
                if math.isclose(now_data.net_asset_value, x.net_asset_value) and \
                   math.isclose(now_data.acc_unit_value, x.acc_unit_value) and \
                   math.isclose(now_data.v_net_value, x.v_net_value):
                    return pd.Series(dtype='object')
                else:
                    return x
            except KeyError:
                return x

        assert df.datetime.nunique() == 1, 'should have single datetime'
        now_df = BasicDataApi().get_hedge_fund_nav(fund_id_list=df.fund_id.to_list())
        if now_df is not None and not now_df.empty:
            # 同产品同日期的净值如果已经存在了且没有变化，就不写DB了
            now_df = now_df.drop(columns=['_update_time']).sort_values(by=['fund_id', 'datetime']).drop_duplicates(subset=['fund_id', 'datetime'], keep='last')
            df = df.reindex(columns=now_df.columns).astype(now_df.dtypes.to_dict())
            df = df.merge(now_df, how='left', on=['fund_id', 'datetime', 'net_asset_value', 'acc_unit_value', 'v_net_value'], indicator=True, validate='one_to_one')
            df = df[df._merge == 'left_only'].drop(columns=['_merge', 'insert_time_y', 'calc_date_y']).rename(columns={'insert_time_x': 'insert_time', 'calc_date_x': 'calc_date'})
            if df.empty:
                return
            # FIXME 没想到特别好的方法 遍历每一行再check一下
            df = df.apply(_check_after_merged, axis=1, now_df=now_df.set_index(['fund_id', 'datetime']))
            if df.empty:
                return
            df['insert_time'] = df.insert_time.map(lambda x: x.to_pydatetime())
        df.to_sql(HedgeFundNAV.__table__.name, BasicDatabaseConnector().get_engine(), index=False, if_exists='append')
        return df


if __name__ == '__main__':
    try:
        email_data_dir = os.environ['EMAIL_DATA_DIR']
        user_name = os.environ['EMAIL_USER_NAME']
        password = os.environ['EMAIL_PASSWORD']
    except KeyError as e:
        import sys
        sys.exit(f'can not found enough params in env (e){e}')

    hf_nav_r = HedgeFundNAVReader(email_data_dir, user_name, password)
    hf_nav_r.read_navs_and_dump_to_db()
