from io import BytesIO
import os
import polars as pl
import gzip
from cameo_claw.functional import it_mp_f
from cameo_claw.net import requests_get
import warnings
warnings.filterwarnings("ignore")


def a_distinct(url, target_directory, lst_distinct_column):
    def write(bytes1):
        df = pl.read_csv(bytes1)
        df = df.distinct(subset=lst_distinct_column)
        bytesio = BytesIO()
        df.to_csv(bytesio)
        filename = os.path.basename(url)
        filename = filename[:filename.find('.')]
        path = f'{target_directory}{filename}.csv.gz'
        with gzip.open(path, 'wb') as f:
            f.write(bytesio.getvalue())
        return url

    return requests_get(write, url, target_directory)


def it_distinct(lst_url, target_directory, lst_distinct_column):
    return it_mp_f(a_distinct, [tuple([url, target_directory, lst_distinct_column]) for url in lst_url])


def a_download(url, target_directory):
    def write(bytes1):
        filename = os.path.basename(url)
        path = f'{target_directory}{filename}'
        with open(path, 'wb') as f:
            f.write(bytes1)
        return url

    return requests_get(write, url, target_directory)


def it_download(lst_url, target_directory):
    return it_mp_f(a_download, [tuple([url, target_directory]) for url in lst_url])


def a_group(url, target_directory, lst_distinct_column, lst_group_by_column):
    def write(bytes1):
        df = pl.read_csv(bytes1)
        df = df.distinct(subset=lst_distinct_column)
        g = df.groupby(lst_group_by_column)
        for df in g:
            filename_tail = '_group_' + '-'.join(list(
                map(lambda column:
                    str(df.row(0)[df.find_idx_by_name(column)]).replace('_', '-'),
                    lst_group_by_column)))
            bytesio = BytesIO()
            df.to_csv(bytesio)
            filename = os.path.basename(url)
            filename = filename[:filename.find('.')]
            path = f'{target_directory}{filename}{filename_tail}.csv.gz'
            with gzip.open(path, 'wb') as f:
                f.write(bytesio.getvalue())
        return url

    return requests_get(write, url, target_directory)


def it_group(lst_url, target_directory, lst_distinct_column, lst_group_by_column):
    return it_mp_f(a_group,
                   [tuple([url, target_directory, lst_distinct_column, lst_group_by_column]) for url in lst_url])
