# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks/CLI_DataBlob.ipynb (unless otherwise specified).

__all__ = ["logger"]

# Cell

from typing import *

# Internal Cell

import os
import json

import typer
from typer import echo
from tabulate import tabulate
import datetime as dt
import pandas as pd

from ..client import Client
from . import helper
from ..logger import get_logger, set_level
from ..constant import CLIENT_DB_USERNAME, CLIENT_DB_PASSWORD

# Internal Cell

app = typer.Typer(
    help="A set of commands for importing and processing the data from sources like CSV files, databases, or AWS S3 bucket."
)

# Cell

logger = get_logger(__name__)

# Internal Cell


@app.command("from-s3")
@helper.requires_auth_token
def from_s3(
    uri: str = typer.Argument(..., help="The AWS S3 bucket uri."),
    access_key: Optional[str] = typer.Option(
        None,
        help="Access key for the S3 bucket. If **None** (default value), then the value from **AWS_ACCESS_KEY_ID** environment variable is used.",
    ),
    secret_key: Optional[str] = typer.Option(
        None,
        help="Secret key for the S3 bucket. If **None** (default value), then the value from **AWS_SECRET_ACCESS_KEY** environment variable is used.",
    ),
    tag: Optional[str] = typer.Option(
        None,
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from an AWS S3 bucket."""

    from ..client import DataBlob

    db = DataBlob.from_s3(
        uri=uri, access_key=access_key, secret_key=secret_key, tag=tag
    )

    if quiet:
        db.wait()

        typer.echo(f"{db.id}")
    else:
        typer.echo(f"Pulling datablob id: {db.id}")

        db.progress_bar()


# Internal Cell


@app.command("from-mysql")
@helper.requires_auth_token
def from_mysql(
    host: str = typer.Option(..., help="Remote database host name."),
    database: str = typer.Option(..., help="Database name."),
    table: str = typer.Option(..., help="Table name."),
    port: int = typer.Option(
        3306,
        help="Host port number. If not passed, then the default value **3306** will be used.",
    ),
    username: Optional[str] = typer.Option(
        None,
        "--username",
        "-u",
        help="Database username. If not passed, then the value set in the environment variable"
        f" **{CLIENT_DB_USERNAME}** will be used else the default value **root** will be used.",
    ),
    password: Optional[str] = typer.Option(
        None,
        "--password",
        "-p",
        help="Database password. If not passed, then the value set in the environment variable"
        f' **{CLIENT_DB_PASSWORD}** will be used else the default value "" will be used.',
    ),
    tag: Optional[str] = typer.Option(
        None,
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from a mysql database.

    If the database requires authentication, pass the username/password as commandline arguments or store it in
    the **AIRT_CLIENT_DB_USERNAME** and **AIRT_CLIENT_DB_PASSWORD** environment variables.
    """

    from ..client import DataBlob

    db = DataBlob.from_mysql(
        host=host,
        database=database,
        port=port,
        table=table,
        username=username,
        password=password,
        tag=tag,
    )

    if quiet:
        db.wait()
        typer.echo(f"{db.id}")
    else:
        typer.echo(f"Pulling datablob id: {db.id}")
        db.progress_bar()


# Internal Cell


@app.command("from-csv")
@helper.requires_auth_token
def from_csv(
    id: int = typer.Option(
        ...,
        help="Datablob id in the server.",
    ),
    index_column: str = typer.Option(
        ...,
        help="The column to use as index (row labels).",
    ),
    sort_by: str = typer.Option(
        ...,
        help="The column(s) to sort the data. Can either be a string or a JSON encoded sequence of strings.",
    ),
    deduplicate_data: bool = typer.Option(
        False,
        help="If set to **True** (default value **False**), then duplicate rows are removed while uploading.",
    ),
    blocksize: str = typer.Option(
        "256MB",
        help="Data split size in bytes. If None, then the split size is set to **256MB**.",
    ),
    kwargs_json: Optional[str] = typer.Option(
        None,
        help="Any additional parameters to be used while processing the data in a JSON string format.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datasource id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Process the CSV data and return a datasource object."""

    from ..client import DataBlob

    kwargs = json.loads(kwargs_json) if kwargs_json else {}

    try:
        sort_by = json.loads(sort_by)

    except json.JSONDecodeError as e:
        pass

    db = DataBlob(id=id)
    ds = db.from_csv(
        index_column=index_column,
        sort_by=sort_by,
        deduplicate_data=deduplicate_data,
        blocksize=blocksize,
        **kwargs,
    )

    if quiet:
        ds.wait()
        typer.echo(f"{ds.id}")
    else:
        typer.echo(f"Processing and pulling the datasource id: {ds.id}")

        ds.progress_bar()


# Internal Cell


@app.command("from-parquet")
@helper.requires_auth_token
def from_parquet(
    id: int = typer.Option(
        ...,
        help="Datablob id in the server.",
    ),
    index_column: str = typer.Option(
        ...,
        help="The column to use as index (row labels).",
    ),
    sort_by: str = typer.Option(
        ...,
        help="The column(s) to sort the data. Can either be a string or a JSON encoded sequence of strings.",
    ),
    deduplicate_data: bool = typer.Option(
        False,
        help="If set to **True** (default value **False**), then duplicate rows are removed while uploading.",
    ),
    blocksize: str = typer.Option(
        "256MB",
        help="Data split size in bytes. If None, then the split size is set to **256MB**.",
    ),
    kwargs_json: Optional[str] = typer.Option(
        None,
        help="Any additional parameters to be used while processing the data in a JSON string format.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datasource id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Process the parquet data and return a datasource object."""

    from ..client import DataBlob

    kwargs = json.loads(kwargs_json) if kwargs_json else {}

    try:
        sort_by = json.loads(sort_by)

    except json.JSONDecodeError as e:
        pass

    db = DataBlob(id=id)

    ds = db.from_parquet(
        index_column=index_column,
        sort_by=sort_by,
        deduplicate_data=deduplicate_data,
        blocksize=blocksize,
        **kwargs,
    )

    if quiet:
        ds.wait()
        typer.echo(f"{ds.id}")
    else:
        typer.echo(f"Processing and pulling the datasource id: {ds.id}")

        ds.progress_bar()


# Internal Cell


@app.command()
@helper.requires_auth_token
def ls(
    offset: int = typer.Option(
        0,
        "--offset",
        "-o",
        help="The number of datablobs to offset at the beginning. If **None**, then the default value **0** will be used.",
    ),
    limit: int = typer.Option(
        100,
        "--limit",
        "-l",
        help="The maximum number of datablobs to return from the server. If **None**, then the default value **100** will be used.",
    ),
    disabled: bool = typer.Option(
        False,
        "--disabled",
        help="If set to **True**, then only the deleted datablobs will be returned."
        "Else, the default value **False** will be used to return only the list"
        "of active datablobs.",
    ),
    completed: bool = typer.Option(
        False,
        "--completed",
        help="If set to **True**, then only the datablobs that are successfully downloaded"
        "to the server will be returned. Else, the default value **False** will be used to"
        "return all the datablobs.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output only ids of datablob separated by space",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Return the list of datablobs."""

    from ..client import DataBlob

    dbx = DataBlob.ls(
        offset=offset, limit=limit, disabled=disabled, completed=completed
    )

    df = DataBlob.as_df(dbx)

    df["pulled_on"] = helper.humanize_date(df["pulled_on"])
    df["folder_size"] = helper.humanize_size(df["folder_size"])

    if quiet:
        ids = df["datablob_id"].astype(str).to_list()
        typer.echo("\n".join(ids))
    else:
        typer.echo(tabulate(df, headers="keys", tablefmt="plain", showindex=False))


# Internal Cell


@app.command()
@helper.requires_auth_token
def details(
    id: int = typer.Argument(
        ...,
        help="Datablob id in the server.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Return details of a datablob."""

    from ..client import DataBlob

    db = DataBlob(id=int(id))
    df = db.details()

    df["pulled_on"] = helper.humanize_date(df["pulled_on"])
    df["folder_size"] = helper.humanize_size(df["folder_size"])

    typer.echo(tabulate(df, headers="keys", tablefmt="plain", showindex=False))


# Internal Cell


@app.command()
@helper.requires_auth_token
def rm(
    id: int = typer.Argument(
        ...,
        help="Datablob id in the server.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output the deleted datablob id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Delete a datablob from the server."""

    from ..client import DataBlob

    db = DataBlob(id=int(id))
    df = db.delete()

    df["pulled_on"] = helper.humanize_date(df["pulled_on"])
    df["folder_size"] = helper.humanize_size(df["folder_size"])

    if quiet:
        typer.echo(df.iloc[0]["datablob_id"])
    else:
        typer.echo(tabulate(df, headers="keys", tablefmt="plain", showindex=False))


# Internal Cell


@app.command()
@helper.requires_auth_token
def tag(
    id: int = typer.Option(
        ...,
        "--datablob_id",
        "-id",
        help="Datablob id in the server.",
    ),
    name: str = typer.Option(
        ...,
        "--name",
        "-n",
        help="A string to tag the datablob.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Tag an existing datablob in the server."""

    from ..client import DataBlob

    db = DataBlob(id=int(id))
    df = db.tag(name=name)

    df["pulled_on"] = helper.humanize_date(df["pulled_on"])
    df["folder_size"] = helper.humanize_size(df["folder_size"])

    typer.echo(tabulate(df, headers="keys", tablefmt="plain", showindex=False))


# Internal Cell


@app.command("from-local")
@helper.requires_auth_token
def from_local(
    path: str = typer.Option(
        ...,
        "--path",
        "-p",
        help="The relative or absolute path to a local CSV file or to a directory containing the CSV files.",
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output data id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Create and return a datablob from local csv file."""

    from ..client import DataBlob

    if quiet:
        db = DataBlob.from_local(path=path, tag=tag, show_progress=False)
        typer.echo(f"{db.id}")
    else:
        db = DataBlob.from_local(path=path, tag=tag)
        typer.echo(f"Successfully pulled the datablob id: {db.id}.")


# Internal Cell


@app.command("from-clickhouse")
@helper.requires_auth_token
def from_clickhouse(
    host: str = typer.Option(..., help="Remote database host name."),
    database: str = typer.Option(..., help="Database name."),
    table: str = typer.Option(..., help="Table name."),
    protocol: str = typer.Option(..., help="Protocol to use (native/http)."),
    index_column: str = typer.Option(
        ..., help="The column to use as index (row labels)."
    ),
    timestamp_column: str = typer.Option(..., help="Timestamp column column name."),
    port: int = typer.Option(
        0,
        help="Host port number. If not passed, then the default value **0** will be used.",
    ),
    username: Optional[str] = typer.Option(
        None,
        "--username",
        "-u",
        help="Database username. If not passed, then the value set in the environment variable"
        " **CLICKHOUSE_USERNAME** will be used else the default value **root** will be used.",
    ),
    password: Optional[str] = typer.Option(
        None,
        "--password",
        "-p",
        help="Database password. If not passed, then the value set in the environment variable"
        ' **CLICKHOUSE_PASSWORD** will be used else the default value "" will be used.',
    ),
    filters_json: Optional[str] = typer.Option(
        None,
        "--filters-json",
        "-f",
        help="Any extra filters to apply while importing the data as a JSON string.",
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from a ClickHouse database.

    If the database requires authentication, pass the username/password as commandline arguments or store it in
    the **CLICKHOUSE_USERNAME** and **CLICKHOUSE_PASSWORD** environment variables.
    """

    filters = json.loads(filters_json) if filters_json else None

    from ..client import DataBlob

    db = DataBlob.from_clickhouse(
        host=host,
        database=database,
        table=table,
        protocol=protocol,
        index_column=index_column,
        timestamp_column=timestamp_column,
        port=port,
        username=username,
        password=password,
        filters=filters,
        tag=tag,
    )

    if quiet:
        db.wait()
        typer.echo(f"{db.id}")
    else:
        typer.echo(f"Pulling datablob id: {db.id}")
        db.progress_bar()
