#!/usr/bin/env python

"""
given some input data, scale it by some user specified method
and then save the scaled data and also the scaling functions

examples

./pugna_scale_data --input-data /Volumes/ancient/pugna/test_data/results/ts/amp/train/X.npy --verbose  --scale-method MinMaxScaler --output-dir amp/train
./pugna_scale_data --input-data /Volumes/ancient/pugna/test_data/results/ts/amp/train/y.npy --verbose  --scale-method MinMaxScaler --output-dir amp/train

./pugna_scale_data --input-data /Volumes/ancient/pugna/test_data/results/ts/amp/val/X.npy --verbose  --scale-method MinMaxScaler --output-dir amp/val
./pugna_scale_data --input-data /Volumes/ancient/pugna/test_data/results/ts/amp/val/y.npy --verbose  --scale-method MinMaxScaler --output-dir amp/val

"""

import os
from pathlib import Path
import argparse
import numpy as np

import pugna.logger
import pugna.data

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("--output-dir", type=str, default='output',
                        help="directory to save data")

    parser.add_argument("--input-data", type=str, required=True,
                        help="""
                        domain data, column formatted.
                        shape=(N,M)
                        N: number of samples
                        M: number of features
                        """)

    parser.add_argument("-v", "--verbose",
                        help="""
                        increase output verbosity
                        no -v: WARNING
                        -v: INFO
                        -vv: DEBUG
                        """,
                        action='count', default=0)

    # scaling options
    parser.add_argument("--scale-method",
                        type=str,
                        help="method to scale data",
                        required=True,
                        choices=['MinMaxScaler', 'StandardScaler'])

    args = parser.parse_args()

    # https://stackoverflow.com/questions/14097061/easier-way-to-enable-verbose-logging
    level = min(2, args.verbose)  # capped to number of levels
    logger = pugna.logger.init_logger(level=level)
    logger.info(f"current file: {__file__}")
    logger.info(f"verbosity turned on at level: {level}")

    output_dir = os.path.join(args.output_dir, args.scale_method)

    logger.info(f"making dir: {output_dir}")
    os.makedirs(f"{output_dir}", exist_ok=True)

    name = Path(args.input_data).stem
    ext = Path(args.input_data).suffix
    logger.info(f"name: {name}")
    logger.info(f"ext: {ext}")

    if ext == '.npy':
        logger.info("using np.load")
        data = np.load(args.input_data)
    else:
        logger.info("using np.genfromtxt")
        data = np.genfromtxt(args.input_data)

    logger.info(f"data.shape: {data.shape}")

    logger.info(f"scale method: {args.scale_method}")
    scalers = pugna.data.make_scalers(data, method=args.scale_method)

    logger.info("appling scalers to data")
    data_scaled = pugna.data.apply_scaler(data, scalers)

    outname = os.path.join(output_dir, f"{name}_scalers.npy")
    logger.info("saving scalers")
    logger.info(f"path: {outname}")
    pugna.data.save_scalers(scalers, outname)

    outname = os.path.join(output_dir, f"{name}_scaled.npy")
    logger.info("saving scaled data")
    logger.info(f"path: {outname}")
    np.save(outname, data_scaled)

    logger.info("done!")
