"""
read.py
language: Python3
author: C. Lockhart <chrisblockhart@gmail.com>
"""

from namdtools.core import Log


# Read output from NAMD run
# Convert to object? Store raw output?
def read_log(fname, glob=None):
    """
    Read output from NAMD.

    Parameters
    ----------
    fname : str
        Name of NAMD output file.
    glob : bool or dict
        Does `fname` need to be globbed? If a boolean, uses :ref:`glob`. If dictionary, uses :ref:`vglob`.
        (Default: None)

    Returns
    -------
    Log
    """

    # Import to save time
    import pandas as pd

    # If glob, change fname to include all globbed files
    if glob:
        from molecular.io.utilities import Path, vglob  #

        # Convert glob to a empty dictionary if necessary
        if not isinstance(glob, dict):
            glob = {}

        # Glob first; if glob is empty, throw an error
        fname_glob = vglob(fname, errors='raise', **glob)
        if not fname_glob:
            raise FileNotFoundError(fname)

        # Sort glob
        # fnames = sorted(fname_glob)
        fnames = fname_glob
    else:
        fnames = [fname]

    # Cycle over fnames and read in
    # df = None
    # for fname in fnames:
    #     data = _read_log(fname)
    #     if df is None:
    #         df = data
    #     else:
    #         df = pd.concat([df, data], ignore_index=True)
    data = list(map(_read_log, fnames))
    if glob:
        data = [table.assign(**Path(fname).metadata) for fname, table in zip(fnames, data)]  # noqa

    # Concatenate
    data = data[0] if len(data) == 1 else pd.concat(data, ignore_index=True)
    data.index.name = 'ts'  # make sure that ts is still the index name

    # Return
    return Log(data)


def _read_log(fname):
    """
    Read NAMD output file.

    Parameters
    ----------
    fname : str
        Name of NAMD output file.

    Returns
    -------
    pandas.DataFrame
    """

    # Import relevant packages
    import numpy as np
    import pandas as pd
    import re

    # Read in entire log file
    with open(fname) as stream:
        records = stream.read()

    # Find ETITLE, we only need the first record. Otherwise, guess that ETITLE follows standard format
    etitle_start = records.find('ETITLE')
    if etitle_start >= 0:
        etitle_end = records.find('ENERGY', etitle_start)
        etitle = records[etitle_start:etitle_end].lower().split()[1:]  # first column is ETITLE
    else:
        etitle = ['ts', 'bond', 'angle', 'dihed', 'imprp', 'elect', 'vdw', 'boundary', 'misc', 'kinetic', 'total',
                  'temp', 'potential', 'total3', 'tempavg', 'pressure', 'gpressure', 'volume', 'pressavg', 'gpressavg']

    # Extract only ENERGY records, then generate numpy array. We skip the first column which is ENERGY
    energy_records = re.sub(r'^(?!ENERGY).*$', '', records, flags=re.MULTILINE).split('\n')
    energy = np.genfromtxt(energy_records, autostrip=True, usecols=range(1, len(etitle) + 1))

    # Return as DataFrame
    return pd.DataFrame(energy, columns=etitle).set_index(etitle[0])


def _read_log_old(fname):
    """


    Parameters
    ----------
    fname : str
        Name of NAMD output file.

    Returns
    -------

    """

    # Import pandas if not already loaded (to speed up namdtools in general)
    import pandas as pd

    # Initialize DataFrame information
    columns = None
    records = []

    # Read through log file and extract energy records
    # TODO read in with regex
    with open(fname, 'r') as stream:
        for line in stream.readlines():
            # Read first ETITLE
            if columns is None and line[:6] == 'ETITLE':
                columns = line.lower().split()[1:]

            # Save each energy record
            if line[:6] == 'ENERGY':
                records.append(line.split()[1:])

    # What if our file doesn't contain ETITLE? Should this return an error, or can we assume the columns?
    columns = ['ts', 'bond', 'angle', 'dihed', 'imprp', 'elect', 'vdw', 'boundary', 'misc', 'kinetic', 'total',
               'temp', 'potential', 'total3', 'tempavg', 'pressure', 'gpressure', 'volume', 'pressavg', 'gpressavg']

    # Return DataFrame
    return pd.DataFrame(records, columns=columns).set_index(columns[0]).astype(float)
