"""
Module for stitching NetCDF-SCM netCDF files together

'Stitching' here means combining results from multiple experiments e.g.
combining historical and scenario experiments. This relies on the 'parent'
conventions within CMIP experiments which define the experiment from which
a given set of output started (in CMIP language, the experiment from which
a given experiment 'branched').
"""
import copy
import datetime as dt
import glob
import logging
import os.path
import re

import click
import numpy as np
import pandas as pd
import scipy.interpolate as scipyinterp
from scmdata import ScmDataFrame, df_append

from .cli_parallel import _apply_func
from .cli_utils import (
    _check_timesteps_are_monthly,
    _convert_units,
    _find_dirs_meeting_func,
    _get_id_in_path,
    _get_openscmdf_header,
    _get_outfile_dir_flat_dir,
    _get_path_bits,
    _get_timestamp_str,
    _write_ascii_file,
)
from .io import load_scmdataframe

try:
    import netCDF4
except ModuleNotFoundError:  # pragma: no cover # emergency valve
    from .errors import raise_no_iris_warning

    raise_no_iris_warning()


logger_stitching = logging.getLogger(__name__)


def _stitch_netdf_scm_ncs(  # pylint:disable=too-many-arguments
    logger,
    src,
    dst,
    stitch_contact,
    regexp,
    prefix,
    out_format,
    drs,
    force,
    number_workers,
    target_units_specs,
    normalise,
):
    regexp_compiled = re.compile(regexp)
    if target_units_specs is not None:
        target_units_specs = pd.read_csv(target_units_specs)

    crunch_list, failures_dir_finding = _find_dirs_meeting_func(
        src, regexp_compiled.match
    )

    failures_wrangling = _apply_func(
        logger,
        _stitch_magicc_files,
        [{"fnames": f, "dpath": d} for d, f in crunch_list],
        common_kwarglist={
            "logger": logger,
            "dst": dst,
            "force": force,
            "out_format": out_format,
            "target_units_specs": target_units_specs,
            "stitch_contact": stitch_contact,
            "drs": drs,
            "prefix": prefix,
            "normalise": normalise,
        },
        n_workers=number_workers,
        style="processes",
    )

    if failures_dir_finding or failures_wrangling:
        raise click.ClickException(
            "Some files failed to process. See the logs for more details"
        )


def _stitch_magicc_files(  # pylint:disable=too-many-arguments
    logger,
    fnames,
    dpath,
    dst,
    force,
    out_format,
    target_units_specs,
    stitch_contact,
    drs,
    prefix,
    normalise,
):
    logger.info("Attempting to process: %s", fnames)
    openscmdf, metadata, header = _get_stitched_openscmdf_metadata_header(
        fnames, dpath, target_units_specs, stitch_contact, drs, normalise
    )

    outfile_dir, duplicate_dir = _get_outfile_dir_flat_dir(dpath, drs, dst)

    _write_ascii_file(
        openscmdf,
        metadata,
        header,
        outfile_dir,
        duplicate_dir,
        fnames,
        force,
        out_format,
        drs,
        prefix=prefix,
    )


def _get_stitched_openscmdf_metadata_header(  # pylint:disable=too-many-arguments
    fnames, dpath, target_units_specs, stitch_contact, drs, normalise
):
    if len(fnames) > 1:
        raise AssertionError(
            "more than one file to wrangle?"
        )  # pragma: no cover # emergency valve

    fullpath = os.path.join(dpath, fnames[0])
    (
        openscmdf,
        picontrol_branching_time,
        picontrol_file,
    ) = get_continuous_timeseries_with_meta(
        fullpath, drs, return_picontrol_info=True if normalise is not None else False
    )

    if normalise is not None:
        openscmdf = _normalise_timeseries_against_picontrol(
            openscmdf, picontrol_branching_time, picontrol_file, drs, normalise
        )

    if target_units_specs is not None:
        openscmdf = _convert_units(openscmdf, target_units_specs)

    metadata = openscmdf.metadata
    try:
        header = _get_openscmdf_header(
            stitch_contact, metadata["(child) crunch_netcdf_scm_version"]
        )
    except KeyError:  # pragma: no cover # for future
        if normalise is not None:  # pragma: no cover
            raise AssertionError("Normalisation metadata should be included...")

        if "piControl" not in metadata["parent_experiment_id"]:  # pragma: no cover
            raise AssertionError("Stitching should have occured no?")

        # TODO: make this a warning
        logger_stitching.info(
            "No normalisation is being done and the parent of %s is %s for infile: %s",
            metadata["experiment_id"],
            metadata["parent_experiment_id"],
            os.path.join(dpath, fnames[0]),
        )

        header = _get_openscmdf_header(
            stitch_contact, metadata["crunch_netcdf_scm_version"]
        )

    return openscmdf, metadata, header


def get_continuous_timeseries_with_meta(infile, drs, return_picontrol_info=True):
    """
    Load a continuous timeseries with metadata

    Continuous here means including all parent experiments up to (but not
    including) piControl

    Parameters
    ----------
    infile : str
        NetCDF-SCM crunched file to load

    drs : str
        Data reference syntax which applies to this file

    return_picontrol_info : bool
        If supplied, piControl information will be returned in the second
        and third outputs if available (rather than ``None``). A caveat is that
        if the experiment itself is a piControl experiment, ``None`` will be
        returned in the second and third outputs.

    Returns
    -------
    :obj:`scmdata.ScmDataFrame`
        Loaded timseries, including metadata

    :obj:`dt.datetime`
        Branch time from piControl. If ``infile`` points to a piControl or
        piControl-spinup experiment  then this will be ``None``.

    str
        Path from which the piControl data was loaded. If ``infile`` points to a
        piControl or piControl-spinup experiment  then this will be ``None``.
    """
    loaded = _load_scmdataframe_and_add_source_file(infile, drs)

    parent_replacements = get_parent_replacements(loaded)
    if not parent_replacements:  # pragma: no cover # emergency valve
        return loaded

    pexp_id = parent_replacements["parent_experiment_id"]
    if pexp_id.endswith("piControl"):
        # don't need to look any further
        if not return_picontrol_info:
            return loaded, None, None

        picontrol_file = get_parent_file_path(infile, parent_replacements, drs)
        picontrol_branching_time = get_branch_time(
            loaded, parent=True, parent_path=picontrol_file
        )
        return loaded, picontrol_branching_time, picontrol_file

    if pexp_id.endswith("piControl-spinup") or (pexp_id == "N/A"):
        # hard-code return at piControl-spinup for now, we don't care about spinup
        return loaded, None, None

    parent_file_path = get_parent_file_path(infile, parent_replacements, drs)

    (
        parent,
        picontrol_branching_time,
        picontrol_file,
    ) = get_continuous_timeseries_with_meta(
        parent_file_path, drs, return_picontrol_info
    )

    return _do_stitching(loaded, parent), picontrol_branching_time, picontrol_file


def _load_scmdataframe_and_add_source_file(infile, drs):
    loaded = load_scmdataframe(infile)
    loaded.metadata["netcdf-scm crunched file"] = infile.replace(
        os.path.join("{}/".format((_get_id_in_path("root_dir", infile, drs)))), ""
    )

    return loaded


def get_parent_replacements(scmdf):
    """
    Get changes in metadata required to identify a dataset's parent file

    Parameters
    ----------
    scmdf : :obj:`scmdata.ScmDataFrame`
        Dataset of which to identify the parent file

    Returns
    -------
    dict of str : str
        Replacements which must be made to the dataset's metadata in order to
        identify its parent file

    Raises
    ------
    Key Error
        The variant label (e.g. r1i1p1f1) of the parent dataset is missing
    """
    if scmdf.get_unique_meta("mip_era", no_duplicates=True) == "CMIP5":
        parent_keys = [
            "parent_experiment",
            "parent_experiment_id",
            "parent_experiment_rip",
        ]
        replacements = {k: v for k, v in scmdf.metadata.items() if k in parent_keys}
        try:
            replacements["parent_ensemble_member"] = replacements.pop(
                "parent_experiment_rip"
            )
        except KeyError:
            raise KeyError("No `parent_experiment_rip` in metadata")

    else:
        parent_keys = [
            "parent_activity_id",
            "parent_experiment_id",
            "parent_mip_era",
            "parent_source_id",
            "parent_variant_label",
        ]

        replacements = {k: v for k, v in scmdf.metadata.items() if k in parent_keys}

        # change in language since I wrote netcdf-scm, this is why using
        # ESMValTool instead would be helpful, we would have extra helpers to
        # know when this sort of stuff changes...
        try:
            replacements["parent_member_id"] = replacements.pop("parent_variant_label")
        except KeyError:
            raise KeyError("No `parent_variant_label` in metadata")

    return replacements


def get_parent_file_path(infile, parent_replacements, drs):
    """
    Get parent file path for a given file

    Parameters
    ----------
    infile : str
        File path of which to get the parent

    parent_replacements : dict of str : str
        Replacements to insert in ``infile`` to determine the parent filepath

    drs : str
        Data reference syntax which is applicable to these filepaths

    Returns
    -------
    str
        Path of the parent file
    """
    # TODO: write a more user friendly function that just takes infile as an
    # argument and has its paths and failures tested more completely

    parent_file_path_base = _get_parent_path_base(infile, parent_replacements, drs)
    parent_file_path = glob.glob(parent_file_path_base)
    if np.equal(len(parent_file_path), 0):
        raise IOError(
            "No parent data ({}) available for {}, we looked in {}".format(
                parent_replacements["parent_experiment_id"],
                infile,
                parent_file_path_base,
            )
        )

    if len(parent_file_path) > 1:
        raise AssertionError(  # pragma: no cover # emergency valve
            "More than one parent file?"
        )

    return parent_file_path[0]


def _get_parent_path_base(child_path, replacements_in, drs):
    replacements = copy.deepcopy(replacements_in)
    if drs == "MarbleCMIP5":
        replacements["parent_experiment"] = replacements.pop("parent_experiment_id")

    parent_path = copy.copy(child_path)
    for k, v in replacements.items():
        pid = k.replace("parent_", "")

        parent_path = parent_path.replace(_get_id_in_path(pid, child_path, drs), v)

    timestamp_str = _get_timestamp_str(child_path, drs)

    parent_path_base = "{}*.nc".format(parent_path.split(timestamp_str)[0])

    path_bits = _get_path_bits(child_path, drs)
    if "version" in path_bits:
        parent_path_base = parent_path_base.replace(path_bits["version"], "*")

    return parent_path_base


def _do_stitching(loaded, parent):
    cmip5 = loaded.get_unique_meta("mip_era", no_duplicates=True) == "CMIP5"
    if not cmip5:
        # we have the information to check that things line up as expected so let's do it
        branch_time_in_parent = get_branch_time(loaded, parent=True)
        branch_time = get_branch_time(loaded, parent=False)

        if branch_time != branch_time_in_parent:
            raise NotImplementedError(
                "Time axes should be adjusted before stitching timeseries"
            )

    else:
        logger_stitching.debug(
            "CMIP5 data does not have enough information to check if any adjustment to "
            "time axes is required before stitching the timeseries (hence we make no "
            "such check)"
        )

    # join the two, making sure we prioritise data from what we've loaded over the
    # parent data
    out = df_append(
        [loaded, parent.filter(year=loaded["year"].unique().tolist(), keep=False)]
    )

    # out = _make_metadata_uniform(out, _get_meta(loaded, "scenario"))
    out = _make_metadata_uniform(
        out, loaded.get_unique_meta("scenario", no_duplicates=True)
    )

    if any(["(child)" in k for k in parent.metadata]):
        parent_metadata = {
            step_up_family_tree(k): v for k, v in parent.metadata.items()
        }
        out.metadata = {
            **{"(child) {}".format(k): v for k, v in loaded.metadata.items()},
            **parent_metadata,
        }

    else:
        out.metadata = {
            **{"(child) {}".format(k): v for k, v in loaded.metadata.items()},
            **{"(parent) {}".format(k): v for k, v in parent.metadata.items()},
        }

    return out


def _make_metadata_uniform(inscmdf, base_scen):
    """Make metadata uniform for ease of plotting etc."""
    base_scmdf = inscmdf.filter(scenario=base_scen)
    meta_cols = [
        c for c in base_scmdf.meta.columns if c not in ["region", "variable", "unit"]
    ]

    outscmdf = []
    for scenario in inscmdf["scenario"].unique():
        scendf = inscmdf.filter(scenario=scenario)
        for meta_col in meta_cols:
            new_meta = base_scmdf.get_unique_meta(meta_col, no_duplicates=True)
            # new_meta = _get_meta(base_scmdf, meta_col)
            scendf.set_meta(new_meta, meta_col)

        outscmdf.append(scendf.timeseries())

    return ScmDataFrame(pd.concat(outscmdf, sort=True, axis=1))


def _normalise_timeseries_against_picontrol(
    openscmdf, picontrol_branching_time, picontrol_file, drs, normalise
):
    picontrolscmdf = _load_scmdataframe_and_add_source_file(picontrol_file, drs)

    return normalise_against_picontrol(
        openscmdf, picontrolscmdf, picontrol_branching_time, normalise
    )


def get_branch_time(openscmdf, parent=True, parent_path=None):
    """
    Get branch time of an experiment

    Parameters
    ----------
    openscmdf : :obj:`scmdata.ScmDataFrame`
        Data of which to get the branch time

    parent : bool
        Should I get the branch time in the parent experiment's time co-ordinates?
        If ``False``, return the branch time in the child (i.e. ``openscmdf``'s)
        time co-ordinates.

    parent_path : str
        Path to the data file containing the parent data of ``openscmdf``. This
        is only required if the data is from CMIP5 because CMIP5 data does not
        store information about the parent experiment's time calendar and units.

    Returns
    -------
    :obj:`datetime.datetime`
        The branch time, rounded to the nearest year, month and day. NetCDF-SCM
        is not designed for very precise calculations, if you need to keep finer
        information, please raise an issue on our issue tracker to discuss.

    Raises
    ------
    ValueError
        ``parent is not True`` and the data is CMIP5 data. It is impossible to
        determine the branch time in the child time co-ordinates from CMIP5 data
        because of a lack of information.

    ValueError
        ``parent_path is None`` and the data is CMIP5 data. You must supply the
        parent path if the data is CMIP5 data because the parent file is the only
        place the parent experiment's time units and calendar information is
        available.
    """
    cmip5 = openscmdf.get_unique_meta("mip_era", no_duplicates=True) == "CMIP5"

    if parent:
        bt_key = "branch_time" if cmip5 else "branch_time_in_parent"
    else:
        bt_key = "branch_time" if cmip5 else "branch_time_in_child"

    if cmip5:
        if not parent:
            raise ValueError(
                "CMIP5 data does not contain information about the branch time "
                "in the child's time axis"
            )

        if parent_path is None:
            raise ValueError("If using cmip5 data, you must provide `parent_path`")

        # have to use file, info not in metadata
        nc = netCDF4.Dataset(parent_path)
        branch_time = netCDF4.num2date(
            openscmdf.metadata[bt_key],
            nc.variables["time"].units,
            nc.variables["time"].calendar,
        )

    elif "BCC" in openscmdf.metadata["source_id"] and not np.equal(
        openscmdf.metadata[bt_key], 0
    ):
        # think the metadata here is wrong as historical has a branch_time_in_parent
        # of 2015 so assuming this means the year of the branch not the actual time
        # in days (like it's meant to)
        warn_str = (
            "Assuming BCC metadata is wrong and branch time units are actually years, "
            "not days"
        )
        logger_stitching.warning(warn_str)
        branch_time = dt.datetime(int(openscmdf.metadata[bt_key]), 1, 1)

    else:
        branch_time = netCDF4.num2date(  # pylint:disable=no-member
            openscmdf.metadata[bt_key],
            openscmdf.metadata["parent_time_units"],
            openscmdf.metadata["calendar"],
        )

    branch_time = dt.datetime(branch_time.year, branch_time.month, branch_time.day)

    return branch_time


def step_up_family_tree(in_level):
    """
    Step name up the family tree

    Parameters
    ----------
    in_level : str
        Level from which to step up

    Returns
    -------
    str
        Level one up from ``in_level``

    Examples
    --------
    >>> step_up_family_tree("(child)")
    "(parent)"

    >>> step_up_family_tree("(parent)")
    "(grandparent)"

    >>> step_up_family_tree("(grandparent)")
    "(grandparent)"

    >>> step_up_family_tree("(greatgreatgrandparent)")
    "(greatgreatgreatgrandparent)"
    """
    if "(child)" in in_level:
        return in_level.replace("(child)", "(parent)")

    if "(parent)" in in_level:
        return in_level.replace("(parent)", "(grandparent)")

    return in_level.replace("grandparent)", "greatgrandparent)")


def normalise_against_picontrol(indata, picontrol, picontrol_branching_time, method):
    """
    Normalise data against picontrol

    Parameters
    ----------
    indata : :obj:`scmdata.ScmDataFrame`
        Data to normalise

    picontrol : :obj:`scmdata.ScmDataFrame`
        Pre-industrial control run data

    picontrol_branching_time : :obj:`datetime.datetime`
        The branching time in the pre-industrial experiment. It is assumed
        that the first timepoint in ``input`` follows immediately from this
        branching time.

    method : {"31-yr-mean-after-branch-time", "21-yr-running-mean"}
        How to derive the reference values. See docstring of
        :func:`get_reference_values`.

    Returns
    -------
    :obj:`scmdata.ScmDataFrame`
        Normalised data including metadata about the file which was used for
        normalisation and the normalisation method

    Raises
    ------
    NotImplementedError
        Normalisation is being done against a timeseries other than piControl

    ValueError
        The branching time data is not in ``picontrol`` data

    NotImplementedError
        The normalisation method is not recognised
    """
    norm_method_key = "normalisation method"

    if not picontrol.metadata["experiment_id"].endswith(  # pragma: no cover
        "piControl"
    ):
        # emergency valve, can't think of how this path should work
        raise NotImplementedError(
            "If you would like to normalise against an experiment other than "
            "piControl, please raise an issue at "
            "https://gitlab.com/znicholls/netcdf-scm/-/issues"
        )

    reference_values = get_reference_values(
        indata, picontrol, picontrol_branching_time, method
    )
    out = _take_anomaly_from(indata, reference_values)

    metadata = copy.deepcopy(indata.metadata)
    if not any(["(child)" in k for k in metadata]):
        metadata = {"(child) {}".format(k): v for k, v in metadata.items()}

    metadata = {
        **metadata,
        **{"(normalisation) {}".format(k): v for k, v in picontrol.metadata.items()},
    }
    metadata[norm_method_key] = method
    out.metadata = metadata

    return out


def get_reference_values(indata, picontrol, picontrol_branching_time, method):
    """
    Get reference values for an experiment from its equivalent piControl experiment

    Parameters
    ----------
    indata : :obj:`scmdata.ScmDataFrame`
        Experiment to calculate reference values for

    picontrol : :obj:`scmdata.ScmDataFrame`
        Pre-industrial control run data

    picontrol_branching_time : :obj:`datetime.datetime`
        The branching time in the pre-industrial experiment. It is assumed
        that the first timepoint in ``input`` follows immediately from this
        branching time.

    method : {"31-yr-mean-after-branch-time", "21-yr-running-mean"}
        How to derive the reference values. "31-yr-mean-after-branch-time"
        results in the reference values simply being the average of the 31
        years following the branch time. "21-yr-running-mean" results in
        the reference values being a 21 year running mean centred on each
        time point.

    Returns
    -------
    :obj:`pd.DataFrame`
        Reference values with the same index and columns as ``indata``

    Raises
    ------
    ValueError
        The branching time data is not in ``picontrol`` data

    NotImplementedError
        The normalisation method is not recognised
    """
    if picontrol_branching_time.year not in picontrol["year"].unique().tolist():
        _raise_branching_time_unavailable_error(picontrol_branching_time, picontrol)

    if method == "31-yr-mean-after-branch-time":
        out = _get_reference_values_using_31_yr_mean_after_branch_time(
            indata, picontrol, picontrol_branching_time
        )

    elif method == "21-yr-running-mean":
        out = _get_reference_values_using_21_yr_running_mean(
            indata, picontrol, picontrol_branching_time
        )

    else:
        raise NotImplementedError(  # pragma: no cover # emergency valve
            "Unrecognised normalisation method: {}".format(method)
        )

    idx_cols = indata.meta.columns
    unify_columns = [
        c for c in idx_cols if c not in ["climate_model", "region", "variable", "unit"]
    ]
    out = out.reset_index(unify_columns)
    for unify_col in unify_columns:
        out[unify_col] = indata.get_unique_meta(unify_col, no_duplicates=True)

    out = out.set_index(unify_columns, append=True)
    out = out.reorder_levels(idx_cols)

    return out


def _raise_branching_time_unavailable_error(branch_time, parent):
    error_msg = "Branching time `{:04d}{:02d}` not available in {} data in {}".format(
        branch_time.year,
        branch_time.month,
        parent.metadata["experiment_id"],
        parent.metadata["netcdf-scm crunched file"],
    )
    raise ValueError(error_msg)


def _get_reference_values_using_31_yr_mean_after_branch_time(
    indata, picontrol, picontrol_branching_time
):
    branch_year = picontrol_branching_time.year
    normalise_series = picontrol.filter(year=range(branch_year, branch_year + 31))

    if (
        normalise_series["time"].max().year - normalise_series["time"].min().year
    ) != 30:
        error_msg = (
            "Only `{:04d}{:02d}` to `{:04d}{:02d}` is available after the "
            "branching time `{:04d}{:02d}` in {} data in {}".format(
                normalise_series["time"].min().year,
                normalise_series["time"].min().month,
                normalise_series["time"].max().year,
                normalise_series["time"].max().month,
                picontrol_branching_time.year,
                picontrol_branching_time.month,
                picontrol.metadata["experiment_id"],
                picontrol.metadata["netcdf-scm crunched file"],
            )
        )
        raise ValueError(error_msg)

    normalise_mean = normalise_series.timeseries().mean(axis=1)
    # normalisation is uniform for all timepoints
    normalise_df = pd.concat([normalise_mean] * indata["time"].shape[0], axis=1)
    normalise_df.columns = indata["time"]

    return normalise_df


def _get_reference_values_using_21_yr_running_mean(
    indata, picontrol, picontrol_branching_time
):
    running_mean_n_years = 21
    months_per_year = 12
    window_width = running_mean_n_years * months_per_year
    window_roll_back = window_width // 2
    # the window extends one step fewer forward because the centre time point
    # is also included
    window_roll_forward = window_roll_back - 1

    branch_year = picontrol_branching_time.year
    branch_month = picontrol_branching_time.month
    picontrol_branch_col = picontrol.filter(year=branch_year, month=branch_month)[
        "time"
    ][0]
    picontrol_time = picontrol["time"]
    picontrol_branch_idx = (picontrol_time == picontrol_branch_col).idxmax()

    # relevant times go from the branch point minus the number of steps each
    # rolling mean window extends backward up to the branch point plus the
    # length of ``indata`` plus the number of stops the rolling mean window
    # extends forward
    relevant_time_points = picontrol_time[
        max(picontrol_branch_idx - window_roll_back, 0) : picontrol_branch_idx
        + indata["time"].shape[0]
        + window_roll_forward
    ].tolist()

    _check_timesteps_are_monthly(picontrol)  # quick sanity check

    # super slow option below, would be good to have direct functionality in scmdata
    # normalise_ts = picontrol.filter(time=relevant_time_points.tolist()).timeseries()

    picontrol_times = (picontrol["time"] >= relevant_time_points[0]) & (
        picontrol["time"] <= relevant_time_points[-1]
    )
    normalise_ts = picontrol.timeseries().iloc[:, picontrol_times.values]

    normalise_df = normalise_ts.rolling(
        window=window_width, center=True, axis="columns",
    ).mean()

    keep_time_points = picontrol_time[
        picontrol_branch_idx : picontrol_branch_idx + indata["time"].shape[0]
    ].tolist()

    raise_error = False
    try:
        normalise_df = normalise_df[keep_time_points]
    except KeyError:
        raise_error = True

    if normalise_df.isnull().any().any():
        logger_stitching.info(
            "Filling gaps in running mean (where not enough values were available to create a full "
            "window) with linear interpolations and extrapolations."
        )
        # # TODO: fix, this should work but is broken...
        # ScmDataFrame(normalise_df).interpolate(target_times=normalise_df.columns, extrapolation_type="linear").timeseries()

        interp_base = normalise_df.dropna(axis=1)

        def _convert_to_s(x):
            return (x - dt.datetime(1970, 1, 1)).total_seconds()

        time_axis_base = interp_base.columns.map(_convert_to_s)
        interpolater = scipyinterp.interp1d(
            time_axis_base, interp_base.values, fill_value="extrapolate"
        )

        nan_cols = sorted(list(set(normalise_df.columns) - set(interp_base.columns)))
        time_nan_cols = [_convert_to_s(x) for x in nan_cols]

        normalise_df.loc[:, normalise_df.isnull().any()] = interpolater(time_nan_cols)

    if normalise_df.isnull().any().any():
        # if there are any remaining gaps, raise
        raise_error = True

    try:
        normalise_df.columns = indata["time"]
    except ValueError:
        raise_error = True

    if raise_error:
        error_msg = (
            "Only `{:04d}{:02d}` to `{:04d}{:02d}` is available in the "
            "{} data. Given the branching time, `{:04d}{:02d}`, we need "
            "data from ~`{:04d}` to `{:04d}`. {} data in {}".format(
                picontrol["time"].min().year,
                picontrol["time"].min().month,
                picontrol["time"].max().year,
                picontrol["time"].max().month,
                picontrol.metadata["experiment_id"],
                picontrol_branching_time.year,
                picontrol_branching_time.month,
                picontrol_branching_time.year - 11,
                picontrol_branching_time.year
                + indata["year"].max()
                - indata["year"].min()
                + 11,
                picontrol.metadata["experiment_id"],
                picontrol.metadata["netcdf-scm crunched file"],
            )
        )
        raise ValueError(error_msg)

    return normalise_df


def _take_anomaly_from(inscmdf, ref_df):
    in_ts = inscmdf.timeseries()

    anomalies = in_ts - ref_df
    if anomalies.isnull().any().any():  # pragma: no cover
        raise ValueError("`inscmdf` and `ref_df` don't have the same index")

    anomalies = ScmDataFrame(anomalies)

    return anomalies
