# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/04_data.selection.ipynb (unless otherwise specified).

__all__ = ['select_y', 'select_tax_order', 'select_X', 'get_y_by_order']

# Cell
#nbdev_comment from __future__ import annotations
from pathlib import Path
import pickle
import numpy as np
from fastcore.test import *

# Cell
def select_y(data:tuple, # (X, y, X_id, depth_order)
             low:float=0.12, # Lowest limit
             high:float=999 # Highest limit
            ):
    """Select data based on the limit values of the target"""
    X, y, X_id, depth_order = data
    idx = np.logical_and((y >= low), (y <= high))
    return X[idx, :], y[idx], X_id[idx], depth_order[idx, :]

# Cell
def select_tax_order(data:tuple, # (X, y, X_id, depth_order)
                     tax_order:int=None # Value between 0 and 12
                    ):
    """Select data based on Soil Taxonomy order"""
    X, y, X_id, depth_order = data
    if tax_order:
        idx = depth_order[:, 1] == tax_order
        return X[idx, :], y[idx], X_id[idx], depth_order[idx, :]
    else:
        return data

# Cell
def select_X(data:tuple, # (X, y, X_id, depth_order)
             low:int=0 # Lowest absorbance value
            ):
    """Select data based on the limit values (only low) of the features"""
    X, y, X_id, depth_order = data
    idx = np.min(X, axis=1) > low
    return X[idx, :], y[idx], X_id[idx], depth_order[idx, :]

# Cell
def get_y_by_order(y, tax_order, tax_lookup):
    tax_values, count_by_order = np.unique(tax_order, return_counts=True)
    idx_order = count_by_order.argsort()

    y_by_order = []
    for tax in tax_values[idx_order]:
        mask_order = tax_order == tax
        y_by_order.append(y[mask_order])

    return (y_by_order, count_by_order[idx_order], idx_order)