from __future__ import annotations

import copy
import dataclasses
import functools
import itertools
import re
import warnings
from datetime import timedelta
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    Generic,
    List,
    Optional,
    Sequence,
    Set,
    Tuple,
    Type,
    TypeVar,
    Union,
    cast,
)

import pyarrow as pa

from chalk._validation.feature_validation import FeatureValidation
from chalk._validation.validation import Validation
from chalk.features._encoding.converter import FeatureConverter, TDecoder, TEncoder
from chalk.features._encoding.primitive import TPrimitive
from chalk.features.tag import Tags
from chalk.serialization.parsed_annotation import ParsedAnnotation
from chalk.streams import Windowed
from chalk.utils.collections import ensure_tuple
from chalk.utils.duration import Duration, parse_chalk_duration
from chalk.utils.string import to_snake_case

if TYPE_CHECKING:
    from chalk.features.feature_set import Features
    from chalk.features.filter import Filter

_TRich = TypeVar("_TRich")
_TPrim = TypeVar("_TPrim", bound=TPrimitive)

__all__ = ["Feature", "feature", "has_one", "has_many"]


@dataclasses.dataclass
class HasOnePathObj:
    parent: Feature
    child: Feature
    parent_to_child_attribute_name: str


@dataclasses.dataclass
class _VersionInfo:
    version: int
    maximum: int
    default: int
    reference: Dict[int, Feature]
    base_name: str = ""

    def name_for_version(self, version: int) -> str:
        return self.base_name if version == 1 else f"{self.base_name}@{version}"


class FeatureNotFoundException(ValueError):
    def __init__(self, fqn: str) -> None:
        super().__init__(f"Feature '{fqn}' was not found in the registry")


class Feature(Generic[_TPrim, _TRich]):
    def __init__(
        self,
        name: Optional[str] = None,
        attribute_name: Optional[str] = None,
        namespace: Optional[str] = None,
        features_cls: Optional[Type[Features]] = None,
        typ: Optional[Union[ParsedAnnotation, Type[_TRich]]] = None,
        version: Optional[int] = None,
        default_version: int = 1,
        description: Optional[str] = None,
        owner: Optional[str] = None,
        tags: Optional[List[str]] = None,
        primary: Optional[bool] = None,
        default: Union[_TRich, ellipsis] = ...,
        max_staleness: Union[Duration, None, ellipsis] = ...,
        etl_offline_to_online: Optional[bool] = None,
        encoder: Optional[TEncoder[_TPrim, _TRich]] = None,
        decoder: Optional[TDecoder[_TPrim, _TRich]] = None,
        pyarrow_dtype: Optional[pa.DataType] = None,
        join: Optional[Union[Callable[[], Filter], Filter]] = None,
        path: Tuple[HasOnePathObj, ...] = (),
        is_feature_time: Optional[bool] = None,
        is_autogenerated: bool = False,
        validations: Optional[FeatureValidation] = None,
        all_validations: Optional[List[FeatureValidation]] = None,
        # Window durations should be set on the Windowed() parent feature,
        # and contain all the durations of the child features
        window_durations: Sequence[int] = (),
        # Windowed duration should be set on the underlying pseudofeature that represents a particular windowed bucket
        window_duration: Optional[int] = None,
        no_display: bool = False,
        offline_ttl: Union[Duration, None, ellipsis] = ...,
    ):
        self._typ = typ if typ is None or isinstance(typ, ParsedAnnotation) else ParsedAnnotation(underlying=typ)
        self.features_cls = features_cls
        self._name = name
        # the attribute name for the feature in the @features class (in case if the name is specified differently)
        self._attribute_name = attribute_name
        self._namespace = namespace
        self.path = path
        self.version: Optional[_VersionInfo] = (
            _VersionInfo(
                version=default_version,
                maximum=version,
                default=default_version,
                reference={},
            )
            if version is not None
            else None
        )

        self.description = description
        self.owner = owner
        self._encoder = encoder
        self._decoder = decoder
        self._pyarrow_dtype = pyarrow_dtype
        self.tags = tags
        self._primary = primary
        self._default = default
        if max_staleness is None:
            max_staleness = timedelta(0)
        if isinstance(max_staleness, str):
            max_staleness = parse_chalk_duration(max_staleness)
        self._max_staleness = max_staleness
        if offline_ttl is None:
            offline_ttl = timedelta(0)
        if offline_ttl is ...:
            # Should we allow the offline_ttl to be set via the class decorator?
            offline_ttl = "infinity"
        if isinstance(offline_ttl, str):
            offline_ttl = parse_chalk_duration(offline_ttl)
        self._offline_ttl = offline_ttl
        self._etl_offline_to_online = etl_offline_to_online
        self._is_feature_time = is_feature_time
        self.is_autogenerated = is_autogenerated
        self.no_display = no_display
        self._join = join
        self._validations = validations
        self._all_validations = all_validations or []
        if self._validations is not None:
            self._all_validations.append(self._validations)
        self.window_durations = window_durations
        self._window_duration = window_duration

    def __str__(self):
        return self.root_fqn

    @property
    def offline_ttl(self) -> timedelta:
        if self._offline_ttl is ...:
            raise RuntimeError("The offline TTL was never finalized. Is this feature in a features class?")
        return self._offline_ttl

    @property
    def max_staleness(self) -> timedelta:
        if self._max_staleness is ...:
            raise RuntimeError("The max staleness was never finalized. Is this feature in a features class?")
        return self._max_staleness

    @property
    def etl_offline_to_online(self) -> bool:
        if self._etl_offline_to_online is None:
            raise RuntimeError("The max staleness was never finalized. Is this feature in a features class?")
        return self._etl_offline_to_online

    @property
    def typ(self) -> ParsedAnnotation:
        if self._typ is None:
            raise RuntimeError("Feature.typ has not yet been set")
        return self._typ

    @typ.setter
    def typ(self, typ: ParsedAnnotation):
        self._typ = typ

    def is_typ_set(self):
        return self._typ is not None

    @property
    def primary(self) -> bool:
        from chalk.features.pseudofeatures import PSEUDOFEATURES

        if self.root_fqn in (x.root_fqn for x in PSEUDOFEATURES):
            # Need to manually check the pseudofeatures because they
            # do not have a features class
            return False
        if self._primary is None:
            if (
                self.features_cls is not None
                and self.features_cls.__chalk_primary__ is not None
                and self.features_cls.__chalk_primary__.name == self.name
            ):
                return True
            return self.typ.is_primary
        return self._primary

    @property
    def is_feature_time(self) -> bool:
        from chalk.features.pseudofeatures import PSEUDOFEATURES

        if self.root_fqn in (x.root_fqn for x in PSEUDOFEATURES):
            # Need to manually check the pseudofeatures because they
            # do not have a features class
            return False
        if self._is_feature_time is None:
            return (
                self.features_cls is not None
                and self.features_cls.__chalk_ts__ is not None
                and self.features_cls.__chalk_ts__.name == self.name
            ) or self.typ.is_feature_time
        return self._is_feature_time

    @functools.cached_property
    def converter(self):
        if self.is_has_one or self.is_has_many:
            raise RuntimeError(
                (
                    f"Feature '{self}' is not a scalar feature, so its values cannot be converted into a serialized type. "
                    "Please access the converters on the underlying scalar feature."
                )
            )
        underlying_typ = self.typ.parsed_annotation
        if isinstance(underlying_typ, Windowed):
            # TODO -- handle Optional[Windowed]?
            underlying_typ = underlying_typ.kind
        underlying_typ = cast(Type[_TRich], underlying_typ)
        return FeatureConverter(
            name=self.fqn,
            rich_type=underlying_typ,
            is_nullable=self.typ.is_nullable,
            rich_default=self._default,
            pyarrow_dtype=self._pyarrow_dtype,
            encoder=self._encoder,
            decoder=self._decoder,
        )

    @property
    def attribute_name(self):
        if self._attribute_name is None:
            raise RuntimeError(
                "Feature.attribute_name is not yet defined. Is the feature being constructed outside of a Features class?"
            )
        return self._attribute_name

    @attribute_name.setter
    def attribute_name(self, attribute_name: str):
        self._attribute_name = attribute_name
        if self._name is None:
            # If there is no name, also set the name to the attribute name
            self._name = attribute_name

    @property
    def name(self):
        if self._name is None:
            raise RuntimeError(
                "Feature.name is not yet defined. Is the feature being constructed outside of a Features class?"
            )
        return self._name

    @name.setter
    def name(self, name: str):
        self._name = name

    @property
    def namespace(self):
        if self._namespace is None:
            raise RuntimeError(
                "Feature.namespace is not yet defined. Is the feature being constructed outside of a Features class?"
            )
        return self._namespace

    @namespace.setter
    def namespace(self, namespace: str):
        self._namespace = namespace

    @classmethod
    @functools.lru_cache(None)
    def from_root_fqn(cls, root_fqn: str) -> Feature:
        """Convert a Root FQN into a feature.

        Parameters
        ----------
        root_fqn
            The root fqn of the feature

        Returns
        -------
        Feature
            The feature for that root_fqn.
        """
        from chalk.features.feature_set import FeatureSetBase
        from chalk.features.pseudofeatures import PSEUDOFEATURES

        for x in PSEUDOFEATURES:
            if root_fqn == x.root_fqn or root_fqn == x.name:
                return x

        root_fqn = to_snake_case(root_fqn)
        split_fqn = root_fqn.split(".")
        root_ns = split_fqn[0]
        split_fqn = split_fqn[1:]

        if root_ns not in FeatureSetBase.registry:
            raise FeatureNotFoundException(root_fqn)
        features_cls = FeatureSetBase.registry[root_ns]

        # FQNs are by name, so must lookup the feature in features_cls.features instead of using getattr
        feat: Optional[Feature] = None

        while len(split_fqn) > 0:
            feature_name = split_fqn[0]
            split_fqn = split_fqn[1:]

            found_feature = False

            for x in features_cls.features:
                assert isinstance(x, Feature)
                if x.name == feature_name:
                    assert x.attribute_name is not None
                    found_feature = True
                    feat = x if feat is None else feat.copy_with_path(x)
                    if len(split_fqn) > 0:
                        # Going to recurse, so validate that the feature is something that we can recurse on.
                        if not x.is_has_one:
                            raise FeatureNotFoundException(root_fqn)
                        assert x.joined_class is not None
                        features_cls = x.joined_class
                    break
            if not found_feature:
                raise FeatureNotFoundException(root_fqn)
        if feat is None:
            raise FeatureNotFoundException(root_fqn)

        return feat

    @property
    def root_namespace(self) -> str:
        if len(self.path) > 0:
            assert self.path[0].parent.namespace is not None, "parent namespace is None"
            return self.path[0].parent.namespace
        assert self.namespace is not None, "namespace is None"
        return self.namespace

    @property
    def root_fqn(self):
        assert self.name is not None, "Missing name on feature"
        if len(self.path) > 0:
            return ".".join(
                itertools.chain(
                    (self.root_namespace,),
                    (x.parent.name for x in self.path),
                    (self.name,),
                )
            )
        return f"{self.namespace}.{self.name}"

    def __hash__(self) -> int:
        return hash(self.root_fqn)

    def __eq__(self, other: object):
        if self.is_has_many:
            # For equality checks on a has-many, we would also need to compare the columns and types
            # For now, ignoring.
            return NotImplemented
        if isinstance(other, Feature):
            other = other.root_fqn
        if isinstance(other, str):
            return self.root_fqn == other
        return NotImplemented

    def __repr__(self):
        return f"Feature(fqn={self.namespace}.{self.name}, typ={self.typ})"

    @property
    def fqn(self) -> str:
        return f"{self.namespace}.{self.name}"

    @property
    def is_has_one(self):
        # A feature is a has-one relationship if the type is
        # another singleton features cls and there is a join condition
        # Need to short-circuit if it is a dataframe, as DataFrames
        # might not have an underlying
        return not self.typ.is_dataframe and self.join is not None

    @property
    def is_has_many(self):
        return self.typ.is_dataframe and self.join is not None

    @property
    def is_scalar(self):
        return not self.is_has_many and not self.is_has_one and not self.is_feature_time

    @property
    def is_windowed(self):
        """Whether the feature is a "fake" feature that has underlying windowed pseudofeatures.
        This feature fqn is not associated with any data in the online or offline stores, because
        it represents multiple windowed features."""
        return self.is_scalar and len(self.window_durations) > 0

    @property
    def is_windowed_pseudofeature(self):
        """Whether the feature is an underlying windowed pseudofeature, representing a particular windowed bucket.
        This feature is like any other scalar feature, and has data associated in the offline and online stores."""
        return self._window_duration is not None

    @property
    def window_duration(self):
        if self._window_duration is None:
            raise ValueError(f"Feature {self} is now a windowed pseudofeature")
        return self._window_duration

    @property
    def window_buckets(self) -> Optional[Set[int]]:
        if self.is_windowed:
            return cast(Windowed, self.typ.underlying).buckets_seconds
        else:
            return None

    @property
    def default(self):
        """The rich representation of the default value"""
        warnings.warn("``Feature.default`` is deprecated. Instead, use ``Feature.converter.rich_default``")
        return self._default

    @property
    def has_resolved_join(self):
        return self._join is not None

    @property
    def join(self) -> Optional[Filter]:
        from chalk.features.pseudofeatures import PSEUDOFEATURES

        if self._join is not None:
            # Join was explicitly specified
            return self._join() if callable(self._join) else self._join
        # Attempt to extract the join condition from the foreign feature
        if not self.typ.is_features_cls:
            return None

        if self.root_fqn in (x.root_fqn for x in PSEUDOFEATURES):
            # Need to manually check the pseudofeatures because they
            # do not have a features class
            return None

        assert self.features_cls is not None
        joins: List[Tuple[str, Filter]] = []  # Tuple of (name, Join)
        for f in self.typ.as_features_cls.features:
            assert isinstance(f, Feature)
            if f.typ.is_features_cls and f.typ.as_features_cls is self.features_cls and f.has_resolved_join:
                assert f.join is not None
                assert f.name is not None
                join = f.join() if callable(f.join) else f.join
                joins.append((f.name, join))
        if len(joins) == 0:
            # It's a nested feature
            return None
        # TODO(Ravi): Enable this check. But let's see if we can be smarter about which join to automatically use, if there are multiple
        # if len(joins) > 1:
        #     assert self.features_cls is not None
        #     raise ValueError(
        #         f"Multiple join conditions exist for {self.features_cls.__name__} and {foreign_features.__name__} on keys: "
        #         + f", ".join(f'{foreign_features.__name__}.{name}' for (name, _) in joins)
        #     )
        join = joins[0][1]
        if callable(join):
            join = join()
        self._join = join
        return join

    @property
    def joined_class(self) -> Optional[Type[Features]]:
        j = self.join
        if j is None:
            return None
        if j.lhs is not None and j.rhs is not None and isinstance(j.lhs, Feature) and isinstance(j.rhs, Feature):
            if j.lhs.namespace != self.namespace:
                return j.lhs.features_cls
            return j.rhs.features_cls
        return None

    def copy_with_path(self, child: Feature) -> Feature:
        child_copy = copy.copy(child)
        assert child.attribute_name is not None
        child_copy.path = tuple(
            (
                *self.path,
                HasOnePathObj(
                    parent=self,
                    child=child,
                    parent_to_child_attribute_name=child.attribute_name,
                ),
            )
        )
        return child_copy

    def for_version(self, version: int) -> Feature:
        if self.version is None:
            assert self.features_cls is not None
            raise ValueError(
                (
                    f"Cannot request version {version} of feature '{self.root_fqn}', because this feature "
                    "doesn't have a version set at definition. To set a version, write \n"
                    f"""    @features
    class {self.features_cls.__name__}:
        {self.attribute_name}: ... = feature(version={version})
        ...
"""
                )
            )

        if version not in self.version.reference:
            assert self.features_cls is not None
            raise ValueError(
                (
                    f"Cannot request version {version} of feature '{re.sub('@.*', '', self.root_fqn)}', because this feature "
                    f"has a maximum version of {self.version.maximum} < {version}. "
                    f"To add versions, write \n"
                    f"""    @features
    class {self.features_cls.__name__}:
   -    {self.attribute_name}: ... = feature(version={self.version.maximum})
   +    {self.attribute_name}: ... = feature(version={version})
        ...
"""
                )
            )

        versioned_feature = self.version.reference[version]
        if len(self.path) == 0:
            return versioned_feature

        # We have a path
        copied_versioned_feature = copy.copy(versioned_feature)
        assert versioned_feature.version is not None
        copied_versioned_feature.version = _VersionInfo(
            maximum=versioned_feature.version.maximum,
            default=versioned_feature.version.default,
            reference=versioned_feature.version.reference,
            version=version,
        )
        copied_versioned_feature.path = tuple(
            (
                *self.path[:-1],
                HasOnePathObj(
                    parent=self.path[-1].parent,
                    child=copied_versioned_feature,
                    parent_to_child_attribute_name=copied_versioned_feature.attribute_name,
                ),
            )
        )
        return copied_versioned_feature


def feature(
    description: Optional[str] = None,
    owner: Optional[str] = None,
    tags: Optional[Tags] = None,
    name: Optional[str] = None,
    version: Optional[int] = None,
    default_version: int = 1,
    primary: Optional[bool] = None,
    max_staleness: Optional[Union[ellipsis, Duration]] = ...,
    etl_offline_to_online: Optional[bool] = None,
    encoder: Optional[TEncoder[_TPrim, _TRich]] = None,
    decoder: Optional[TDecoder[_TPrim, _TRich]] = None,
    min: Optional[_TRich] = None,
    max: Optional[_TRich] = None,
    min_length: Optional[int] = None,
    max_length: Optional[int] = None,
    strict: bool = False,
    validations: Optional[List[Validation]] = None,
    dtype: Optional[pa.DataType] = None,
    default: _TRich = ...,
    offline_ttl: Optional[Union[ellipsis, Duration]] = ...,
) -> _TRich:
    """Add metadata and configuration to a feature.

    Parameters
    ----------
    owner
        You may also specify which person or group is responsible for a feature.
        The owner tag will be available in Chalk's web portal.
        Alerts that do not otherwise have an owner will be assigned
        to the owner of the monitored feature.
        Read more at https://docs.chalk.ai/docs/feature-discovery#owner
    tags
        Add metadata to a feature for use in filtering, aggregations,
        and visualizations. For example, you can use tags to assign
        features to a team and find all features for a given team.
        Read more at https://docs.chalk.ai/docs/feature-discovery#tags
    version
        The maximum version for a feature. Versioned features can be
        referred to with the `@` operator:

        >>> @features
        ... class User:
        ...     id: str
        ...     score: int = feature(version=2)
        >>> str(User.score @ 2)
        "user.score@2"

        See more at https://docs.chalk.ai/docs/feature-versions
    default_version
        The default version for a feature. When you reference a
        versioned feature without the `@` operator, you reference
        the `default_version`. Set to `1` by default.

        >>> @features
        ... class User:
        ...     id: str
        ...     score: int = feature(version=2, default_version=2)
        >>> str(User.score)
        "user.score"

        See more at https://docs.chalk.ai/docs/feature-versions#default-versions
    max_staleness
        When a feature is expensive or slow to compute, you may wish to cache its value.
        Chalk uses the terminology "maximum staleness" to describe how recently a feature
        value needs to have been computed to be returned without re-running a resolver.
        Read more at https://docs.chalk.ai/docs/feature-caching
    etl_offline_to_online
        When `True`, Chalk copies this feature into the online environment
        when it is computed in offline resolvers.
        Read more at https://docs.chalk.ai/docs/reverse-etl
    min
        If specified, when this feature is computed, Chalk will check that `x >= min`.
    max
        If specified, when this feature is computed, Chalk will check that `x <= max`.
    min_length
        If specified, when this feature is computed, Chalk will check that `len(x) >= min_length`.
    max_length
        If specified, when this feature is computed, Chalk will check that `len(x) <= max_length`.
    strict
        If `True`, if this feature does not meet the validation criteria, Chalk will not persist
        the feature value and will treat it as failed.
    validations

    Other Parameters
    ----------------
    name
        The name for the feature. By default, the name of a feature is
        the name of the attribute on the class, prefixed with
        the camel-cased name of the class. Note that if you provide an
        explicit name, the namespace, determined by the feature class,
        will still be prepended. See `features` for more details.
    default
        The default value of the feature if it otherwise can't be computed.
        If you don't need to specify other metadata, you can also assign a default
        in the same way you would assign a default to a `dataclass`:

        >>> from chalk.features import features
        >>> @features
        ... class User:
        ...     num_purchases: int = 0
    primary
        If `True`, this feature is considered a primary key for the
        feature class. Note that a feature class cannot have more than
        one primary key.

        Typically, you will not need to use this parameter. Features named
        `id` are declared primary keys by default.

        If you have primary key feature with a name other than `id`, you can
        use this parameter, or the class `Primary` to indicate the primary key.
        For example:

        >>> @features
        ... class User:
        ...     uid: Primary[int]
    description
        Descriptions are typically provided as comments preceding
        the feature definition. For example, you can document a
        `fraud_score` feature with information about the values
        as follows:

        >>> @features
        ... class User:
        ...     # 0 to 100 score indicating an identity match.
        ...     fraud_score: float

        You can also specify the description directly with this parameter.
        Read more at https://docs.chalk.ai/docs/feature-discovery#description
    encoder
    decoder
    dtype
    offline_ttl

    Returns
    -------
    _TRich
        The type of the input feature, given by `_TRich`.

    Examples
    --------
    >>> from chalk.features import Primary
    >>> @features
    ... class User:
    ...     uid: Primary[int]
    ...     # Description of the name feature.
    ...     # :owner: fraud@company.com
    ...     # :tags: fraud, credit
    ...     name: str = feature(
    ...         max_staleness="10m",
    ...         etl_offline_to_online=True
    ...     )
    ...     score = feature(
    ...         version=2, default_version=2
    ...     )
    """
    return cast(
        _TRich,
        Feature(
            name=name,
            version=version,
            default_version=default_version,
            owner=owner,
            tags=None if tags is None else list(ensure_tuple(tags)),
            description=description,
            primary=primary,
            max_staleness=max_staleness,
            etl_offline_to_online=etl_offline_to_online,
            encoder=encoder,
            decoder=decoder,
            pyarrow_dtype=dtype,
            validations=FeatureValidation(
                min=min,
                max=max,
                min_length=min_length,
                max_length=max_length,
                contains=None,
                strict=strict,
            ),
            all_validations=None
            if validations is None
            else [
                FeatureValidation(
                    min=v.min,
                    max=v.max,
                    min_length=v.min_length,
                    max_length=v.max_length,
                    contains=None,
                    strict=v.strict,
                )
                for v in validations
            ],
            default=default,
            offline_ttl=offline_ttl,
        ),
    )


def has_one(f: Callable[[], Any]) -> Any:
    """Specify a feature that represents a one-to-one relationship.

    Read more at https://docs.chalk.ai/docs/has-one

    Parameters
    ----------
    f
        The join condition between `@feature` classes.
        This argument is callable to allow for forward
        references to members of this class and the joined
        class.

    Examples
    --------
    >>> from chalk.features import DataFrame, features
    >>> @features
    ... class Card
    ...     id: str
    ...     balance: float
    >>> @features
    ... class User
    ...     id: str
    ...     card: Card = has_one(
    ...         lambda: User.id == Card.user_id
    ...     )
    """
    return Feature(join=f)


def has_many(f: Callable[[], Any]) -> Any:
    """Specify a feature that represents a one-to-many relationship.

    Parameters
    ----------
    f
        The join condition between `@features` classes.
        This argument is callable to allow for forward
        references to members of this class and the joined
        class.

    Examples
    --------
    >>> from chalk.features import DataFrame, features
    >>> @features
    ... class Card
    ...     id: str
    ...     balance: float
    >>> @features
    ... class User
    ...     id: str
    ...     cards: DataFrame[Card] = has_many(
    ...         lambda: User.id == Card.user_id
    ...     )
    """
    return Feature(join=f)
