from pathlib import Path
import re
from typing import Set, List
import os
import urllib.request
import urllib.parse
import json


def get_bib_keys(txt: str) -> Set[str]:
    return set(re.findall(r"@[a-zA-Z]+\{([^,]+),", txt))


def get_aux_bibdata(aux: Path) -> List[Path]:
    with open(aux) as f:
        # having more than one bibdata is an error
        m = re.search(r"\\bibdata{([^}]+)}", f.read())
    # split comma-separated entries
    names = m.group(1).split(",") if m else []
    # make unique while preserving order
    dir = aux.parent
    return [dir / f"{name}.bib" for name in dict.fromkeys(names)]


def get_aux_keys(aux: Path) -> Set[str]:
    with open(aux) as f:
        txt = f.read()
    tmp = re.findall(r"\\citation{([^}]+)}", txt)
    # split multi-citations generated by `cite` package in LaTeX
    result = set()
    for c in tmp:
        result.update(c.split(","))
    return result


def get_entry_online(key: str) -> str:
    # Inspire and ADS keys are supported. ADS keys start with digits.
    if re.match("[0-9]+", key):
        # https://github.com/adsabs/adsabs-dev-api
        token = os.environ.get("ADS_TOKEN", "")
        if not token:
            print(
                "Key looks like ADS format, but the ADS_TOKEN environment variable "
                "environment variable is not set. You need to follow these steps:\n"
                "\n"
                "1) Follow the instructions at "
                "https://github.com/adsabs/adsabs-dev-api#access to get an API token.\n"
                "2) Export the token in your shell as ADS_TOKEN with\n"
                "      export ADS_TOKEN=<insert token here>"
            )
        req = urllib.request.Request(
            "https://api.adsabs.harvard.edu/v1/export/bibtex",
            f'{{"bibcode": ["{key}"]}}'.encode("ascii"),
            {
                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json",
            },
        )
        try:
            with urllib.request.urlopen(req) as r:
                # do not check return code, we accept failure silently
                bibdata = json.loads(r.read()).get("export", "\n")[:-1]
        except urllib.error.HTTPError:
            bibdata = ""
    else:
        # https://github.com/inspirehep/rest-api-doc
        data = urllib.parse.urlencode({"q": key, "format": "bibtex"})
        req = urllib.request.Request(f"https://inspirehep.net/api/literature?{data}")
        with urllib.request.urlopen(req) as r:
            # do not check return code, we accept failure silently
            bibdata = r.read().decode() if r.code == 200 else ""
    assert len(get_bib_keys(bibdata)) <= 1
    return bibdata


def find_in_path(name: str) -> List[Path]:
    paths = ["/bin", "/usr/bin", "/usr/local/bin"]
    for path in os.environ["PATH"].split(":"):
        path = os.path.expandvars(path)
        paths.append(path)

    results = []
    for path in paths:
        for p in Path(os.path.expandvars(path)).glob(name):
            results.append(p)

    return results
