#!/usr/bin/env python
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import operator
import xlsxwriter
from datetime import datetime
from pyzotero import zotero
from tinyscript import *
from tinyscript.helpers.text import _indent
from tinyscript.report import *


__author__    = "Alexandre D'Hondt"
__email__     = "alexandre.dhondt@gmail.com"
__version__   = "1.3.0"
__copyright__ = ("A. D'Hondt", 2020)
__license__   = "gpl-3.0"
__doc__       = """
This tool aims to inspect, filter, sort and export Zotero references.

It works by downloading the whole list of items to perform computations locally. It also uses a modified page rank to
sort references to help selecting the most relevant ones.
"""
__examples__  = [
    "count -f \"collections:biblio\" -f \"rank:>1.0\"",
    "export year title itemType numAuthors numPages zscc references what results comments -f \"collections:biblio\" " \
        "-s date -l \">rank:50\"",
    "list attachments",
    "list collections",
    "list citations --desc -f \"citations:>10\"",
    "show title DOI",
    "show title date zscc -s date -l '>zscc:10'",
]

CACHE_FILES = {
    'attachments': "attachments.json",
    'collections': "collections.json",
    'items':       "items.json",
    'marks':       "marks.json",
    'notes':       "notes.json",
}
CACHE_PATH = ts.Path("~/.cache/zotero-cli", create=True, expand=True)
CREDS_FILE = ts.CredentialsPath(str(CACHE_PATH))

OPERATORS = {
    '==': operator.eq,
    '<':  operator.lt,
    '<=': operator.le,
    '>':  operator.gt,
    '>=': operator.ge,
}

FIELD_ALIASES = {
    'itemType': "Type",
    'what': "What ?",
    'zscc': "#Cited",
}
FLOAT_FIELDS = []
INTEGER_FIELDS = ["callNumber", "citations", "numAttachments", "numAuthors", "numCreators", "numEditors", "numNotes",
                  "numPages", "rank", "references", "year", "zscc"]
NOTE_FIELDS = ["comments", "results", "what"]

TIME_FORMATS = ["%Y", "%Y-%m", "%b %Y", "%B %Y", "%Y-%m-%dT%H:%M:%SZ", "%b %d %Y at %I:%M%p", "%B %d, %Y, %H:%M:%S"]

CHARTS  = ["software-in-time"]
MARKERS = [("read", "unread", "this will display the entry as normal instead of bold"),
           ("irrelevant", "relevant", "this will exclude the entry from the shown results"),
           ("ignore", "unignore", "this will totally ignore the entry while getting the filtered items")]
QUERIES = {
    'no-attachment':         {'filter': ["numAttachments:0"], 'fields': ["title"]},
    'no-url':                {'filter': ["url:<empty>"], 'sort': "year", 'fields': ["year", "title"]},
    'top-10-most-relevants': {'limit': ">rank:10", 'sort': "year", 'fields': ["year", "title", "numPages", "itemType"]},
    'top-50-most-relevants': {'limit': ">rank:50", 'sort': "year", 'fields': ["year", "title", "numPages", "itemType"]},
}
STATIC_WORDS = ["Android", "Bochs", "Linux", "Markov", "NsPack", "Windows"]
TYPE_EMOJIS = {
    'blog post':         ":pushpin:",
    'book':              ":book:",
    'computer program':  ":floppy_disk:",
    'conference paper':  ":notebook:",
    'default':           ":question:",
    'document':          ":page_facing_up:",
    'journal article':   ":page_with_curl:",
    'manuscript':        ":scroll:",
    'newspaper article': ":newspaper:",
    'presentation':      ":bar_chart:",
    'thesis':            ":mortar_board:",
    'webpage':           ":earth_americas:",
}


class ZoteroCLI(object):
    def __init__(self, api_id=None, api_id_type="user", api_key=None):
        if api_id is not None and api_key is not None:
            CREDS_FILE.id = api_id
            CREDS_FILE.secret = api_key
            CREDS_FILE.save()
        self.__zot = None
        for k in ["collections", "items", "attachments", "notes"]:  # not CACHE_FILES.keys() ; order matters here
            cached_file = CACHE_PATH.joinpath(CACHE_FILES[k])
            try:
                if k == "creds":
                    continue
                with cached_file.open() as f:
                    logger.debug("Getting %s from cache '%s'..." % (k, cached_file))
                    setattr(self, k, json.load(f))
            except OSError:
                self._creds()
                self.__zot = self.__zot or zotero.Zotero(CREDS_FILE.id, api_id_type, CREDS_FILE.secret)
                logger.debug("Getting %s from zotero.org..." % k)
                if k == "collections":
                    self.collections = list(self.__zot.collections())
                elif k == "items":
                    self.items = list(self.__zot.everything(self.__zot.top()))
                elif k in ["attachments", "notes"]:
                    for attr in ["attachments", "notes"]:
                        if not hasattr(self, attr):
                            setattr(self, attr, [])
                    for i in self.items[:]:
                        if i['meta']['numChildren'] > 0:
                            for c in self.__zot.children(i['key']):
                                if c['data']['itemType'] == "attachment":
                                    self.attachments.append(c)
                                elif c['data']['itemType'] == "note":
                                    self.notes.append(c)
                                else:
                                    raise ValueError("Unknown item type '%s'" % c['data']['itemType'])
                with cached_file.open('w') as f:
                    logger.debug("Saving %s to cache '%s'..." % (k, cached_file))
                    json.dump(getattr(self, k), f)
        self._marks_file = CACHE_PATH.joinpath(CACHE_FILES['marks'])
        if not self._marks_file.exists():
            self._marks_file.write_text("{}")
        with self._marks_file.open() as f:
            logger.debug("Opening marks from cache '%s'..." % self._marks_file)
            self.marks = json.load(f)
        self.__objects = {}
        for a in CACHE_FILES.keys():
            d = getattr(self, a)
            if isinstance(d, list):
                for x in d:
                    try:
                        self.__objects[x['key']] = x
                    except:
                        print(x)
                        raise
        self._valid_fields = []
        self._valid_tags = []
        # parse items, collecting valid fields and tags
        for i in self.items:
            tags = i['data'].get('tags')
            if tags:
                if ts.is_str(tags):
                    tags = tags.split(";")
                elif ts.is_list(tags):
                    tags = [t['tag'] for t in tags]
                for t in tags:
                    if t not in self._valid_tags:
                        self._valid_tags.append(t)
            for f in i['data'].keys():
                if f not in self._valid_fields:
                    self._valid_fields.append(f)
        # also append computed fields to the list of valid fields
        for f in ["abstractShortNote", "attachments", "authors", "editors", "selected"] + INTEGER_FIELDS + NOTE_FIELDS:
            if f not in self._valid_fields:
                self._valid_fields.append(f)
    
    def _creds(self):
        """ Get API credentials from a local file or ask the user for it. """
        if CREDS_FILE.id == "" and CREDS_FILE.secret == "":
            CREDS_FILE.load()
        if CREDS_FILE.id == "" and CREDS_FILE.secret == "":
            CREDS_FILE.ask("API ID: ", "API key: ")
            CREDS_FILE.save()
    
    def _filter(self, fields=None, filters=None, force=False):
        """ Apply one or more filters to the items. """
        # validate and make filters
        _filters = {}
        for f in filters or []:
            try:
                field, regex = list(map(lambda x: x.strip(), f.split(":", 1)))
                if regex == "":
                    logger.error("Regex for filter on field '%s' is empty" % field)
                    raise ValueError
                not_ = field[0] == "~"
                if not_:
                    field = field[1:]
                # filter format: (negate, comparison operator, first comparison value's lambda, second comparison value)
                if field in set(INTEGER_FIELDS) - set(["rank"]) and re.match(r"^(|<|>|<=|>=|==)\d+$", regex):
                    m = re.match(r"^(|<|>|<=|>=|==)(\d+)$", regex)
                    op, v = m.group(1), m.group(2).strip()
                    if op == "":
                        op = "=="
                    filt = (not_, OPERATORS[op], lambda i, f: i['data'][f], int(v))
                elif field.startswith("date"):
                    if regex in ["-", "<empty>"]:
                        op, v = "==", ""
                    else:
                        m = re.match(r"^(<|>|<=|>=|==)([^=]*)$", regex)
                        op, v = m.group(1), m.group(2).strip()
                    filt = (not_, OPERATORS[op], lambda i, f: ZoteroCLI.date(i['data'][f], i), ZoteroCLI.date(v))
                elif field == "rank":
                    m = re.match(r"^(<|>|<=|>=|==)(\d+|\d*\.\d+)$", regex)
                    op, v = m.group(1), m.group(2).strip()
                    filt = (not_, OPERATORS[op], lambda i, f: self.ranks.get(i['key'], 0), float(v))
                # filter format: (negate, lambda, lambda's second arg)
                elif field == "tags":
                    if regex not in self._valid_tags and regex not in ["-", "<empty>"]:
                        logger.debug("Should be one of:\n- " + \
                                     "\n- ".join(sorted(self._valid_tags, key=ZoteroCLI.sort)))
                        logger.error("Tag '%s' does not exist" % regex)
                        raise ValueError
                    filt = (not_, lambda i, r: r in ["-", "<empty>"] and i['data']['tags'] in ["", []] or \
                                            r in (i['data']['tags'].split(";") if ts.is_str(i['data']['tags']) else \
                                                  [x['tag'] for x in i['data']['tags']]), regex)
                # filter format: (negate, lambda) ; lambda's second arg is the field name 
                elif regex in ["-", "<empty>"]:
                    filt = (not_, lambda i, f: i['data'].get(f) == 1900) if field == "year" else \
                           (not_, lambda i, f: i['data'].get(f) == "")
                else:
                    filt = (not_, re.compile(regex, re.I), lambda i, f: i['data'].get(f) or "")
                _filters.setdefault(field, [])
                _filters[field].append(filt)
            except:
                logger.error("Bad filter '%s' ; format: [field]:[regex]" % f)
                raise ValueError
        # validate fields
        afields = (fields or []) + list(_filters.keys())
        for f in afields:
            if f not in self._valid_fields:
                logger.debug("Should be one of:\n- " + "\n- ".join(sorted(self._valid_fields, key=ZoteroCLI.sort)))
                logger.error("Bad field name '%s'" % f)
                raise ValueError
        # now yield items, applying the filters and only selecting the given fields
        for i in self.items:
            # create a temporary item with computed fields (e.g. citations)
            tmp_i = {k: v for k, v in i.items() if k != 'data'}
            tmp_i['data'] = d = {k: self._format_value(v, k) for k, v in i['data'].items() if k in afields}
            d['zscc'] = -1
            # set custom fields defined in the special field named "Extra"
            for l in i['data'].get('extra', "").splitlines():
                try:
                    field, value = list(map(lambda x: x.strip(), l.split(": ", 1)))
                except:
                    continue
                field = field.lower()
                if field not in i['data'].keys():
                    if field == "zscc":
                        try:
                            d[field] = int(value)
                        except:
                            pass
                    else:
                        d[field] = self._format_value(value, field)
            # compute non-existing fields if required
            if "abstractShortNote" in afields:
                d['abstractShortNote'] = re.split(r"\.(\s|$)", i['data']['abstractNote'])[0].strip() + "."
            if "attachments" in afields:
                d['attachments'] = [x['data']['title'] for x in self.attachments if x['data']['parentItem'] == i['key']]
            if "authors" in afields:
                d['authors'] = [c for c in i['data']['creators'] if c['creatorType'] == "author"]
            if "citations" in afields or "references" in afields:
                c, r = 0, 0
                try:
                    links = i['data']['relations']['dc:relation']
                    if not ts.is_list(links):
                        links = [links]
                    for link in links:
                        k = self.__objects[link.split("/")[-1]]
                        if "collections" in _filters.keys():
                            gb = True
                            for n, regex, _ in _filters['collections']:
                                b = regex.search(", ".join(self.__objects[x]['data']['name'] for x in \
                                                           k['data']['collections']))
                                gb = gb and [b, not b][n]
                            if not gb:
                                continue
                        if ZoteroCLI.date(k['data']['date'], k) > ZoteroCLI.date(i['data']['date'], i):
                            c += 1
                        if ZoteroCLI.date(k['data']['date'], k) <= ZoteroCLI.date(i['data']['date'], i):
                            r += 1
                except KeyError:
                    pass
                d['citations'] = c
                d['references'] = r
            if "collections" in afields:
                d['collections'] = [self.__objects[k]['data']['name'] for k in i['data']['collections']]
            if "editors" in afields:
                d['editors'] = [c for c in i['data']['creators'] if c['creatorType'] == "editor"]
            if "numAttachments" in afields:
                d['numAttachments'] = len([x for x in self.attachments if x['data']['parentItem'] == i['key']])
            if "numAuthors" in afields:
                d['numAuthors'] = len([x for x in i['data']['creators'] if x['creatorType'] == "author"])
            if "numCreators" in afields:
                d['numCreators'] = len([x for x in i['data']['creators']])
            if "numEditors" in afields:
                d['numEditors'] = len([x for x in i['data']['creators'] if x['creatorType'] == "editor"])
            if "numNotes" in afields:
                d['numNotes'] = len([x for x in self.notes if x['data']['parentItem'] == i['key']])
            if "numPages" in afields:
                p = i['data'].get('numPages', i['data'].get('pages')) or "0"
                m = re.match(r"(\d+)(?:\s*[\-–]+\s*(\d+))?$", p)
                if m:
                    s, e = m.groups()
                    d['numPages'] = abs(int(s) - int(e or 0)) or -1
                else:
                    logger.warning("Bad pages value '%s'" % p)
                    d['numPages'] = -1
            if any(x in NOTE_FIELDS for x in afields):
                for f in NOTE_FIELDS:
                    d[f] = ""
                for n in self.notes:
                    if n['data']['parentItem'] == i['key']:
                        t = bs4.BeautifulSoup(n['data']['note']).text
                        try:
                            f, c = t.split(":", 1)
                        except:
                            continue
                        f = f.lower()
                        if f in NOTE_FIELDS:
                            d[f] = c.strip()
            if "year" in afields:
                d['year'] = ZoteroCLI.date(i['data']['date'], i).year
            # now apply filters
            pass_item = False
            for field, tfilters in _filters.items():
                for tfilter in tfilters:
                    if isinstance(tfilter[1], re.Pattern):
                        b = tfilter[1].search(self._format_value(tfilter[2](tmp_i, field), field))
                    elif ts.is_lambda(tfilter[1]) and len(tfilter) == 2:
                        b = tfilter[1](tmp_i, field)
                    elif ts.is_lambda(tfilter[1]) and len(tfilter) == 3:
                        b = tfilter[1](tmp_i, tfilter[2])
                    elif len(tfilter) == 4:
                        b = tfilter[1](tfilter[2](tmp_i, field), tfilter[3])
                    else:
                        raise ValueError("Unsupported filter")
                    if [not b, b][tfilter[0]]:
                        pass_item = True
                        break
                if pass_item:
                    break
            if not pass_item and (not tmp_i['key'] in self.marks.get('ignore', []) or force):
                yield tmp_i
    
    def _format_value(self, value, field=""):
        """ Ensure the given value is a string. """
        v = value
        if field == "tags":
            return v if ts.is_str(v) else ";".join(x['tag'] for x in v)
        elif field == "itemType":
            return re.sub(r'([a-z0-9])([A-Z])', r'\1 \2', re.sub(r'(.)([A-Z][a-z]+)', r'\1 \2', v)).lower()
        elif field == "rank":
            return "%.3f" % float(v or "0")
        elif field == "year":
            return [str(v), "-"][v == 1900]
        elif ts.is_int(v):
            return [str(v), "-"][v < 0]
        elif ts.is_list(v):
            return [", ", ";"][field == "attachments"].join(self._format_value(x, field) for x in v)
        elif ts.is_dict(v):
            if field in ["authors", "creators", "editors"]:
                v = v.get('name') or "{} {}".format(v.get('lastName', ""), v.get('firstName', "")).strip()
        return str(v)
    
    def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, force=False):
        """ Get items, computing special fields and applying filters. """
        filters = filters or []
        sort = sort or fields[0]
        data = []
        age, order = True, 3
        # define the order of the damping factor function to be applied to the rank field
        for f in fields:
            m = re.match(r"rank(\*|\^[1-9])$", f)
            if m:
                break
        if m:
            f = m.group()
            if m.group(1) == "*":
                age = False
            else:
                order = int(m.group(1).lstrip("^"))
            fields.insert(fields.index(f), "rank")
            fields.remove(f)
        # extract the limit field
        lfield, lfdesc = sort, desc
        if limit is not None:
            try:
                lfield, limit = limit.split(":")
                lfield = lfield.strip()
                # handle [<>] as sort orders
                if lfield[0] in "<>":
                    lfdesc = lfield[0] == ">"
                    lfield = lfield[1:]
                # handle "rank*" as using the strict rank, that is, with no damping factor relatd to the item's age
                if lfield == "rank*":
                    age = False
                    lfield = "rank"
            except ValueError:
                pass
            if not str(limit).isdigit() or int(limit) <= 0:
                logger.error("Bad limit number ; sould be a positive integer")
                raise ValueError
            limit = int(limit)
        # select relevant items, including all the fields required for further computations
        ffields = fields[:]
        if sort not in ffields:
            ffields.append(sort)
        if "rank" in fields or lfield == "rank" or sort == "rank" or \
           "rank" in [f.split(":")[0].lstrip("~") for f in filters or []]:
            for f in ["rank", "citations", "references", "year", "zscc"]:
                if f not in ffields:
                    ffields.append(f)
        if lfield not in ffields:
            ffields.append(lfield)
        items = {i['key']: i for i in \
                 self._filter(ffields, [f for f in filters if not re.match(r"\~?rank\:", f)], force)}
        if len(items) == 0:
            logger.info("No data")
            return [], []
        # compute ranks similarly to the Page Rank algorithm, if relevant
        if "rank" in ffields:
            logger.debug("Computing ranks...")
            # principle: items with a valid date get a weight, others (with year==1900) do not
            self.ranks = {k: 1./len(items) if i['data']['year'] > 1900 else 0. for k, i in items.items()}
            y = set(i['data']['year'] for i in items.values()) - {1900}
            # min/max years are computed to take item's age into account
            y_min, y_max = min(y), max(y)
            # in order not to get a null damping factor for items with minimum year, we shift y_min by 10% to the left
            y_min -= max(1, (y_max - y_min) // 10)
            dy = float(y_max - y_min)
            # set the damping factor formula relying on the previously defined order (default is order 3)
            df_func = lambda y: (float(y-y_min)/dy)**order
            # now we can iterate
            prev = tuple(self.ranks.values())
            for n in range(len(self.ranks)):
                for k1 in self.ranks.keys():
                    k1_d = self.__objects[k1]['data']
                    links = k1_d['relations'].get('dc:relation', [])
                    if not ts.is_list(links):
                        links = [links]
                    if items[k1]['data']['year'] == 1900:
                        continue
                    # now compute the iterated rank
                    self.ranks[k1] = df_func(items[k1]['data']['year'])
                    for link in links:
                        k2 = link.split("/")[-1]
                        if k2 not in items.keys():
                            continue
                        k2_d = self.__objects[k2]['data']
                        if ZoteroCLI.date(k1_d['date'], k1_d) <= ZoteroCLI.date(k2_d['date'], k2_d):
                            k2_d = items.get(k2, {}).get('data')
                            if k2_d:
                                r = k2_d['references']
                                if r > 0:
                                    # consider a damping factor on a per-item basis, taking age into account
                                    df = df_func(k2_d['year']) if age else 1.
                                    self.ranks[k1] += df * self.ranks.get(k2, 0.) / r
                # check for convergence
                if tuple(self.ranks.values()) == prev:
                    logger.debug("Ranking algorithm converged after %d iterations" % n)
                    break
                prev = tuple(self.ranks.values())
            # finally, we normalize ranks
            max_rank = max(self.ranks.values())
            self.ranks = {k: v / max_rank for k, v in self.ranks.items()}
            items = {i['key']: i for i in self._filter(ffields, filters, force)}
            for k, i in items.items():
                i['data']['rank'] = self.ranks.get(k)
            if len(items) == 0:
                logger.info("No data")
                return
        # exclude irrelevant items from the list of items
        if not force:
            for k, i in {k: v for k, v in items.items()}.items():
                if k in self.marks.get('irrelevant', []):
                    del items[k]
        # apply the limit on the selected items
        if limit is not None:
            if lfield is not None:
                select_items = sorted(items.values(), key=lambda i: ZoteroCLI.sort(i['data'][lfield], lfield))
            else:
                select_items = list(items.values())
            if lfdesc:
                select_items = select_items[::-1]
            logger.debug("Limiting to %d items (sorted based on %s in %s order)..." % \
                         (limit, lfield or sort, ["ascending", "descending"][lfdesc]))
            items = {i['key']: i for i in select_items[:limit]}
        # ensure that the rank field is set for every item
        if "rank" in ffields:
            for i in items.values():
                i['data']['rank'] = self.ranks.get(i['key'], .0)
        # format the selected items as table data
        logger.debug("Sorting items based on %s..." % sort)
        for i in sorted(items.values(), key=lambda i: ZoteroCLI.sort(i['data'].get(sort, "-"), sort)):
            row = [self._format_value(i['data'].get(f), f) if i['data'].get(f) else "-" for f in fields]
            if len(row) > 1 and all(x in ".-" for x in row[1:]):  # row[0] is the item's key ; shall never be "." or "-"
                continue
            data.append(row)
        if desc:
            data = data[::-1]
        return [ZoteroCLI.header(f) for f in fields], data
    
    def _marks(self, marker, filters=None, sort=None, desc=False, limit=None):
        """ Mark the selected items with a specified marker.
        
        NB: by convetion, a marker can be submitted as its negation with the "un" prefix ; e.g. read <> unread
        """
        m1, m2, negate = None, None, False
        for m1, m2, _ in MARKERS:
            if marker in [m1, m2]:
                negate = marker == m2
                marker = m1
                break
        if m1 is None:
            raise ValueError("Bad marker (should be one of: {})".format("|".join(x for p in MARKERS for x in p[:2])))
        self.marks.setdefault(marker, [])
        _, data = self._items(["key"], filters, sort, desc, limit, True)
        for row in data:
            k = row[0]
            if negate:
                try:
                    self.marks[marker].remove(k)
                    logger.debug("Unmarked %s from %s" % (k, marker))
                except ValueError:
                    pass
            elif k not in self.marks[marker]:
                logger.debug("Marked %s as %s" % (k, marker))
                self.marks[marker].append(k)
        self.marks = {k: l for k, l in self.marks.items() if len(l) > 0}
        with self._marks_file.open('w') as f:
            logger.debug("Saving marks to cache '%s'..." % self._marks_file)
            json.dump(self.marks, f)
    
    def count(self, filters=None):
        """ Count items while applying filters. """
        _, data = self._items(["title"], filters or [])
        print(len(data))
    
    def export(self, fields=None, filters=None, sort=None, desc=False, limit=None, line_format=None,
               output_format="xslx"):
        """ Export the selected fields of items while applying filters to an Excel file. """
        if "{stars}" in line_format and "rank" not in fields:
            fields.append("rank")
        headers, data = self._items(fields, filters, sort, desc, limit)
        if output_format == "xslx":
            c, r = string.ascii_uppercase[len(headers)-1], len(data) + 1
            logger.debug("Creating Excel file...")
            wb = xlsxwriter.Workbook("export.xslx")
            ws = wb.add_worksheet()
            ws.add_table("A1:%s%d" % (c, r), {
                'autofilter': 1,
                'columns': [{'header': h} for h in headers],
                'data': data,
            })
            # center header cells
            cc = wb.add_format()
            cc.set_align("center")
            for i, h in enumerate(headers):
                ws.write("%s1" % string.ascii_uppercase[i], h, cc)
            # fix widths
            max_w = []
            wtxt = [False] * len(headers)
            for i in range(len(headers)):
                m = max(len(str(row[i])) for row in [headers] + data)
                w = min(m, 80)
                wtxt[i] = wtxt[i] or m > 80
                max_w.append(w)
                ws.set_column("{0}:{0}".format(string.ascii_uppercase[i]), w)
            # wrap text where needed and fix heights
            tw = wb.add_format()
            tw.set_text_wrap()
            for j, row in enumerate(data):
                for i, v in enumerate(row):
                    if wtxt[i]:
                        ws.write(string.ascii_uppercase[i] + str(j+2), v, tw)
            wb.close()
            return
        elif output_format in ["csv", "json", "xml"] or line_format is None:
            r = Report(Table([headers] + data))
        else:
            lines = []
            for row in data:
                d = {k.lower(): v for k, v in zip(headers, row)}
                if "Title" in headers and "Url" in headers:
                    d['lower_title'] = t = _lower_title(d['title'])
                    d['link'] = d['title'] if d['url'] in ["", "-"] else "[%s](%s)" % (d['title'], d['url'])
                    d['link_lower'] = t if d['url'] in ["", "-"] else "[%s](%s)" % (t, d['url'])
                if "link" in d.keys() and "link_with_abstract" in line_format:
                    if "AbstractNote" in headers:
                        d['link_with_abstract'] = d['link'] if d['abstractnote'] == "-" else \
                                                  "%s\n\n%s\n\n" % (d['link'], _indent(d['abstractnote'], 2))
                    elif "AbstractShortNote" in headers:
                        d['link_with_abstract'] = d['link'] if d['abstractshortnote'] == "." else \
                                                  "%s - %s" % (d['link'], d['abstractshortnote'])
                if "{emoji}" in line_format:
                    d['emoji'] = TYPE_EMOJIS.get(d['type'], TYPE_EMOJIS['default'])
                if "{stars}" in line_format:
                    d['stars'] = (round(float(d['rank']) * 4) - 1) * " :star:"
                lines.append(line_format.format(**d))
            r = Report(List(*lines))
        r.filename = "export"
        logger.debug("Creating %s file..." % output_format.upper())
        getattr(r, output_format)(False)
    
    def list(self, field, filters=None, desc=False, limit=None):
        """ List field's values while applying filters. """
        if field == "collections":
            l = [c['data']['name'] for c in self.collections]
            logger.warning("Filters are not applicable to field: collections")
        elif field == "fields":
            l = self._valid_fields
            logger.warning("Filters are not applicable to field: fields")
        else:
            l = [row[0] for row in self._items([field], filters)[1]]
        if len(l) == 0:
            return
        if field in ["attachments", "tags"]:
            tmp, l = l[:], []
            for x in tmp:
                l.extend(x.split(";"))
        elif field in ["authors", "creators", "editors"]:
            tmp, l = l[:], []
            for x in tmp:
                l.extend(x.split(", "))
        data = [[x] for x in sorted(set(l), key=lambda x: ZoteroCLI.sort(x, field)) if x != "-"]
        if desc:
            data = data[::-1]
        if limit is not None:
            data = data[:limit]
        print(ts.BorderlessTable([[ZoteroCLI.header(field)]] + data))
    
    def mark(self, marker, filters=None, sort=None, desc=False, limit=None):
        """ Mark the selected items as read/unread. """
        self._marks(marker, filters, sort, desc, limit)
    
    def plot(self, name, filters=None):
        """ Plot a chart given its slug. """
        if name == "software-in-time":
            data = {}
            for i in self._filter(["title", "date"], ["itemType:computerProgram"] + filters):
                y = ZoteroCLI.date(i['data']['date'], i).year
                data.setdefault(y, [])
                data[y].append(i['data']['title'])
            for y, t in sorted(data.items(), key=lambda x: x[0]):
                print(["%d:" % y, "####:"][y == 1900], ", ".join(t))
        else:
            logger.debug("Should be one of:\n- " + "\n- ".join(sorted(CHARTS)))
            logger.error("Bad chart")
            raise ValueError
    
    def show(self, fields=None, filters=None, sort=None, desc=False, limit=None):
        """ Show the selected fields of items while applying filters. """
        # ensure the 'key' field is included for filtering the items ; then do not keep it if not selected
        output_key = "key" in fields
        if not output_key:
            fields = ["key"] + fields
        headers, data = self._items(fields, filters, sort, desc, limit)
        keys, data = [row[fields.index("key")] for row in data], [row[int(not output_key):] for row in data]
        if not output_key:
            headers = headers[1:]
        if len(headers) > 0:
            table = ts.BorderlessTable([headers] + data)
            t_idx = headers.index("Title")
            for key, row in zip(keys, table.table_data[2:]):
                if t_idx > 0:
                    row[t_idx] = "\n".join(ts.txt2italic(l) if len(l) > 0 else "" for l in row[t_idx].split("\n"))
                if key not in self.marks.get('read', []):
                    for i, v in enumerate(row):
                        row[i] = "\n".join(ts.txt2bold(l) if len(l) > 0 else "" for l in v.split("\n"))
            print(table)
    
    def view(self, name, value, fields=None):
        """ View a single item given a field and its value. """
        headers, data = self._items(fields, ["%s:%s" % (name, value)])
        for h, d in zip(headers, data[0]):
            hb = ts.txt2bold(h)
            if h == "Title":
                d = ts.txt2italic(d)
            try:
                d = ast.literal_eval(d)
            except:
                pass
            if not isinstance(d, dict):
                print("{: <24}: {}".format(hb, d))
            else:
                if len(d) == 0:
                    print("{: <24}: -".format(hb))
                elif h == "Relations":
                    print("{: <24}:".format(hb))
                    rel = d['dc:relation']
                    if isinstance(rel, str):
                        rel = [rel]
                    for k in rel:
                        print("- %s" % ts.txt2italic(self.__objects[k.split("/")[-1]]['data']['title']))
                else:
                    print("{: <24}:\n".format(hb))
                    for i in d:
                        print("- %s" % i)
    
    @staticmethod
    def date(date_str, data=None):
        for f in [""] + TIME_FORMATS:
            try:
                return datetime.strptime(date_str, f)
            except:
                pass
        msg = "Bad datetime format: %s" % date_str
        if data:
            msg += " (%s)" % (data or {}).get('data', data).get('title', "undefined")
        logger.error(msg)
        raise ValueError
    
    @staticmethod
    def header(field):
        h = FIELD_ALIASES.get(field, re.sub(r"^num([A-Z].*)$", r"#\1", field))
        return h[0].upper() + h[1:]
    
    @staticmethod
    def sort(value, field=None):
        field = field or ""
        if field.startswith("date") or field.endswith("Date"):
            return ZoteroCLI.date(value.lstrip("-"), "sort per %s" % field).timestamp()
        elif field in FLOAT_FIELDS or field in INTEGER_FIELDS:
            try:
                return float(-1 if value in ["", "-", None] else value)
            except:
                logger.warning("Bad value '%s' for field %s" % (value, field))
                return -1
        elif field == "title":
            s = str(value).lower()
            if len(s) > 0 and s.split(maxsplit=1)[0] in ["a", "an", "the"]:
                s = s.split(maxsplit=1)[-1]
            return s
        else:
            return str(value).lower()


def _lower_title(title):
    g = lambda p, n=1: p.group(n)
    for i in range(2):
        # this regex allows to preserve cased subtitles such as: "First Part: Second Part" => "First part: Second part"
        title = re.sub(r"([^-:!?]\s+(?:[A-Z][a-z]+(?:[-_]?[A-Z]?[a-z]+)*|[A-Z]{2}[a-z]{3,}))", \
                       lambda p: g(p) if g(p)[1:].strip() in STATIC_WORDS else g(p)[0] + g(p)[1:].lower(), title)
        # it requires applying twice the transformation as the first one only catches 1 instance out of 2 ;
        #  "An Example Title: With a Subtitle"
        #    ^^^^^^^^^      ^^^^^^ ^^^^^^^^^^
        #                ^
        #        this one is not matched the first time as the last "e" of "Example" was already consumed !
    # this one corrects bad case for substrings after a punctuation among -:!?
    title = re.sub(r"([-:!?]\s+)([a-z]+(?:[-_][A-Z]?[a-z]+)*)", lambda p: g(p) + g(p,2)[0].upper() + g(p,2)[1:], title)
    # the last one ensures that the very first word has its first letter uppercased
    return re.sub(r"^([A-Z][a-z]+(?:[-_][A-Z]?[a-z]+)*)", lambda p: g(p)[0] + g(p)[1:].lower(), title)


def _set_arg(subparser, arg, msg=None):
    """ Shortcut function to set arguments repeated for multiple subparsers. """
    if arg == "filter":
        subparser.add_argument("-f", "--filter", action="extend", nargs="*", default=[], note="format: [field]:[regex]",
                               help=msg or "filter to be applied on field's value")
    elif arg == "limit":
        subparser.add_argument("-l", "--limit", help="limit the number of displayed records", note="format: either a "
                               "number or [field]:[number]\n    '<' and '>' respectively indicates ascending or "
                               "descending order (default: ascending)")
    elif arg == "query":
        subparser.add_argument("-q", "--query", choices=list(QUERIES.keys()), help="use a predefined query",
                               note="this can be combined with additional filters")
    elif arg == "sort":
        subparser.add_argument("-s", "--sort", help="field to be sorted on", note="if not defined, the first input "
                               "field is selected\n    '<' and '>' respectively indicates ascending or descending order"
                               " (default: ascending)")
_set_args = lambda sp, *args: [_set_arg(sp, a) for a in args] and None


if __name__ == '__main__':
    # main parser
    parser.add_argument("-g", "--group", action="store_true", help="API identifier is a group",
                        note="the default API identifier type is 'user' ; use this option if you modified your library "
                             "to be shared with a group of users, this can be set permanently by touching %s" % 
                             CACHE_PATH.joinpath("group"))
    parser.add_argument("-i", "--id", help="API identifier",
                        note="if 'id' and 'key' not specified, credentials are obtained from file '%s'" % CREDS_FILE)
    parser.add_argument("-k", "--key", help="API key",
                        note="if not specified while 'id' is, 'key' is asked as a password")
    parser.add_argument("-r", "--reset", action="store_true", help="remove cached collections and items")
    # commands: count | export | list | plot | reset | show | view
    sparsers = parser.add_subparsers(dest="command", help="command to be executed")
    ccount = sparsers.add_parser("count", help="count items")
    _set_arg(ccount, "filter", "filter to be applied while counting")
    _set_arg(ccount, "query")
    cexpt = sparsers.add_parser("export", help="export items to a file")
    cexpt.add_argument("field", nargs="+", help="field to be shown")
    cexpt.add_argument("-l", "--line-format", help="line's format string for outputting as a list")
    cexpt.add_argument("-o", "--output-format", choices=["csv", "html", "json", "md", "pdf", "rst", "xml", "xslx"],
                       default="xslx", help="output format")
    _set_args(cexpt, "filter", "limit", "query", "sort")
    clist = sparsers.add_parser("list", help="list distinct values for the given field")
    clist.add_argument("field", help="field whose distinct values are to be listed")
    _set_args(clist, "filter", "query")
    clist.add_argument("-l", "--limit", type=ts.pos_int, help="limit the number of displayed records")
    clist.add_argument("--desc", action="store_true", help="sort results in descending order")
    cmark = sparsers.add_parser("mark", help="mark items with a marker")
    cmark.add_argument("marker", choices=[x for p in MARKERS for x in p[:2]], help="marker to be set",
                       note="possible values:\n - {}".format("\n - ".join("%s: %s" % (p[0], p[2]) for p in MARKERS)))
    _set_args(cmark, "filter", "limit", "query", "sort")
    cplot = sparsers.add_parser("plot", help="plot various information using Matplotlib")
    cplot.add_argument("chart", choices=CHARTS, help="chart to be plotted")
    _set_args(cplot, "filter", "query")
    creset = sparsers.add_parser("reset", help="reset cached collections and items")
    creset.add_argument("-r", "--reset-items", action="store_true", help="reset items only")
    cshow = sparsers.add_parser("show", help="show a list of items")
    cshow.add_argument("field", nargs="+", help="field to be shown")
    _set_args(cshow, "filter", "limit", "query", "sort")
    cview = sparsers.add_parser("view", help="view a single item")
    cview.add_argument("name", help="field name for selection")
    cview.add_argument("value", help="field value to be selected")
    cview.add_argument("field", nargs="+", help="field to be shown")
    initialize()
    if getattr(args, "query", None):
        if hasattr(args, "field") and args.field == ["-"]:
            args.field = QUERIES[args.query].get('fields', ["title"])
        args.filter.extend(QUERIES[args.query].get('filter', []))
        if args.limit is None:
            args.limit = QUERIES[args.query].get('limit')
        if args.sort is None:
            args.sort = QUERIES[args.query].get('sort')
    if hasattr(args, "sort"):
        args.desc = False
        if args.sort is not None:
            args.desc = args.sort[0] == ">"
            if args.sort[0] in "<>":
                args.sort = args.sort[1:]
    if args.command == "reset" or args.reset:
        for k, fn in CACHE_FILES.items():
            if getattr(args, "reset_items", False) and k not in ["items", "attachments", "notes"] or k == "marks":
                continue
            try:
                os.remove(str(CACHE_PATH.joinpath(fn)))
            except OSError:
                pass
    z = ZoteroCLI(args.id, ["user", "group"][args.group or CACHE_PATH.joinpath("group").exists()], args.key)
    if args.command == "count":
        z.count(args.filter)
    elif args.command == "export":
        z.export(args.field, args.filter, args.sort, args.desc, args.limit, args.line_format, args.output_format)
    elif args.command == "list":
        z.list(args.field, args.filter, args.desc, args.limit)
    elif args.command == "mark":
        args.filter.append("numPages:>0")
        z.mark(args.marker, args.filter, args.sort, args.desc, args.limit)
    elif args.command == "plot":
        z.plot(args.chart)
    elif args.command == "show":
        z.show(args.field, args.filter, args.sort, args.desc, args.limit)
    elif args.command == "view":
        z.view(args.name, args.value, args.field)

