# -*- coding: utf-8 -*-
from setuptools import setup

packages = \
['colibrie']

package_data = \
{'': ['*']}

install_requires = \
['Cython>=0.29.32,<0.30.0',
 'Pillow>=9.2.0,<10.0.0',
 'PyMuPDF>=1.20.2,<2.0.0',
 'ailist>=1.0.4,<2.0.0',
 'numpy>=1.23.3,<2.0.0',
 'pandas>=1.5.0,<2.0.0']

setup_kwargs = {
    'name': 'colibrie',
    'version': '1.1.3.1',
    'description': 'Colibrie is a blazing fast tool to extract tables from PDFs',
    'long_description': '# Colibrie\n [![image](https://img.shields.io/pypi/v/colibrie.svg)](https://pypi.org/project/colibrie/) [![image](https://img.shields.io/pypi/l/colibrie.svg)](https://pypi.org/project/colibrie/)\n\nColibrie is a blazing fast tool to extract tables from PDFs \n\n## Why Colibrie?\n\n- **Efficient**: Colibrie is faster by multiple order of magnitude than any actual existing solution\n- **Fidel visual**: Colibrie can provide 1:1 HTML representation of any tables it\'ll find\n- **Reliable**: Colibri will find every valid tables without exception if the PDF is compatible with the core principle of Colibrie\n- **Output**: Each table can be export into to multiple formats, which include : \n  - Pandas Dataframe.\n  - HTML.\n\n### Benchmark :\nSome number to compare [Camelot](https://github.com/camelot-dev/camelot) (a popular library to extract tables from PDF) and Colibrie\n<table>\n  <thead>\n    <tr>\n        <th colspan="2"></th>\n        <th colspan="4">Tables extracted</th>\n        <th colspan="2"></th>\n    </tr>\n    <tr>\n        <th colspan="2">Times in second</th>\n        <th colspan="2">camelot</th>\n        <th colspan="2">colibrie</th>\n        <th colspan="2"></th>\n    </tr>\n    <tr style="text-align: right;">\n      <th>camelot</th>\n      <th>colibrie</th>\n      <th>valid</th>\n      <th>false positive</th>\n      <th>valid</th>\n      <th>false positive</th>\n      <th>pages count</th>\n      <th>pdf file</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0.53</td>\n      <td>0.00545</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td><a href="https://github.com/abitoun-42/colibrie/files/9620468/boc_20220014_0001_p000_extract_2.pdf">small pdf</a></td>\n    </tr>\n    <tr>\n      <td>5.95</td>\n      <td>0.02100</td>\n      <td>4</td>\n      <td>0</td>\n      <td>4</td>\n      <td>0</td>\n      <td>11</td>\n      <td><a href="https://github.com/abitoun-42/colibrie/files/9620506/boc_20210034_0000_0003.pdf">medium pdf</a></td>\n    </tr>\n    <tr>\n      <td>105.00</td>\n      <td>0.21900</td>\n      <td>62</td>\n      <td>1</td>\n      <td>62</td>\n      <td>0</td>\n      <td>167</td>\n      <td><a href="https://github.com/abitoun-42/colibrie/files/9620511/boc_20220014_0001_p000.pdf">big pdf</a></td>\n    </tr>\n    <tr>\n      <td>182.00</td>\n      <td>0.69000</td>\n      <td>175</td>\n      <td>1</td>\n      <td>177</td>\n      <td>0</td>\n      <td>269</td>\n      <td><a href="https://github.com/abitoun-42/colibrie/files/9620515/boc_20220025_0001_p000.pdf">giant pdf</a></td>\n    </tr>\n  </tbody>\n</table>\n\n## Current limitation\n\n- Colibrie only works with text-based PDFs and not scanned documents. (As Tabula [explains](https://github.com/tabulapdf/tabula#why-tabula), "If you can click and drag to select text in your table in a PDF viewer, then your PDF is text-based".)\n- For the moment Colibrie doesn\'t work on PDF with tables that has no structural lines (like [this one](https://github.com/abitoun-42/colibrie/files/9627754/budget_2014-15.pdf) or [this one](https://github.com/abitoun-42/colibrie/files/9627800/m27.pdf)\n) but it can handle a few missing lines (like [this one](https://github.com/abitoun-42/colibrie/files/9627853/spreadsheet_no_bounding_frame.pdf) or [this one](https://github.com/abitoun-42/colibrie/files/9627858/boc_20210034_0000_0003_extract.pdf))\n\n## Installation\n\n### using source\n\n```\npip install poetry\n\ngit clone https://github.com/abitoun-42/colibrie.git\n\ncd colibrie\n\npoetry install\n```\n### using pip\n```\npip install colibrie\n```\n\n## Usage\n\nPDF used in example : [example.pdf](https://github.com/abitoun-42/colibrie/files/9620593/boc_20210034_0000_0003_extract_2.pdf)\n\n\n```\nfrom colibrie.extract_tables import extract_table\n\ntables = extract_table(\'example.pdf\')\n\nfor table in tables:\n   print(table.to_html())\n   df = table.to_df()\n```\n\n### Output :\n<table><tr><td style="text-align:center;" rowspan=1 colspan=4>Classiﬁ cation des associations agréées de surveillance <br>\nde la qualité de l’air<br>\n</td><td style="text-align:center;" rowspan=1 colspan=4>Classiﬁ cation des bureaux d’études techniques, <br>\ndes cabinets d’ingénieurs-conseils <br>\net des sociétés de conseils<br>\n</td></tr><tr><td style="text-align:center;" rowspan=1 colspan=1>Catégorie<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>Échelon<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>Coefﬁ cient<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>Salaire <br>\nminimal <br>\nhiérarchique<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1></td><td style="text-align:center;" rowspan=1 colspan=1>Position<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>Coefﬁ cient<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>Salaire <br>\nminimal <br>\nhiérarchique<br>\n</td></tr><tr><td style="text-align:center;" rowspan=3 colspan=1>7<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>1<br>\n2<br>\n3<br>\n4<br>\n5<br>\n6<br>\n7<br>\n8<br>\n9<br>\n10<br>\n11<br>\n12<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>255<br>\n268<br>\n282<br>\n296<br>\n311<br>\n327<br>\n344<br>\n362<br>\n381<br>\n401<br>\n422<br>\n444<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>1 307,13 €<br>\n1 373,77 €<br>\n1 445,53 €<br>\n1 517,30 €<br>\n1 594,19 €<br>\n1 676,20 €<br>\n1 763,34 €<br>\n1 855,61 €<br>\n1 953,01 €<br>\n2 055,53 €<br>\n2 163,17 €<br>\n2 275,94 €<br>\n</td><td style="text-align:center;" rowspan=6 colspan=1>ETAM<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1.1.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>230<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 558,80 €<br>\n</td></tr><tr><td style="text-align:center;" rowspan=1 colspan=1>1.2.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>240<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 587,50 €<br>\n</td></tr><tr><td style="text-align:center;" rowspan=1 colspan=1>1.3.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>250<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 618,50 €<br>\n</td></tr><tr><td style="text-align:center;" rowspan=3 colspan=1>6<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>1<br>\n2<br>\n3<br>\n4<br>\n5<br>\n6<br>\n7<br>\n8<br>\n9<br>\n10<br>\n11<br>\n12<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>310<br>\n326<br>\n344<br>\n363<br>\n384<br>\n406<br>\n430<br>\n457<br>\n485<br>\n515<br>\n549<br>\n585<br>\n</td><td style="text-align:center;" rowspan=3 colspan=1>1 589,06 €<br>\n1 671,08 €<br>\n1 763,34 €<br>\n1 860,74 €<br>\n1 968,38 €<br>\n2 081,16 €<br>\n2 204,18 €<br>\n2 342,58 €<br>\n2 486,11 €<br>\n2 639,89 €<br>\n2 814,17 €<br>\n2 998,71 €<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>2.1.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>275<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 683,75 €<br>\n</td></tr><tr><td style="text-align:center;" rowspan=1 colspan=1>2.2.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>310<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 786,70 €<br>\n</td></tr><tr><td style="text-align:center;" rowspan=1 colspan=1>2.3.<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>355<br>\n</td><td style="text-align:center;" rowspan=1 colspan=1>1 922,60 €<br>\n</td></tr></table>\n',
    'author': 'Alexandre Bitoun',
    'author_email': 'alexandre.bitoun@outlook.fr',
    'maintainer': 'None',
    'maintainer_email': 'None',
    'url': 'https://github.com/abitoun-42/colibrie',
    'packages': packages,
    'package_data': package_data,
    'install_requires': install_requires,
    'python_requires': '>=3.8',
}


setup(**setup_kwargs)
