CITATION.cff
CONTRIBUTING.md
HISTORY.md
LICENSE
MANIFEST.in
README.rst
pytest.ini
setup.py
docs/Makefile
docs/background.rst
docs/compendium.rst
docs/conf.py
docs/corefunctions.rst
docs/corpus-data.rst
docs/crawls.rst
docs/downloads.rst
docs/dwds-count-exportieren.jpg
docs/dwds-exportieren.jpg
docs/dwds-treffer-exportieren.jpg
docs/evaluation.rst
docs/gui-screenshot.png
docs/index.rst
docs/installation-gui.rst
docs/installation.rst
docs/make.bat
docs/quickstart.rst
docs/requirements.txt
docs/software-ecosystem.png
docs/sources.rst
docs/trafilatura-demo.gif
docs/trafilatura-logo.png
docs/tutorial-dwds.rst
docs/tutorial0.rst
docs/tutorial1.rst
docs/tutorial2.rst
docs/tutorials.rst
docs/url-management.rst
docs/usage-cli.rst
docs/usage-gui.rst
docs/usage-python.rst
docs/usage-r.rst
docs/usage.rst
docs/used-by.rst
docs/_build/_images/dwds-count-exportieren.jpg
docs/_build/_images/dwds-exportieren.jpg
docs/_build/_images/dwds-treffer-exportieren.jpg
docs/_build/_images/gui-screenshot.png
docs/_build/_images/software-ecosystem.png
docs/_build/_images/trafilatura-demo.gif
docs/_build/_static/file.png
docs/_build/_static/minus.png
docs/_build/_static/plus.png
docs/_build/_static/trafilatura-logo.png
tests/__init__.py
tests/cli_tests.py
tests/downloads_tests.py
tests/feeds_tests.py
tests/json_metadata_tests.py
tests/metadata_tests.py
tests/sitemaps_tests.py
tests/spider_tests.py
tests/unit_tests.py
tests/resources/apache.html
tests/resources/exotic_tags.html
tests/resources/exotic_tags_tei.html
tests/resources/feed.json
tests/resources/feed1.atom
tests/resources/feed2.rss
tests/resources/http_sample.html
tests/resources/httpbin_sample.html
tests/resources/list-discard.txt
tests/resources/list-process.txt
tests/resources/newsettings.cfg
tests/resources/redundant-urls.txt
tests/resources/scam.html
tests/resources/sitemap-hreflang.xml
tests/resources/sitemap.xml
tests/resources/sitemap.xml.gz
tests/resources/sitemap2.xml
tests/resources/webpage.html.gz
trafilatura/__init__.py
trafilatura/cli.py
trafilatura/cli_utils.py
trafilatura/core.py
trafilatura/downloads.py
trafilatura/external.py
trafilatura/feeds.py
trafilatura/filters.py
trafilatura/gui.py
trafilatura/htmlprocessing.py
trafilatura/json_metadata.py
trafilatura/lru.py
trafilatura/metadata.py
trafilatura/metaxpaths.py
trafilatura/readability_lxml.py
trafilatura/settings.cfg
trafilatura/settings.py
trafilatura/sitemaps.py
trafilatura/spider.py
trafilatura/utils.py
trafilatura/xml.py
trafilatura/xpaths.py
trafilatura.egg-info/PKG-INFO
trafilatura.egg-info/SOURCES.txt
trafilatura.egg-info/dependency_links.txt
trafilatura.egg-info/entry_points.txt
trafilatura.egg-info/not-zip-safe
trafilatura.egg-info/requires.txt
trafilatura.egg-info/top_level.txt
trafilatura/data/jt-stopwords-pickle.lzma
trafilatura/data/tei-schema-pickle.lzma