#!/usr/bin/env python3

import sys
from internetarchivepdf.recode import recode
from internetarchivepdf.mrc import KDU_COMPRESS, KDU_EXPAND, OPJ_COMPRESS, \
        OPJ_DECOMPRESS, GRK_COMPRESS, GRK_DECOMPRESS
from internetarchivepdf.const import (VERSION, PRODUCER,
        IMAGE_MODE_PASSTHROUGH, IMAGE_MODE_PIXMAP, IMAGE_MODE_MRC,
        JPEG2000_IMPL_KAKADU,
        JPEG2000_IMPL_OPENJPEG,
        JPEG2000_IMPL_GROK,
        COMPRESSOR_JPEG2000,
        COMPRESSOR_JPEG)
from shutil import which


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(
            description='PDF recoder version %s.' % VERSION +
                        ' Compresses PDFs with images and inserts text layers '
                        ' based on hOCR input files.')

    # For Python 3.6, 3.7
    class ExtendAction(argparse.Action):
        def __call__(self, parser, namespace, values, option_string=None):
            items = getattr(namespace, self.dest) or []
            items.extend(values)
            setattr(namespace, self.dest, items)
    parser.register('action', 'extend', ExtendAction)
    # End for Python 3.6, 3.7

    parser.add_argument('-P', '--from-pdf', type=str, default=None,
                        help='Input PDF (containing images) to recode')
    parser.add_argument('-I', '--from-imagestack', type=str, default=None,
                        help='Glob pattern for image stack')
    parser.add_argument('-D', '--dpi', type=int, default=None,
                        help='DPI of input images, supply this to get '
                             'proportional page sizes in resulting PDF')
    parser.add_argument('-T', '--hocr-file', type=str, default=None,
                        help='hOCR file containing page information '
                              '(currently not optional)')
    parser.add_argument('-S', '--scandata-file', type=str, default=None,
                        help='archive.org specific.'
                              'Scandata XML file containing information on '
                              'which pages to skip (optional). This is helpful '
                              'if the input PDF is a PDF where certain '
                              'pages have already been skipped, but the hOCR '
                              'still has the pages in its file structure, '
                              'and is also used for page labels (numbering)')
    parser.add_argument('-o', '--out-pdf', type=str, default=None,
                        help='Output file to write recoded PDF to.')
    parser.add_argument('-O', '--out-dir', type=str, default=None,
                        help='Output directory to (also) write images to.')
    parser.add_argument('-R', '--reporter', type=str, default=None,
                        help='Program to launch when reporting progress.')
    parser.add_argument('--grayscale-pdf', action='store_true',
                        default=False,
                        help='Whether to convert all images to grayscale in '
                             'the resulting PDF')
    parser.add_argument('--bw-pdf', action='store_true',
                        default=False,
                        help='Whether to convert all images 1 bit images in '
                             'the resulting PDF')
    parser.add_argument('-m', '--image-mode', default=IMAGE_MODE_MRC,
                        help='Compression mode. 0 is pass-through, 1 is pixmap'
                              ' 2 is MRC (default is 2). 3 is skip images',
                              type=int)
    parser.add_argument('--jbig2', default=True, help='Encode using jbig2',
                        action='store_true')
    parser.add_argument('-v', '--verbose', default=False, action='store_true',
                        help='Verbose output')
    parser.add_argument('--tmp-dir', default=None, type=str,
                        help='Directory to store temporary intermediate images')
    parser.add_argument('--report-every', default=None, type=int,
                        help='Report on status every N pages '
                             '(default is no reporting)')
    parser.add_argument('-t', '--stop-after', default=None, type=int,
                        help='Stop after N pages (default is no stop)')
    # TODO: Support JPEG2000_IMPL_PILLOW
    parser.add_argument('-J', '--jpeg2000-implementation', type=str,
                        default=JPEG2000_IMPL_KAKADU,
                        choices=[JPEG2000_IMPL_KAKADU, JPEG2000_IMPL_OPENJPEG,
                            JPEG2000_IMPL_GROK],
                        help='Selects JPEG2000 implementation.')
    parser.add_argument('--bg-compression-flags', default=None, type=str,
                        help='Background compression flags for JPEG2000 '
                             'compression. '
                              'Default for kakadu is \'-slope 44250\','
                              'default for grok/openjpeg is \'-q 30\'.'
                             'Pass with quoted and with a space at the start:'
                             '\' --flag value\'')
    parser.add_argument('--fg-compression-flags', default=None, type=str,
                        help='Foreground compression flags for JPEG2000 '
                             'compression. '
                              'Default for kakadu is \'-slope 44500\','
                              'default for grok/openjpeg is \'-q 30\'.'
                             'Pass with quoted and with a space at the start:'
                             '\' --flag value\'')
    parser.add_argument('--mrc-image-format', default=COMPRESSOR_JPEG2000,
                        type=str,
                        choices=[COMPRESSOR_JPEG2000, COMPRESSOR_JPEG],
                        help='Image formats to produce in MRC encoding. '
                              'JPEG2000 yields better compression and quality '
                              'at the expense of computation')
    parser.add_argument('--downsample', default=None, type=int,
                        help='Downsample entire image by factor before '
                              'processing. Default is no downscaling.')
    parser.add_argument('--bg-downsample', default=None, type=int,
                        help='Downsample background by factor.'
                             ' Default is no scaling')
    parser.add_argument('--denoise-mask', default=None,
                        help='Denoise mask when MRC algorithm thinks it needs'
                             ' to denoise - which is not always.'
                             ' Default is off', action='store_true')
    parser.add_argument('--hq-pages', type=str, default=None,
                        help='Pages to render in higher quality, provided '
                             'as comma separate values, negative indexing is '
                             'allowed, e.g.: --hq-pages \'1,2,3,4,-4,-3,-2,-1\''
                             ' will make the first four and last four pages '
                             'of a higher quality. Pages marked as higher '
                             'quality will not get downsampled and might use '
                             'different slope values (see '
                             '--hq-bg-compression-flags and '
                             '--hq-fg-compression-flags)')
    parser.add_argument('--hq-bg-compression-flags', default=None, type=str,
                        help='High quality background compression flags for '
                             'JPEG2000 compression. '
                             'Default for kakadu is \'-slope 43500\','
                             'default for grok/openjpeg is \'-q 40\'.'
                             'Pass with quoted and with a space at the start:'
                             '\' --flag value\'')
    parser.add_argument('--hq-fg-compression-flags', type=str,
                        help='High quality foreground compression flags for '
                             'JPEG2000 compression. '
                             'Default for kakadu is \'-slope 44500\','
                             'default for grok/openjpeg is \'-q 30\'.'
                             'Pass with quoted and with a space at the start:'
                             '\' --flag value\'')
    parser.add_argument('--render-text-lines', action='store_true',
                        default=False,
                        help='Whether to render the text line visible instead '
                             'of invisible')
    parser.add_argument('--metadata-url', type=str, default=None,
                        help='URL describing document, if any')
    parser.add_argument('--metadata-title', type=str, default=None,
                        help='Title of PDF document')
    parser.add_argument('--metadata-author', type=str, default=None,
                        help='Author of document')
    parser.add_argument('--metadata-creator', type=str, default=None,
                        help='Creator of PDF document')
    parser.add_argument('--metadata-language', type=str, default=None,
                        nargs='+', action='extend',
                        help='Language of PDF document, see RFC 3066. '
                             'If multiple languages are specified, only the '
                             'first is added to the PDF catalog, but all of '
                             'them will end up in the XMP metadata')
    parser.add_argument('--metadata-subject', type=str, default=None,
                        help='Subjects')
    parser.add_argument('--metadata-creatortool', type=str, default=None,
                        help='Creator tool')


    args = parser.parse_args()
    if (args.from_pdf is None and args.from_imagestack is None) or args.out_pdf is None:
        sys.stderr.write('***** Error: --from-pdf or --out-pdf missing\n\n')
        parser.print_help()
        sys.exit(1)

    if args.from_imagestack is not None and args.from_pdf is not None:
        sys.stderr.write('***** Error: --from-pdf and --from-imagestack '
                         'are mutually exclusive\n\n')
        parser.print_help()
        sys.exit(1)

    if args.image_mode == IMAGE_MODE_MRC:
        if args.mrc_image_format == COMPRESSOR_JPEG2000:
            if args.jpeg2000_implementation == JPEG2000_IMPL_KAKADU:
                if args.bg_compression_flags is None:
                    args.bg_compression_flags = '-slope 44250'

                if args.fg_compression_flags is None:
                    args.fg_compression_flags = '-slope 44500'

                if args.hq_bg_compression_flags is None:
                    args.hq_bg_compression_flags = '-slope 43500'

                if args.hq_fg_compression_flags is None:
                    args.hq_fg_compression_flags = '-slope 44500'

                if not (which(KDU_EXPAND) and which(KDU_COMPRESS)):
                    sys.stderr.write('***** Error: kakadu is requested (this is the default, pass --use-openjpeg for the alternative compression), but kdu_expand and kdu_compress are not found in $PATH\n')
                    sys.exit(1)

            if args.jpeg2000_implementation == JPEG2000_IMPL_OPENJPEG:
                if args.bg_compression_flags is None:
                    args.bg_compression_flags = '-q 40'

                if args.fg_compression_flags is None:
                    args.fg_compression_flags = '-q 30'

                if args.hq_bg_compression_flags is None:
                    args.hq_bg_compression_flags = '-q 40'

                if args.hq_fg_compression_flags is None:
                    args.hq_fg_compression_flags = '-q 30'

                if not (which(OPJ_COMPRESS) and which(OPJ_DECOMPRESS)):
                    sys.stderr.write('***** Error: OpenJPEG is requested but opj_compress and opj_decompress are not found in $PATH\n')
                    sys.exit(1)

            if args.jpeg2000_implementation == JPEG2000_IMPL_GROK:
                if args.bg_compression_flags is None:
                    args.bg_compression_flags = '-q 40'

                if args.fg_compression_flags is None:
                    args.fg_compression_flags = '-q 30'

                if args.hq_bg_compression_flags is None:
                    args.hq_bg_compression_flags = '-q 40'

                if args.hq_fg_compression_flags is None:
                    args.hq_fg_compression_flags = '-q 30'

                if not (which(GRK_COMPRESS) and which(GRK_DECOMPRESS)):
                    sys.stderr.write('***** Error: Grok is requested but opj_compress and opj_decompress are not found in $PATH\n')
                    sys.exit(1)

        elif args.mrc_image_format == COMPRESSOR_JPEG:
            # TODO: Check jpegoptim args
            if args.bg_compression_flags is None:
                args.bg_compression_flags = '-S30'

            if args.fg_compression_flags is None:
                args.fg_compression_flags = '-S20'

            if args.hq_bg_compression_flags is None:
                args.hq_bg_compression_flags = '-S40'

            if args.hq_fg_compression_flags is None:
                args.hq_fg_compression_flags = '-S30'

            if not which('jpegoptim'):
                sys.stderr.write('***** Error: JPEG is requested but jpegoptim is not found in $PATH\n')
                sys.exit(1)

        else:
            raise Exception('Invalid mrc image format')


    res = recode(args.from_pdf, args.from_imagestack, args.dpi, args.hocr_file,
           args.scandata_file, args.out_pdf, args.out_dir,
           args.reporter,
           args.grayscale_pdf,
           args.bw_pdf,
           args.image_mode, args.jbig2, args.verbose, args.tmp_dir,
           args.report_every, args.stop_after,
           args.jpeg2000_implementation,
           args.bg_compression_flags.split(' '),
           args.fg_compression_flags.split(' '),
           args.mrc_image_format,
           args.downsample,
           args.bg_downsample,
           args.denoise_mask,
           args.hq_pages,
           args.hq_bg_compression_flags.split(' '),
           args.hq_fg_compression_flags.split(' '),
           args.render_text_lines,
           args.metadata_url, args.metadata_title, args.metadata_author,
           args.metadata_creator, args.metadata_language,
           args.metadata_subject, args.metadata_creatortool)

    errors = res['errors']
    if len(errors) > 0:
        for error in errors:
            print('Encountered runtime error:', error)
