from nlu.components.assertions.assertion_dl.assertion_dl import AssertionDL
from nlu.components.assertions.assertion_log_reg.assertion_log_reg import AssertionLogReg
from nlu.components.chunkers.contextual_parser.contextual_parser import ContextualParser
from nlu.components.chunkers.default_chunker.default_chunker import DefaultChunker
from nlu.components.chunkers.ngram.ngram import NGram
from nlu.components.classifiers.classifier_dl.classifier_dl import ClassifierDl
from nlu.components.classifiers.generic_classifier.generic_classifier import GenericClassifier
from nlu.components.classifiers.language_detector.language_detector import LanguageDetector
from nlu.components.classifiers.multi_classifier.multi_classifier import MultiClassifier
from nlu.components.classifiers.named_entity_recognizer_crf.ner_crf import NERDLCRF
from nlu.components.classifiers.ner.ner_dl import NERDL
from nlu.components.classifiers.ner_healthcare.ner_dl_healthcare import NERDLHealthcare
from nlu.components.classifiers.pos.part_of_speech_jsl import PartOfSpeechJsl
from nlu.components.classifiers.sentiment_detector.sentiment_detector import Sentiment
from nlu.components.classifiers.sentiment_dl.sentiment_dl import SentimentDl
from nlu.components.classifiers.seq_albert.seq_albert import SeqAlbertClassifier
from nlu.components.classifiers.seq_bert.seq_bert_classifier import SeqBertClassifier
from nlu.components.classifiers.seq_bert_medical.seq_bert_medical_classifier import SeqBertMedicalClassifier
from nlu.components.classifiers.seq_deberta.seq_deberta_classifier import SeqDebertaClassifier
from nlu.components.classifiers.seq_distilbert.seq_distilbert_classifier import SeqDilstilBertClassifier
from nlu.components.classifiers.seq_distilbert_medical.seq_distilbert_medical_classifier import \
    SeqDilstilBertMedicalClassifier
from nlu.components.classifiers.seq_longformer.seq_longformer import SeqLongformerClassifier
from nlu.components.classifiers.seq_roberta.seq_roberta import SeqRobertaClassifier
from nlu.components.classifiers.seq_xlm_roberta.seq_xlm_roberta import SeqXlmRobertaClassifier
from nlu.components.classifiers.seq_xlnet.seq_xlnet import SeqXlnetClassifier
from nlu.components.classifiers.token_albert.token_albert import TokenAlbert
from nlu.components.classifiers.token_bert.token_bert import TokenBert
from nlu.components.classifiers.token_bert_healthcare.token_bert_healthcare import TokenBertHealthcare
from nlu.components.classifiers.token_distilbert.token_distilbert import TokenDistilBert
from nlu.components.classifiers.token_longformer.token_longformer import TokenLongFormer
from nlu.components.classifiers.token_roberta.token_roberta import TokenRoBerta
from nlu.components.classifiers.token_xlm_roberta.token_xlmroberta import TokenXlmRoBerta
from nlu.components.classifiers.token_xlnet.token_xlnet import TokenXlnet
from nlu.components.classifiers.vivekn_sentiment.vivekn_sentiment_detector import ViveknSentiment
from nlu.components.classifiers.yake.yake import Yake
from nlu.components.deidentifiers.deidentifier.deidentifier import Deidentifier
from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \
    LabeledDependencyParser
from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import \
    UnlabeledDependencyParser
from nlu.components.embeddings.albert.spark_nlp_albert import SparkNLPAlbert
from nlu.components.embeddings.bert.spark_nlp_bert import SparkNLPBert
from nlu.components.embeddings.bert_sentence_chunk.bert_sentence_chunk import BertSentenceChunkEmbeds
from nlu.components.embeddings.deberta.deberta import Deberta
from nlu.components.embeddings.distil_bert.distilbert import DistilBert
from nlu.components.embeddings.doc2vec.doc2vec import Doc2Vec
from nlu.components.embeddings.elmo.spark_nlp_elmo import SparkNLPElmo
from nlu.components.embeddings.glove.glove import Glove
from nlu.components.embeddings.longformer.longformer import Longformer
from nlu.components.embeddings.roberta.roberta import Roberta
from nlu.components.embeddings.sentence_bert.BertSentenceEmbedding import BertSentence
from nlu.components.embeddings.sentence_xlm.sentence_xlm import Sentence_XLM
from nlu.components.embeddings.use.spark_nlp_use import SparkNLPUse
from nlu.components.embeddings.word2vec.word2vec import Word2Vec
from nlu.components.embeddings.xlm.xlm import XLM
from nlu.components.embeddings.xlnet.spark_nlp_xlnet import SparkNLPXlnet
from nlu.components.embeddings_chunks.chunk_embedder.chunk_embedder import ChunkEmbedder
from nlu.components.lemmatizers.lemmatizer.spark_nlp_lemmatizer import SparkNLPLemmatizer
from nlu.components.matchers.regex_matcher.regex_matcher import RegexMatcher
from nlu.components.normalizers.document_normalizer.spark_nlp_document_normalizer import SparkNLPDocumentNormalizer
from nlu.components.normalizers.drug_normalizer.drug_normalizer import DrugNorm
from nlu.components.normalizers.normalizer.spark_nlp_normalizer import SparkNLPNormalizer
from nlu.components.relation_extractors.relation_extractor.relation_extractor import RelationExtraction
from nlu.components.relation_extractors.relation_extractor_dl.relation_extractor_dl import RelationExtractionDL
from nlu.components.relation_extractors.zero_shot_relation_extractor.zero_shot_relation_extractor import \
    ZeroShotRelationExtractor
from nlu.components.resolutions.sentence_entity_resolver.sentence_resolver import SentenceResolver
from nlu.components.sentence_detectors.deep_sentence_detector.deep_sentence_detector import SentenceDetectorDeep
from nlu.components.sentence_detectors.pragmatic_sentence_detector.sentence_detector import PragmaticSentenceDetector
from nlu.components.seq2seqs.gpt2.gpt2 import GPT2
from nlu.components.seq2seqs.marian.marian import Marian
from nlu.components.seq2seqs.t5.t5 import T5
from nlu.components.spell_checkers.context_spell.context_spell_checker import ContextSpellChecker
from nlu.components.spell_checkers.norvig_spell.norvig_spell_checker import NorvigSpellChecker
from nlu.components.spell_checkers.symmetric_spell.symmetric_spell_checker import SymmetricSpellChecker
from nlu.components.stemmers.stemmer.spark_nlp_stemmer import SparkNLPStemmer
from nlu.components.stopwordscleaners.stopwordcleaner.nlustopwordcleaner import NLUStopWordcleaner
from nlu.components.tokenizers.default_tokenizer.default_tokenizer import DefaultTokenizer
from nlu.components.tokenizers.regex_tokenizer.regex_tokenizer import RegexTokenizer
from nlu.components.tokenizers.word_segmenter.word_segmenter import WordSegmenter
from nlu.components.utils.chunk_2_doc.doc_2_chunk import Chunk_2_Doc
from nlu.components.utils.doc2chunk.doc_2_chunk import Doc_2_Chunk
from nlu.components.utils.document_assembler.spark_nlp_document_assembler import SparkNlpDocumentAssembler
from nlu.components.utils.ner_to_chunk_converter.ner_to_chunk_converter import NerToChunkConverter
from nlu.components.utils.ner_to_chunk_converter_licensed.ner_to_chunk_converter_licensed import \
    NerToChunkConverterLicensed
from nlu.components.utils.sdf_finisher.sdf_finisher import SdfFinisher
from nlu.components.utils.sentence_embeddings.spark_nlp_sentence_embedding import SparkNLPSentenceEmbeddings
from nlu.ocr_components.table_extractors.doc_table_extractor.doc2table import Doc2TextTable
from nlu.ocr_components.table_extractors.pdf_table_extractor.pdf2table import PDF2TextTable
from nlu.ocr_components.table_extractors.ppt_table_extractor.ppt2table import PPT2TextTable
from nlu.ocr_components.text_recognizers.doc2text.doc2text import Doc2Text
from nlu.ocr_components.text_recognizers.img2text.img2text import Img2Text
from nlu.ocr_components.text_recognizers.pdf2text.pdf2text import Pdf2Text
from nlu.ocr_components.utils.binary2image.binary2image import Binary2Image
from nlu.pipe.col_substitution.col_substitution_HC import *
from nlu.pipe.col_substitution.col_substitution_OCR import substitute_recognized_text_cols
from nlu.pipe.col_substitution.col_substitution_OS import *
from nlu.pipe.extractors.extractor_configs_HC import *
from nlu.pipe.extractors.extractor_configs_OCR import default_text_recognizer_config, default_binary_to_image_config
from nlu.pipe.extractors.extractor_configs_OS import *
from nlu.pipe.nlu_component import NluComponent
from nlu.universe.annotator_class_universe import AnnoClassRef
from nlu.universe.atoms import JslAnnoId, LicenseType, JslAnnoPyClass
from nlu.universe.feature_node_ids import NLP_NODE_IDS, NLP_HC_NODE_IDS
from nlu.universe.feature_node_ids import OCR_NODE_IDS
from nlu.universe.feature_node_universes import NLP_FEATURE_NODES
from nlu.universe.feature_node_universes import NLP_HC_FEATURE_NODES, OCR_FEATURE_NODES
from nlu.universe.feature_universes import NLP_FEATURES
from nlu.universe.logic_universes import NLP_LEVELS, AnnoTypes
from nlu.universe.universes import ComponentBackends
from nlu.universe.universes import Licenses, ComputeContexts


def anno_class_to_empty_component(anno_class) -> NluComponent:
    """
    For a given anno-class returns NLU-Component which wraps the corrosponding annotator class
    but has no model_anno_obj yet loaded onto it.
    :param anno_class: compatible nlu-component to find for
    :return: NluComponent which can load anno_class models
    """
    jsl_anno_id = anno_class_to_jsl_id(anno_class)
    try:
        if jsl_anno_id not in ComponentUniverse.components:
            raise ValueError(f'Invalid JSL-Anno-ID={jsl_anno_id}')
        component = ComponentUniverse.components[jsl_anno_id]()
    except Exception as err:
        raise ValueError(f'Failed to create annotator for JSL-Anno-ID={jsl_anno_id}, error={err}')
    return component


def jsl_id_to_empty_component(jsl_id) -> NluComponent:
    """
    Get NLU component with given JSL-ID with no model_anno_obj loaded onto it
    :param jsl_id: identifier of component/annotator type
    :return: NluComponent for jsl_id
    """
    return anno_class_to_empty_component(jsl_id_to_anno_class(jsl_id))


def jsl_id_to_anno_class(jsl_id) -> JslAnnoPyClass:
    """Returns anno_class name for jsl_id
    :param jsl_id: id of anno
    :return JslAnnoPyClass : cass of annotator
    """
    if jsl_id in AnnoClassRef.JSL_anno2_py_class:
        anno_class = AnnoClassRef.JSL_anno2_py_class[jsl_id]
    elif jsl_id in AnnoClassRef.JSL_anno_HC_ref_2_py_class:
        anno_class = AnnoClassRef.JSL_anno_HC_ref_2_py_class[jsl_id]
    elif jsl_id in AnnoClassRef.JSL_anno_OCR_ref_2_py_class:
        anno_class = AnnoClassRef.JSL_anno_OCR_ref_2_py_class[jsl_id]
    else:
        raise ValueError(f'Cannot find anno_class for jsl-id={jsl_id}')
    return anno_class


def anno_class_to_jsl_id(anno_class) -> JslAnnoId:
    """Returns JSL-Anno-ID and default license type for given anno_class name.
    Note that an anno which maps to a component with default OS_license,
    may load a HC model_anno_obj and nlu component must be updated to HC license then
    :param anno_class: class name of the annotator
    :return:JslAnnoID of anno class
    """
    if anno_class in AnnoClassRef.get_os_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_os_pyclass_2_anno_id_dict()[anno_class]
    elif anno_class in AnnoClassRef.get_hc_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_hc_pyclass_2_anno_id_dict()[anno_class]
    elif anno_class in AnnoClassRef.get_ocr_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_ocr_pyclass_2_anno_id_dict()[anno_class]
    else:
        raise ValueError(f'Cannot get class metadata for invalid anno_class={anno_class}')
    return jsl_anno_id


def get_anno_class_metadata(anno_class) -> Tuple[JslAnnoId, LicenseType]:
    """Returns JSL-Anno-ID and default license type for given anno_class name.
    Note that an anno which maps to a component with default OS_license,
    may load a HC model_anno_obj and nlu component must be updated to HC license then
    :param anno_class: class name of the annotator
    :return: Tuple, first entry JslAnnoID, second entry Default LicenseType
    """
    if anno_class in AnnoClassRef.JSL_OS_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_OS_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    elif anno_class in AnnoClassRef.JSL_HC_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_HC_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    elif anno_class in AnnoClassRef.JSL_OCR_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_OCR_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    else:
        raise ValueError(f'Cannot get class metadata for invalid anno_class={anno_class}')
    return jsl_anno_id, license_type


class ComponentUniverse:
    # Encapsulate all Open Source components Constructors by mappping each individual Annotator class to a specific Construction
    A = NLP_NODE_IDS
    H_A = NLP_HC_NODE_IDS
    O_A = OCR_NODE_IDS
    T = AnnoTypes
    F = NLP_FEATURES
    L = NLP_LEVELS
    ACR = AnnoClassRef
    # os_components = {}
    # hc_components = {}
    # ocr_components = {}
    components = {
        A.CHUNK2DOC: partial(NluComponent,
                             name=A.CHUNK2DOC,
                             type=T.HELPER_ANNO,
                             get_default_model=Chunk_2_Doc.get_default_model,
                             pdf_extractor_methods={'default_full': default_full_config, },
                             # 'default': '',   TODO no extractor
                             pdf_col_name_substitutor=substitute_doc2chunk_cols,
                             output_level=L.DOCUMENT,
                             node=NLP_FEATURE_NODES.nodes[A.CHUNK2DOC],
                             description='TODO',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.CHUNK2DOC,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNK2DOC],

                             ),
        A.CHUNK_EMBEDDINGS_CONVERTER: partial(NluComponent,
                                              name=A.CHUNK_EMBEDDINGS_CONVERTER,
                                              type=T.HELPER_ANNO,
                                              get_default_model=ChunkEmbedder.get_default_model,
                                              pdf_extractor_methods={'default': default_chunk_embedding_config,
                                                                     'default_full': default_full_config, },
                                              # TODO no extractor
                                              pdf_col_name_substitutor=substitute_chunk_embed_cols,
                                              output_level=L.CHUNK,
                                              node=NLP_FEATURE_NODES.nodes[A.CHUNK_EMBEDDINGS_CONVERTER],
                                              description='Convert Chunks to Doc type col',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=A.CHUNK_EMBEDDINGS_CONVERTER,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNK_EMBEDDINGS_CONVERTER],
                                              is_storage_ref_producer=True,
                                              has_storage_ref=True,
                                              ),

        A.BERT_SENTENCE_CHUNK_EMBEDDINGS: partial(NluComponent,
                                                  name=A.BERT_SENTENCE_CHUNK_EMBEDDINGS,
                                                  type=T.CHUNK_EMBEDDING,
                                                  get_default_model=BertSentenceChunkEmbeds.get_default_model,
                                                  get_pretrained_model=BertSentenceChunkEmbeds.get_pretrained_model,
                                                  pdf_extractor_methods={'default': default_chunk_embedding_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_chunk_embed_cols,
                                                  output_level=L.TOKEN,
                                                  node=NLP_FEATURE_NODES.nodes[A.BERT_SENTENCE_CHUNK_EMBEDDINGS],
                                                  description='Converts NER chunks into Chunk Embeddings generated from sentence embedder',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=A.BERT_SENTENCE_CHUNK_EMBEDDINGS,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.BERT_SENTENCE_CHUNK_EMBEDDINGS],
                                                  is_storage_ref_producer=True,
                                                  has_storage_ref=True,
                                                  ),

        # TODO just placeholder
        A.TRAINABLE_TOKENIZER: partial(NluComponent,
                                       name=A.POS,
                                       type=T.TOKEN_CLASSIFIER,
                                       get_default_model=RegexTokenizer.get_default_model,
                                       pdf_extractor_methods={'default': default_tokenizer_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_tokenizer_cols,
                                       output_level=L.TOKEN,
                                       node=NLP_FEATURE_NODES.nodes[A.POS],
                                       description='todo',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.REGEX_TOKENIZER,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_TOKENIZER],
                                       ),
        A.CHUNK_TOKENIZER: 'TODO NOT INTEGRATED',
        A.CHUNKER: partial(NluComponent,
                           name=A.CHUNKER,
                           type=T.CHUNK_CLASSIFIER,
                           get_default_model=DefaultChunker.get_default_model,
                           pdf_extractor_methods={'default': default_chunk_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_chunk_cols,
                           output_level=L.CHUNK,
                           node=NLP_FEATURE_NODES.nodes[A.CHUNKER],
                           description='Regex matcher that matches patters defined by part-of-speech (POS) tags',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.CHUNKER,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNKER],

                           ),
        A.CLASSIFIER_DL: partial(NluComponent,
                                 name=A.CLASSIFIER_DL,
                                 type=T.DOCUMENT_CLASSIFIER,
                                 get_default_model=ClassifierDl.get_default_model,
                                 get_pretrained_model=ClassifierDl.get_pretrained_model,
                                 get_trainable_model=ClassifierDl.get_trainable_model,
                                 pdf_extractor_methods={'default': default_classifier_dl_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_classifier_dl_cols,
                                 output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                 node=NLP_FEATURE_NODES.nodes[A.CLASSIFIER_DL],
                                 description='Deep Learning based general classifier for many problems',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.CLASSIFIER_DL,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CLASSIFIER_DL],
                                 has_storage_ref=True,
                                 is_storage_ref_consumer=True,
                                 trainable_mirror_anno=A.TRAINABLE_CLASSIFIER_DL,
                                 ),
        A.TRAINABLE_CLASSIFIER_DL: partial(NluComponent,
                                           name=A.TRAINABLE_CLASSIFIER_DL,
                                           type=T.DOCUMENT_CLASSIFIER,
                                           get_default_model=ClassifierDl.get_default_model,
                                           get_pretrained_model=ClassifierDl.get_pretrained_model,
                                           get_trainable_model=ClassifierDl.get_trainable_model,
                                           pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_classifier_dl_cols,
                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                           node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_CLASSIFIER_DL],
                                           description='Deep Learning based general classifier for many problems',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.TRAINABLE_CLASSIFIER_DL,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_CLASSIFIER_DL],
                                           has_storage_ref=True,
                                           is_storage_ref_consumer=True,
                                           trainable=True,
                                           trained_mirror_anno=A.CLASSIFIER_DL,
                                           ),
        A.CONTEXT_SPELL_CHECKER: partial(NluComponent,
                                         name=A.CONTEXT_SPELL_CHECKER,
                                         type=T.SPELL_CHECKER,
                                         get_default_model=ContextSpellChecker.get_default_model,
                                         get_pretrained_model=ContextSpellChecker.get_pretrained_model,
                                         get_trainable_model=ContextSpellChecker.get_default_trainable_model,
                                         pdf_extractor_methods={'default': default_spell_context_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_spell_context_cols,
                                         output_level=L.TOKEN,
                                         node=NLP_FEATURE_NODES.nodes[A.CONTEXT_SPELL_CHECKER],
                                         description='Deep Learning based spell checker that uses context to predict correct corrections.',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=A.CONTEXT_SPELL_CHECKER,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CONTEXT_SPELL_CHECKER],
                                         trainable_mirror_anno=A.TRAINABLE_CONTEXT_SPELL_CHECKER,
                                         ),
        A.DATE_MATCHER: 'TODO no Extractor Implemented',
        A.UNTYPED_DEPENDENCY_PARSER: partial(NluComponent,
                                             name=A.UNTYPED_DEPENDENCY_PARSER,
                                             type=T.TOKEN_CLASSIFIER,
                                             get_default_model=LabeledDependencyParser.get_default_model,
                                             get_pretrained_model=LabeledDependencyParser.get_pretrained_model,
                                             get_trainable_model=LabeledDependencyParser.get_default_trainable_model,
                                             pdf_extractor_methods={'default': default_dep_typed_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=substitute_labled_dependency_cols,
                                             output_level=L.TOKEN,
                                             node=NLP_FEATURE_NODES.nodes[A.UNTYPED_DEPENDENCY_PARSER],
                                             description='todo',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             jsl_anno_class_id=A.UNTYPED_DEPENDENCY_PARSER,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.UNTYPED_DEPENDENCY_PARSER],
                                             trainable_mirror_anno=A.TRAINABLE_DEP_PARSE_UN_TYPED,
                                             ),
        A.TYPED_DEPENDENCY_PARSER: partial(NluComponent,
                                           name=A.TYPED_DEPENDENCY_PARSER,
                                           type=T.TOKEN_CLASSIFIER,
                                           get_default_model=UnlabeledDependencyParser.get_default_model,
                                           get_pretrained_model=UnlabeledDependencyParser.get_pretrained_model,
                                           get_trainable_model=UnlabeledDependencyParser.get_default_trainable_model,
                                           pdf_extractor_methods={'default': default_dep_untyped_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_un_labled_dependency_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_FEATURE_NODES.nodes[A.TYPED_DEPENDENCY_PARSER],
                                           description='todo',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.TYPED_DEPENDENCY_PARSER,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TYPED_DEPENDENCY_PARSER],
                                           trainable_mirror_anno=A.TRAINABLE_DEP_PARSE_TYPED,
                                           ),
        A.DOC2CHUNK: partial(NluComponent,
                             name=A.DOC2CHUNK,
                             type=T.HELPER_ANNO,
                             get_default_model=Doc_2_Chunk.get_default_model,
                             pdf_extractor_methods={'default': default_doc2chunk_config,
                                                    'default_full': default_full_config, },
                             pdf_col_name_substitutor=substitute_doc2chunk_cols,
                             output_level=L.CHUNK,
                             node=NLP_FEATURE_NODES.nodes[A.DOC2CHUNK],
                             description='Converts Document type col to Chunk type col',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.DOC2CHUNK,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOC2CHUNK],
                             ),
        A.DOCUMENT_ASSEMBLER: partial(NluComponent,
                                      name=A.DOCUMENT_ASSEMBLER,
                                      type=T.HELPER_ANNO,
                                      get_default_model=SparkNlpDocumentAssembler.get_default_model,
                                      pdf_extractor_methods={'default': default_document_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_doc_assembler_cols,
                                      output_level=L.DOCUMENT,
                                      node=NLP_FEATURE_NODES.nodes[A.DOCUMENT_ASSEMBLER],
                                      description='todo',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.DOCUMENT_ASSEMBLER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOCUMENT_ASSEMBLER],
                                      ),
        A.DOCUMENT_NORMALIZER: partial(NluComponent,
                                       name=A.DOCUMENT_NORMALIZER,
                                       type=T.TEXT_NORMALIZER,
                                       get_default_model=SparkNLPDocumentNormalizer.get_default_model,
                                       pdf_extractor_methods={'default': default_norm_document_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_doc_norm_cols,
                                       output_level=L.DOCUMENT,
                                       node=NLP_FEATURE_NODES.nodes[A.DOCUMENT_NORMALIZER],
                                       description='todo',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.DOCUMENT_NORMALIZER,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOCUMENT_NORMALIZER],
                                       ),
        A.EMBEDDINGS_FINISHER: 'TODO NOT INTEGRATED',
        A.ENTITY_RULER: 'TODO NOT INTEGRATED',
        A.FINISHER: partial(NluComponent,  # TODO WIP
                            name=A.FINISHER,
                            type=T.HELPER_ANNO,
                            get_default_model=SdfFinisher.get_default_model,
                            # TODO EXTRACTOR
                            pdf_extractor_methods={'default': default_full_config,
                                                   'default_full': default_full_config, },
                            # TODO SUBSTITOR
                            pdf_col_name_substitutor=None,  # TODO no sub defined
                            output_level=L.DOCUMENT,
                            # TODO sub-token actually(?)
                            node=NLP_FEATURE_NODES.nodes[A.FINISHER],
                            description='Get lemmatized base version of tokens',
                            provider=ComponentBackends.open_source,
                            license=Licenses.open_source,
                            computation_context=ComputeContexts.spark,
                            output_context=ComputeContexts.spark,
                            jsl_anno_class_id=A.FINISHER,
                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.FINISHER],
                            ),
        A.GRAPH_EXTRACTION: 'TODO NOT INTEGRATED',
        A.GRAPH_FINISHER: 'TODO NOT INTEGRATED',
        A.LANGUAGE_DETECTOR_DL: partial(NluComponent,
                                        name=A.LANGUAGE_DETECTOR_DL,
                                        type=T.DOCUMENT_CLASSIFIER,
                                        get_default_model=LanguageDetector.get_default_model,
                                        get_pretrained_model=LanguageDetector.get_pretrained_model,
                                        pdf_extractor_methods={'default': default_lang_classifier_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=None,  # TODO no sub defined
                                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                        # TODO sub-token actually(?)
                                        node=NLP_FEATURE_NODES.nodes[A.LANGUAGE_DETECTOR_DL],
                                        description='Get lemmatized base version of tokens',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.LANGUAGE_DETECTOR_DL,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LANGUAGE_DETECTOR_DL],
                                        ),
        A.LEMMATIZER: partial(NluComponent,
                              name=A.LEMMATIZER,
                              type=T.TOKEN_NORMALIZER,
                              output_context=ComputeContexts.spark,
                              get_default_model=SparkNLPLemmatizer.get_default_model,
                              get_pretrained_model=SparkNLPLemmatizer.get_pretrained_model,
                              get_trainable_model=SparkNLPLemmatizer.get_default_trainable_model,
                              pdf_extractor_methods={'default': default_lemma_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_lem_cols,
                              output_level=L.TOKEN,  # TODO sub-token actually(?)
                              node=NLP_FEATURE_NODES.nodes[A.LEMMATIZER],
                              description='Get lemmatized base version of tokens',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.LEMMATIZER,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LEMMATIZER],
                              trainable_mirror_anno=A.TRAINABLE_LEMMATIZER
                              ),
        A.MULTI_CLASSIFIER_DL: partial(NluComponent,
                                       name=A.MULTI_CLASSIFIER_DL,
                                       type=T.DOCUMENT_CLASSIFIER,
                                       output_level=L.MULTI_TOKEN_CLASSIFIER,
                                       get_default_model=MultiClassifier.get_default_model,
                                       get_pretrained_model=MultiClassifier.get_pretrained_model,
                                       get_trainable_model=MultiClassifier.get_default_trainable_model,
                                       pdf_extractor_methods={'default': default_multi_classifier_dl_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_multi_classifier_dl_cols,
                                       node=NLP_FEATURE_NODES.nodes[A.MULTI_CLASSIFIER_DL],
                                       description='Deep Learning based general classifier for multi-label classification problem. I.e. problems, where one document may be labled with multiple labels at the same time.',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.MULTI_CLASSIFIER_DL,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.MULTI_CLASSIFIER_DL],
                                       has_storage_ref=True,
                                       is_storage_ref_consumer=True,
                                       trainable_mirror_anno=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                       ),
        A.TRAINABLE_MULTI_CLASSIFIER_DL: partial(NluComponent,
                                                 name=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                                 type=T.DOCUMENT_CLASSIFIER,
                                                 output_level=L.MULTI_TOKEN_CLASSIFIER,
                                                 get_default_model=MultiClassifier.get_default_model,
                                                 get_pretrained_model=MultiClassifier.get_pretrained_model,
                                                 get_trainable_model=MultiClassifier.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_multi_classifier_dl_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_multi_classifier_dl_cols,
                                                 node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_MULTI_CLASSIFIER_DL],
                                                 description='Trainable Deep Learning based general classifier for multi-label classification problem. I.e. problems, where one document may be labled with multiple labels at the same time.',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.TRAINABLE_MULTI_CLASSIFIER_DL],
                                                 has_storage_ref=True,
                                                 is_storage_ref_consumer=True,
                                                 trainable=True,
                                                 trained_mirror_anno=A.CLASSIFIER_DL,
                                                 # Should be A.MULTI_CLASSIFIER_DL, but fitted class is actually classifier DL, special edge case
                                                 ),

        A.MULTI_DATE_MATCHER: 'TODO NOT INTEGRATED',
        A.N_GRAMM_GENERATOR: partial(NluComponent,
                                     name=A.N_GRAMM_GENERATOR,
                                     type=T.CHUNK_CLASSIFIER,  # Classify each n-gram wether they match Pattern or not
                                     get_default_model=NGram.get_default_model,
                                     pdf_extractor_methods={'default': default_ngram_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_ngram_cols,
                                     output_level=L.CHUNK,
                                     node=NLP_FEATURE_NODES.nodes[A.N_GRAMM_GENERATOR],
                                     description='Extract N-Gram chunks from texts',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.N_GRAMM_GENERATOR,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.N_GRAMM_GENERATOR],
                                     ),
        A.NER_CONVERTER: partial(NluComponent,
                                 name=A.NER_CONVERTER,
                                 type=T.HELPER_ANNO,
                                 get_default_model=NerToChunkConverter.get_default_model,
                                 pdf_extractor_methods={'default': default_ner_converter_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_ner_converter_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_FEATURE_NODES.nodes[A.NER_CONVERTER],
                                 description='Convert NER-IOB tokens into concatenated strings (aka chunks)',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.NER_CONVERTER,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_CONVERTER],
                                 ),
        A.NER_CRF: partial(NluComponent,
                           name=A.NER_CRF,
                           type=T.TOKEN_CLASSIFIER,
                           output_level=L.TOKEN,
                           get_default_model=NERDLCRF.get_default_model,
                           get_pretrained_model=NERDLCRF.get_pretrained_model,
                           get_trainable_model=NERDLCRF.get_default_trainable_model,
                           pdf_extractor_methods={'default': '', 'default_full': default_full_config, },
                           pdf_col_name_substitutor=None,  # TODO
                           node=NLP_FEATURE_NODES.nodes[A.NER_CRF],
                           description='Classical NER model_anno_obj based on conditional random fields (CRF). Predicts IOB tags ',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.NER_CRF,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_CRF],
                           trainable_mirror_anno=A.TRAINABLE_NER_CRF,
                           ),
        A.NER_DL: partial(NluComponent,
                          name=A.NER_DL,
                          type=T.TOKEN_CLASSIFIER,
                          output_level=L.TOKEN,
                          get_default_model=NERDL.get_default_model,
                          get_pretrained_model=NERDL.get_pretrained_model,
                          get_trainable_model=NERDL.get_default_trainable_model,
                          pdf_extractor_methods={'default': default_NER_config, 'meta': meta_NER_config,
                                                 'default_full': default_full_config, },
                          pdf_col_name_substitutor=substitute_ner_dl_cols,
                          node=NLP_FEATURE_NODES.nodes[A.NER_DL],
                          description='Deep Learning based NER model_anno_obj that predicts IOB tags. ',
                          provider=ComponentBackends.open_source,
                          license=Licenses.open_source,
                          computation_context=ComputeContexts.spark,
                          output_context=ComputeContexts.spark,
                          jsl_anno_class_id=A.NER_DL,
                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_DL],
                          trainable_mirror_anno=A.TRAINABLE_NER_DL,
                          has_storage_ref=True,
                          is_storage_ref_consumer=True
                          ),
        A.TRAINABLE_NER_DL: partial(NluComponent,
                                    name=A.TRAINABLE_NER_DL,
                                    type=T.TOKEN_CLASSIFIER,
                                    get_default_model=NERDL.get_default_model,
                                    get_pretrained_model=NERDL.get_pretrained_model,
                                    get_trainable_model=NERDL.get_default_trainable_model,
                                    pdf_extractor_methods={'default': default_NER_config, 'meta': meta_NER_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_ner_dl_cols,
                                    output_level=L.TOKEN,
                                    node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_NER_DL],
                                    description='Deep Learning based NER model_anno_obj that predicts IOB tags. ',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.TRAINABLE_NER_DL,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_NER_DL],
                                    trained_mirror_anno=A.NER_DL,
                                    trainable=True,
                                    has_storage_ref=True,
                                    is_storage_ref_consumer=True
                                    ),
        A.NER_OVERWRITER: 'TODO NOT INTEGRATED',
        A.NORMALIZER: partial(NluComponent,
                              name=A.NORMALIZER,
                              type=T.TOKEN_NORMALIZER,
                              get_default_model=SparkNLPNormalizer.get_default_model,
                              get_pretrained_model=SparkNLPNormalizer.get_pretrained_model,
                              # get_trainable_model=SparkNLPLemmatizer.get_default_trainable_model,
                              pdf_extractor_methods={'default': default_norm_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_norm_cols,
                              output_level=L.TOKEN,  # TODO sub-token actually(?)
                              node=NLP_FEATURE_NODES.nodes[A.NORMALIZER],
                              description='Get lemmatized base version of tokens',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.NORMALIZER,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NORMALIZER],
                              trainable_mirror_anno=A.TRAINABLE_NORMALIZER
                              ),
        A.NORVIG_SPELL_CHECKER: partial(NluComponent,
                                        name=A.NORVIG_SPELL_CHECKER,
                                        type=T.SPELL_CHECKER,
                                        get_default_model=NorvigSpellChecker.get_default_model,
                                        get_pretrained_model=NorvigSpellChecker.get_pretrained_model,
                                        get_trainable_model=NorvigSpellChecker.get_default_trainable_model,
                                        pdf_extractor_methods={'default': default_spell_norvig_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_spell_norvig_cols,
                                        output_level=L.TOKEN,  # TODO sub-token actually
                                        node=NLP_FEATURE_NODES.nodes[A.NORVIG_SPELL_CHECKER],
                                        description='Norvig algorithm based Spell Checker',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.NORVIG_SPELL_CHECKER,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NORVIG_SPELL_CHECKER],
                                        trainable_mirror_anno=A.TRAINABLE_NORVIG_SPELL_CHECKER
                                        ),
        A.POS: partial(NluComponent,
                       name=A.POS,
                       type=T.TOKEN_CLASSIFIER,
                       get_default_model=PartOfSpeechJsl.get_default_model,
                       get_pretrained_model=PartOfSpeechJsl.get_pretrained_model,
                       get_trainable_model=PartOfSpeechJsl.get_default_trainable_model,
                       pdf_extractor_methods={'default': default_POS_config, 'default_full': default_full_config, },
                       pdf_col_name_substitutor=substitute_pos_cols,
                       output_level=L.TOKEN,
                       node=NLP_FEATURE_NODES.nodes[A.POS],
                       description='todo',
                       provider=ComponentBackends.open_source,
                       license=Licenses.open_source,
                       computation_context=ComputeContexts.spark,
                       output_context=ComputeContexts.spark,
                       jsl_anno_class_id=A.POS,
                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.POS],
                       trainable_mirror_anno=A.TRAINABLE_POS,
                       ),
        A.TRAINABLE_POS: partial(NluComponent,
                                 name=A.TRAINABLE_POS,
                                 type=T.TOKEN_CLASSIFIER,
                                 get_default_model=PartOfSpeechJsl.get_default_model,
                                 get_pretrained_model=PartOfSpeechJsl.get_pretrained_model,
                                 get_trainable_model=PartOfSpeechJsl.get_default_trainable_model,
                                 pdf_extractor_methods={'default': default_POS_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_pos_cols,
                                 output_level=L.TOKEN,
                                 node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_POS],
                                 description='todo',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.TRAINABLE_POS,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_POS],
                                 trained_mirror_anno=A.POS,
                                 trainable=True
                                 ),

        A.RECURISVE_TOKENIZER: 'TODO NOT INTEGRATED',
        A.REGEX_MATCHER: partial(NluComponent,  # TODO , type as ner_converted ok ?
                                 name=A.REGEX_MATCHER,
                                 type=T.HELPER_ANNO,
                                 get_default_model=RegexMatcher.get_default_model,
                                 # TODO extractor??
                                 pdf_extractor_methods={'default': default_ner_converter_config,
                                                        'default_full': default_full_config, },
                                 # TODO substitor??
                                 pdf_col_name_substitutor=substitute_ner_converter_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_FEATURE_NODES.nodes[A.REGEX_MATCHER],
                                 description='Matches chunks in text based on regex rules',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.REGEX_MATCHER,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_MATCHER],
                                 ),
        A.TRAINABLE_REGEX_MATCHER: 'TODO no Extractor Implemented',
        A.REGEX_TOKENIZER: partial(NluComponent,
                                   name=A.POS,
                                   type=T.TOKEN_CLASSIFIER,
                                   get_default_model=RegexTokenizer.get_default_model,
                                   pdf_extractor_methods={'default': default_tokenizer_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_tokenizer_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.POS],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.REGEX_TOKENIZER,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_TOKENIZER],
                                   ),
        A.SENTENCE_DETECTOR: partial(NluComponent,
                                     name=A.SENTENCE_DETECTOR,
                                     type=T.SENTENCE_DETECTOR,
                                     get_default_model=PragmaticSentenceDetector.get_default_model,
                                     pdf_extractor_methods={'default': default_sentence_detector_DL_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_sentence_detector_dl_cols,
                                     output_level=L.SENTENCE,
                                     node=NLP_FEATURE_NODES.nodes[A.SENTENCE_DETECTOR],
                                     description='Classical rule based Sentence Detector',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.SENTENCE_DETECTOR,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTENCE_DETECTOR],
                                     ),
        A.SENTENCE_DETECTOR_DL: partial(NluComponent,
                                        name=A.SENTENCE_DETECTOR_DL,
                                        type=T.SENTENCE_DETECTOR,
                                        get_default_model=SentenceDetectorDeep.get_default_model,
                                        get_pretrained_model=SentenceDetectorDeep.get_pretrained_model,
                                        # get_trainable_model=SentenceDetectorDeep.get_trainable_model,
                                        pdf_extractor_methods={'default': default_sentence_detector_DL_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_sentence_detector_dl_cols,
                                        output_level=L.SENTENCE,
                                        node=NLP_FEATURE_NODES.nodes[A.SENTENCE_DETECTOR_DL],
                                        description='Deep Learning based sentence Detector',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.SENTENCE_DETECTOR_DL,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTENCE_DETECTOR_DL],
                                        trainable_mirror_anno=A.TRAINABLE_SENTENCE_DETECTOR_DL
                                        ),
        A.SENTENCE_EMBEDDINGS_CONVERTER: partial(NluComponent,
                                                 name=A.SENTENCE_EMBEDDINGS_CONVERTER,
                                                 type=T.DOCUMENT_EMBEDDING,
                                                 get_default_model=SparkNLPSentenceEmbeddings.get_default_model,
                                                 pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_sent_embed_cols,
                                                 output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                                 node=NLP_FEATURE_NODES.nodes[A.SENTENCE_EMBEDDINGS_CONVERTER],
                                                 description='Converts Word Embeddings to Sentence/Document Embeddings',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.SENTENCE_EMBEDDINGS_CONVERTER,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.SENTENCE_EMBEDDINGS_CONVERTER],
                                                 is_storage_ref_producer=True,
                                                 has_storage_ref=True
                                                 ),
        A.STEMMER: partial(NluComponent,
                           name=A.STEMMER,
                           type=T.TOKEN_NORMALIZER,
                           get_default_model=SparkNLPStemmer.get_default_model,
                           pdf_extractor_methods={'default': default_stemm_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_stem_cols,
                           output_level=L.TOKEN,  # TODO sub-token actually(?)
                           node=NLP_FEATURE_NODES.nodes[A.STEMMER],
                           description='Get stemmed base version of tokens',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.STEMMER,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.STEMMER],
                           ),
        A.STOP_WORDS_CLEANER: partial(NluComponent,
                                      name=A.STOP_WORDS_CLEANER,
                                      type=T.TEXT_NORMALIZER,
                                      get_default_model=NLUStopWordcleaner.get_default_model,
                                      get_pretrained_model=NLUStopWordcleaner.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_stopwords_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_stopwords_cols,
                                      output_level=L.TOKEN,  # TODO sub-token actually
                                      node=NLP_FEATURE_NODES.nodes[A.STOP_WORDS_CLEANER],
                                      description='Removes stopwords from text based on internal list of stop words.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.STOP_WORDS_CLEANER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.STOP_WORDS_CLEANER],
                                      ),
        A.SYMMETRIC_DELETE_SPELLCHECKER: partial(NluComponent,
                                                 name=A.SYMMETRIC_DELETE_SPELLCHECKER,
                                                 type=T.SPELL_CHECKER,
                                                 get_default_model=SymmetricSpellChecker.get_default_model,
                                                 get_pretrained_model=SymmetricSpellChecker.get_pretrained_model,
                                                 get_trainable_model=SymmetricSpellChecker.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_spell_symmetric_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_spell_symm_cols,
                                                 output_level=L.TOKEN,  # TODO sub-token actually
                                                 node=NLP_FEATURE_NODES.nodes[A.SYMMETRIC_DELETE_SPELLCHECKER],
                                                 description='Symmetric Spell Checker',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.SYMMETRIC_DELETE_SPELLCHECKER,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.SYMMETRIC_DELETE_SPELLCHECKER],
                                                 trainable_mirror_anno=A.TRAINABLE_SYMMETRIC_DELETE_SPELLCHECKER
                                                 ),
        A.TEXT_MATCHER: 'TODO EXTRACTOR METHOD MISSING',  # TODO
        A.TRAINABLE_TEXT_MATCHER: 'TODO EXTRACTOR METHOD MISSING',  # TODO
        A.TOKEN2CHUNK: 'TODO NOT INTEGRATED',  # TODO
        A.TOKEN_ASSEMBLER: 'TODO EXTRACTORS MISSING',  # TODO
        A.TOKENIZER: partial(NluComponent,
                             name=A.TOKENIZER,
                             type=T.TOKENIZER,
                             get_default_model=DefaultTokenizer.get_default_model,
                             pdf_extractor_methods={'default': default_tokenizer_config,
                                                    'default_full': default_full_config, },
                             pdf_col_name_substitutor=substitute_tokenizer_cols,
                             output_level=L.TOKEN,
                             node=NLP_FEATURE_NODES.nodes[A.TOKENIZER],
                             description='Default tokenizer',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.TOKENIZER,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TOKENIZER],
                             ),
        A.SENTIMENT_DL: partial(NluComponent,
                                name=A.SENTIMENT_DL,
                                type=T.DOCUMENT_CLASSIFIER,
                                get_default_model=SentimentDl.get_default_model,
                                get_pretrained_model=SentimentDl.get_pretrained_model,
                                get_trainable_model=SentimentDl.get_default_trainable_model,
                                pdf_extractor_methods={'default': default_sentiment_dl_config,
                                                       'default_full': default_full_config, },
                                pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                node=NLP_FEATURE_NODES.nodes[A.SENTIMENT_DL],
                                description='Deep Learning based Sentiment Detector',
                                provider=ComponentBackends.open_source,
                                license=Licenses.open_source,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=A.SENTIMENT_DL,
                                jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTIMENT_DL],
                                trainable_mirror_anno=A.TRAINABLE_SENTIMENT_DL,
                                is_storage_ref_consumer=True,
                                has_storage_ref=True
                                ),
        A.TRAINABLE_SENTIMENT_DL: partial(NluComponent,
                                          name=A.TRAINABLE_SENTIMENT_DL,
                                          type=T.DOCUMENT_CLASSIFIER,
                                          get_default_model=SentimentDl.get_default_model,
                                          get_pretrained_model=SentimentDl.get_pretrained_model,
                                          get_trainable_model=SentimentDl.get_default_trainable_model,
                                          pdf_extractor_methods={'default': default_sentiment_dl_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                          output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                          node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_SENTIMENT_DL],
                                          description='Deep Learning based Sentiment Detector',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.TRAINABLE_SENTIMENT_DL,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_SENTIMENT_DL],
                                          trained_mirror_anno=A.SENTIMENT_DL,
                                          is_storage_ref_consumer=True,
                                          has_storage_ref=True,
                                          trainable=True
                                          ),
        A.SENTIMENT_DETECTOR: partial(NluComponent,
                                      name=A.SENTIMENT_DETECTOR,
                                      type=T.DOCUMENT_CLASSIFIER,
                                      get_default_model=Sentiment.get_default_model,
                                      # get_pretrained_model = Sentiment.get_pretrained_model,
                                      get_trainable_model=Sentiment.get_default_trainable_model,
                                      pdf_extractor_methods={'default': default_sentiment_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                      node=NLP_FEATURE_NODES.nodes[A.SENTIMENT_DETECTOR],
                                      description='Rule based sentiment detector, which calculates a score based on predefined keywords',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.SENTIMENT_DETECTOR,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTIMENT_DETECTOR],
                                      trainable_mirror_anno=A.TRAINABLE_SENTIMENT,
                                      ),
        A.VIVEKN_SENTIMENT: partial(NluComponent,
                                    name=A.VIVEKN_SENTIMENT,
                                    type=T.DOCUMENT_CLASSIFIER,
                                    get_default_model=ViveknSentiment.get_default_model,
                                    get_pretrained_model=ViveknSentiment.get_pretrained_model,
                                    get_trainable_model=ViveknSentiment.get_default_trainable_model,
                                    pdf_extractor_methods={'default': default_sentiment_vivk_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_sentiment_vivk_cols,
                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                    node=NLP_FEATURE_NODES.nodes[A.VIVEKN_SENTIMENT],
                                    description='Sentiment detector based on the vivekn algorithm',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.VIVEKN_SENTIMENT,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.VIVEKN_SENTIMENT],
                                    trainable_mirror_anno=A.TRAINABLE_VIVEKN_SENTIMENT
                                    ),
        A.WORD_EMBEDDINGS: partial(NluComponent,
                                   name=A.WORD_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=Glove.get_default_model,
                                   get_pretrained_model=Glove.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.WORD_EMBEDDINGS],
                                   description='Static Word Embeddings generator, i.e. Glove, etc..',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.WORD_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_EMBEDDINGS],
                                   is_storage_ref_producer=True,
                                   has_storage_ref=True,
                                   ),
        A.WORD_SEGMENTER: partial(NluComponent,
                                  name=A.WORD_SEGMENTER,
                                  type=T.TOKENIZER,
                                  get_default_model=WordSegmenter.get_default_model,
                                  get_pretrained_model=WordSegmenter.get_pretrained_model,
                                  get_trainable_model=WordSegmenter.get_default_model_for_lang,
                                  pdf_extractor_methods={'default': default_word_segmenter_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_word_seg_cols,
                                  output_level=L.TOKEN,
                                  node=NLP_FEATURE_NODES.nodes[A.WORD_SEGMENTER],
                                  description='Segments non white space seperated text into tokens, like Chinese or Japanese. ',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=A.WORD_SEGMENTER,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_SEGMENTER],
                                  trainable_mirror_anno=A.TRAINABLE_WORD_SEGMENTER
                                  ),
        A.YAKE_KEYWORD_EXTRACTION: partial(NluComponent,
                                           name=A.YAKE_KEYWORD_EXTRACTION,
                                           type=T.CHUNK_CLASSIFIER,
                                           # TODO??? Classifies each chunks/ngram likelyhood of beeing a Ketyword
                                           get_default_model=Yake.get_default_model,
                                           pdf_extractor_methods={'default': default_yake_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_YAKE_cols,
                                           output_level=L.CHUNK,  # Actual sub-ngram/ngram filter
                                           node=NLP_FEATURE_NODES.nodes[A.YAKE_KEYWORD_EXTRACTION],
                                           description='Calculates probability of each n-gram beeing a keyword. Yields a selection of these n-grams with specific filters,i.e. length, probability, etc..',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.YAKE_KEYWORD_EXTRACTION,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.YAKE_KEYWORD_EXTRACTION],
                                           has_storage_ref=False,
                                           is_storage_ref_consumer=False,
                                           is_storage_ref_producer=False,
                                           ),

        A.DOC2VEC: partial(NluComponent,
                           name=A.DOC2VEC,
                           type=T.TOKEN_EMBEDDING,
                           get_default_model=Doc2Vec.get_default_model,
                           get_trainable_model=Doc2Vec.get_trainable_model,
                           get_pretrained_model=Doc2Vec.get_pretrained_model,
                           pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_sent_embed_cols,
                           output_level=L.TOKEN,
                           node=NLP_FEATURE_NODES.nodes[A.DOC2VEC],
                           description='Trains a Word2Vec model_anno_obj that creates vector representations of words in a text corpus. The algorithm first constructs a vocabulary from the corpus and then learns vector representation of words in the vocabulary. The vector representation can be used as features in natural language processing and machine learning algorithms.',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.DOC2VEC,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOC2VEC],
                           has_storage_ref=True,
                           is_storage_ref_producer=True,
                           trainable_mirror_anno=A.TRAINABLE_DOC2VEC
                           ),

        A.TRAINABLE_DOC2VEC: partial(NluComponent,
                                     name=A.TRAINABLE_DOC2VEC,
                                     type=T.TOKEN_EMBEDDING,
                                     get_default_model=Doc2Vec.get_default_model,
                                     get_trainable_model=Doc2Vec.get_trainable_model,
                                     get_pretrained_model=Doc2Vec.get_pretrained_model,
                                     pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_sent_embed_cols,
                                     output_level=L.TOKEN,
                                     node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_DOC2VEC],
                                     description='Trains a Word2Vec model_anno_obj that creates vector representations of words in a text corpus. The algorithm first constructs a vocabulary from the corpus and then learns vector representation of words in the vocabulary. The vector representation can be used as features in natural language processing and machine learning algorithms.',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.TRAINABLE_DOC2VEC,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_DOC2VEC],
                                     has_storage_ref=True,
                                     is_storage_ref_producer=True,
                                     trained_mirror_anno=A.DOC2VEC,
                                     trainable=True
                                     ),

        ### ________ TRANSFORMERS BELOW _________
        A.ALBERT_EMBEDDINGS: partial(NluComponent,
                                     name=A.ALBERT_EMBEDDINGS,
                                     type=T.TOKEN_EMBEDDING,
                                     get_default_model=SparkNLPAlbert.get_default_model,
                                     get_pretrained_model=SparkNLPAlbert.get_pretrained_model,
                                     pdf_extractor_methods={'default': default_word_embedding_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_word_embed_cols,
                                     output_level=L.TOKEN,
                                     node=NLP_FEATURE_NODES.nodes[A.ALBERT_EMBEDDINGS],
                                     description='ALBERT: A LITE BERT FOR SELF-SUPERVISED LEARNING OF LANGUAGE REPRESENTATIONS - Google Research, Toyota Technological Institute at Chicago',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.ALBERT_EMBEDDINGS,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ALBERT_EMBEDDINGS],
                                     has_storage_ref=True,
                                     is_storage_ref_producer=True,
                                     ),

        A.ALBERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                   name=A.ALBERT_FOR_TOKEN_CLASSIFICATION,
                                                   type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                   get_default_model=TokenAlbert.get_default_model,
                                                   get_pretrained_model=TokenAlbert.get_pretrained_model,
                                                   pdf_extractor_methods={'default': default_token_classifier_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                   output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                   node=NLP_FEATURE_NODES.nodes[A.ALBERT_FOR_TOKEN_CLASSIFICATION],
                                                   description='AlbertForTokenClassification can load ALBERT Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=A.ALBERT_FOR_TOKEN_CLASSIFICATION,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.ALBERT_FOR_TOKEN_CLASSIFICATION],
                                                   ),
        A.BERT_EMBEDDINGS: partial(NluComponent,
                                   name=A.BERT_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=SparkNLPBert.get_default_model,
                                   get_pretrained_model=SparkNLPBert.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.BERT_EMBEDDINGS],
                                   description='Token-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.BERT_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BERT_EMBEDDINGS],
                                   has_storage_ref=True,
                                   is_storage_ref_producer=True,
                                   ),
        A.BERT_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                            name=A.BERT_SENTENCE_EMBEDDINGS,
                                            type=T.DOCUMENT_EMBEDDING,
                                            get_default_model=BertSentence.get_default_model,
                                            get_pretrained_model=BertSentence.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_sent_embed_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                            node=NLP_FEATURE_NODES.nodes[A.BERT_SENTENCE_EMBEDDINGS],
                                            description='Sentence-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.BERT_SENTENCE_EMBEDDINGS,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BERT_SENTENCE_EMBEDDINGS],
                                            has_storage_ref=True,
                                            is_storage_ref_producer=True,
                                            ),
        A.BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                 name=A.BERT_FOR_TOKEN_CLASSIFICATION,
                                                 type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                 get_default_model=TokenBert.get_default_model,
                                                 get_pretrained_model=TokenBert.get_pretrained_model,
                                                 pdf_extractor_methods={'default': default_token_classifier_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                 output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                 node=NLP_FEATURE_NODES.nodes[A.BERT_FOR_TOKEN_CLASSIFICATION],
                                                 description='BertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.BERT_FOR_TOKEN_CLASSIFICATION,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.BERT_FOR_TOKEN_CLASSIFICATION],
                                                 ),

        A.BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                    name=A.BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                    get_default_model=SeqBertClassifier.get_default_model,
                                                    get_pretrained_model=SeqBertClassifier.get_pretrained_model,
                                                    pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                    node=NLP_FEATURE_NODES.nodes[A.BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                    description='BertForSequenceClassification can load Bert Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    jsl_anno_class_id=A.BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                    ),
        A.DISTIL_BERT_EMBEDDINGS: partial(NluComponent,
                                          name=A.DISTIL_BERT_EMBEDDINGS,
                                          type=T.TOKEN_EMBEDDING,
                                          get_default_model=DistilBert.get_default_model,
                                          get_pretrained_model=DistilBert.get_pretrained_model,
                                          pdf_extractor_methods={'default': default_word_embedding_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_word_embed_cols,
                                          output_level=L.TOKEN,
                                          node=NLP_FEATURE_NODES.nodes[A.DISTIL_BERT_EMBEDDINGS],
                                          description='DistilBERT is a small, fast, cheap and light Transformer model_anno_obj trained by distilling BERT base. It has 40% less parameters than bert-base-uncased, runs 60% faster while preserving over 95% of BERT’s performances as measured on the GLUE language understanding benchmark.',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.DISTIL_BERT_EMBEDDINGS,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DISTIL_BERT_EMBEDDINGS],
                                          has_storage_ref=True,
                                          is_storage_ref_producer=True,
                                          ),
        A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                           name=A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                           type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                           get_default_model=SeqDilstilBertClassifier.get_default_model,
                                                           get_pretrained_model=SeqDilstilBertClassifier.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_classifier_dl_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                           node=NLP_FEATURE_NODES.nodes[
                                                               A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                           description='DistilBertForSequenceClassification can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.open_source,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                               A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                           ),
        A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                        name=A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                        type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                        get_default_model=TokenDistilBert.get_default_model,
                                                        get_pretrained_model=TokenDistilBert.get_pretrained_model,
                                                        pdf_extractor_methods={
                                                            'default': default_token_classifier_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                        output_level=L.TOKEN,
                                                        node=NLP_FEATURE_NODES.nodes[
                                                            A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                        description='DistilBertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                        provider=ComponentBackends.open_source,
                                                        license=Licenses.open_source,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                            A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                        ),
        A.ELMO_EMBEDDINGS: partial(NluComponent,
                                   name=A.ELMO_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=SparkNLPElmo.get_default_model,
                                   get_pretrained_model=SparkNLPElmo.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.ELMO_EMBEDDINGS],
                                   description='Word embeddings from ELMo (Embeddings from Language Models), a language model_anno_obj trained on the 1 Billion Word Benchmark.',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.ELMO_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ELMO_EMBEDDINGS],
                                   has_storage_ref=True,
                                   is_storage_ref_producer=True,
                                   ),
        A.LONGFORMER_EMBEDDINGS: partial(NluComponent,
                                         name=A.LONGFORMER_EMBEDDINGS,
                                         type=T.TOKEN_EMBEDDING,
                                         get_default_model=Longformer.get_default_model,
                                         get_pretrained_model=Longformer.get_pretrained_model,
                                         pdf_extractor_methods={'default': default_word_embedding_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_word_embed_cols,
                                         output_level=L.TOKEN,
                                         node=NLP_FEATURE_NODES.nodes[A.LONGFORMER_EMBEDDINGS],
                                         description='Longformer is a transformer model_anno_obj for long documents. The Longformer model_anno_obj was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan. longformer-base-4096 is a BERT-like model_anno_obj started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096.',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=A.LONGFORMER_EMBEDDINGS,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LONGFORMER_EMBEDDINGS],
                                         has_storage_ref=True,
                                         is_storage_ref_producer=True,
                                         ),

        A.LONGFORMER_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                       name=A.LONGFORMER_FOR_TOKEN_CLASSIFICATION,
                                                       type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                       get_default_model=TokenLongFormer.get_default_model,
                                                       get_pretrained_model=TokenLongFormer.get_pretrained_model,
                                                       pdf_extractor_methods={
                                                           'default': default_token_classifier_config,
                                                           'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                       output_level=L.TOKEN,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.LONGFORMER_FOR_TOKEN_CLASSIFICATION],
                                                       description='LongformerForTokenClassification can load Longformer Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                       provider=ComponentBackends.open_source,
                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.LONGFORMER_FOR_TOKEN_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.LONGFORMER_FOR_TOKEN_CLASSIFICATION],
                                                       ),
        A.MARIAN_TRANSFORMER: partial(NluComponent,
                                      name=A.MARIAN_TRANSFORMER,
                                      type=T.DOCUMENT_CLASSIFIER,
                                      get_default_model=Marian.get_default_model,
                                      get_pretrained_model=Marian.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_marian_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_marian_cols,
                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                      node=NLP_FEATURE_NODES.nodes[A.MARIAN_TRANSFORMER],
                                      description='Marian is an efficient, free Neural Machine Translation framework written in pure C++ with minimal dependencies. It is mainly being developed by the Microsoft Translator team. Many academic (most notably the University of Edinburgh and in the past the Adam Mickiewicz University in Poznań) and commercial contributors help with its development. MarianTransformer uses the models trained by MarianNMT.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.MARIAN_TRANSFORMER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.MARIAN_TRANSFORMER],
                                      ),
        A.ROBERTA_EMBEDDINGS: partial(NluComponent,
                                      name=A.ROBERTA_EMBEDDINGS,
                                      type=T.TOKEN_EMBEDDING,
                                      get_default_model=Roberta.get_default_model,
                                      get_pretrained_model=Roberta.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_word_embedding_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_word_embed_cols,
                                      output_level=L.TOKEN,
                                      node=NLP_FEATURE_NODES.nodes[A.ROBERTA_EMBEDDINGS],
                                      description='The RoBERTa model_anno_obj was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google’s BERT model_anno_obj released in 2018.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.ROBERTA_EMBEDDINGS,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ROBERTA_EMBEDDINGS],
                                      has_storage_ref=True,
                                      is_storage_ref_producer=True,
                                      ),

        A.ROBERTA_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                    name=A.ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                    get_default_model=TokenRoBerta.get_default_model,
                                                    get_pretrained_model=TokenRoBerta.get_pretrained_model,
                                                    pdf_extractor_methods={'default': default_token_classifier_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                    output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                    node=NLP_FEATURE_NODES.nodes[A.ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    description='RoBertaForTokenClassification can load RoBERTa Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    jsl_anno_class_id=A.ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    ),
        # A.ROBERTA_SENTENCE_EMBEDDINGS: partial(NluComponent, # TODO not integrated
        #     name=A.ROBERTA_SENTENCE_EMBEDDINGS,
        #     type=T.DOCUMENT_EMBEDDING,
        #     get_default_model=BertSentence.get_default_model,
        #     get_pretrained_model=BertSentence.get_pretrained_model,
        #     pdf_extractor_methods={'default': default_sentence_embedding_config, 'default_full': default_full_config, },
        #     pdf_col_name_substitutor=substitute_sent_embed_cols,
        #     pipe_prediction_output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
        #     node=NLP_FEATURE_NODES.nodes[A.ROBERTA_SENTENCE_EMBEDDINGS],
        #     description='Sentence-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
        #     provider=ComponentBackends.open_source,
        #     license=Licenses.open_source,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=A.ROBERTA_SENTENCE_EMBEDDINGS,
        #     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ROBERTA_SENTENCE_EMBEDDINGS],
        #
        #     has_storage_ref=True,
        #     is_is_storage_ref_producer=True,
        # ),
        A.T5_TRANSFORMER: partial(NluComponent,
                                  # TODO  task based construction, i.e. get_preconfigured_model
                                  name=A.T5_TRANSFORMER,
                                  type=T.DOCUMENT_CLASSIFIER,
                                  get_default_model=T5.get_default_model,
                                  get_pretrained_model=T5.get_pretrained_model,
                                  pdf_extractor_methods={'default': default_T5_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_T5_cols,
                                  output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                  node=NLP_FEATURE_NODES.nodes[A.T5_TRANSFORMER],
                                  description='T5 reconsiders all NLP tasks into a unified text-to-text-format where the input and output are always text strings, in contrast to BERT-style models that can only output either a class label or a span of the input. The text-to-text framework is able to use the same model_anno_obj, loss function, and hyper-parameters on any NLP task, including machine translation, document summarization, question answering, and classification tasks (e.g., sentiment analysis). T5 can even apply to regression tasks by training it to predict the string representation of a number instead of the number itself.',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=A.T5_TRANSFORMER,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.T5_TRANSFORMER],
                                  ),
        A.UNIVERSAL_SENTENCE_ENCODER: partial(NluComponent,
                                              name=A.UNIVERSAL_SENTENCE_ENCODER,
                                              type=T.DOCUMENT_EMBEDDING,
                                              get_default_model=SparkNLPUse.get_default_model,
                                              get_pretrained_model=SparkNLPUse.get_pretrained_model,
                                              pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                     'default_full': default_full_config, },
                                              pdf_col_name_substitutor=substitute_sent_embed_cols,
                                              output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                              node=NLP_FEATURE_NODES.nodes[A.UNIVERSAL_SENTENCE_ENCODER],
                                              description='The Universal Sentence Encoder encodes text into high dimensional vectors that can be used for text classification, semantic similarity, clustering and other natural language tasks.',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=A.UNIVERSAL_SENTENCE_ENCODER,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.UNIVERSAL_SENTENCE_ENCODER],
                                              has_storage_ref=True,
                                              is_storage_ref_producer=True,
                                              ),

        A.XLM_ROBERTA_EMBEDDINGS: partial(NluComponent,
                                          name=A.XLM_ROBERTA_EMBEDDINGS,
                                          type=T.TOKEN_EMBEDDING,
                                          get_default_model=XLM.get_default_model,
                                          get_pretrained_model=XLM.get_pretrained_model,
                                          pdf_extractor_methods={'default': default_word_embedding_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_word_embed_cols,
                                          output_level=L.TOKEN,
                                          node=NLP_FEATURE_NODES.nodes[A.XLM_ROBERTA_EMBEDDINGS],
                                          description='The XLM-RoBERTa model_anno_obj was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco GuzmÃ¡n, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook’s RoBERTa model_anno_obj released in 2019. It is a large multi-lingual language model_anno_obj, trained on 2.5TB of filtered CommonCrawl data.',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.XLM_ROBERTA_EMBEDDINGS,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.XLM_ROBERTA_EMBEDDINGS],
                                          has_storage_ref=True,
                                          is_storage_ref_producer=True,
                                          ),

        A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                        name=A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                        type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                        get_default_model=TokenXlmRoBerta.get_default_model,
                                                        get_pretrained_model=TokenXlmRoBerta.get_pretrained_model,
                                                        pdf_extractor_methods={
                                                            'default': default_token_classifier_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                        output_level=L.TOKEN,
                                                        node=NLP_FEATURE_NODES.nodes[
                                                            A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                        description='XlmRoBertaForTokenClassification can load XLM-RoBERTa Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                        provider=ComponentBackends.open_source,
                                                        license=Licenses.open_source,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                            A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                        ),
        A.XLM_ROBERTA_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                                   name=A.XLM_ROBERTA_SENTENCE_EMBEDDINGS,
                                                   type=T.DOCUMENT_EMBEDDING,
                                                   get_default_model=Sentence_XLM.get_default_model,
                                                   get_pretrained_model=Sentence_XLM.get_pretrained_model,
                                                   pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_sent_embed_cols,
                                                   output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                                   node=NLP_FEATURE_NODES.nodes[A.XLM_ROBERTA_SENTENCE_EMBEDDINGS],
                                                   description='Sentence-level embeddings using XLM-RoBERTa. The XLM-RoBERTa model_anno_obj was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco GuzmÃ¡n, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook’s RoBERTa model_anno_obj released in 2019. It is a large multi-lingual language model_anno_obj, trained on 2.5TB of filtered CommonCrawl data.',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=A.XLM_ROBERTA_SENTENCE_EMBEDDINGS,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.XLM_ROBERTA_SENTENCE_EMBEDDINGS],
                                                   has_storage_ref=True,
                                                   is_storage_ref_producer=True,
                                                   ),
        A.XLNET_EMBEDDINGS: partial(NluComponent,
                                    name=A.XLNET_EMBEDDINGS,
                                    type=T.TOKEN_EMBEDDING,
                                    get_default_model=SparkNLPXlnet.get_default_model,
                                    get_pretrained_model=SparkNLPXlnet.get_pretrained_model,
                                    pdf_extractor_methods={'default': default_word_embedding_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_word_embed_cols,
                                    output_level=L.TOKEN,
                                    node=NLP_FEATURE_NODES.nodes[A.XLNET_EMBEDDINGS],
                                    description='XLNet is a new unsupervised language representation learning method based on a novel generalized permutation language modeling objective. Additionally, XLNet employs Transformer-XL as the backbone model_anno_obj, exhibiting excellent performance for language tasks involving long context. Overall, XLNet achieves state-of-the-art (SOTA) results on various downstream language tasks including question answering, natural language inference, sentiment analysis, and document ranking.',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.XLNET_EMBEDDINGS,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.XLNET_EMBEDDINGS],
                                    has_storage_ref=True,
                                    is_storage_ref_producer=True,
                                    ),
        A.XLNET_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                  name=A.XLNET_FOR_TOKEN_CLASSIFICATION,
                                                  type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                  get_default_model=TokenXlnet.get_default_model,
                                                  get_pretrained_model=TokenXlnet.get_pretrained_model,
                                                  pdf_extractor_methods={'default': default_token_classifier_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                  output_level=L.TOKEN,
                                                  node=NLP_FEATURE_NODES.nodes[A.XLNET_FOR_TOKEN_CLASSIFICATION],
                                                  description='XlnetForTokenClassification can load XLNet Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=A.XLNET_FOR_TOKEN_CLASSIFICATION,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.XLNET_FOR_TOKEN_CLASSIFICATION],
                                                  ),

        A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                           name=A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                           type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                           get_default_model=SeqXlmRobertaClassifier.get_default_model,
                                                           get_pretrained_model=SeqXlmRobertaClassifier.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_classifier_dl_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                           node=NLP_FEATURE_NODES.nodes[
                                                               A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                           description='XlmRoBertaForSequenceClassification can load XLM-RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification task',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.open_source,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                               A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                           ),
        A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                       name=A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                       get_default_model=SeqRobertaClassifier.get_default_model,
                                                       get_pretrained_model=SeqRobertaClassifier.get_pretrained_model,
                                                       pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                              'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                       output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       description='RoBertaForSequenceClassification can load RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks',
                                                       provider=ComponentBackends.open_source,
                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       ),

        A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                          name=A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION,
                                                          type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                          get_default_model=SeqLongformerClassifier.get_default_model,
                                                          get_pretrained_model=SeqLongformerClassifier.get_pretrained_model,
                                                          pdf_extractor_methods={
                                                              'default': default_classifier_dl_config,
                                                              'default_full': default_full_config, },
                                                          pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                          output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                          node=NLP_FEATURE_NODES.nodes[
                                                              A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION],
                                                          description='RoBertaForSequenceClassification can load RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks',
                                                          provider=ComponentBackends.open_source,
                                                          license=Licenses.open_source,
                                                          computation_context=ComputeContexts.spark,
                                                          output_context=ComputeContexts.spark,
                                                          jsl_anno_class_id=A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION,
                                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                              A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION],
                                                          ),
        A.ALBERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                      name=A.ALBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                      type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                      get_default_model=SeqAlbertClassifier.get_default_model,
                                                      get_pretrained_model=SeqAlbertClassifier.get_pretrained_model,
                                                      pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                             'default_full': default_full_config, },
                                                      pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                      node=NLP_FEATURE_NODES.nodes[
                                                          A.ALBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                      description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      jsl_anno_class_id=A.ALBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.ALBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                      ),

        A.XLNET_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                     name=A.XLNET_FOR_SEQUENCE_CLASSIFICATION,
                                                     type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                     get_default_model=SeqXlnetClassifier.get_default_model,
                                                     get_pretrained_model=SeqXlnetClassifier.get_pretrained_model,
                                                     pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                            'default_full': default_full_config, },
                                                     pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                     output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                     node=NLP_FEATURE_NODES.nodes[A.XLNET_FOR_SEQUENCE_CLASSIFICATION],
                                                     description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.open_source,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     jsl_anno_class_id=A.XLNET_FOR_SEQUENCE_CLASSIFICATION,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.XLNET_FOR_SEQUENCE_CLASSIFICATION],
                                                     ),

        A.GPT2: partial(NluComponent,
                        name=A.GPT2,
                        type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                        get_default_model=GPT2.get_default_model,
                        get_pretrained_model=GPT2.get_pretrained_model,
                        pdf_extractor_methods={'default': default_gpt2_config, 'default_full': default_full_config, },
                        pdf_col_name_substitutor=substitute_gpt2_cols,  # TIODO TESt
                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                        node=NLP_FEATURE_NODES.nodes[A.GPT2],
                        description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                        provider=ComponentBackends.open_source,
                        license=Licenses.open_source,
                        computation_context=ComputeContexts.spark,
                        output_context=ComputeContexts.spark,
                        jsl_anno_class_id=A.GPT2,
                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.GPT2],
                        ),

        A.WORD_2_VEC: partial(NluComponent,  # TOOD
                              name=A.WORD_2_VEC,
                              type=T.TOKEN_EMBEDDING,
                              get_default_model=Word2Vec.get_default_model,
                              get_pretrained_model=Word2Vec.get_pretrained_model,
                              get_trainable_model=Word2Vec.get_trainable_model,
                              pdf_extractor_methods={'default': default_word_embedding_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_word_embed_cols,  # TODO?
                              output_level=L.TOKEN,
                              node=NLP_FEATURE_NODES.nodes[A.WORD_2_VEC],
                              description='We use Word2Vec implemented in Spark ML. It uses skip-gram model_anno_obj in our implementation and a hierarchical softmax method to train the model_anno_obj. The variable names in the implementation match the original C implementation.',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.WORD_2_VEC,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_2_VEC],
                              has_storage_ref=True,
                              is_storage_ref_producer=True,
                              ),

        A.DEBERTA_WORD_EMBEDDINGS: partial(NluComponent,
                                           name=A.DEBERTA_WORD_EMBEDDINGS,
                                           type=T.TOKEN_EMBEDDING,
                                           get_default_model=Deberta.get_default_model,
                                           get_pretrained_model=Deberta.get_pretrained_model,
                                           pdf_extractor_methods={'default': default_word_embedding_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_word_embed_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_FEATURE_NODES.nodes[A.DEBERTA_WORD_EMBEDDINGS],
                                           description='Token-level embeddings using DeBERTa. The DeBERTa model_anno_obj was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. It is based on Google’s BERT model_anno_obj released in 2018 and Facebook’s RoBERTa model_anno_obj released in 2019.',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.DEBERTA_WORD_EMBEDDINGS,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DEBERTA_WORD_EMBEDDINGS],
                                           has_storage_ref=True,
                                           is_storage_ref_producer=True,
                                           ),

        A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                       name=A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                       get_default_model=SeqDebertaClassifier.get_default_model,
                                                       get_pretrained_model=SeqDebertaClassifier.get_pretrained_model,
                                                       pdf_extractor_methods={
                                                           'default': default_classifier_dl_config,
                                                           'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                       output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       description='The DeBERTa model_anno_obj was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. It is based on Google’s BERT model_anno_obj released in 2018 and Facebook’s RoBERTa model_anno_obj released in 2019. This classifier uses DeBERTa embeddingss with a linear classification head ontop.',
                                                       provider=ComponentBackends.open_source,

                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       ),

        ######### HEALTHCARE ##############

        H_A.ASSERTION_DL: partial(NluComponent,
                                  name=H_A.ASSERTION_DL,
                                  type=T.CHUNK_CLASSIFIER,
                                  get_default_model=AssertionDL.get_default_model,
                                  get_pretrained_model=AssertionDL.get_pretrained_model,
                                  get_trainable_model=AssertionDL.get_default_trainable_model,
                                  pdf_extractor_methods={'default': default_assertion_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_assertion_cols,
                                  output_level=L.CHUNK,
                                  node=NLP_HC_FEATURE_NODES.nodes[H_A.ASSERTION_DL],
                                  description='Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                  provider=ComponentBackends.hc,
                                  license=Licenses.hc,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=H_A.ASSERTION_DL,
                                  jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_DL],
                                  has_storage_ref=True,
                                  is_storage_ref_consumer=True,
                                  trainable_mirror_anno=H_A.TRAINABLE_ASSERTION_DL
                                  ),
        H_A.TRAINABLE_ASSERTION_DL: partial(NluComponent,
                                            name=H_A.TRAINABLE_ASSERTION_DL,
                                            type=T.CHUNK_CLASSIFIER,
                                            get_default_model=AssertionDL.get_default_model,
                                            get_pretrained_model=AssertionDL.get_pretrained_model,
                                            get_trainable_model=AssertionDL.get_default_trainable_model,
                                            pdf_extractor_methods={'default': default_assertion_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_assertion_cols,
                                            output_level=L.CHUNK,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_ASSERTION_DL],
                                            description='Trainable Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.TRAINABLE_ASSERTION_DL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.TRAINABLE_ASSERTION_DL],
                                            has_storage_ref=True,
                                            is_storage_ref_consumer=True,
                                            trainable=True,
                                            trained_mirror_anno=H_A.ASSERTION_DL),
        # H_A.ASSERTION_FILTERER: partial(NluComponent, # TODO not integrated
        #     name=H_A.ASSERTION_FILTERER,
        #     type=T.CHUNK_FILTERER,
        #     get_default_model=AssertionDL.get_default_model,
        #     get_pretrained_model=AssertionDL.get_pretrained_model,
        #     get_trainable_model=AssertionDL.get_default_trainable_model,
        #     pdf_extractor_methods={'default': default_assertion_config, 'default_full': default_full_config, },
        #     pdf_col_name_substitutor=substitute_assertion_cols,
        #     pipe_prediction_output_level=L.CHUNK,
        #     node=NLP_HC_FEATURE_NODES.ASSERTION_DL,
        #     description='Trainable Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.ASSERTION_FILTERER,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_FILTERER],
        #
        #     has_storage_ref=True,
        #     is_is_storage_ref_consumer=True,
        #     trainable=True,
        #     trained_mirror_anno=H_A.ASSERTION_FILTERER), AssertionLogReg
        H_A.ASSERTION_LOG_REG: partial(NluComponent,
                                       name=H_A.ASSERTION_LOG_REG,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=AssertionLogReg.get_default_model,
                                       get_pretrained_model=AssertionLogReg.get_pretrained_model,
                                       get_trainable_model=AssertionLogReg.get_default_trainable_model,
                                       pdf_extractor_methods={'default': default_assertion_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_assertion_cols,
                                       output_level=L.CHUNK,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.ASSERTION_LOG_REG],
                                       description='Classical ML based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.ASSERTION_LOG_REG,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_LOG_REG],
                                       trained_mirror_anno=H_A.TRAINABLE_ASSERTION_LOG_REG),
        H_A.TRAINABLE_ASSERTION_LOG_REG: partial(NluComponent,
                                                 name=H_A.TRAINABLE_ASSERTION_LOG_REG,
                                                 type=T.CHUNK_CLASSIFIER,
                                                 get_default_model=AssertionLogReg.get_default_model,
                                                 get_pretrained_model=AssertionLogReg.get_pretrained_model,
                                                 get_trainable_model=AssertionLogReg.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_assertion_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_assertion_cols,
                                                 output_level=L.CHUNK,
                                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_ASSERTION_LOG_REG],
                                                 description='Classical ML based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                                 provider=ComponentBackends.hc,
                                                 license=Licenses.hc,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=H_A.TRAINABLE_ASSERTION_LOG_REG,
                                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                     H_A.TRAINABLE_ASSERTION_LOG_REG],
                                                 trained_mirror_anno=H_A.ASSERTION_LOG_REG),
        H_A.CHUNK2TOKEN: 'TODO not integrated',
        H_A.CHUNK_ENTITY_RESOLVER: 'Deprecated',
        H_A.TRAINABLE_CHUNK_ENTITY_RESOLVER: 'Deprecated',
        H_A.CHUNK_FILTERER: 'TODO not integrated',
        H_A.CHUNK_KEY_PHRASE_EXTRACTION: 'TODO not integrated',
        H_A.CHUNK_MERGE: partial(NluComponent,
                                 name=H_A.CONTEXTUAL_PARSER,
                                 type=T.CHUNK_CLASSIFIER,

                                 get_default_model=ContextualParser.get_default_model,
                                 get_trainable_model=ContextualParser.get_trainable_model,
                                 # TODO method extractr method
                                 pdf_extractor_methods={'default': default_chunk_config,
                                                        'default_full': default_full_config, },
                                 # TODO  substitor
                                 pdf_col_name_substitutor=substitute_chunk_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.CHUNK_MERGE],
                                 description='Rule based entity extractor.',
                                 provider=ComponentBackends.hc,
                                 license=Licenses.hc,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=H_A.CHUNK_MERGE,
                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.CHUNK_MERGE], ),
        H_A.CONTEXTUAL_PARSER: partial(NluComponent,
                                       name=H_A.CONTEXTUAL_PARSER,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=ContextualParser.get_default_model,
                                       get_trainable_model=ContextualParser.get_trainable_model,
                                       # TODO extractr method
                                       pdf_extractor_methods={'default': default_full_config,
                                                              'default_full': default_full_config, },
                                       # TODO  substitor
                                       pdf_col_name_substitutor=substitute_context_parser_cols,
                                       output_level=L.CHUNK,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.CONTEXTUAL_PARSER],
                                       description='Rule based entity extractor.',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.CONTEXTUAL_PARSER,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.CONTEXTUAL_PARSER], ),
        H_A.DE_IDENTIFICATION: partial(NluComponent,
                                       name=H_A.DE_IDENTIFICATION,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=Deidentifier.get_default_model,
                                       get_pretrained_model=Deidentifier.get_pretrained_model,
                                       pdf_extractor_methods={'default': default_de_identification_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_de_identification_cols,
                                       output_level=L.DOCUMENT,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.DE_IDENTIFICATION],
                                       description='De-Identify named entity according to various Healthcare Data Protection standards',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.DE_IDENTIFICATION,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.DE_IDENTIFICATION],
                                       trainable_mirror_anno=H_A.TRAINABLE_DE_IDENTIFICATION

                                       ),
        H_A.TRAINABLE_DE_IDENTIFICATION: partial(NluComponent, # TODO WIP 
                                                 name=H_A.TRAINABLE_DE_IDENTIFICATION,
                                                 type=T.CHUNK_CLASSIFIER,
                                                 get_default_model=Deidentifier.get_default_model,
                                                 get_pretrained_model=Deidentifier.get_pretrained_model,
                                                 get_trainable_model=Deidentifier.get_trainable_model,
                                                 pdf_extractor_methods={'default': default_de_identification_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_de_identification_cols,
                                                 output_level=L.DOCUMENT,
                                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_DE_IDENTIFICATION],
                                                 description='De-Identify named entity according to various Healthcare Data Protection standards',
                                                 provider=ComponentBackends.hc,
                                                 license=Licenses.hc,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=H_A.TRAINABLE_DE_IDENTIFICATION,
                                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                     H_A.TRAINABLE_DE_IDENTIFICATION],
                                                 trainable=True,
                                                 trained_mirror_anno=H_A.DE_IDENTIFICATION

                                                 ),

        H_A.DOCUMENT_LOG_REG_CLASSIFIER: 'TODO not integrated',
        H_A.TRAINABLE_DOCUMENT_LOG_REG_CLASSIFIER: 'TODO not integrated',
        H_A.DRUG_NORMALIZER: partial(NluComponent,
                                     name=H_A.DRUG_NORMALIZER,
                                     type=T.CHUNK_CLASSIFIER,
                                     get_default_model=DrugNorm.get_default_model,
                                     pdf_extractor_methods={'default': default_only_result_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_drug_normalizer_cols,
                                     output_level=L.DOCUMENT,
                                     node=NLP_HC_FEATURE_NODES.nodes[H_A.DRUG_NORMALIZER],
                                     description='Normalizes raw clinical and crawled text which contains drug names into cleaned and standardized representation',
                                     provider=ComponentBackends.hc,
                                     license=Licenses.hc,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=H_A.DRUG_NORMALIZER,
                                     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.DRUG_NORMALIZER], ),
        # H_A.FEATURES_ASSEMBLER: partial(NluComponent, # TODO partially integrated. featire mpde ,ossomg
        #     name=H_A.FEATURES_ASSEMBLER,
        #     type=T.HELPER_ANNO,
        #     get_default_model=SparkNLPFeatureAssembler.get_default_model,
        #     pdf_extractor_methods={'default': default_feature_assembler_config, 'default_full': default_full_config, },
        #     # pdf_col_name_substitutor=substitute_drug_normalizer_cols, # TODO no substition
        #     pipe_prediction_output_level=L.DOCUMENT, # TODO double check output level?
        #     node=NLP_HC_FEATURE_NODES.FEATURES_ASSEMBLER,
        #     description='Aggregated features from various annotators into one column for training generic classifiers',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.FEATURES_ASSEMBLER,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.FEATURES_ASSEMBLER],
        #
        H_A.GENERIC_CLASSIFIER: partial(NluComponent,
                                        name=H_A.GENERIC_CLASSIFIER,
                                        type=T.DOCUMENT_CLASSIFIER,
                                        get_default_model=GenericClassifier.get_default_model,
                                        get_trainable_model=GenericClassifier.get_default_model,
                                        get_pretrained_model=GenericClassifier.get_default_model,
                                        pdf_extractor_methods={'default': default_generic_classifier_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_generic_classifier_parser_cols,
                                        output_level=L.DOCUMENT,
                                        node=NLP_HC_FEATURE_NODES.nodes[H_A.GENERIC_CLASSIFIER],
                                        description='Generic Deep Learning based tensorflow classifier',
                                        provider=ComponentBackends.hc,
                                        license=Licenses.hc,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=H_A.GENERIC_CLASSIFIER,
                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.GENERIC_CLASSIFIER],
                                        trainable_mirror_anno=H_A.TRAINABLE_GENERIC_CLASSIFIER
                                        ),
        H_A.TRAINABLE_GENERIC_CLASSIFIER: partial(NluComponent,
                                                  name=H_A.TRAINABLE_GENERIC_CLASSIFIER,
                                                  type=T.DOCUMENT_CLASSIFIER,
                                                  get_default_model=GenericClassifier.get_default_model,
                                                  get_trainable_model=GenericClassifier.get_default_model,
                                                  get_pretrained_model=GenericClassifier.get_default_model,
                                                  pdf_extractor_methods={'default': default_generic_classifier_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_generic_classifier_parser_cols,
                                                  output_level=L.DOCUMENT,
                                                  node=NLP_HC_FEATURE_NODES.nodes[H_A.GENERIC_CLASSIFIER],
                                                  description='Generic Deep Learning based tensorflow classifier',
                                                  provider=ComponentBackends.hc,
                                                  license=Licenses.hc,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=H_A.TRAINABLE_GENERIC_CLASSIFIER,
                                                  jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                      H_A.TRAINABLE_GENERIC_CLASSIFIER],

                                                  trained_mirror_anno=H_A.GENERIC_CLASSIFIER
                                                  ),
        H_A.IOB_TAGGER: 'TODO not integrated',
        H_A.MEDICAL_NER: partial(NluComponent,
                                 name=H_A.MEDICAL_NER,
                                 type=T.CHUNK_CLASSIFIER,
                                 get_default_model=NERDLHealthcare.get_default_model,
                                 get_trainable_model=NERDLHealthcare.get_default_trainable_model,
                                 get_pretrained_model=NERDLHealthcare.get_pretrained_model,
                                 pdf_extractor_methods={'default': default_ner_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_ner_dl_cols,
                                 output_level=L.TOKEN,
                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.MEDICAL_NER],
                                 description='Deep Learning based Medical Named Entity Recognizer (NER)',
                                 provider=ComponentBackends.hc,
                                 license=Licenses.hc,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=H_A.MEDICAL_NER,
                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.MEDICAL_NER],
                                 trainable_mirror_anno=H_A.TRAINABLE_MEDICAL_NER,
                                 has_storage_ref=True,
                                 is_storage_ref_consumer=True
                                 ),
        H_A.TRAINABLE_MEDICAL_NER: partial(NluComponent,
                                           name=H_A.TRAINABLE_MEDICAL_NER,
                                           type=T.CHUNK_CLASSIFIER,
                                           get_default_model=NERDLHealthcare.get_default_model,
                                           get_trainable_model=NERDLHealthcare.get_default_model,
                                           get_pretrained_model=NERDLHealthcare.get_default_model,
                                           pdf_extractor_methods={'default': default_ner_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_ner_dl_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_MEDICAL_NER],
                                           description='Trainable Deep Learning based Medical Named Entity Recognizer (NER)',
                                           provider=ComponentBackends.hc,
                                           license=Licenses.hc,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=H_A.TRAINABLE_MEDICAL_NER,
                                           jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.TRAINABLE_MEDICAL_NER],
                                           trained_mirror_anno=H_A.TRAINABLE_MEDICAL_NER,
                                           has_storage_ref=True,
                                           is_storage_ref_consumer=True
                                           ),
        H_A.NER_CHUNKER: 'TODO not integrated',
        H_A.NER_CONVERTER_INTERNAL: partial(NluComponent,
                                            name=H_A.NER_CONVERTER_INTERNAL,
                                            type=T.HELPER_ANNO,
                                            get_default_model=NerToChunkConverterLicensed.get_default_model,
                                            pdf_extractor_methods={'default': default_NER_converter_licensed_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_ner_internal_converter_cols,
                                            output_level=L.CHUNK,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.NER_CONVERTER_INTERNAL],
                                            description='Convert NER-IOB tokens into concatenated strings (aka chunks)',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.NER_CONVERTER_INTERNAL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.NER_CONVERTER_INTERNAL],

                                            ),
        H_A.NER_DISAMBIGUATOR: 'TODO not integrated',
        H_A.RELATION_NER_CHUNKS_FILTERER: 'TODO not integrated',
        H_A.RE_IDENTIFICATION: 'TODO not integrated',
        H_A.RELATION_EXTRACTION: partial(NluComponent,
                                         name=H_A.RELATION_EXTRACTION,
                                         type=T.RELATION_CLASSIFIER,
                                         get_default_model=RelationExtraction.get_default_model,
                                         get_pretrained_model=RelationExtraction.get_pretrained_model,
                                         get_trainable_model=RelationExtraction.get_default_trainable_model,
                                         pdf_extractor_methods={'default': default_relation_extraction_config,
                                                                'positional': default_relation_extraction_positional_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_relation_cols,
                                         output_level=L.RELATION,
                                         node=NLP_HC_FEATURE_NODES.nodes[H_A.RELATION_EXTRACTION],
                                         description='Classical ML model_anno_obj for predicting relation ship between entity pairs',
                                         provider=ComponentBackends.hc,
                                         license=Licenses.hc,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=H_A.RELATION_EXTRACTION,
                                         jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.RELATION_EXTRACTION],
                                         trainable_mirror_anno=H_A.TRAINABLE_RELATION_EXTRACTION,
                                         has_storage_ref=True,
                                         is_storage_ref_consumer=True

                                         ),
        H_A.TRAINABLE_RELATION_EXTRACTION: partial(NluComponent,
                                                   name=H_A.TRAINABLE_RELATION_EXTRACTION,
                                                   type=T.RELATION_CLASSIFIER,
                                                   get_default_model=RelationExtraction.get_default_model,
                                                   get_pretrained_model=RelationExtraction.get_pretrained_model,
                                                   get_trainable_model=RelationExtraction.get_default_trainable_model,
                                                   pdf_extractor_methods={'default': default_relation_extraction_config,
                                                                          'positional': default_relation_extraction_positional_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_relation_cols,
                                                   output_level=L.RELATION,
                                                   node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_RELATION_EXTRACTION],
                                                   description='Trainable Classical ML model_anno_obj for predicting relation ship between entity pairs',
                                                   provider=ComponentBackends.hc,
                                                   license=Licenses.hc,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=H_A.TRAINABLE_RELATION_EXTRACTION,
                                                   jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                       H_A.TRAINABLE_RELATION_EXTRACTION],
                                                   trained_mirror_anno=H_A.RELATION_EXTRACTION,
                                                   trainable=True,
                                                   has_storage_ref=True,
                                                   is_storage_ref_consumer=True
                                                   ),

        H_A.ZERO_SHOT_RELATION_EXTRACTION: partial(NluComponent,
                                                   name=H_A.ZERO_SHOT_RELATION_EXTRACTION,
                                                   type=T.RELATION_CLASSIFIER,
                                                   get_default_model=ZeroShotRelationExtractor.get_default_model,
                                                   get_pretrained_model=ZeroShotRelationExtractor.get_pretrained_model,
                                                   pdf_extractor_methods={'default': default_relation_extraction_config,
                                                                          'positional': default_relation_extraction_positional_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_relation_cols,
                                                   output_level=L.RELATION,
                                                   node=NLP_HC_FEATURE_NODES.nodes[H_A.ZERO_SHOT_RELATION_EXTRACTION],
                                                   description='Zero-shot relation extraction model_anno_obj that leverages BertForSequenceClassificaiton to return, based on a predefined set of relation',
                                                   provider=ComponentBackends.hc,
                                                   license=Licenses.hc,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=H_A.ZERO_SHOT_RELATION_EXTRACTION,
                                                   jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                       H_A.ZERO_SHOT_RELATION_EXTRACTION],
                                                   trained_mirror_anno=H_A.RELATION_EXTRACTION,
                                                   ),

        H_A.RELATION_EXTRACTION_DL: partial(NluComponent,
                                            name=H_A.RELATION_EXTRACTION_DL,
                                            type=T.RELATION_CLASSIFIER,
                                            get_default_model=RelationExtractionDL.get_default_model,
                                            get_pretrained_model=RelationExtractionDL.get_pretrained_model,
                                            # get_trainable_model=RelationExtractionDL.get_default_trainable_model,
                                            pdf_extractor_methods={'default': default_relation_extraction_config,
                                                                   'positional': default_relation_extraction_positional_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_relation_cols,
                                            output_level=L.RELATION,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.RELATION_EXTRACTION_DL],
                                            description='Deep Learning based model_anno_obj for predicting relation ship between entity pairs',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.RELATION_EXTRACTION_DL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.RELATION_EXTRACTION_DL],

                                            # trainable_mirror_anno=H_A.TRAINABLE_RELATION_EXTRACTION_DL
                                            ),
        # H_A.TRAINABLE_RELATION_EXTRACTION_DL: partial(NluComponent, # DOES NOT EXIST!
        #     name=H_A.TRAINABLE_RELATION_EXTRACTION_DL,
        #     type=T.RELATION_CLASSIFIER,
        #     get_default_model=RelationExtractionDL.get_default_model,
        #     get_pretrained_model=RelationExtractionDL.get_pretrained_model,
        #     pdf_extractor_methods={ 'default': default_relation_extraction_config, 'positional': default_relation_extraction_positional_config, 'default_full'  : default_full_config, },
        #     pdf_col_name_substitutor=substitute_relation_cols,
        #     pipe_prediction_output_level=L.RELATION,
        #     node=NLP_HC_FEATURE_NODES.TRAINABLE_RELATION_EXTRACTION_DL,
        #     description='Trainable Deep Learning based model_anno_obj for predicting relation ship between entity pairs',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.TRAINABLE_RELATION_EXTRACTION_DL,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.TRAINABLE_RELATION_EXTRACTION_DL],
        #
        #     trained_mirror_anno=H_A.RELATION_EXTRACTION_DL,
        #     trainable=True
        # ),
        H_A.SENTENCE_ENTITY_RESOLVER: partial(NluComponent,
                                              name=H_A.SENTENCE_ENTITY_RESOLVER,
                                              type=T.CHUNK_CLASSIFIER,
                                              get_pretrained_model=SentenceResolver.get_pretrained_model,
                                              get_trainable_model=SentenceResolver.get_default_trainable_model,
                                              pdf_extractor_methods={'default': resolver_conifg_with_metadata,
                                                                     'default_full': full_resolver_config, },
                                              pdf_col_name_substitutor=substitute_sentence_resolution_cols,
                                              output_level=L.CHUNK,
                                              node=NLP_HC_FEATURE_NODES.nodes[H_A.SENTENCE_ENTITY_RESOLVER],
                                              description='Deep Learning based entity resolver which extracts resolved entities directly from Sentence Embedding. No NER model_anno_obj required.',
                                              provider=ComponentBackends.hc,
                                              license=Licenses.hc,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=H_A.SENTENCE_ENTITY_RESOLVER,
                                              jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                  H_A.SENTENCE_ENTITY_RESOLVER],

                                              trained_mirror_anno=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                              is_storage_ref_consumer=True,
                                              has_storage_ref=True
                                              ),
        H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER: partial(NluComponent,
                                                        name=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                                        type=T.CHUNK_CLASSIFIER,
                                                        get_pretrained_model=SentenceResolver.get_pretrained_model,
                                                        get_trainable_model=SentenceResolver.get_default_trainable_model,
                                                        pdf_extractor_methods={
                                                            'default': default_chunk_resolution_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_sentence_resolution_cols,
                                                        output_level=L.RELATION,
                                                        node=NLP_HC_FEATURE_NODES.nodes[
                                                            H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER],
                                                        description='Trainable Deep Learning based entity resolver which extracts resolved entities directly from Sentence Embedding. No NER model_anno_obj required.',
                                                        provider=ComponentBackends.hc,
                                                        license=Licenses.hc,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                            H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER],
                                                        trained_mirror_anno=H_A.SENTENCE_ENTITY_RESOLVER,
                                                        is_storage_ref_consumer=True,
                                                        trainable=True,
                                                        has_storage_ref=True
                                                        ),
        H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                           name=H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                           type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                           get_default_model=TokenBertHealthcare.get_default_model,
                                                           get_pretrained_model=TokenBertHealthcare.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_token_classifier_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                           output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                           node=NLP_HC_FEATURE_NODES.nodes[
                                                               H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                           description='MedicalBertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.hc,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                               H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION],

                                                           ),

        H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                              name=H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                              type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                              get_default_model=SeqBertMedicalClassifier.get_default_model,
                                                              get_pretrained_model=SeqBertMedicalClassifier.get_pretrained_model,
                                                              pdf_extractor_methods={
                                                                  'default': default_classifier_dl_config,
                                                                  'default_full': default_full_config, },
                                                              pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                              output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                              # Handled like NER model_anno_obj
                                                              node=NLP_HC_FEATURE_NODES.nodes[
                                                                  H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                              description='Custom Architecture John Snow labs developed, called MedicalBertForSequenceClassification. It can load BERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                              provider=ComponentBackends.hc,
                                                              license=Licenses.hc,
                                                              computation_context=ComputeContexts.spark,
                                                              output_context=ComputeContexts.spark,
                                                              jsl_anno_class_id=H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                              jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                                  H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                              ),

        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                                    name=H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                                    get_default_model=SeqDilstilBertMedicalClassifier.get_default_model,
                                                                    get_pretrained_model=SeqDilstilBertMedicalClassifier.get_pretrained_model,
                                                                    pdf_extractor_methods={
                                                                        'default': default_classifier_dl_config,
                                                                        'default_full': default_full_config, },
                                                                    pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                                    # Handled like NER model_anno_obj
                                                                    node=NLP_HC_FEATURE_NODES.nodes[
                                                                        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                                    description='Custom Architecture John Snow labs developed, called MedicalDistilBertForSequenceClassification. It can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                                    provider=ComponentBackends.hc,
                                                                    license=Licenses.hc,
                                                                    computation_context=ComputeContexts.spark,
                                                                    output_context=ComputeContexts.spark,
                                                                    jsl_anno_class_id=H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                                    jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                                        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                                    ),

        ######### OCR ##############
        O_A.IMAGE2TEXT: partial(NluComponent,
                                name=O_A.IMAGE2TEXT,
                                type=T.TEXT_RECOGNIZER,
                                get_default_model=Img2Text.get_default_model,
                                pdf_extractor_methods={'default': default_text_recognizer_config},
                                pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                node=OCR_FEATURE_NODES.nodes[O_A.IMAGE2TEXT],
                                description='Recognize text from image files',
                                provider=ComponentBackends.ocr,
                                license=Licenses.ocr,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=O_A.IMAGE2TEXT,
                                jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE2TEXT],

                                applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', '.TIFF']
                                ),

        O_A.DOC2TEXT: partial(NluComponent,
                              name=O_A.DOC2TEXT,
                              type=T.TEXT_RECOGNIZER,
                              get_default_model=Doc2Text.get_default_model,
                              pdf_extractor_methods={'default': default_text_recognizer_config},
                              pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                              output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                              node=OCR_FEATURE_NODES.nodes[O_A.DOC2TEXT],
                              description='Recognize text from DOC/DOCX files',
                              provider=ComponentBackends.ocr,
                              license=Licenses.ocr,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=O_A.DOC2TEXT,
                              jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.DOC2TEXT],

                              applicable_file_types=['DOC', 'DOCX']
                              ),

        O_A.PDF2TEXT: partial(NluComponent,
                              name=O_A.PDF2TEXT,
                              type=T.TEXT_RECOGNIZER,
                              get_default_model=Pdf2Text.get_default_model,
                              pdf_extractor_methods={'default': default_text_recognizer_config},
                              pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                              output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                              node=OCR_FEATURE_NODES.nodes[O_A.PDF2TEXT],
                              description='Recognize text from PDF files',
                              provider=ComponentBackends.ocr,
                              license=Licenses.ocr,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=O_A.PDF2TEXT,
                              jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PDF2TEXT],
                              applicable_file_types=['PDF']
                              ),

        O_A.BINARY2IMAGE: partial(NluComponent,
                                  name=O_A.BINARY2IMAGE,
                                  type=T.HELPER_ANNO,
                                  get_default_model=Binary2Image.get_default_model,
                                  pdf_extractor_methods={'default': default_binary_to_image_config},
                                  pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                  output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                  node=OCR_FEATURE_NODES.nodes[O_A.BINARY2IMAGE],
                                  description='Convert binary image data to OCR image Spark struct representation',
                                  provider=ComponentBackends.ocr,
                                  license=Licenses.ocr,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=O_A.BINARY2IMAGE,
                                  jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.BINARY2IMAGE],
                                  applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', 'TIFF']

                                  ),

        O_A.PDF2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.PDF2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=PDF2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.PDF2TEXT_TABLE],
                                    description='Extract Tables from PDFs with have highlightable text',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.PDF2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PDF2TEXT_TABLE],
                                    applicable_file_types=['PDF']

                                    ),

        O_A.PPT2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.PPT2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=PPT2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.PPT2TEXT_TABLE],
                                    description='Extract Tables from PPT and PPTX files',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.PPT2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PPT2TEXT_TABLE],
                                    applicable_file_types=['PPT', 'PPTX']
                                    ),

        O_A.DOC2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.DOC2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=Doc2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.DOC2TEXT_TABLE],
                                    description='Extract Tables from PPT and PPTX files',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.DOC2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.DOC2TEXT_TABLE],
                                    applicable_file_types=['DOCX', 'DOC']
                                    ),

    }
