a
    hx                    @   s  d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	 ddl
mZmZmZmZ dd	lmZ erdd
lmZ ddlmZ G dd deeZe	eZeg dZeg dZeg dZeg dZeg dZeg dZedgZeg dZ edgZ!eg dZ"edgZ#eg dZ$eg dZ%eg dZ&edgZ'eg dZ(eg dZ)eg dZ*eg d Z+eg d!Z,eg d"Z-eg d#Z.eg d$Z/eg d%Z0ed&gZ1eg d'Z2eg d(Z3eg d)Z4eg d*Z5eg d+Z6eg d,Z7eg d-Z8eg d.Z9eg d/Z:ed0d1gZ;eg d2Z<eg d3Z=eg d4Z>eg d5Z?ed6gZ@eg d7ZAeg d8ZBed9d:gZCed;d<gZDed=gZEed>gZFed?gZGeeeZHeeeZIeeeZJeeeZKeeeZLeee ZMeee=ZNeee!ZOeee"ZPeee#ZQeee$ZReee%ZSeee&ZTeee(ZUeee'ZVeee2ZWeee3ZXeee)ZYeeeZZeeeZ[eee*Z\eee+Z]eee,Z^eee-Z_eee/Z`eee0Zaeee1Zbeee4Zceee5Zdeee6Zeeee7Zfeee8Zgeee.Zheee9Zieee:Zjeee;Zkeee<Zleee>Zmeee?Zneee@ZoeeeAZpeeeBZqeeeCZreeeDZseeeEZteeeFZueeeGZvG d@dA dAeZwG dBdC dCeZxG dDdE dEeZyG dFdG dGeZzG dHdI dIeZ{G dJdK dKeZ|ee|Z|G dLdM dMeZ}ee}dNdOZ}G dPdQ dQeZ~ee~dRdOZ~G dSdT dTeZeedUdOZG dVdW dWeZeedXdOZG dYdZ dZeZeed[d\d]ZG d^d_ d_eZeed`dOZG dadb dbeZeedcdOZG ddde deeZeedfdgd]ZG dhdi dieZeedjdkd]ZG dldm dmeZeedndod]ZG dpdq dqeZeedrdOZG dsdt dteZeedudOZG dvdw dweZeedxdOZG dydz dzeZeed{dOZG d|d} d}eZeed~dOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd deZG dd deZG dd deZG dd deZeeddOZG dd deZeeddOZG dd deZeeddOZG dd de~ZG dd deZg dZdS )zAuto Model class.    N)OrderedDict)TYPE_CHECKINGUnion   )logging   )_BaseAutoBackboneClass_BaseAutoModelClass_LazyAutoMappingauto_class_update)CONFIG_MAPPING_NAMES)GenerationMixin)PreTrainedModelc                   @   s   e Zd ZdS )_BaseModelWithGenerateN)__name__
__module____qualname__ r   r   b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/auto/modeling_auto.pyr   %   s   r   (v  )Zaimv2Z
Aimv2ModelZaimv2_vision_modelZAimv2VisionModelalbertZAlbertModelalignZ
AlignModelZaltclipZAltCLIPModel)apertusZApertusModel)arceeZ
ArceeModel)ariaZ	AriaModel)	aria_textZAriaTextModel)audio-spectrogram-transformerZASTModel)Z
autoformerZAutoformerModel)
aya_visionZAyaVisionModel)bambaZ
BambaModelZbarkZ	BarkModel)bartZ	BartModelbeitZ	BeitModelbertZ	BertModel)bert-generationZBertGenerationEncoderbig_birdZBigBirdModel)bigbird_pegasusZBigBirdPegasusModel)biogptZBioGptModelbitZBitModel)bitnetZBitNetModel)
blenderbotZBlenderbotModel)blenderbot-smallZBlenderbotSmallModelblipZ	BlipModel)blip-2Z
Blip2Model)Zblip_2_qformerZBlip2QFormerModel)bloomZ
BloomModel)ZbridgetowerZBridgeTowerModel)brosZ	BrosModel)	camembertZCamembertModel)canineZCanineModel)	chameleonZChameleonModelZchinese_clipZChineseCLIPModel)Zchinese_clip_vision_modelZChineseCLIPVisionModel)clapZ	ClapModelclipZ	CLIPModelZclip_text_modelZCLIPTextModel)Zclip_vision_modelZCLIPVisionModelZclipsegZCLIPSegModel)ZclvpZ!ClvpModelForConditionalGeneration)
code_llama
LlamaModel)codegenZCodeGenModel)cohereZCohereModel)cohere2ZCohere2Modelcohere2_visionZCohere2VisionModelconditional_detrZConditionalDetrModel)convbertZConvBertModelconvnextZConvNextModel
convnextv2ZConvNextV2Model)cpmantZCpmAntModelZcsmZCsmForConditionalGeneration)ctrlZ	CTRLModel)cvtZCvtModel)d_fineZ
DFineModeldab-detrZDabDetrModelZdacZDacModel)data2vec-audioZData2VecAudioModeldata2vec-textZData2VecTextModeldata2vec-visionZData2VecVisionModel)dbrxZ	DbrxModeldebertaZDebertaModel
deberta-v2ZDebertaV2Model)Zdecision_transformerZDecisionTransformerModel)deepseek_v2ZDeepseekV2Model)deepseek_v3ZDeepseekV3Model)deepseek_vlZDeepseekVLModel)deepseek_vl_hybridZDeepseekVLHybridModeldeformable_detrZDeformableDetrModeldeitZ	DeiTModel	depth_proZDepthProModeldetaZ	DetaModeldetrZ	DetrModel)diaZDiaModel)	diffllamaZDiffLlamaModeldinatZ
DinatModeldinov2ZDinov2Modeldinov2_with_registersZDinov2WithRegistersModelZdinov3_convnextZDINOv3ConvNextModelZ
dinov3_vitZDINOv3ViTModel
distilbertZDistilBertModel)dogeZ	DogeModel)
donut-swinZDonutSwinModel)dots1Z
Dots1Model)ZdprZDPRQuestionEncoderdptZDPTModelefficientformerZEfficientFormerModel)efficientloftrZEfficientLoFTRModelefficientnetZEfficientNetModelelectraZElectraModel)emu3Z	Emu3Model)ZencodecZEncodecModel)ernieZ
ErnieModel)ernie4_5ZErnie4_5Model)ernie4_5_moeZErnie4_5_MoeModel)ernie_mZErnieMModel)esmZEsmModel)evollaZEvollaModel)exaone4ZExaone4Model)falconZFalconModel)	falcon_h1ZFalconH1Model)falcon_mambaZFalconMambaModelfastspeech2_conformerZFastSpeech2ConformerModelZ"fastspeech2_conformer_with_hifiganFastSpeech2ConformerWithHifiGanflaubertZFlaubertModel)flavaZ
FlavaModel)	florence2ZFlorence2Model)fnetZ	FNetModelfocalnetZFocalNetModel)fsmtZ	FSMTModel)funnel)ZFunnelModelZFunnelBaseModel)fuyuZ	FuyuModel)gemmaZ
GemmaModel)gemma2ZGemma2Model)gemma3ZGemma3Model)gemma3_textZGemma3TextModel)gemma3nZGemma3nModel)Zgemma3n_audioZGemma3nAudioEncoder)gemma3n_textZGemma3nTextModel)Zgemma3n_visionTimmWrapperModel)gitZGitModel)glmZGlmModel)glm4Z	Glm4Model)glm4_moeZGlm4MoeModel)glm4vZ
Glm4vModel)	glm4v_moeZGlm4vMoeModel)Zglm4v_moe_textZGlm4vMoeTextModel)Z
glm4v_textZGlm4vTextModelglpnZ	GLPNModel)got_ocr2ZGotOcr2Model)gpt-sw3	GPT2Model)gpt2r   )gpt_bigcodeZGPTBigCodeModel)gpt_neoZGPTNeoModel)gpt_neoxZGPTNeoXModel)gpt_neox_japaneseZGPTNeoXJapaneseModel)gpt_ossZGptOssModel)gptjZ	GPTJModelzgptsan-japaneseZ&GPTSanJapaneseForConditionalGeneration)graniteZGraniteModel)
granitemoeZGraniteMoeModel)granitemoehybridZGraniteMoeHybridModel)granitemoesharedZGraniteMoeSharedModel)Z
graphormerZGraphormerModel)grounding-dinoZGroundingDinoModel)ZgroupvitZGroupViTModel)heliumZHeliumModelhgnet_v2ZHGNetV2BackbonehieraZ
HieraModel)hubertZHubertModel)hunyuan_v1_denseZHunYuanDenseV1Model)hunyuan_v1_moeZHunYuanMoEV1ModelibertZ
IBertModel)ideficsZIdeficsModel)idefics2ZIdefics2Model)idefics3ZIdefics3Model)Zidefics3_visionZIdefics3VisionTransformerijepaZ
IJepaModelimagegptZImageGPTModel)ZinformerZInformerModel)instructblipZInstructBlipModel)instructblipvideoZInstructBlipVideoModel)internvlZInternVLModel)Zinternvl_visionZInternVLVisionModel)jambaZ
JambaModel)janusZ
JanusModel)jetmoeZJetMoeModel)ZjukeboxZJukeboxModel)kosmos-2ZKosmos2Model)
kosmos-2.5ZKosmos2_5Model)kyutai_speech_to_textZKyutaiSpeechToTextModel)layoutlmZLayoutLMModel)
layoutlmv2ZLayoutLMv2Model)
layoutlmv3ZLayoutLMv3Model)ledZLEDModellevitZ
LevitModel)lfm2Z	Lfm2ModelZ	lightglueZLightGlueForKeypointMatching)liltZ	LiltModel)llamarA   llama4ZLlama4ForConditionalGeneration)llama4_textLlama4TextModel)llavaZ
LlavaModel)
llava_nextZLlavaNextModel)llava_next_videoZLlavaNextVideoModel)llava_onevisionZLlavaOnevisionModel
longformerZLongformerModel)longt5ZLongT5Model)lukeZ	LukeModel)lxmertZLxmertModel)m2m_100ZM2M100Model)mambaZ
MambaModel)mamba2ZMamba2Model)marianZMarianModel)markuplmZMarkupLMModel)mask2formerZMask2FormerModel)
maskformerZMaskFormerModel)maskformer-swinZMaskFormerSwinModel)mbartZ
MBartModel)mctctZ
MCTCTModel)megaZ	MegaModel)megatron-bertZMegatronBertModel
metaclip_2ZMetaClip2Model)zmgp-strZMgpstrForSceneTextRecognition)ZmimiZ	MimiModel)minimaxZMiniMaxModel)mistralZMistralModel)mistral3ZMistral3Model)mixtralZMixtralModelZmlcdZMLCDVisionModel)mllamaZMllamaModel)mm-grounding-dinoZMMGroundingDinoModel
mobilebertZMobileBertModelmobilenet_v1ZMobileNetV1Modelmobilenet_v2ZMobileNetV2Model	mobilevitZMobileViTModelmobilevitv2ZMobileViTV2Model)
modernbertZModernBertModel)modernbert-decoderZModernBertDecoderModel)	moonshineZMoonshineModel)moshiZ
MoshiModel)mpnetZ
MPNetModel)mptZMptModel)mraZMraModel)mt5ZMT5Model)musicgenZMusicgenModel)musicgen_melodyZMusicgenMelodyModel)mvpZMvpModelnatZNatModel)nemotronZNemotronModel)nezhaZ
NezhaModel)nllb-moeZNllbMoeModelnystromformerZNystromformerModel)olmoZ	OlmoModel)olmo2Z
Olmo2Model)olmoeZ
OlmoeModelzomdet-turboZOmDetTurboForObjectDetection)	oneformerZOneFormerModel)
open-llamaZOpenLlamaModel)
openai-gptZOpenAIGPTModel)optZOPTModel)ovis2Z
Ovis2Model)owlv2Z
Owlv2Model)owlvitZOwlViTModel)	paligemmaZPaliGemmaModel)patchtsmixerZPatchTSMixerModel)patchtstZPatchTSTModel)pegasusZPegasusModel)	pegasus_xZPegasusXModel)	perceiverZPerceiverModel)Zperception_encoderZPerceptionEncoder)perception_lmZPerceptionLMModel)	persimmonZPersimmonModel)phiZPhiModel)phi3Z	Phi3Model)phi4_multimodalZPhi4MultimodalModel)phimoeZPhimoeModel)pixtralZPixtralVisionModel)plbartZPLBartModel
poolformerZPoolFormerModel)
prophetnetZProphetNetModelpvtZPvtModel)pvt_v2Z
PvtV2Model)qdqbertZQDQBertModel)qwen2Z
Qwen2Model)
qwen2_5_vlZQwen2_5_VLModel)Zqwen2_5_vl_textZQwen2_5_VLTextModel)Zqwen2_audio_encoderZQwen2AudioEncoder)	qwen2_moeZQwen2MoeModel)qwen2_vlZQwen2VLModel)Zqwen2_vl_textZQwen2VLTextModel)qwen3Z
Qwen3Model)	qwen3_moeZQwen3MoeModel)recurrent_gemmaZRecurrentGemmaModelreformerZReformerModelregnetZRegNetModelrembertZRemBertModelresnetZResNetModelZ	retribertZRetriBertModelrobertaZRobertaModelroberta-prelayernormZRobertaPreLayerNormModelroc_bertZRoCBertModelroformerZRoFormerModel)rt_detrZRTDetrModel)
rt_detr_v2ZRTDetrV2Model)rwkvZ	RwkvModelZsamZSamModelZsam2	Sam2Model)Zsam2_hiera_det_modelZSam2HieraDetModel)
sam2_videoZSam2VideoModel)Zsam2_vision_modelZSam2VisionModelZsam_hqZ
SamHQModel)Zsam_hq_vision_modelZSamHQVisionModel)Zsam_vision_modelZSamVisionModel)seamless_m4tZSeamlessM4TModel)seamless_m4t_v2ZSeamlessM4Tv2Model)seed_ossZSeedOssModel	segformerZSegformerModel)ZseggptZSegGptModel)sewZSEWModel)sew-dZ	SEWDModelsiglipZSiglipModelsiglip2ZSiglip2ModelZsiglip_vision_modelZSiglipVisionModel)smollm3ZSmolLM3Model)smolvlmZSmolVLMModel)Zsmolvlm_visionZSmolVLMVisionTransformer)speech_to_textZSpeech2TextModel)speecht5ZSpeechT5Model)splinterZSplinterModelsqueezebertZSqueezeBertModel)stablelmZStableLmModel)
starcoder2ZStarcoder2ModelswiftformerZSwiftFormerModelswinZ	SwinModelswin2srZSwin2SRModelswinv2ZSwinv2Model)switch_transformersZSwitchTransformersModel)t5ZT5Model)t5gemmaZT5GemmaModeltable-transformerZTableTransformerModel)tapasZ
TapasModel)textnetZTextNetModel)Ztime_series_transformerZTimeSeriesTransformerModel)timesfmZTimesFmModeltimesformerZTimesformerModelZtimm_backboneZTimmBackbonetimm_wrapperr   )Ztrajectory_transformerZTrajectoryTransformerModel)
transfo-xlZTransfoXLModel)tvltZ	TvltModel)ZtvpZTvpModel)udopZ	UdopModel)umt5Z	UMT5Model)	unispeechZUniSpeechModel)unispeech-satZUniSpeechSatModel)ZunivnetZUnivNetModelvanZVanModel)video_llavaZVideoLlavaModelvideomaeZVideoMAEModel)viltZ	ViltModel)vipllavaZVipLlavaModel)zvision-text-dual-encoderZVisionTextDualEncoderModel)visual_bertZVisualBertModelvitZViTModel
vit_hybridZViTHybridModelvit_maeZViTMAEModelvit_msnZViTMSNModelvitdetZVitDetModelZvitsZ	VitsModelvivitZ
VivitModel)vjepa2ZVJEPA2ModelZvoxtralZVoxtralForConditionalGeneration)Zvoxtral_encoderZVoxtralEncoder)wav2vec2ZWav2Vec2Model)wav2vec2-bertZWav2Vec2BertModel)wav2vec2-conformerZWav2Vec2ConformerModel)wavlmZ
WavLMModel)whisperZWhisperModel)ZxclipZ
XCLIPModel)ZxcodecZXcodecModel)xglmZ	XGLMModelxlmZXLMModel)xlm-prophetnetZXLMProphetNetModelxlm-robertaZXLMRobertaModelxlm-roberta-xlZXLMRobertaXLModel)xlnetZ
XLNetModel)xlstmZ
xLSTMModel)xmodZ	XmodModelyolosZ
YolosModel)yosoZ	YosoModel)zambaZ
ZambaModel)zamba2ZZamba2Model)X)r   ZAlbertForPreTrainingr#   ZBartForConditionalGeneration)r'   ZBertForPreTraining)r*   ZBigBirdForPreTrainingr5   ZBloomForCausalLMr7   ZCamembertForMaskedLMZcolpaliZColPaliForRetrieval)Zcolqwen2ZColQwen2ForRetrievalrP   ZCTRLLMHeadModelrX   ZData2VecTextForMaskedLMr]   ZDebertaForMaskedLMr_   ZDebertaV2ForMaskedLMry   ZDistilBertForMaskedLM)r   ZElectraForPreTraining)r   ZErnieForPreTrainingr   ZEvollaForProteinText2Textr   ZExaone4ForCausalLMr   ZFalconMambaForCausalLMr   ZFlaubertWithLMHeadModel)r   ZFlavaForPreTrainingr   Z!Florence2ForConditionalGeneration)r   ZFNetForPreTrainingr   ZFSMTForConditionalGeneration)r   ZFunnelForPreTrainingr   Gemma3ForConditionalGenerationr   GPT2LMHeadModelr   r  r   ZGPTBigCodeForCausalLMr   )r   ZHieraForPreTrainingr   ZIBertForMaskedLMr   ZIdeficsForVisionText2Textr   Z Idefics2ForConditionalGenerationr   Z Idefics3ForConditionalGenerationr   ZJanusForConditionalGenerationr   ZLayoutLMForMaskedLMr   LlavaForConditionalGenerationr   Z!LlavaNextForConditionalGenerationr   Z&LlavaNextVideoForConditionalGenerationr   Z&LlavaOnevisionForConditionalGenerationr   ZLongformerForMaskedLMr   ZLukeForMaskedLM)r   ZLxmertForPreTrainingr   ZMambaForCausalLMr   ZMamba2ForCausalLMr   ZMegaForMaskedLM)r   ZMegatronBertForPreTrainingr   Z Mistral3ForConditionalGenerationr  ZMllamaForConditionalGeneration)r  ZMobileBertForPreTrainingr  ZMPNetForMaskedLMr  ZMptForCausalLMr  ZMraForMaskedLMr  ZMvpForConditionalGeneration)r  ZNezhaForPreTrainingr  ZNllbMoeForConditionalGenerationr&  ZOpenAIGPTLMHeadModelr+  Z!PaliGemmaForConditionalGenerationZqwen2_audioZ"Qwen2AudioForConditionalGenerationrO  rQ  ZRobertaForMaskedLMrS  ZRobertaPreLayerNormForMaskedLM)rU  ZRoCBertForPreTrainingrZ  ZRwkvForCausalLM)rp  ZSplinterForPreTrainingrr  ZSqueezeBertForMaskedLMr}  Z*SwitchTransformersForConditionalGenerationr~  ZT5ForConditionalGenerationr  ZT5GemmaForConditionalGenerationr  ZTapasForMaskedLMr  ZTransfoXLLMHeadModel)r  ZTvltForPreTraining)r  ZUniSpeechForPreTraining)r  ZUniSpeechSatForPreTrainingr  Z"VideoLlavaForConditionalGeneration)r  ZVideoMAEForPreTrainingr  Z VipLlavaForConditionalGeneration)r  ZVisualBertForPreTraining)r  ZViTMAEForPreTrainingr  )r  ZWav2Vec2ForPreTraining)r  ZWav2Vec2ConformerForPreTrainingr  ZXLMWithLMHeadModelr  ZXLMRobertaForMaskedLMr  ZXLMRobertaXLForMaskedLMr  ZXLNetLMHeadModelr  ZxLSTMForCausalLMr  ZXmodForMaskedLM)Tr   ZAlbertForMaskedLMr  r'   ZBertForMaskedLMr*   ZBigBirdForMaskedLMr+   Z&BigBirdPegasusForConditionalGenerationr1   Z'BlenderbotSmallForConditionalGenerationr  r  rB   ZCodeGenForCausalLMrI   ZConvBertForMaskedLMrN   ZCpmAntForCausalLMr  r  r  r  rn   ZDiaForConditionalGenerationr  r   ZElectraForMaskedLMzencoder-decoderZEncoderDecoderModelr   ZErnieForMaskedLMr   ZEsmForMaskedLMr  r  r  r   ZFNetForMaskedLMr  r   ZFunnelForMaskedLMr   ZGitForCausalLMr  r  r  r   ZGPTNeoForCausalLMr   ZGPTNeoXForCausalLMr   ZGPTNeoXJapaneseForCausalLMr   ZGPTJForCausalLMr   r  r  r   ZLEDForConditionalGenerationr  r   ZLongT5ForConditionalGenerationr  r   ZM2M100ForConditionalGenerationr  r  r   ZMarianMTModelr  r   ZMegatronBertForCausalLMr  ZMobileBertForMaskedLMr  Z!MoonshineForConditionalGenerationr  r  r  r  r  ZNezhaForMaskedLMr  r  ZNystromformerForMaskedLMr  r/  Z PegasusXForConditionalGenerationr8  ZPLBartForConditionalGenerationZ	pop2pianoZ!Pop2PianoForConditionalGenerationr?  ZQDQBertForMaskedLMrH  ZReformerModelWithLMHeadrL  ZRemBertForMaskedLMr  r  rU  ZRoCBertForMaskedLMrW  ZRoFormerForMaskedLMr  rn  Z#Speech2TextForConditionalGenerationr  r  r  r  r  r  r  ZWav2Vec2ForMaskedLMr  ZWhisperForConditionalGenerationr  r  r  r  r  r  ZYosoForMaskedLM))r   ZApertusForCausalLM)r   ZArceeForCausalLM)r   ZAriaTextForCausalLM)r!   ZBambaForCausalLM)r#   ZBartForCausalLM)r'   ZBertLMHeadModel)r(   ZBertGenerationDecoder)r*   ZBigBirdForCausalLM)r+   ZBigBirdPegasusForCausalLM)r,   ZBioGptForCausalLM)r/   ZBitNetForCausalLM)r0   ZBlenderbotForCausalLM)r1   ZBlenderbotSmallForCausalLMr  )r7   ZCamembertForCausalLM)r@   LlamaForCausalLMr  )rC   ZCohereForCausalLM)rD   ZCohere2ForCausalLMr  r  )rX   ZData2VecTextForCausalLM)r[   ZDbrxForCausalLM)r`   ZDeepseekV2ForCausalLM)ra   ZDeepseekV3ForCausalLM)ro   ZDiffLlamaForCausalLM)rz   ZDogeForCausalLM)r|   ZDots1ForCausalLM)r   ZElectraForCausalLM)r   ZEmu3ForCausalLM)r   ZErnieForCausalLM)r   ZErnie4_5ForCausalLM)r   ZErnie4_5_MoeForCausalLMr  )r   ZFalconForCausalLM)r   ZFalconH1ForCausalLMr  r   ZFuyuForCausalLM)r   ZGemmaForCausalLM)r   ZGemma2ForCausalLMr  )r   ZGemma3ForCausalLMr   ZGemma3nForConditionalGeneration)r   ZGemma3nForCausalLMr  )r   ZGlmForCausalLM)r   ZGlm4ForCausalLM)r   ZGlm4MoeForCausalLMr   ZGotOcr2ForConditionalGenerationr  r  r  r  r  r  )r   ZGptOssForCausalLMr  )r   ZGraniteForCausalLM)r   ZGraniteMoeForCausalLM)r   ZGraniteMoeHybridForCausalLM)r   ZGraniteMoeSharedForCausalLM)r   ZHeliumForCausalLM)r   ZHunYuanDenseV1ForCausalLM)r   ZHunYuanMoEV1ForCausalLM)r   ZJambaForCausalLM)r   ZJetMoeForCausalLM)r   ZLfm2ForCausalLM)r   r%  )r   Llama4ForCausalLM)r   r)  r  r  )r   ZMarianForCausalLM)r   ZMBartForCausalLM)r   ZMegaForCausalLMr  )r   ZMiniMaxForCausalLM)r   ZMistralForCausalLM)r   ZMixtralForCausalLM)r  ZMllamaForCausalLM)r  ZModernBertDecoderForCausalLM)r  ZMoshiForCausalLMr  )r  ZMusicgenForCausalLM)r  ZMusicgenMelodyForCausalLM)r  ZMvpForCausalLM)r  ZNemotronForCausalLM)r   ZOlmoForCausalLM)r!  ZOlmo2ForCausalLM)r"  ZOlmoeForCausalLM)r%  ZOpenLlamaForCausalLMr  )r'  ZOPTForCausalLM)r.  ZPegasusForCausalLM)r2  ZPersimmonForCausalLM)r3  ZPhiForCausalLM)r4  ZPhi3ForCausalLM)r5  ZPhi4MultimodalForCausalLM)r6  ZPhimoeForCausalLM)r8  ZPLBartForCausalLM)r;  ZProphetNetForCausalLM)r?  ZQDQBertLMHeadModel)r@  ZQwen2ForCausalLM)rB  ZQwen2MoeForCausalLM)rD  ZQwen3ForCausalLM)rE  ZQwen3MoeForCausalLM)rF  ZRecurrentGemmaForCausalLMr  )rL  ZRemBertForCausalLM)rQ  ZRobertaForCausalLM)rS  ZRobertaPreLayerNormForCausalLM)rU  ZRoCBertForCausalLM)rW  ZRoFormerForCausalLMr  )rb  ZSeedOssForCausalLM)rl  ZSmolLM3ForCausalLM)Zspeech_to_text_2ZSpeech2Text2ForCausalLM)rs  ZStableLmForCausalLM)rt  ZStarcoder2ForCausalLMr  )ZtrocrZTrOCRForCausalLM)r  ZWhisperForCausalLM)r  ZXGLMForCausalLMr  )r  ZXLMProphetNetForCausalLM)r  ZXLMRobertaForCausalLM)r  ZXLMRobertaXLForCausalLMr  r  )r  ZXmodForCausalLM)r  ZZambaForCausalLM)r  ZZamba2ForCausalLM);r   r$   r-   rE   rG   rJ   rL   rS   rY   rd   rf   rh   rj   rl   rp   rr   rt   rv   rw   r}   r   r   r   r   r   r   r   r   )r   ZLlama4VisionModelr  )r  ZMllamaVisionModelr  r  r
  r  r  r9  r<  rI  rM  rc  rk  ru  rw  ry  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  ))rg   ZDeiTForMaskedImageModeling)r   ZFocalNetForMaskedImageModeling)rx  ZSwinForMaskedImageModeling)r|  ZSwinv2ForMaskedImageModeling)r  ZViTForMaskedImageModeling)r   ZImageGPTForCausalImageModeling)-)r%   ZBeitForImageClassification)r.   ZBitForImageClassification)r=   ZCLIPForImageClassification)rK   ZConvNextForImageClassification)rM   Z ConvNextV2ForImageClassification)rQ   ZCvtForImageClassification)rZ   Z$Data2VecVisionForImageClassification)rg   )ZDeiTForImageClassificationZ%DeiTForImageClassificationWithTeacher)rq   ZDinatForImageClassification)rs   ZDinov2ForImageClassification)ru   Z)Dinov2WithRegistersForImageClassification)r{   ZDonutSwinForImageClassification)r   )Z%EfficientFormerForImageClassificationZ0EfficientFormerForImageClassificationWithTeacher)r   Z"EfficientNetForImageClassification)r   ZFocalNetForImageClassification)r   ZHGNetV2ForImageClassification)r   ZHieraForImageClassification)r   ZIJepaForImageClassification)r   ZImageGPTForImageClassification)r   )ZLevitForImageClassificationZ&LevitForImageClassificationWithTeacher)r   ZMetaClip2ForImageClassification)r  Z!MobileNetV1ForImageClassification)r	  Z!MobileNetV2ForImageClassification)r  ZMobileViTForImageClassification)r  Z!MobileViTV2ForImageClassification)r  ZNatForImageClassification)r0  )Z&PerceiverForImageClassificationLearnedZ&PerceiverForImageClassificationFourierZ-PerceiverForImageClassificationConvProcessing)r:  Z PoolFormerForImageClassification)r=  ZPvtForImageClassification)r>  ZPvtV2ForImageClassification)rJ  ZRegNetForImageClassification)rN  ZResNetForImageClassification)rd  ZSegformerForImageClassification)shieldgemma2Z"ShieldGemma2ForImageClassification)rh  ZSiglipForImageClassification)rj  ZSiglip2ForImageClassification)rv  Z!SwiftFormerForImageClassification)rx  ZSwinForImageClassification)r|  ZSwinv2ForImageClassification)r  ZTextNetForImageClassification)r  Z!TimmWrapperForImageClassification)r  ZVanForImageClassification)r  ZViTForImageClassification)r  ZViTHybridForImageClassification)r  ZViTMSNForImageClassificationrm   ZDetrForSegmentation))r%   ZBeitForSemanticSegmentation)rZ   Z%Data2VecVisionForSemanticSegmentation)r~   ZDPTForSemanticSegmentation)r	  Z"MobileNetV2ForSemanticSegmentation)r  Z MobileViTForSemanticSegmentation)r  Z"MobileViTV2ForSemanticSegmentation)rd  Z SegformerForSemanticSegmentation)ZupernetZUperNetForSemanticSegmentationr   Z!MaskFormerForInstanceSegmentation)r+  )ZeomtZEomtForUniversalSegmentation)r   Z#Mask2FormerForUniversalSegmentationr,  )r$  Z!OneFormerForUniversalSegmentation))r  Z!TimesformerForVideoClassification)r  ZVideoMAEForVideoClassification)r  ZVivitForVideoClassification)r  ZVJEPA2ForVideoClassification)r3   ZBlipForConditionalGenerationr4   ZBlip2ForConditionalGenerationr9   Z!ChameleonForConditionalGenerationr  r  r  r   Z$InstructBlipForConditionalGeneration)r   Z)InstructBlipVideoForConditionalGenerationr   ZKosmos2ForConditionalGenerationr   Z!Kosmos2_5ForConditionalGenerationr  r  r  r  r  r  r(  ZOvis2ForConditionalGenerationr  Z
pix2structZ"Pix2StructForConditionalGenerationrA  Z"Qwen2_5_VLForConditionalGenerationrC  ZQwen2VLForConditionalGenerationr  r  zvision-encoder-decoderZVisionEncoderDecoderModelr  )-)r   ZAriaForConditionalGeneration)r    Z!AyaVisionForConditionalGenerationr-  r.  r/  )rF   Z%Cohere2VisionForConditionalGeneration)rb   Z"DeepseekVLForConditionalGeneration)rc   Z(DeepseekVLHybridForConditionalGeneration)r   ZEmu3ForConditionalGenerationr  r  r&  r  r'  r  )r   ZGlm4vForConditionalGeneration)r   Z Glm4vMoeForConditionalGenerationr(  r  r  r  r0  )r   Z InternVLForConditionalGenerationr  r1  r2  r   r  r  r  r  r  r  r3  r  )r1  Z$PerceptionLMForConditionalGenerationr4  )r7  r  r5  r6  )r*  r  )rm  ZSmolVLMForConditionalGeneration)r  ZUdopForConditionalGenerationr  r7  ).r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r   ZMBartForConditionalGenerationr  )r   ZMegatronBertForMaskedLMr  )r  ZModernBertForMaskedLMr  r  r  r  r  )r0  ZPerceiverForMaskedLMr  )rH  ZReformerForMaskedLMr  r  r  r  r   r  r  r"  r  r  r  r  r$  )
)rH   Z!ConditionalDetrForObjectDetection)rR   ZDFineForObjectDetection)rT   ZDabDetrForObjectDetection)re   Z DeformableDetrForObjectDetection)rk   ZDetaForObjectDetection)rm   ZDetrForObjectDetection)rX  ZRTDetrForObjectDetection)rY  ZRTDetrV2ForObjectDetection)r  Z"TableTransformerForObjectDetection)r  ZYolosForObjectDetection))r   ZGroundingDinoForObjectDetection)r  Z!MMGroundingDinoForObjectDetectionr#  )r)  ZOwlv2ForObjectDetection)r*  ZOwlViTForObjectDetection))Zdepth_anythingZDepthAnythingForDepthEstimation)ri   ZDepthProForDepthEstimation)r~   ZDPTForDepthEstimation)r   ZGLPNForDepthEstimation)Zprompt_depth_anythingZ%PromptDepthAnythingForDepthEstimation)ZzoedepthZZoeDepthForDepthEstimation)r  r  )r0   Z"BlenderbotForConditionalGenerationr   r  r  r   Zgranite_speechZ%GraniteSpeechForConditionalGenerationr  r  r  r  r8  )r  ZMT5ForConditionalGenerationr  r  )r.  ZPegasusForConditionalGenerationr  r  )r;  Z"ProphetNetForConditionalGenerationr  )r`  ZSeamlessM4TForTextToText)ra  ZSeamlessM4Tv2ForTextToTextr  r  r  )r  ZUMT5ForConditionalGenerationr  )r  Z%XLMProphetNetForConditionalGeneration)r  r9  )r   Z*KyutaiSpeechToTextForConditionalGenerationr  r  )r`  ZSeamlessM4TForSpeechToText)ra  ZSeamlessM4Tv2ForSpeechToText)zspeech-encoder-decoderZSpeechEncoderDecoderModelr!  )ro  ZSpeechT5ForSpeechToTextr#  )p)r   ZAlbertForSequenceClassification)r   ZArceeForSequenceClassification)r#   ZBartForSequenceClassification)r'   ZBertForSequenceClassification)r*   Z BigBirdForSequenceClassification)r+   Z'BigBirdPegasusForSequenceClassification)r,   ZBioGptForSequenceClassification)r5   ZBloomForSequenceClassification)r7   Z"CamembertForSequenceClassification)r8   ZCanineForSequenceClassification)r@   LlamaForSequenceClassification)rI   Z!ConvBertForSequenceClassification)rP   ZCTRLForSequenceClassification)rX   Z%Data2VecTextForSequenceClassification)r]   Z DebertaForSequenceClassification)r_   Z"DebertaV2ForSequenceClassification)r`   Z#DeepseekV2ForSequenceClassification)ra   Z#DeepseekV3ForSequenceClassification)ro   Z"DiffLlamaForSequenceClassification)ry   Z#DistilBertForSequenceClassification)rz   ZDogeForSequenceClassification)r   Z ElectraForSequenceClassification)r   ZErnieForSequenceClassification)r   ZErnieMForSequenceClassification)r   ZEsmForSequenceClassification)r   Z Exaone4ForSequenceClassification)r   ZFalconForSequenceClassification)r   Z!FlaubertForSequenceClassification)r   ZFNetForSequenceClassification)r   ZFunnelForSequenceClassification)r   ZGemmaForSequenceClassification)r   ZGemma2ForSequenceClassification)r   ZGemma3ForSequenceClassification)r   ZGlmForSequenceClassification)r   ZGlm4ForSequenceClassification)r   GPT2ForSequenceClassification)r   r;  )r   Z#GPTBigCodeForSequenceClassification)r   ZGPTNeoForSequenceClassification)r   Z GPTNeoXForSequenceClassification)r   ZGptOssForSequenceClassification)r   ZGPTJForSequenceClassification)r   ZHeliumForSequenceClassification)r   Z'HunYuanDenseV1ForSequenceClassification)r   Z%HunYuanMoEV1ForSequenceClassification)r   ZIBertForSequenceClassification)r   ZJambaForSequenceClassification)r   ZJetMoeForSequenceClassification)r   Z!LayoutLMForSequenceClassification)r   Z#LayoutLMv2ForSequenceClassification)r   Z#LayoutLMv3ForSequenceClassification)r   ZLEDForSequenceClassification)r   ZLiltForSequenceClassification)r   r:  )r   Z#LongformerForSequenceClassification)r   ZLukeForSequenceClassification)r   Z!MarkupLMForSequenceClassification)r   ZMBartForSequenceClassification)r   ZMegaForSequenceClassification)r   Z%MegatronBertForSequenceClassification)r   Z MiniMaxForSequenceClassification)r   Z MistralForSequenceClassification)r   Z MixtralForSequenceClassification)r  Z#MobileBertForSequenceClassification)r  Z#ModernBertForSequenceClassification)r  Z*ModernBertDecoderForSequenceClassification)r  ZMPNetForSequenceClassification)r  ZMptForSequenceClassification)r  ZMraForSequenceClassification)r  ZMT5ForSequenceClassification)r  ZMvpForSequenceClassification)r  Z!NemotronForSequenceClassification)r  ZNezhaForSequenceClassification)r  Z&NystromformerForSequenceClassification)r%  Z"OpenLlamaForSequenceClassification)r&  Z"OpenAIGPTForSequenceClassification)r'  ZOPTForSequenceClassification)r0  Z"PerceiverForSequenceClassification)r2  Z"PersimmonForSequenceClassification)r3  ZPhiForSequenceClassification)r4  ZPhi3ForSequenceClassification)r6  ZPhimoeForSequenceClassification)r8  ZPLBartForSequenceClassification)r?  Z QDQBertForSequenceClassification)r@  ZQwen2ForSequenceClassification)rB  Z!Qwen2MoeForSequenceClassification)rD  ZQwen3ForSequenceClassification)rE  Z!Qwen3MoeForSequenceClassification)rH  Z!ReformerForSequenceClassification)rL  Z RemBertForSequenceClassification)rQ  Z RobertaForSequenceClassification)rS  Z,RobertaPreLayerNormForSequenceClassification)rU  Z RoCBertForSequenceClassification)rW  Z!RoFormerForSequenceClassification)rb  Z SeedOssForSequenceClassification)rl  Z SmolLM3ForSequenceClassification)rr  Z$SqueezeBertForSequenceClassification)rs  Z!StableLmForSequenceClassification)rt  Z#Starcoder2ForSequenceClassification)r~  ZT5ForSequenceClassification)r  Z T5GemmaForSequenceClassification)r  ZTapasForSequenceClassification)r  Z"TransfoXLForSequenceClassification)r  ZUMT5ForSequenceClassification)r  ZXLMForSequenceClassification)r  Z#XLMRobertaForSequenceClassification)r  Z%XLMRobertaXLForSequenceClassification)r  ZXLNetForSequenceClassification)r  ZXmodForSequenceClassification)r  ZYosoForSequenceClassification)r  ZZambaForSequenceClassification)r  ZZamba2ForSequenceClassification)M)r   ZAlbertForQuestionAnswering)r   ZArceeForQuestionAnswering)r#   ZBartForQuestionAnswering)r'   ZBertForQuestionAnswering)r*   ZBigBirdForQuestionAnswering)r+   Z"BigBirdPegasusForQuestionAnswering)r5   ZBloomForQuestionAnswering)r7   ZCamembertForQuestionAnswering)r8   ZCanineForQuestionAnswering)rI   ZConvBertForQuestionAnswering)rX   Z Data2VecTextForQuestionAnswering)r]   ZDebertaForQuestionAnswering)r_   ZDebertaV2ForQuestionAnswering)ro   ZDiffLlamaForQuestionAnswering)ry   ZDistilBertForQuestionAnswering)r   ZElectraForQuestionAnswering)r   ZErnieForQuestionAnswering)r   ZErnieMForQuestionAnswering)r   ZExaone4ForQuestionAnswering)r   ZFalconForQuestionAnswering)r   Z"FlaubertForQuestionAnsweringSimple)r   ZFNetForQuestionAnswering)r   ZFunnelForQuestionAnswering)r   ZGPT2ForQuestionAnswering)r   ZGPTNeoForQuestionAnswering)r   ZGPTNeoXForQuestionAnswering)r   ZGPTJForQuestionAnswering)r   ZIBertForQuestionAnsweringr   ZLayoutLMv2ForQuestionAnsweringr   ZLayoutLMv3ForQuestionAnswering)r   ZLEDForQuestionAnswering)r   ZLiltForQuestionAnswering)r   ZLlamaForQuestionAnswering)r   ZLongformerForQuestionAnswering)r   ZLukeForQuestionAnswering)r   ZLxmertForQuestionAnswering)r   ZMarkupLMForQuestionAnswering)r   ZMBartForQuestionAnswering)r   ZMegaForQuestionAnswering)r   Z MegatronBertForQuestionAnswering)r   ZMiniMaxForQuestionAnswering)r   ZMistralForQuestionAnswering)r   ZMixtralForQuestionAnswering)r  ZMobileBertForQuestionAnswering)r  ZModernBertForQuestionAnswering)r  ZMPNetForQuestionAnswering)r  ZMptForQuestionAnswering)r  ZMraForQuestionAnswering)r  ZMT5ForQuestionAnswering)r  ZMvpForQuestionAnswering)r  ZNemotronForQuestionAnswering)r  ZNezhaForQuestionAnswering)r  Z!NystromformerForQuestionAnswering)r'  ZOPTForQuestionAnswering)r?  ZQDQBertForQuestionAnswering)r@  ZQwen2ForQuestionAnswering)rB  ZQwen2MoeForQuestionAnswering)rD  ZQwen3ForQuestionAnswering)rE  ZQwen3MoeForQuestionAnswering)rH  ZReformerForQuestionAnswering)rL  ZRemBertForQuestionAnswering)rQ  ZRobertaForQuestionAnswering)rS  Z'RobertaPreLayerNormForQuestionAnswering)rU  ZRoCBertForQuestionAnswering)rW  ZRoFormerForQuestionAnswering)rb  ZSeedOssForQuestionAnswering)rl  ZSmolLM3ForQuestionAnswering)rp  ZSplinterForQuestionAnswering)rr  ZSqueezeBertForQuestionAnswering)r~  ZT5ForQuestionAnswering)r  ZUMT5ForQuestionAnswering)r  ZXLMForQuestionAnsweringSimple)r  ZXLMRobertaForQuestionAnswering)r  Z XLMRobertaXLForQuestionAnswering)r  ZXLNetForQuestionAnsweringSimple)r  ZXmodForQuestionAnswering)r  ZYosoForQuestionAnswering)r  ZTapasForQuestionAnswering))r3   ZBlipForQuestionAnsweringr.  )r  ZViltForQuestionAnswering))r   ZLayoutLMForQuestionAnsweringr<  r=  )V)r   ZAlbertForTokenClassification)r   ZApertusForTokenClassification)r   ZArceeForTokenClassification)r'   ZBertForTokenClassification)r*   ZBigBirdForTokenClassification)r,   ZBioGptForTokenClassification)r5   ZBloomForTokenClassification)r6   ZBrosForTokenClassification)r7   ZCamembertForTokenClassification)r8   ZCanineForTokenClassification)rI   ZConvBertForTokenClassification)rX   Z"Data2VecTextForTokenClassification)r]   ZDebertaForTokenClassification)r_   ZDebertaV2ForTokenClassification)ro   ZDiffLlamaForTokenClassification)ry   Z DistilBertForTokenClassification)r   ZElectraForTokenClassification)r   ZErnieForTokenClassification)r   ZErnieMForTokenClassification)r   ZEsmForTokenClassification)r   ZExaone4ForTokenClassification)r   ZFalconForTokenClassification)r   ZFlaubertForTokenClassification)r   ZFNetForTokenClassification)r   ZFunnelForTokenClassification)r   ZGemmaForTokenClassification)r   ZGemma2ForTokenClassification)r   ZGlmForTokenClassification)r   ZGlm4ForTokenClassification)r   GPT2ForTokenClassification)r   r>  )r   Z GPTBigCodeForTokenClassification)r   ZGPTNeoForTokenClassification)r   ZGPTNeoXForTokenClassification)r   ZGptOssForTokenClassification)r   ZHeliumForTokenClassification)r   ZIBertForTokenClassification)r   ZLayoutLMForTokenClassification)r   Z LayoutLMv2ForTokenClassification)r   Z LayoutLMv3ForTokenClassification)r   ZLiltForTokenClassification)r   ZLlamaForTokenClassification)r   Z LongformerForTokenClassification)r   ZLukeForTokenClassification)r   ZMarkupLMForTokenClassification)r   ZMegaForTokenClassification)r   Z"MegatronBertForTokenClassification)r   ZMiniMaxForTokenClassification)r   ZMistralForTokenClassification)r   ZMixtralForTokenClassification)r  Z MobileBertForTokenClassification)r  Z ModernBertForTokenClassification)r  ZMPNetForTokenClassification)r  ZMptForTokenClassification)r  ZMraForTokenClassification)r  ZMT5ForTokenClassification)r  ZNemotronForTokenClassification)r  ZNezhaForTokenClassification)r  Z#NystromformerForTokenClassification)r2  ZPersimmonForTokenClassification)r3  ZPhiForTokenClassification)r4  ZPhi3ForTokenClassification)r?  ZQDQBertForTokenClassification)r@  ZQwen2ForTokenClassification)rB  ZQwen2MoeForTokenClassification)rD  ZQwen3ForTokenClassification)rE  ZQwen3MoeForTokenClassification)rL  ZRemBertForTokenClassification)rQ  ZRobertaForTokenClassification)rS  Z)RobertaPreLayerNormForTokenClassification)rU  ZRoCBertForTokenClassification)rW  ZRoFormerForTokenClassification)rb  ZSeedOssForTokenClassification)rl  ZSmolLM3ForTokenClassification)rr  Z!SqueezeBertForTokenClassification)rs  ZStableLmForTokenClassification)rt  Z Starcoder2ForTokenClassification)r~  ZT5ForTokenClassification)r  ZT5GemmaForTokenClassification)r  ZUMT5ForTokenClassification)r  ZXLMForTokenClassification)r  Z XLMRobertaForTokenClassification)r  Z"XLMRobertaXLForTokenClassification)r  ZXLNetForTokenClassification)r  ZXmodForTokenClassification)r  ZYosoForTokenClassification)')r   ZAlbertForMultipleChoice)r'   ZBertForMultipleChoice)r*   ZBigBirdForMultipleChoice)r7   ZCamembertForMultipleChoice)r8   ZCanineForMultipleChoice)rI   ZConvBertForMultipleChoice)rX   ZData2VecTextForMultipleChoice)r_   ZDebertaV2ForMultipleChoice)ry   ZDistilBertForMultipleChoice)r   ZElectraForMultipleChoice)r   ZErnieForMultipleChoice)r   ZErnieMForMultipleChoice)r   ZFlaubertForMultipleChoice)r   ZFNetForMultipleChoice)r   ZFunnelForMultipleChoice)r   ZIBertForMultipleChoice)r   ZLongformerForMultipleChoice)r   ZLukeForMultipleChoice)r   ZMegaForMultipleChoice)r   ZMegatronBertForMultipleChoice)r  ZMobileBertForMultipleChoice)r  ZModernBertForMultipleChoice)r  ZMPNetForMultipleChoice)r  ZMraForMultipleChoice)r  ZNezhaForMultipleChoice)r  ZNystromformerForMultipleChoice)r?  ZQDQBertForMultipleChoice)rL  ZRemBertForMultipleChoice)rQ  ZRobertaForMultipleChoice)rS  Z$RobertaPreLayerNormForMultipleChoice)rU  ZRoCBertForMultipleChoice)rW  ZRoFormerForMultipleChoice)rr  ZSqueezeBertForMultipleChoice)r  ZXLMForMultipleChoice)r  ZXLMRobertaForMultipleChoice)r  ZXLMRobertaXLForMultipleChoice)r  ZXLNetForMultipleChoice)r  ZXmodForMultipleChoice)r  ZYosoForMultipleChoice))r'   ZBertForNextSentencePrediction)r   ZErnieForNextSentencePrediction)r   ZFNetForNextSentencePrediction)r   Z%MegatronBertForNextSentencePrediction)r  Z#MobileBertForNextSentencePrediction)r  ZNezhaForNextSentencePrediction)r?  Z QDQBertForNextSentencePrediction))r   ZASTForAudioClassification)rV   Z&Data2VecAudioForSequenceClassification)r   ZHubertForSequenceClassification)re  ZSEWForSequenceClassification)rf  ZSEWDForSequenceClassification)r  Z"UniSpeechForSequenceClassification)r  Z%UniSpeechSatForSequenceClassification)r  Z!Wav2Vec2ForSequenceClassification)r  Z%Wav2Vec2BertForSequenceClassification)r  Z*Wav2Vec2ConformerForSequenceClassification)r  ZWavLMForSequenceClassification)r  ZWhisperForAudioClassification))rV   ZData2VecAudioForCTC)r   ZHubertForCTC)r   ZMCTCTForCTC)re  Z	SEWForCTC)rf  Z
SEWDForCTC)r  ZUniSpeechForCTC)r  ZUniSpeechSatForCTC)r  ZWav2Vec2ForCTC)r  ZWav2Vec2BertForCTC)r  ZWav2Vec2ConformerForCTC)r  ZWavLMForCTC))rV   Z(Data2VecAudioForAudioFrameClassification)r  Z'UniSpeechSatForAudioFrameClassification)r  Z#Wav2Vec2ForAudioFrameClassification)r  Z'Wav2Vec2BertForAudioFrameClassification)r  Z,Wav2Vec2ConformerForAudioFrameClassification)r  Z WavLMForAudioFrameClassification))rV   ZData2VecAudioForXVector)r  ZUniSpeechSatForXVector)r  ZWav2Vec2ForXVector)r  ZWav2Vec2BertForXVector)r  ZWav2Vec2ConformerForXVector)r  ZWavLMForXVectorr   )ro  ZSpeechT5ForTextToSpeech)
r"   rO   )r   r   r   )r  Z MusicgenForConditionalGeneration)r  Z&MusicgenMelodyForConditionalGeneration)Zqwen2_5_omniZ#Qwen2_5OmniForConditionalGeneration)r`  ZSeamlessM4TForTextToSpeech)ra  ZSeamlessM4Tv2ForTextToSpeechr  )
r   r   r2   )r4   ZBlip2ForImageTextRetrievalr:   r<   r?   r   rg  ri  ))r%   ZBeitBackbone)r.   ZBitBackbone)rK   ZConvNextBackbone)rM   ZConvNextV2Backbone)rq   ZDinatBackbone)rs   ZDinov2Backbone)ru   ZDinov2WithRegistersBackbone)r   ZFocalNetBackboner   )r   ZHieraBackbone)r   ZMaskFormerSwinBackbone)r  ZNatBackbone)r>  ZPvtV2Backbone)rN  ZResNetBackbone)Zrt_detr_resnetZRTDetrResNetBackbone)rx  ZSwinBackbone)r|  ZSwinv2Backbone)r  ZTextNetBackboner  )r  ZVitDetBackbone)Zvitpose_backboneZVitPoseBackbone)r[  r\  )r^  r]  r_  )Z
superpointZSuperPointForKeypointDetection))r   Z!EfficientLoFTRForKeypointMatchingr   )Z	superglueZSuperGlueForKeypointMatching)r   r&   r)   r>   rW   r\   r^   rx   r   )r   ZEmu3TextModelr   r   )r   r   r   )r  ZMllamaTextModelr  )r  ZMT5EncoderModelr  rG  rK  rP  rR  rT  rV  rq  )r~  ZT5EncoderModel)r  ZT5GemmaEncoderModel)r  ZUMT5EncoderModelr  r  r  )r,  Z'PatchTSMixerForTimeSeriesClassification)r-  ZPatchTSTForClassification)r,  ZPatchTSMixerForRegression)r-  ZPatchTSTForRegression)r  ZTimesFmModelForPrediction)rz  ZSwin2SRForImageSuperResolutionrU   c                   @   s   e Zd ZeZdS )AutoModelForMaskGenerationN)r   r   r   !MODEL_FOR_MASK_GENERATION_MAPPING_model_mappingr   r   r   r   r?  V  s   r?  c                   @   s   e Zd ZeZdS )AutoModelForKeypointDetectionN)r   r   r   $MODEL_FOR_KEYPOINT_DETECTION_MAPPINGrA  r   r   r   r   rB  Z  s   rB  c                   @   s   e Zd ZeZdS )AutoModelForKeypointMatchingN)r   r   r   #MODEL_FOR_KEYPOINT_MATCHING_MAPPINGrA  r   r   r   r   rD  ^  s   rD  c                   @   s   e Zd ZeZdS )AutoModelForTextEncodingN)r   r   r   MODEL_FOR_TEXT_ENCODING_MAPPINGrA  r   r   r   r   rF  b  s   rF  c                   @   s   e Zd ZeZdS )AutoModelForImageToImageN)r   r   r    MODEL_FOR_IMAGE_TO_IMAGE_MAPPINGrA  r   r   r   r   rH  f  s   rH  c                   @   s   e Zd ZeZdS )	AutoModelN)r   r   r   MODEL_MAPPINGrA  r   r   r   r   rJ  j  s   rJ  c                   @   s   e Zd ZeZdS )AutoModelForPreTrainingN)r   r   r   MODEL_FOR_PRETRAINING_MAPPINGrA  r   r   r   r   rL  q  s   rL  Zpretraining)head_docc                   @   s   e Zd ZeZdS )_AutoModelWithLMHeadN)r   r   r   MODEL_WITH_LM_HEAD_MAPPINGrA  r   r   r   r   rO  y  s   rO  zlanguage modelingc                       s@   e Zd ZeZeed  eee	j
e f dd fddZ  ZS )AutoModelForCausalLMr   clspretrained_model_name_or_pathreturnc                    s   t  j|g|R i |S Nsuperfrom_pretrainedrS  rT  Z
model_argskwargs	__class__r   r   rY    s    z$AutoModelForCausalLM.from_pretrained)r   r   r   MODEL_FOR_CAUSAL_LM_MAPPINGrA  classmethodtyper   strosPathLikerY  __classcell__r   r   r\  r   rQ    s   rQ  zcausal language modelingc                   @   s   e Zd ZeZdS )AutoModelForMaskedLMN)r   r   r   MODEL_FOR_MASKED_LM_MAPPINGrA  r   r   r   r   re    s   re  zmasked language modelingc                   @   s   e Zd ZeZdS )AutoModelForSeq2SeqLMN)r   r   r   &MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPINGrA  r   r   r   r   rg    s   rg  z&sequence-to-sequence language modelingzgoogle-t5/t5-base)rN  Zcheckpoint_for_examplec                   @   s   e Zd ZeZdS )"AutoModelForSequenceClassificationN)r   r   r   )MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPINGrA  r   r   r   r   ri    s   ri  zsequence classificationc                   @   s   e Zd ZeZdS )AutoModelForQuestionAnsweringN)r   r   r   $MODEL_FOR_QUESTION_ANSWERING_MAPPINGrA  r   r   r   r   rk    s   rk  zquestion answeringc                   @   s   e Zd ZeZdS )"AutoModelForTableQuestionAnsweringN)r   r   r   *MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPINGrA  r   r   r   r   rm    s   rm  ztable question answeringzgoogle/tapas-base-finetuned-wtqc                   @   s   e Zd ZeZdS )#AutoModelForVisualQuestionAnsweringN)r   r   r   +MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPINGrA  r   r   r   r   ro    s   ro  zvisual question answeringzdandelin/vilt-b32-finetuned-vqac                   @   s   e Zd ZeZdS )%AutoModelForDocumentQuestionAnsweringN)r   r   r   -MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPINGrA  r   r   r   r   rq    s   rq  zdocument question answeringz/impira/layoutlm-document-qa", revision="52e01b3c                   @   s   e Zd ZeZdS )AutoModelForTokenClassificationN)r   r   r   &MODEL_FOR_TOKEN_CLASSIFICATION_MAPPINGrA  r   r   r   r   rs    s   rs  ztoken classificationc                   @   s   e Zd ZeZdS )AutoModelForMultipleChoiceN)r   r   r   !MODEL_FOR_MULTIPLE_CHOICE_MAPPINGrA  r   r   r   r   ru    s   ru  zmultiple choicec                   @   s   e Zd ZeZdS )"AutoModelForNextSentencePredictionN)r   r   r   *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPINGrA  r   r   r   r   rw    s   rw  znext sentence predictionc                   @   s   e Zd ZeZdS )AutoModelForImageClassificationN)r   r   r   &MODEL_FOR_IMAGE_CLASSIFICATION_MAPPINGrA  r   r   r   r   ry    s   ry  zimage classificationc                   @   s   e Zd ZeZdS )'AutoModelForZeroShotImageClassificationN)r   r   r   0MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPINGrA  r   r   r   r   r{    s   r{  zzero-shot image classificationc                   @   s   e Zd ZeZdS )AutoModelForImageSegmentationN)r   r   r   $MODEL_FOR_IMAGE_SEGMENTATION_MAPPINGrA  r   r   r   r   r}    s   r}  zimage segmentationc                   @   s   e Zd ZeZdS ) AutoModelForSemanticSegmentationN)r   r   r   'MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPINGrA  r   r   r   r   r    s   r  zsemantic segmentationc                   @   s   e Zd ZeZdS ) AutoModelForTimeSeriesPredictionN)r   r   r   (MODEL_FOR_TIME_SERIES_PREDICTION_MAPPINGrA  r   r   r   r   r    s   r  ztime-series predictionc                   @   s   e Zd ZeZdS )!AutoModelForUniversalSegmentationN)r   r   r   (MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPINGrA  r   r   r   r   r    s   r  zuniversal image segmentationc                   @   s   e Zd ZeZdS ) AutoModelForInstanceSegmentationN)r   r   r   'MODEL_FOR_INSTANCE_SEGMENTATION_MAPPINGrA  r   r   r   r   r    s   r  zinstance segmentationc                   @   s   e Zd ZeZdS )AutoModelForObjectDetectionN)r   r   r   "MODEL_FOR_OBJECT_DETECTION_MAPPINGrA  r   r   r   r   r  &  s   r  zobject detectionc                   @   s   e Zd ZeZdS )#AutoModelForZeroShotObjectDetectionN)r   r   r   ,MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPINGrA  r   r   r   r   r  -  s   r  zzero-shot object detectionc                   @   s   e Zd ZeZdS )AutoModelForDepthEstimationN)r   r   r   "MODEL_FOR_DEPTH_ESTIMATION_MAPPINGrA  r   r   r   r   r  6  s   r  zdepth estimationc                   @   s   e Zd ZeZdS )AutoModelForVideoClassificationN)r   r   r   &MODEL_FOR_VIDEO_CLASSIFICATION_MAPPINGrA  r   r   r   r   r  =  s   r  zvideo classificationc                   @   s   e Zd ZeZdS )_AutoModelForVision2SeqN)r   r   r   MODEL_FOR_VISION_2_SEQ_MAPPINGrA  r   r   r   r   r  E  s   r  zvision-to-text modelingc                       s@   e Zd ZeZeed  eee	j
e f dd fddZ  ZS )AutoModelForImageTextToTextr   rR  c                    s   t  j|g|R i |S rV  rW  rZ  r\  r   r   rY  P  s    z+AutoModelForImageTextToText.from_pretrained)r   r   r   $MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPINGrA  r_  r`  r   ra  rb  rc  rY  rd  r   r   r\  r   r  L  s   r  zimage-text-to-text modelingc                   @   s   e Zd ZeZdS )AutoModelForAudioClassificationN)r   r   r   &MODEL_FOR_AUDIO_CLASSIFICATION_MAPPINGrA  r   r   r   r   r  ]  s   r  zaudio classificationc                   @   s   e Zd ZeZdS )AutoModelForCTCN)r   r   r   MODEL_FOR_CTC_MAPPINGrA  r   r   r   r   r  d  s   r  z%connectionist temporal classificationc                   @   s   e Zd ZeZdS )AutoModelForSpeechSeq2SeqN)r   r   r   "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPINGrA  r   r   r   r   r  k  s   r  z,sequence-to-sequence speech-to-text modelingc                   @   s   e Zd ZeZdS )$AutoModelForAudioFrameClassificationN)r   r   r   ,MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPINGrA  r   r   r   r   r  t  s   r  z"audio frame (token) classificationc                   @   s   e Zd ZeZdS )AutoModelForAudioXVectorN)r   r   r   MODEL_FOR_AUDIO_XVECTOR_MAPPINGrA  r   r   r   r   r  }  s   r  c                   @   s   e Zd ZeZdS )AutoModelForTextToSpectrogramN)r   r   r   %MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPINGrA  r   r   r   r   r    s   r  c                   @   s   e Zd ZeZdS )AutoModelForTextToWaveformN)r   r   r   "MODEL_FOR_TEXT_TO_WAVEFORM_MAPPINGrA  r   r   r   r   r    s   r  c                   @   s   e Zd ZeZdS )AutoBackboneN)r   r   r   MODEL_FOR_BACKBONE_MAPPINGrA  r   r   r   r   r    s   r  zaudio retrieval via x-vectorc                   @   s   e Zd ZeZdS )AutoModelForMaskedImageModelingN)r   r   r   'MODEL_FOR_MASKED_IMAGE_MODELING_MAPPINGrA  r   r   r   r   r    s   r  zmasked image modelingc                   @   s   e Zd ZeZdS )AutoModelForAudioTokenizationN)r   r   r   $MODEL_FOR_AUDIO_TOKENIZATION_MAPPINGrA  r   r   r   r   r    s   r  z$audio tokenization through codebooksc                       s0   e Zd Ze fddZe fddZ  ZS )AutoModelWithLMHeadc                    s   t dt t |S NzThe class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.warningswarnFutureWarningrX  from_configrS  configr\  r   r   r    s
    zAutoModelWithLMHead.from_configc                    s&   t dt t j|g|R i |S r  r  r  r  rX  rY  rZ  r\  r   r   rY    s
    z#AutoModelWithLMHead.from_pretrainedr   r   r   r_  r  rY  rd  r   r   r\  r   r    s   	r  c                       s0   e Zd Ze fddZe fddZ  ZS )AutoModelForVision2Seqc                    s   t dt t |S NzThe class `AutoModelForVision2Seq` is deprecated and will be removed in v5.0. Please use `AutoModelForImageTextToText` instead.r  r  r\  r   r   r    s
    z"AutoModelForVision2Seq.from_configc                    s&   t dt t j|g|R i |S r  r  rZ  r\  r   r   rY    s
    z&AutoModelForVision2Seq.from_pretrainedr  r   r   r\  r   r    s   r  )Yr  r  r  r  r  'MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPINGr^  r  rr  r  rz  MODEL_FOR_IMAGE_MAPPINGr~  rI  rC  rE  r  r  rf  r@  rv  rx  r  rM  rl  r  rh  rj  r  rn  rG  r  r  r  rt  r  r  r  MODEL_FOR_RETRIEVAL_MAPPINGr  rp  rK  rP  r|  r  ,MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPINGrJ  r  r  r  r  r  rQ  r  r  ry  r}  rH  r  rB  rD  r?  rF  r  re  ru  rw  r  rL  rk  r  rg  ri  r  rm  r  r  r  rs  r  r  r  ro  rq  r  r{  r  r  )__doc__rb  r  collectionsr   typingr   r   utilsr   Zauto_factoryr   r	   r
   r   Zconfiguration_autor   Z
generationr   Zmodeling_utilsr   r   Z
get_loggerr   loggerZMODEL_MAPPING_NAMESZ#MODEL_FOR_PRETRAINING_MAPPING_NAMESZ MODEL_WITH_LM_HEAD_MAPPING_NAMESZ!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESZMODEL_FOR_IMAGE_MAPPING_NAMESZ-MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMESZ-MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMESZ,MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMESZ*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMESZ-MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMESZ-MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMESZ.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMESZ,MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMESZ$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMESZ!MODEL_FOR_RETRIEVAL_MAPPING_NAMESZ*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESZ!MODEL_FOR_MASKED_LM_MAPPING_NAMESZ(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMESZ2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMESZ(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMESZ,MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESZ(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMESZ/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMESZ*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMESZ0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESZ1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMESZ3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMESZ,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESZ'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMESZ0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMESZ,MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESZMODEL_FOR_CTC_MAPPING_NAMESZ2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMESZ%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMESZ+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMESZ(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMESZ6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMESZ MODEL_FOR_BACKBONE_MAPPING_NAMESZ'MODEL_FOR_MASK_GENERATION_MAPPING_NAMESZ*MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMESZ)MODEL_FOR_KEYPOINT_MATCHING_MAPPING_NAMESZ%MODEL_FOR_TEXT_ENCODING_MAPPING_NAMESZ2MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING_NAMESZ.MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMESZ.MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMESZ&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMESZ"MODEL_FOR_AUDIO_TOKENIZATION_NAMESrK  rM  rP  r^  r  rz  r|  r~  r  r  r  r  r  r  r  rp  rr  rf  r  r  r  r  r  rh  rj  rl  rn  rt  rv  rx  r  r  r  r  r  r  r  r  r@  rC  rE  rG  r  r  r  rI  r  r?  rB  rD  rF  rH  rJ  rL  rO  rQ  re  rg  ri  rk  rm  ro  rq  rs  ru  rw  ry  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __all__r   r   r   r   <module>   s  
  ~^Z AF	24#vS\-
$




















