a
    h-U                     @   s  d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZ e rnddlZddlmZ eeZdd	 Ze re rdd
lmZ nddlmZ G dd deZG dd deZdadd Zdd Zdd Zdd Z dd Z!dd Z"d"ddZ#d#d d!Z$dS )$z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availablelogging)nnc                  C   s@   t jdd u} | r<ztd}W dS  tjy:   Y dS 0 d S )N	deepspeedTF)	importlibutil	find_specimportlib_metadatametadataPackageNotFoundError)Zpackage_exists_ r   _/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_available$   s    
r   )HfDeepSpeedConfig)objectc                       s    e Zd ZdZ fddZ  ZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                    s(   t |  td td t | d S )NZ
accelerater	   )set_hf_deepspeed_configr   super__init__selfZconfig_file_or_dict	__class__r   r   r   J   s    zHfDeepSpeedConfig.__init__)__name__
__module____qualname____doc__r   __classcell__r   r   r   r   r   9   s   r   c                       sX   e Zd ZdZ fddZdd Zdd Zdd
dZeeddZ	dddZ
dd Z  ZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                    s   t  | d | _g | _d S N)r   r   _dtype
mismatchesr   r   r   r   r   X   s    z!HfTrainerDeepSpeedConfig.__init__c                 C   s   | j d u rtd| j S )Nz8trainer_config_process() wasn't called yet to tell dtype)r$   
ValueErrorr   r   r   r   dtype]   s    
zHfTrainerDeepSpeedConfig.dtypec                 C   s"   |  |}|d u rdS |dkS d S )NFauto)	get_value)r   ds_key_longvalr   r   r   is_autob   s    
z HfTrainerDeepSpeedConfig.is_autoNTc              
   C   s~   |  |\}}|du rdS ||dkr4|||< dS |s<dS ||}|durz||krz| jd| d| d| d|  dS )a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr)   z- ds =z vs hf )Zfind_config_nodegetr%   append)r   r+   Zhf_valZhf_key
must_matchconfigZds_keyZds_valr   r   r   
fill_matchi   s    
z#HfTrainerDeepSpeedConfig.fill_matchF)r1   c                 C   s  |j |j |j }| d|jd|  | d|jd | d|d|  | d|jd | d|jd	 | d
|j|jgd | d|jd | d|j	d | 
dd | d|jd	 |js|jr|jdkrdnd}nd}|jr
| jdi | jd< |j| jd d< | d|js |jo&|dkd | d|dkd | d|jd | d|jp`|jd | dr|tj| _n| drtj| _ntj| _dS ) z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        Ztrain_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)Zgradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lrZapexampN
checkpointZuse_node_local_storagezfp16.enabledz%fp16|fp16_full_eval+fp16_backend(amp)zamp.enabledzfp16+fp16_backend(apex)zamp.opt_levelfp16_opt_levelzbf16.enabledzbf16|bf16_full_eval)Z
world_sizer4   r5   r3   r7   r8   Z
adam_beta1Z
adam_beta2r9   r:   	fill_onlyZfp16Zfp16_full_evalfp16_backendZsave_on_each_noder2   r/   r=   Zbf16Zbf16_full_evalZis_truetorchZbfloat16r$   Zis_falseZfloat32Zfloat16)r   argsZauto_find_batch_sizer6   r?   r   r   r   trainer_config_process   s`    


z/HfTrainerDeepSpeedConfig.trainer_config_processc                    sH  g d} fdd|D }t |dkrt|jdr<|jj}nzt|jdrVt|jj}n`t|jdr|t|jjdr||jjj}n:t|jdrt|jjdrt|jjj}ntd| d	 d
||   	 r dt
d| |   dd|   d|d  d||d t  jdkrDd j}td| ddS )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                    s   g | ]}  |r|qS r   )r-   ).0xr'   r   r   
<listcomp>       zDHfTrainerDeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>r   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.rC   rD   g?rE   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepsZwarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)lenhasattrr2   rJ   maxrK   rL   r&   r>   is_zero3intr3   Zget_warmup_stepsr%   join)r   rA   modelnum_training_stepsZhidden_size_based_keysZhidden_size_auto_keysrJ   r%   r   r'   r   trainer_config_finalize   sV    	
z0HfTrainerDeepSpeedConfig.trainer_config_finalize)NT)F)r   r   r   r    r   r(   r-   r3   r   r>   rB   rW   r!   r   r   r   r   r"   R   s   

Jr"   c                 C   s   t | ad S r#   )weakrefref_hf_deepspeed_config_weak_ref)Zhf_deepspeed_config_objr   r   r   r     s    r   c                   C   s   d a d S r#   )rZ   r   r   r   r   unset_hf_deepspeed_config  s    r[   c                   C   s$   t d urt  d urt   S dS d S )NF)rZ   rR   r   r   r   r   is_deepspeed_zero3_enabled$  s    
r\   c                   C   s"   t d urt  d urt  jS d S d S r#   )rZ   r2   r   r   r   r   deepspeed_config+  s    r]   c                    sR   t |dd| }dur"|_g  d	tjd fdd| |dd  S )
z
    Loads state dict into a model specifically for Zero3, since DeepSpeed does not support the `transformers`
    tensor parallelism API.

    Nearly identical code to PyTorch's `_load_from_state_dict`
    	_metadataN F)modulec           
         s  d u ri n d d i }||d< ||dg g f}t rtfdd|D dkrdd l}t| jd d dd  fd	d|D }t|dkr|jj|dd
( tj	
 dkr| j|  W d    n1 s0    Y  | j D ]&\}}	|	d ur|	|| d | qd S )Nassign_to_params_buffersTc                    s   g | ]}|  r|qS r   )
startswith)rF   key)prefixr   r   rH   J  rI   zC_load_state_dict_into_zero3_model.<locals>.load.<locals>.<listcomp>r   F)re   Zrecursec                    s   g | ]}| v r | qS r   r   )rF   k)named_parametersr   r   rH   P  rI   )Zmodifier_rank.)r/   r\   rO   r	   dictrg   zeroZGatheredParametersr@   distributedZget_rankZ_load_from_state_dictZ_modulesitems)
r`   
state_dictre   rb   Zlocal_metadatarA   r	   Zparams_to_gathernamechildZ
error_msgsloadr   )rg   re   r   rq   C  s      (z/_load_state_dict_into_zero3_model.<locals>.load)rb   )r_   F)getattrcopyr^   r   Module)Zmodel_to_loadrm   r   rp   r   !_load_state_dict_into_zero3_model2  s    ru   c                    s   ddl m}m} |j}d}d|v r<|jr0td||d}n"| rNtd 	 }d|d	< d}	d
|v rt||}	n4t
||r fdd}
|||
d}	nj |d}	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizerz|--adafactor was passed, but also found `optimizer` configured in the DeepSpeed config. Only one optimizer can be configured.)paramszDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)TZzero_allow_untested_optimizerZ	schedulerc                    s"   t  }d |_|j | d}|S )NrV   rx   )rs   lr_schedulercreate_scheduler)rx   Ztrainer_copyr{   rV   trainerr   r   _lr_scheduler_callable  s    
z5deepspeed_optim_sched.<locals>._lr_scheduler_callable)Zlr_scheduler_callablerz   )Zaccelerate.utilsrv   rw   r2   Z	adafactorr&   Z
is_offloadloggerinfoZcreate_optimizer
isinstancer|   )r~   hf_deepspeed_configrA   rV   model_parametersrv   rw   r2   rx   r{   r   r   r}   r   deepspeed_optim_schedb  s.    


r   Fc                 C   s   ddl m} | j}| j}| jjjj}|||| |	|
  |rv| sTtd|d |d d\}}d}	nnd| _|jdi d	d
}
|
d
krddl}|j||
| |jd}ttdd | }	t| ||||	\}}||fS )a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/deepspeedai/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/deepspeedai/DeepSpeed/issues/1612

    r   )r   zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configrx   r{   )NNNZtensor_parallelZautotp_size   )rU   Ztp_sizer(   r2   c                 S   s   | j S r#   )Zrequires_grad)pr   r   r   <lambda>  rI   z deepspeed_init.<locals>.<lambda>)Zdeepspeed.utilsr   rU   rA   ZacceleratorstateZdeepspeed_pluginZhf_ds_configrW   setLevelZget_process_log_levelrR   r&   Zdel_config_sub_treerx   r2   r/   r	   Ztp_model_initr(   listfilter
parametersr   )r~   rV   Z	inferenceZ	ds_loggerrU   rA   r   rx   r{   r   Zdeepspeed_tp_sizer	   r   r   r   deepspeed_init  s8    


r   Tc                 C   sx   dd l }t| | d}t|dkrftd|  | j||ddd\}}|d u rttd| ntd| d S )Nr   z/global_step*zAttempting to resume from T)load_module_strictZload_optimizer_statesZload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globsortedrO   r   r   Zload_checkpointr&   )Zdeepspeed_engineZcheckpoint_pathr   r   Zdeepspeed_checkpoint_dirsZ	load_pathr   r   r   r   deepspeed_load_checkpoint  s    
r   )F)T)%r    rs   importlib.metadatar   r   importlib.utilr
   rX   	functoolsr   Zdependency_versions_checkr   utilsr   r   r   r@   r   Z
get_loggerr   r   r   Zaccelerate.utils.deepspeedr   ZDeepSpeedConfigbuiltinsr   r"   rZ   r   r[   r\   r]   ru   r   r   r   r   r   r   r   <module>   s6   
 B0=
C