diff --git a/llm/predict/test.py b/llm/predict/test.py deleted file mode 100644 index 2a8e50986277..000000000000 --- a/llm/predict/test.py +++ /dev/null @@ -1,3 +0,0 @@ -import sys -sys.path.append('/home/ldn/baidu/pyreft/paddle-version/mypr/0705/PaddleNLP/llm') -from utils.utils import dybatch_preprocess diff --git a/llm/run_finetune copy.py b/llm/run_finetune copy.py deleted file mode 100644 index 2a2cafccaeb5..000000000000 --- a/llm/run_finetune copy.py +++ /dev/null @@ -1,859 +0,0 @@ -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os -import sys -from functools import partial - -import paddle -from utils.argument import ( - DataArgument, - GenerateArgument, - ModelArgument, - QuantArgument, - ReftArgument, - TrainingArguments, -) -from utils.data import LoReftSupervisedDataset, get_convert_example -from utils.utils import ( - CausalLMTrainer, - ZeroPaddingIterDatasetCallback, - compute_metrics, - get_lora_target_modules, - get_prefix_tuning_params, - init_chat_template, -) - -from paddlenlp.data import DataCollatorForSeq2Seq -from paddlenlp.datasets import ( - ZeroPaddingIterableDataset, - ZeroPaddingMapDataset, - load_dataset, -) -from paddlenlp.metrics import BLEU, Rouge1, Rouge2, RougeL -from paddlenlp.peft import LoRAConfig, LoRAModel, PrefixConfig, PrefixModelForCausalLM -from paddlenlp.peft.reft.pareft import ( - LoreftIntervention, - ReftConfig, - ReftDataCollator, - ReftTrainerForCausalLM, - get_reft_model, -) -from paddlenlp.trainer import PdArgumentParser, get_last_checkpoint -from paddlenlp.trainer.trainer_callback import TrainerState -from paddlenlp.transformers import ( - AutoConfig, - AutoModelForCausalLM, - AutoModelForCausalLMPipe, - AutoTokenizer, - Llama3Tokenizer, - LlamaForCausalLM, - LlamaForCausalLMPipe, - LlamaTokenizer, -) -from paddlenlp.transformers.configuration_utils import LlmMetaConfig -from paddlenlp.utils.log import logger - -# Fine-tune Environment Variables to support sharding stage1 overlap optimization. -os.environ["USE_CASUAL_MASK"] = "False" - -flash_mask_support_list = [LlamaForCausalLM, LlamaForCausalLMPipe] - - -def main(): - # Arguments - parser = PdArgumentParser( - (GenerateArgument, QuantArgument, ModelArgument, ReftArgument, DataArgument, TrainingArguments) - ) - # Support format as "args.json --arg1 value1 --arg2 value2.” - # In case of conflict, command line arguments take precedence. - if len(sys.argv) >= 2 and sys.argv[1].endswith(".json"): - gen_args, quant_args, model_args, data_args, training_args = parser.parse_json_file_and_cmd_lines() - else: - gen_args, quant_args, model_args, data_args, training_args = parser.parse_args_into_dataclasses() - training_args.print_config(model_args, "Model") - training_args.print_config(data_args, "Data") - training_args.print_config(quant_args, "Quant") - training_args.print_config(gen_args, "Generation") - - if sum([quant_args.do_ptq, quant_args.do_qat, quant_args.do_gptq, training_args.do_train]) > 1: - raise ValueError( - "--do_train, --do_ptq, --do_gptq and --do_qat cannot work at the same time. Please choose only one at a time" - ) - - # Setup GPU & distributed training - paddle.set_device(training_args.device) - logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, world_size: {training_args.world_size}, " - + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16 or training_args.bf16}" - ) - - # reft method - if model_args.reft: - # load tokenizer - tokenizer = AutoTokenizer.from_pretrained( - model_args.model_name, - model_max_length=data_args.max_length, - padding_side="right", - # use_fast=False, - ) - tokenizer.pad_token = tokenizer.unk_token - layers = None - if model_args.layers != "all": - layers = [int(l) for l in layers.split(";")] - else: - layers = [l for l in range(model_args.num_hidden_layers)] - train_dataset = LoReftSupervisedDataset( - model_args.task, - os.path.join(model_args.data_dir, model_args.train_dataset), - tokenizer, - data_split="train", - seed=42, - max_n_example=model_args.max_n_train_example, - **{ - "num_interventions": len(layers), - "position": model_args.position, - "share_weights": model_args.share_weights, - "test_split": model_args.test_split, - }, - ) - - all_eval_datasets = {} - eval_datasets = model_args.eval_datasets.split(":") - for eval_dataset in eval_datasets: - test_splits = model_args.test_split.split(";") - all_eval_datasets[eval_dataset] = {} - for split in test_splits: - raw_eval = LoReftSupervisedDataset( - model_args.task, - os.path.join(model_args.data_dir, eval_dataset), - tokenizer, - data_split=split, - seed=42, - max_n_example=model_args.max_n_eval_example, - **{ - "num_interventions": len(layers), - "position": model_args.position, - "share_weights": model_args.share_weights, - }, - ) - all_eval_datasets[eval_dataset][split] = [raw_eval, raw_eval.raw_dataset] - eval_datasets = all_eval_datasets - - # loading model - model = AutoModelForCausalLM.from_pretrained( - model_args.model_name, - dtype=paddle.bfloat16, # save memory - ) - # config = model.config - data_collator_fn = DataCollatorForSeq2Seq( - tokenizer=tokenizer, model=model, label_pad_token_id=-100, padding="longest" - ) - data_collator = ReftDataCollator(data_collator=data_collator_fn) - - # intervention config based on model type - intervention_dtype = paddle.bfloat16 - representations = [ - { - "layer": l, - "component": "block_output", - "low_rank_dimension": model_args.rank, - "intervention": LoreftIntervention( - embed_dim=model_args.hidden_size, - low_rank_dimension=model_args.rank, - dropout=model_args.dropout, - dtype=intervention_dtype, - act_fn=model_args.act_fn, - device="gpu", - add_bias=model_args.add_bias, - ), - } - for l in layers - ] - print(representations) - reft_config = ReftConfig(representations=representations) - reft_model = get_reft_model(model, reft_config, set_device=True) - reft_model.print_trainable_parameters() - reft_model.model.train() - n_params = reft_model.count_parameters(include_model=False) - # # training args - import datetime - - current_time = datetime.datetime.now() - run_name = current_time.strftime("%Y-%m-%d_%H-%M-%S") - training_args = TrainingArguments( - output_dir=f"{training_args.output_dir}/{run_name}", - run_name=run_name, - num_train_epochs=model_args.epochs, - per_device_train_batch_size=model_args.batch_size, - per_device_eval_batch_size=model_args.eval_batch_size, - gradient_accumulation_steps=training_args.gradient_accumulation_steps, - evaluation_strategy="no", - save_strategy="no", - metric_for_best_model=None, - load_best_model_at_end=False, - logging_strategy="steps", - save_total_limit=1, - logging_steps=10, - lr_scheduler_type="linear", - learning_rate=training_args.learning_rate, - warmup_ratio=0.01, - remove_unused_columns=False, - ) - trainer = ReftTrainerForCausalLM( - model=reft_model, - tokenizer=tokenizer, - args=training_args, - train_dataset=train_dataset, - eval_dataset=None, - data_collator=data_collator, - compute_metrics=None, - ) - trainer.train() - print("train end and start eval") - # dump config - args_dict = vars(model_args) - args_dict["n_params"] = int(n_params) - json_file_name = f"{training_args.output_dir}/args.json" - with open(json_file_name, "w") as json_file: - json.dump(args_dict, json_file, indent=4) - - # save model - if model_args.save_model: - reft_model.save(f"{training_args.output_dir}/{run_name}") - - # ensure everything is in eval mode - reft_model.model.eval() - for k, v in reft_model.interventions.items(): - _ = v[0].eval() - - print({"n_params": n_params}) - # do eval - eval_results = {} - from utils.compute_metrics import compute_metrics_reft - - for dataset_name in eval_datasets: - # split evalset into chunks - print(f"Evaluating on {dataset_name}") - for split, (eval_dataset, data_items) in eval_datasets[dataset_name].items(): - - generations, stats = compute_metrics_reft( - task=model_args.task, - dataset_name=dataset_name, - intervenable=reft_model, - tokenizer=tokenizer, - eval_dataset=eval_dataset, - data_items=data_items, - trigger_tokens=model_args.trigger_tokens, - run_name=run_name, - batch_size=model_args.eval_batch_size, - data_collator=None, - split=split, - greedy_decoding=model_args.greedy_decoding, - temperature=model_args.temperature, - top_p=0.00, - top_k=0.00, - ) - - # log - eval_results.update(stats) - generations = stats if generations is None else generations - result_json_file_name = f"{training_args.output_dir}/{dataset_name}_{split}_outputs.json" - with open(result_json_file_name, "w") as json_file: - json.dump(generations, json_file, indent=4) - - # log final eval stats - result_json_file_name = f"{training_args.output_dir}/eval_results.json" - eval_results["n_params"] = int(n_params) - with open(result_json_file_name, "w") as json_file: - json.dump(eval_results, json_file, indent=4) - - print(f"Training results can be found in {training_args.output_dir}/{run_name}") - exit(0) - - # Detecting last checkpoint. - last_checkpoint = None - if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: - last_checkpoint = get_last_checkpoint(training_args.output_dir) - # if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 1: - # raise ValueError( - # f"Output directory ({training_args.output_dir}) already exists and is not empty. " - # "Use --overwrite_output_dir to overcome." - # ) - if last_checkpoint is not None and training_args.resume_from_checkpoint is None: - logger.info( - f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " - "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." - ) - - # Load model - if training_args.fp16_opt_level == "O2": - if training_args.fp16: - dtype = "float16" - elif training_args.bf16: - dtype = "bfloat16" - else: - raise ValueError("Please specific dtype: --fp16 or --bf16") - else: - dtype = "float32" - quantization_config = dict( - weight_quantize_algo=model_args.weight_quantize_algo, - weight_blocksize=model_args.weight_blocksize, - weight_double_quant=model_args.weight_double_quant, - weight_double_quant_block_size=model_args.weight_double_quant_block_size, - ) - - model_config = AutoConfig.from_pretrained( - model_args.model_name_or_path, - dtype=dtype, - from_aistudio=model_args.from_aistudio, - quantization_config=quantization_config, - ) - - LlmMetaConfig.set_llm_config(model_config, training_args) - - # Config for model using dropout, such as GPT. - if hasattr(model_config, "hidden_dropout_prob"): - model_config.hidden_dropout_prob = model_args.hidden_dropout_prob - if hasattr(model_config, "attention_probs_dropout_prob"): - model_config.attention_probs_dropout_prob = model_args.attention_probs_dropout_prob - if hasattr(model_config, "ignore_index"): - model_config.ignore_index = -100 - - if model_args.fuse_attention_qkv is not None: - model_config.fuse_attention_qkv = model_args.fuse_attention_qkv - if model_args.fuse_attention_ffn is not None: - model_config.fuse_attention_ffn = model_args.fuse_attention_ffn - - model_config.seq_length = data_args.max_length - - logger.info(f"Final model config: {model_config}") - - model_class = AutoModelForCausalLM - if training_args.pipeline_parallel_degree > 1: - if data_args.eval_with_do_generation and training_args.do_eval: - raise ValueError("Plese set eval_with_do_generation to false in pipeline parallel mode.") - - model_class = AutoModelForCausalLMPipe - - if model_args.continue_training and not training_args.autotuner_benchmark: - model = model_class.from_pretrained( - model_args.model_name_or_path, - config=model_config, - from_aistudio=model_args.from_aistudio, - ) - else: - # NOTE(gongenlei): new add autotuner_benchmark - model = model_class.from_config(model_config, dtype=dtype) - - if model_args.flash_mask and (not data_args.zero_padding or not model.config.use_flash_attention): - logger.warning("`flash_mask` must use with zero padding and flash attention.") - data_args.zero_padding = True - model.config.use_flash_attention = True - - if model_args.flash_mask and not any(isinstance(model, cls) for cls in flash_mask_support_list): - raise NotImplementedError(f"{model.__class__} not support flash mask.") - - if training_args.do_train and model_args.neftune: - # Inspired by https://github.com/neelsjain/NEFTune - if hasattr(model, "get_input_embeddings"): - - def neft_post_hook(module, input, output): - if module.training: - mag_norm = model_args.neftune_noise_alpha / paddle.sqrt( - paddle.to_tensor(output.shape[0] * output.shape[1], dtype="float32") - ) - output = output + paddle.uniform( - shape=output.shape, dtype=output.dtype, min=-mag_norm, max=mag_norm - ) - return output - - neft_post_hook_handle = model.get_input_embeddings().register_forward_post_hook(neft_post_hook) - else: - raise NotImplementedError("Only support neftune for model with get_input_embeddings") - - # Load tokenizer & dataset - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, from_aistudio=model_args.from_aistudio) - # init chat_template for tokenizer - init_chat_template(tokenizer, model_args.model_name_or_path, data_args.chat_template) - - # if using chat_template, data_args.eval_with_do_generation must be false - if tokenizer.chat_template is not None: - data_args.eval_with_do_generation = False - - if isinstance(tokenizer, LlamaTokenizer) or isinstance(tokenizer, Llama3Tokenizer): - tokenizer.pad_token_id = tokenizer.eos_token_id - - if data_args.dataset_name_or_path is None: - raise ValueError(f"Please specific dataset name or path (got {data_args.dataset_name_or_path})") - elif ( - os.path.exists(os.path.join(data_args.dataset_name_or_path, "train.json")) - or os.path.exists(os.path.join(data_args.dataset_name_or_path, "dev.json")) - or os.path.exists(os.path.join(data_args.dataset_name_or_path, "quant.json")) - ): - if training_args.do_train or quant_args.do_qat: - train_ds = load_dataset( - "json", - data_files=os.path.join(data_args.dataset_name_or_path, "train.json"), - lazy=data_args.lazy, - )[0] - else: - train_ds = None - if training_args.do_eval: - dev_ds = load_dataset( - "json", - data_files=os.path.join(data_args.dataset_name_or_path, "dev.json"), - lazy=data_args.lazy, - )[0] - else: - dev_ds = None - if quant_args.do_ptq or quant_args.do_gptq: - if os.path.exists(os.path.join(data_args.dataset_name_or_path, "quant.json")): - ptq_ds = load_dataset( - "json", - data_files=os.path.join(data_args.dataset_name_or_path, "quant.json"), - lazy=data_args.lazy, - )[0] - elif os.path.exists(os.path.join(data_args.dataset_name_or_path, "train.json")): - ptq_ds = load_dataset( - "json", - data_files=os.path.join(data_args.dataset_name_or_path, "train.json"), - lazy=data_args.lazy, - )[0] - logger.info( - f"Not found quant.json in {data_args.dataset_name_or_path}. Set train dataset as PTQ calibration dataset." - ) - else: - raise ValueError( - f"Quant strategy requires quant.json or train.json in {data_args.dataset_name_or_path}" - ) - else: - ptq_ds = None - elif ( - os.path.exists(os.path.join(data_args.dataset_name_or_path, "train")) - or os.path.exists(os.path.join(data_args.dataset_name_or_path, "dev")) - or os.path.exists(os.path.join(data_args.dataset_name_or_path, "quant")) - ): - import glob - - if training_args.do_train or quant_args.do_qat: - train_ds = load_dataset( - "json", - data_files=glob.glob(os.path.join(data_args.dataset_name_or_path, "train", "*.json")), - lazy=data_args.lazy, - )[0] - else: - train_ds = None - if training_args.do_eval: - dev_ds = load_dataset( - "json", - data_files=glob.glob(os.path.join(data_args.dataset_name_or_path, "dev", "*.json")), - lazy=data_args.lazy, - )[0] - else: - dev_ds = None - if quant_args.do_ptq or quant_args.do_gptq: - if os.path.exists(os.path.join(data_args.dataset_name_or_path, "quant")): - ptq_ds = load_dataset( - "json", - data_files=glob.glob(os.path.join(data_args.dataset_name_or_path, "quant", "*.json")), - lazy=data_args.lazy, - )[0] - elif os.path.exists(os.path.join(data_args.dataset_name_or_path, "train")): - ptq_ds = load_dataset( - "json", - data_files=glob.glob(os.path.join(data_args.dataset_name_or_path, "train", "*.json")), - lazy=data_args.lazy, - )[0] - logger.info( - f"Not found quant.json in {data_args.dataset_name_or_path}. Set train dataset as PTQ calibration dataset." - ) - else: - raise ValueError(f"Quant strategy requires quant or train folder in {data_args.dataset_name_or_path}") - else: - ptq_ds = None - else: - if training_args.do_train or quant_args.do_qat: - train_ds = load_dataset(data_args.dataset_name_or_path, splits=["train"])[0] - else: - train_ds = None - if training_args.do_eval: - dev_ds = load_dataset(data_args.dataset_name_or_path, splits=["dev"])[0] - else: - dev_ds = None - if quant_args.do_ptq or quant_args.do_gptq: - ptq_ds = load_dataset(data_args.dataset_name_or_path, splits=["train"])[0] - logger.info("Set train dataset as PTQ calibration dataset.") - else: - ptq_ds = None - # TODO(ZHUI & sijunhe): Temporary implementation. Generalize this logic and move to Trainer later. - if training_args.resume_from_checkpoint is not None and data_args.lazy: - logger.info( - f"Loading from '{training_args.resume_from_checkpoint}' with `lazy=True`, manually skipping dataset and setting `ignore_data_skip` to True." - ) - training_args.ignore_data_skip = True - state = TrainerState.load_from_json(os.path.join(training_args.resume_from_checkpoint, "trainer_state.json")) - if state.trial_params is not None and "zero_padding_global_step" in state.trial_params: - consumed_samples = state.trial_params["zero_padding_global_step"] - else: - consumed_samples = ( - state.global_step - * training_args.per_device_train_batch_size - * training_args.gradient_accumulation_steps - * training_args.dataset_world_size - ) - logger.info( - f"Skipping the first {consumed_samples} samples to warmup the dataset from checkpoint '{training_args.resume_from_checkpoint}'." - ) - train_ds = train_ds.skip(consumed_samples) - - if training_args.pipeline_parallel_degree > 1: - from utils.data import convert_example_common - - trans_func = partial(convert_example_common, tokenizer=tokenizer, data_args=data_args) - else: - trans_func = partial(get_convert_example(model), tokenizer=tokenizer, data_args=data_args) - - if data_args.zero_padding: - if ( - model.base_model_prefix not in ["llama", "bloom", "chatglm", "chatglm_v2", "qwen", "mistral"] - and training_args.pipeline_parallel_degree < 1 - ): - raise NotImplementedError( - "Zero Padding data stream is only implemented for LLaMA, Bloom, ChatGLM, QWen and Mistral so far." - ) - train_ds = ( - train_ds.map( - partial(trans_func, is_test=False, zero_padding=data_args.zero_padding, flash_mask=model_args.flash_mask) - ) - if train_ds is not None - else None - ) - ptq_ds = ( - ptq_ds.map( - partial(trans_func, is_test=False, zero_padding=data_args.zero_padding, flash_mask=model_args.flash_mask) - ) - if ptq_ds is not None - else None - ) - eval_zero_padding = data_args.zero_padding - if data_args.zero_padding and data_args.eval_with_do_generation: - logger.warning( - "`zero_padding` conflicts with `eval_with_do_generation`. Setting zero_padding to False for the eval_dataset." - ) - eval_zero_padding = False - dev_ds = ( - dev_ds.map( - partial( - trans_func, - is_test=data_args.eval_with_do_generation, - zero_padding=eval_zero_padding, - flash_mask=model_args.flash_mask, - ) - ) - if dev_ds is not None - else None - ) - if data_args.zero_padding: - if data_args.lazy: - intoken_dataset = ZeroPaddingIterableDataset - else: - intoken_dataset = ZeroPaddingMapDataset - logger.info("Creating Zero Padding Data Stream. This may take a few minutes.") - train_ds = ( - intoken_dataset( - train_ds, - tokenizer=tokenizer, - max_length=data_args.max_length, - ) - if train_ds is not None - else None - ) - ptq_ds = ( - intoken_dataset( - ptq_ds, - tokenizer=tokenizer, - max_length=data_args.max_length, - ) - if ptq_ds is not None - else None - ) - - if eval_zero_padding: - dev_ds = ( - intoken_dataset( - dev_ds, - tokenizer=tokenizer, - max_length=data_args.max_length, - ) - if dev_ds is not None - else None - ) - - if model_args.prefix_tuning: - if training_args.pipeline_parallel_degree > 1: - raise NotImplementedError("Prefix tuning is not implemented for pipeline parallelism.") - - prefix_tuning_params = get_prefix_tuning_params(model) - prefix_config = PrefixConfig( - num_prefix_tokens=model_args.num_prefix_tokens, - num_attention_heads=prefix_tuning_params["num_attention_heads"], - num_hidden_layers=prefix_tuning_params["num_hidden_layers"], - hidden_size=prefix_tuning_params["hidden_size"], - multi_query_group_num=prefix_tuning_params["multi_query_group_num"], - dtype=dtype, - ) - if model_args.prefix_path is None: - model = PrefixModelForCausalLM( - model=model, - prefix_config=prefix_config, - postprocess_past_key_value=prefix_tuning_params["postprocess_past_key_value"], - ) - else: - model = PrefixModelForCausalLM.from_pretrained( - model=model, - prefix_path=model_args.prefix_path, - postprocess_past_key_value=prefix_tuning_params["postprocess_past_key_value"], - ) - model.print_trainable_parameters() - - if model_args.lora: - if training_args.sharding_parallel_degree > 1: - assert ( - "enable_stage1_overlap" not in training_args.sharding_parallel_config - ), "Currently not support enabling sharding_stage1_overlap in lora mode." - if model_args.lora_path is None: - target_modules = get_lora_target_modules(model) - lora_config = LoRAConfig( - target_modules=target_modules, - r=model_args.lora_rank, - lora_alpha=2 * model_args.lora_rank if not model_args.rslora else 4, - rslora=model_args.rslora, - lora_plus_scale=model_args.lora_plus_scale, - pissa=model_args.pissa, - merge_weights=False, - tensor_parallel_degree=training_args.tensor_parallel_degree, - dtype=dtype, - do_qat=quant_args.do_qat, - base_model_name_or_path=model_args.model_name_or_path, - use_quick_lora=model_args.use_quick_lora, - ) - model = LoRAModel(model, lora_config) - else: - model = LoRAModel.from_pretrained(model=model, lora_path=model_args.lora_path) - - model.print_trainable_parameters() - - def compute_metrics_do_generation(eval_preds): - rouge1 = Rouge1() - rouge2 = Rouge2() - rougel = RougeL() - bleu4 = BLEU(n_size=4) - - predictions = [x[x != -100].tolist() for x in eval_preds.predictions] - references = [x[x != -100].tolist() for x in eval_preds.label_ids] - - predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True, clean_up_tokenization_spaces=False) - references = tokenizer.batch_decode(references, skip_special_tokens=True, clean_up_tokenization_spaces=False) - if data_args.save_generation_output: - with open(os.path.join(training_args.output_dir, "generated_output.json"), "w", encoding="utf-8") as f: - for pred, ref in zip(predictions, references): - out = {"output": pred, "tgt": ref} - f.write(json.dumps(out, ensure_ascii=False) + "\n") - - # for pred in predictions: - rouge1_score = rouge1.score(predictions, references) - rouge2_score = rouge2.score(predictions, references) - for pred, ref in zip(predictions, references): - rougel.add_inst(pred, [ref]) - bleu4.add_inst(pred, [ref]) - return { - "rouge1": rouge1_score, - "rouge2": rouge2_score, - "rougel": rougel.score(), - "bleu4": bleu4.score(), - } - - # Create trainer - max_length = ( - data_args.max_length - if training_args.pipeline_parallel_degree > 1 or training_args.autotuner_benchmark - else None - ) # NOTE(gongenlei): new add autotuner_benchmark - padding = ( - "max_length" if training_args.pipeline_parallel_degree > 1 or training_args.autotuner_benchmark else True - ) # NOTE(gongenlei): new add autotuner_benchmark - if training_args.pipeline_parallel_degree > 1: - metrics = None - elif data_args.eval_with_do_generation: - metrics = compute_metrics_do_generation - else: - metrics = compute_metrics - - trainer = CausalLMTrainer( - model=model, - args=training_args, - train_dataset=train_ds, - eval_dataset=dev_ds, - tokenizer=tokenizer, - compute_metrics=metrics, - data_collator=DataCollatorForSeq2Seq( - tokenizer=tokenizer, - max_length=max_length, - padding=padding, - max_label_length=max_length, - return_tensors="np", - return_attention_mask=not model_args.flash_mask, - pad_to_multiple_of=data_args.pad_to_multiple_of, - ), - do_generation=data_args.eval_with_do_generation, - callbacks=[ZeroPaddingIterDatasetCallback()] if isinstance(train_ds, ZeroPaddingIterableDataset) else None, - gen_args=gen_args, - data_args=data_args, - ) - - # Train - if training_args.do_train: - checkpoint = None - if training_args.resume_from_checkpoint is not None: - checkpoint = training_args.resume_from_checkpoint - elif last_checkpoint is not None: - checkpoint = last_checkpoint - train_result = trainer.train(resume_from_checkpoint=checkpoint) - if model_args.neftune: - neft_post_hook_handle.remove() - if training_args.benchmark: - total_effective_tokens = ( - sum([len(i["input_ids"]) for i in trainer.train_dataset]) * training_args.num_train_epochs - ) - effective_tokens_per_second = total_effective_tokens / train_result.metrics["train_runtime"] - logger.info(f"Effective_Tokens_per_second: {effective_tokens_per_second} ") - logger.info("Benchmark done.") - else: - if model_args.save_to_aistudio: - kwargs = {} - if model_args.aistudio_token is not None: - kwargs["token"] = model_args.aistudio_token - # PEFT Model only save PEFT parameters, if pretrained model obtains from aistudio - if model_args.from_aistudio and (model_args.lora or model_args.prefix_tuning): - kwargs["base_model"] = model_args.model_name_or_path - else: - trainer.tokenizer.save_to_aistudio( - repo_id=model_args.aistudio_repo_id, - private=model_args.aistudio_repo_private, - license=model_args.aistudio_repo_license, - exist_ok=True, - **kwargs, - ) - trainer.model.save_to_aistudio( - repo_id=model_args.aistudio_repo_id, - private=model_args.aistudio_repo_private, - license=model_args.aistudio_repo_license, - merge_tensor_parallel=training_args.tensor_parallel_degree > 1, - exist_ok=True, - **kwargs, - ) - - if not training_args.autotuner_benchmark: - trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1) - trainer.log_metrics("train", train_result.metrics) - trainer.save_metrics("train", train_result.metrics) - trainer.save_state() - - # QAT - if quant_args.do_qat: - from utils.quant import create_qat_model - - trainer.model = create_qat_model(quant_args, trainer.model, dtype) - train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) - trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1) - trainer.log_metrics("qat", train_result.metrics) - trainer.save_metrics("qat", train_result.metrics) - trainer.save_state() - - # PTQ - if quant_args.do_ptq: - if isinstance(model, LoRAModel): - raise NotImplementedError( - "PTQ strategy not supported for LoRA model. Please merge lora parameters to pretrain model first." - ) - from utils.quant import ( - apply_autoclip, - apply_ptq, - apply_shift, - apply_smooth, - get_ptq_model_config, - ) - - trainer.model.eval() - trainer.model.config.quantization_config.quant_type = quant_args.quant_type - trainer.model.config.quantization_config.smooth = quant_args.smooth - trainer.model.config.quantization_config.shift = quant_args.shift - trainer.model.config.quantization_config.shift_smooth_all_linears = ( - quant_args.smooth_all_linears or quant_args.shift_all_linears - ) - ptq_dataloader = trainer.get_ptq_dataloader(ptq_ds) - if quant_args.shift or quant_args.smooth: - ptq_model_config = get_ptq_model_config(trainer.model) - - if quant_args.shift: - apply_shift(quant_args, trainer, ptq_dataloader, ptq_model_config) - - if quant_args.smooth: - apply_smooth(quant_args, trainer, ptq_dataloader, ptq_model_config) - - if quant_args.auto_clip: - apply_autoclip(quant_args, trainer, ptq_dataloader) - - apply_ptq(quant_args, trainer, ptq_dataloader) - trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1) - - if quant_args.do_gptq: - if isinstance(model, LoRAModel): - raise NotImplementedError( - "PTQ strategy not supported for LoRA model. Please merge lora parameters to pretrain model first." - ) - from utils.quant import apply_gptq - - ptq_dataloader = trainer.get_ptq_dataloader(ptq_ds) - apply_gptq(quant_args, trainer, ptq_dataloader) - trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1) - - # Evaluation dev set - if training_args.do_eval: - eval_result = trainer.evaluate(dev_ds) - trainer.log_metrics("eval", eval_result) - - # Evaluation test set - if training_args.do_predict: - test_ds = load_dataset( - "json", - data_files=os.path.join(data_args.dataset_name_or_path, "test.json"), - lazy=data_args.lazy, - )[0] - - test_ds = test_ds.map(partial(trans_func, is_test=data_args.eval_with_do_generation)) - if eval_zero_padding: - test_ds = intoken_dataset( - test_ds, - tokenizer=tokenizer, - max_length=data_args.max_length, - ) - eval_result = trainer.predict(test_ds).metrics - trainer.log_metrics("test", eval_result) - - -if __name__ == "__main__": - main() diff --git a/tests/peft/test_reft.py b/tests/peft/test_reft.py index 744e55390c55..9e5b00cfff90 100644 --- a/tests/peft/test_reft.py +++ b/tests/peft/test_reft.py @@ -37,7 +37,6 @@ def test_get_type_from_string(self): class TestLoReftIntervention(unittest.TestCase): def setUp(self): - # 在每个测试方法执行前调用,用于初始化测试环境 self.kwargs = { "embed_dim": 256, "low_rank_dimension": 64, @@ -53,19 +52,15 @@ def test_initialization(self): self.assertEqual(intervention.dropout.p, self.kwargs["dropout"]) def test_forward(self): - # 测试前向传播方法 - base = paddle.randn([10, self.kwargs["embed_dim"]]) # 示例输入 + base = paddle.randn([10, self.kwargs["embed_dim"]]) intervention = LoreftIntervention(**self.kwargs) output = intervention.forward(base) - - # 添加具体的断言,验证前向传播的输出 self.assertEqual(output.shape, base.shape) self.assertEqual(output.dtype, self.kwargs["dtype"]) class TestTinyIntervention(unittest.TestCase): def setUp(self): - # 在每个测试方法执行前调用,用于初始化测试环境 self.kwargs = { "embed_dim": 256, "low_rank_dimension": 64, @@ -75,14 +70,9 @@ def setUp(self): } def test_initialization(self): - # 测试初始化过程 intervention = TinyIntervention(**self.kwargs) - - # 添加具体的断言,验证初始化的行为 self.assertEqual(intervention.rank, self.kwargs["low_rank_dimension"]) self.assertEqual(intervention.hidden_size, self.kwargs["embed_dim"]) - - # 验证参数的类型和形状 self.assertEqual(intervention.param_A.shape, [self.kwargs["embed_dim"], self.kwargs["low_rank_dimension"]]) self.assertEqual(intervention.param_B.shape, [self.kwargs["low_rank_dimension"], self.kwargs["embed_dim"]]) self.assertEqual(intervention.param_a.shape, [self.kwargs["low_rank_dimension"]])