OpenRLHF: Loading a reward model causes ValueError: weight is on the meta device, we need a `value` to put in on 0
Hi! Thanks for your work on OpenRLHF. I trained a 4-bit Qwen-based reward model with this config (see the defaults):
parser.add_argument("--pretrain", type=str, default="Qwen/Qwen1.5-7B")
parser.add_argument('--dataset', type=str, default='Anthropic/hh-rlhf')
parser.add_argument("--dataset", type=str, default="nz/highest-number-rlhf")
parser.add_argument("--dataset_probs", type=str, default="1.0", help="sampling probs for datasets")
parser.add_argument("--save_path", type=str, default="./ckpt/qwen-7b-rm-highest-number")
parser.add_argument("--save_steps", type=int, default=-1)
parser.add_argument("--logging_steps", type=int, default=1)
parser.add_argument("--eval_steps", type=int, default=-1)
parser.add_argument("--ckpt_path", type=str, default="./ckpt/checkpoints_rm")
parser.add_argument("--max_ckpt_num", type=int, default=3)
parser.add_argument("--max_ckpt_mem", type=int, default=1000) # 1000GB
parser.add_argument("--max_epochs", type=int, default=1)
parser.add_argument("--micro_train_batch_size", type=int, default=8)
parser.add_argument("--train_batch_size", type=int, default=128)
parser.add_argument("--max_samples", type=int, default=1000000)
parser.add_argument("--load_checkpoint", action="store_true", default=False)
parser.add_argument("--max_norm", type=float, default=1.0)
parser.add_argument("--max_len", type=int, default=1024)
parser.add_argument("--l2", type=float, default=0.0)
parser.add_argument("--loss", type=str, default="sigmoid")
parser.add_argument("--gradient_checkpointing", action="store_true", default=True)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--local_rank", type=int, default=0, help="local_rank for deepspeed")
parser.add_argument("--zero_stage", type=int, default=3)
parser.add_argument("--bf16", action="store_true", default=True)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--zpg", type=int, default=1, help="ZeRO++ max partition size")
parser.add_argument("--adam_offload", action="store_true", default=True)
parser.add_argument("--flash_attn", action="store_true", default=True)
parser.add_argument("--compute_fp32_loss", action="store_true", default=False)
parser.add_argument("--margin_loss", action="store_true", default=False)
parser.add_argument("--aux_loss_coef", type=float, default=0)
parser.add_argument("--grad_accum_dtype", type=str, default=None)
parser.add_argument("--disable_trace_cache", action="store_true", default=False)
parser.add_argument("--load_in_4bit", action="store_true", default=True)
parser.add_argument("--lora_rank", type=int, default=0)
parser.add_argument("--lora_alpha", type=int, default=16)
parser.add_argument("--target_modules", type=list, default=None)
parser.add_argument("--bos_token", type=str, default=None)
parser.add_argument("--eos_token", type=str, default=None)
parser.add_argument("--pad_token", type=str, default=None)
parser.add_argument("--unk_token", type=str, default=None)
I now have the following files:
ls ckpt/qwen-7b-rm-highest-number/
added_tokens.json config.json merges.txt model-00001-of-00002.safetensors model-00002-of-00002.safetensors model.safetensors.index.json special_tokens_map.json tokenizer.json tokenizer_config.json vocab.json
I am trying to load the model this way
from openrlhf.models import get_llm_for_sequence_regression
model = get_llm_for_sequence_regression("/home/nz/OpenRLHF/ckpt/qwen-7b-rm-highest-number", model_type='reward', init_value_head=True)
but I am getting this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[3], line 1
----> 1 model = get_llm_for_sequence_regression("/home/nz/OpenRLHF/examples/ckpt/qwen-7b-rm-highest-number", model_type='reward', init_value_head=False, device_map="auto")
File ~/miniconda3/envs/openrlhf/lib/python3.10/site-packages/openrlhf/models/model.py:116, in get_llm_for_sequence_regression(model_name_or_path, model_type, bf16, load_in_4bit, lora_rank, lora_alpha, target_modules, normalize_reward, use_flash_attention_2, ds_config, init_value_head, **kwargs)
113 else:
114 nf4_config = None
--> 116 model = cls_class.from_pretrained(
117 model_name_or_path,
118 config=config,
119 trust_remote_code=True,
120 torch_dtype="auto",
121 quantization_config=nf4_config,
122 **kwargs,
123 )
125 # LoRA
126 if lora_rank > 0:
File ~/miniconda3/envs/openrlhf/lib/python3.10/site-packages/transformers/modeling_utils.py:3919, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3917 if "skip_keys" in inspect.signature(dispatch_model).parameters:
3918 device_map_kwargs["skip_keys"] = model._skip_keys_device_placement
-> 3919 dispatch_model(model, **device_map_kwargs)
3921 if quantization_method_from_args == QuantizationMethod.GPTQ:
3922 if quantization_config.tokenizer is None:
File ~/.local/lib/python3.10/site-packages/accelerate/big_modeling.py:399, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
396 weights_map = None
398 tied_params = find_tied_parameters(model)
--> 399 attach_align_device_hook_on_blocks(
400 model,
401 execution_device=execution_device,
402 offload=offload,
403 offload_buffers=offload_buffers,
404 weights_map=weights_map,
405 skip_keys=skip_keys,
406 preload_module_classes=preload_module_classes,
407 )
409 # warn if there is any params on the meta device
410 offloaded_devices_str = " and ".join(
411 [device for device in set(device_map.values()) if device in ("cpu", "disk")]
412 )
File ~/.local/lib/python3.10/site-packages/accelerate/hooks.py:517, in attach_align_device_hook_on_blocks(module, execution_device, offload, weights_map, offload_buffers, module_name, skip_keys, preload_module_classes)
509 if module_name in execution_device and module_name in offload and not offload[module_name]:
510 hook = AlignDevicesHook(
511 execution_device=execution_device[module_name],
512 offload_buffers=offload_buffers,
(...)
515 skip_keys=skip_keys,
516 )
--> 517 add_hook_to_module(module, hook)
518 attach_execution_device_hook(module, execution_device[module_name])
519 elif module_name in execution_device and module_name in offload:
File ~/.local/lib/python3.10/site-packages/accelerate/hooks.py:156, in add_hook_to_module(module, hook, append)
153 old_forward = module.forward
154 module._old_forward = old_forward
--> 156 module = hook.init_hook(module)
157 module._hf_hook = hook
159 def new_forward(module, *args, **kwargs):
File ~/.local/lib/python3.10/site-packages/accelerate/hooks.py:254, in AlignDevicesHook.init_hook(self, module)
252 if not self.offload and self.execution_device is not None:
253 for name, _ in named_module_tensors(module, recurse=self.place_submodules):
--> 254 set_module_tensor_to_device(module, name, self.execution_device)
255 elif self.offload:
256 self.original_devices = {
257 name: param.device for name, param in named_module_tensors(module, recurse=self.place_submodules)
258 }
File ~/.local/lib/python3.10/site-packages/accelerate/utils/modeling.py:306, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics)
303 old_value = getattr(module, tensor_name)
305 if old_value.device == torch.device("meta") and device not in ["meta", torch.device("meta")] and value is None:
--> 306 raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {device}.")
308 if value is not None:
309 if old_value.shape != value.shape:
ValueError: weight is on the meta device, we need a `value` to put in on 0.
I guess I am doing something wrong when loading the model. Any help?
About this issue
- Original URL
- State: open
- Created 5 months ago
- Comments: 19
This is right for the pre-trained RM.
get_llm_for_sequence_regression( "ckpt/llama-2-7b-rm", model_type='reward', init_value_head=False)