4位量化:常规的线性层被替换成了4位线性层(48)
model_q4 = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m", device_map='cuda:0', torch_dtype=compute_dtype,
quantization_config=nf4_config
)
print(model_q4.get_memory_footprint
model_q4 = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m", device_map='cuda:0', torch_dtype=compute_dtype,
quantization_config=nf4_config
)
print(model_q4.get_memory_footprint