Share when the draft has no own copy, or its copy matches the target.
Source code in vllm/v1/worker/gpu/spec_decode/eagle/utils.py
| def _should_share(eagle: nn.Module, flag: str, draft, target) -> bool:
"""Share when the draft has no own copy, or its copy matches the target."""
if not getattr(eagle, flag, False) or draft is None:
return True
if target is None:
return False
# torch.equal on GPU allocates a bool mask the size of the input.
# Use the faster GPU path when there is plenty of headroom;
# otherwise compare on CPU.
w = draft.weight
if w.is_cuda and torch.cuda.mem_get_info(w.device)[0] < w.numel() * 2:
return torch.equal(w.cpu(), target.weight.cpu())
return torch.equal(w, target.weight)
|