`vllm.ir` ¶

Modules:

op –
ops –
util –

Functions:

enable_torch_wrap –

Context manager to enable/disable torch custom op wrapping for vLLM IR ops.
register_op –

Register a new vLLM IR op.
set_default_torch_wrap –

Permanently set the torch wrap flag.

`enable_torch_wrap(enable=True)` ¶

Context manager to enable/disable torch custom op wrapping for vLLM IR ops. When torch wrapping is disabled, the torch custom op layer is skipped and IR ops dispatch directly to the implementation. Helpful for avoiding torch dispatch overhead in eager mode and avoiding the need for lowering for platforms not using Inductor.

Source code in vllm/ir/op.py

@contextlib.contextmanager
def enable_torch_wrap(enable: bool = True):
    """
    Context manager to enable/disable torch custom op wrapping for vLLM IR ops.
    When torch wrapping is disabled, the torch custom op layer is skipped
    and IR ops dispatch directly to the implementation.
    Helpful for avoiding torch dispatch overhead in eager mode
    and avoiding the need for lowering for platforms not using Inductor.
    """

    global _ENABLE_TORCH_WRAP
    old = _ENABLE_TORCH_WRAP
    try:
        _ENABLE_TORCH_WRAP = enable
        yield
    finally:
        _ENABLE_TORCH_WRAP = old

`register_op(f=None, *, name=None, activations=None, allow_inplace=False)` ¶

register_op(f: Callable[..., Any]) -> IrOp

register_op(
    *,
    name: str | None = None,
    activations: list[str] | None = None,
    allow_inplace: Literal[False] = False,
) -> Callable[[Callable[..., Any]], IrOp]

register_op(
    *,
    name: str | None = None,
    activations: list[str] | None = None,
    allow_inplace: Literal[True],
) -> Callable[[Callable[..., Any]], IrOpInplace]

Register a new vLLM IR op.

Parameters:

f ¶
(Callable | None, default: None ) –

the native implementation of the op
name ¶
(str | None, default: None ) –

the name of the op, defaults to the function name
activations ¶
(list[str] | None, default: None ) –

list of activation params, defaults to params starting with 'x'
allow_inplace ¶
(bool, default: False ) –

add a maybe_inplace overload that allows inplace impls

Returns:

IrOp | Callable[[Callable], IrOp] –

the IrOp object if f is provided, otherwise a decorator

Example usage: ```python @vllm.ir.register_op def my_add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y

@vllm.ir.register_op(name="custom_mul") def multiply(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x * y

Source code in vllm/ir/op.py

def register_op(
    f: Callable | None = None,
    *,
    name: str | None = None,
    activations: list[str] | None = None,
    allow_inplace: bool = False,
) -> "IrOp | Callable[[Callable], IrOp]":
    """
    Register a new vLLM IR op.

    Args:
        f: the native implementation of the op
        name: the name of the op, defaults to the function name
        activations: list of activation params, defaults to params starting with 'x'
        allow_inplace: add a maybe_inplace overload that allows inplace impls

    Returns:
        the IrOp object if f is provided, otherwise a decorator

    Example usage:
    ```python
    @vllm.ir.register_op
    def my_add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        return x + y


    @vllm.ir.register_op(name="custom_mul")
    def multiply(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        return x * y"""

    def decorator(_f: Callable):
        op_name: str = _f.__name__ if name is None else name
        _validate_name(op_name, "Op")
        assert op_name not in IrOp.registry, f"Op '{op_name}' is already registered."
        # Slice out the decorator function frames from the stack
        stack = traceback.format_stack()[:-2]
        if allow_inplace:
            op: IrOp = IrOpInplace(op_name, _f, activations, stack)
        else:
            op = IrOp(op_name, _f, activations, stack)
        IrOp.registry[op_name] = op
        return op

    if f is not None:
        return decorator(f)

    return decorator

`set_default_torch_wrap(enable=True)` ¶

Permanently set the torch wrap flag.

Source code in vllm/ir/op.py

def set_default_torch_wrap(enable: bool = True) -> None:
    """
    Permanently set the torch wrap flag.
    """
    global _ENABLE_TORCH_WRAP
    _ENABLE_TORCH_WRAP = enable

`vllm.ir` ¶

`enable_torch_wrap(enable=True)` ¶

`register_op(f=None, *, name=None, activations=None, allow_inplace=False)` ¶

`f` ¶

`name` ¶

`activations` ¶

`allow_inplace` ¶

`set_default_torch_wrap(enable=True)` ¶

vllm.ir ¶

enable_torch_wrap(enable=True) ¶

register_op(f=None, *, name=None, activations=None, allow_inplace=False) ¶

f ¶

name ¶

activations ¶

allow_inplace ¶

set_default_torch_wrap(enable=True) ¶

`vllm.ir` ¶

`enable_torch_wrap(enable=True)` ¶

`register_op(f=None, *, name=None, activations=None, allow_inplace=False)` ¶

`f` ¶

`name` ¶

`activations` ¶

`allow_inplace` ¶

`set_default_torch_wrap(enable=True)` ¶