`vllm.config.profiler` ¶

Classes:

ProfilerConfig –

Dataclass which contains profiler config for the engine.

`ProfilerConfig` ¶

Dataclass which contains profiler config for the engine.

Methods:

compute_hash –

WARNING: Whenever a new field is added to this config,

Attributes:

active_iterations (int) –

Number of active iterations for PyTorch profiler schedule.
delay_iterations (int) –

Number of engine iterations to skip before starting profiling.
ignore_frontend (bool) –

If True, disables the front-end profiling of AsyncLLM when using the
max_iterations (int) –

Maximum number of engine iterations to profile after starting profiling.
profiler (ProfilerKind | None) –

Which profiler to use. Defaults to None. Options are:
torch_profiler_dir (str) –

Directory to save torch profiler traces. Both AsyncLLM's CPU traces and
torch_profiler_dump_cuda_time_total (bool) –

If True, dumps total CUDA time in torch profiler traces. Enabled by default.
torch_profiler_record_shapes (bool) –

If True, records tensor shapes in the torch profiler. Disabled by default.
torch_profiler_use_gzip (bool) –

If True, saves torch profiler traces in gzip format. Enabled by default
torch_profiler_with_flops (bool) –

If True, enables FLOPS counting in the torch profiler. Disabled by default.
torch_profiler_with_memory (bool) –

If True, enables memory profiling in the torch profiler.
torch_profiler_with_stack (bool) –

If True, enables stack tracing in the torch profiler. Enabled by default
wait_iterations (int) –

Number of wait iterations for PyTorch profiler schedule.
warmup_iterations (int) –

Number of warmup iterations for PyTorch profiler schedule.

Source code in vllm/config/profiler.py

@config
class ProfilerConfig:
    """Dataclass which contains profiler config for the engine."""

    profiler: ProfilerKind | None = None
    """Which profiler to use. Defaults to None. Options are:

    - 'torch': Use PyTorch profiler.
    - 'cuda': Use CUDA profiler."""

    torch_profiler_dir: str = ""
    """Directory to save torch profiler traces. Both AsyncLLM's CPU traces and
    worker's traces (CPU & GPU) will be saved under this directory. Note that
    it must be an absolute path."""

    torch_profiler_with_stack: bool = True
    """If `True`, enables stack tracing in the torch profiler. Enabled by default
    as it is useful for debugging. Can be disabled via 
    --profiler-config.torch_profiler_with_stack=false CLI flag."""

    torch_profiler_with_flops: bool = False
    """If `True`, enables FLOPS counting in the torch profiler. Disabled by default."""

    torch_profiler_use_gzip: bool = True
    """If `True`, saves torch profiler traces in gzip format. Enabled by default"""

    torch_profiler_dump_cuda_time_total: bool = True
    """If `True`, dumps total CUDA time in torch profiler traces. Enabled by default."""

    torch_profiler_record_shapes: bool = False
    """If `True`, records tensor shapes in the torch profiler. Disabled by default."""

    torch_profiler_with_memory: bool = False
    """If `True`, enables memory profiling in the torch profiler.
    Disabled by default."""

    ignore_frontend: bool = False
    """If `True`, disables the front-end profiling of AsyncLLM when using the
    'torch' profiler. This is needed to reduce overhead when using delay/limit options,
    since the front-end profiling does not track iterations and will capture the
    entire range.
    """

    delay_iterations: int = Field(default=0, ge=0)
    """Number of engine iterations to skip before starting profiling.
    Defaults to 0, meaning profiling starts immediately after receiving /start_profile.
    """

    max_iterations: int = Field(default=0, ge=0)
    """Maximum number of engine iterations to profile after starting profiling.
    Defaults to 0, meaning no limit.
    """

    warmup_iterations: int = Field(default=0, ge=0)
    """Number of warmup iterations for PyTorch profiler schedule.
    During warmup, the profiler runs but data is discarded. This helps reduce
    noise from JIT compilation and other one-time costs in the profiled trace.
    Defaults to 0 (schedule-based profiling disabled, recording all iterations).
    Set to a positive value (e.g., 2) to enable schedule-based profiling.
    """

    active_iterations: int = Field(default=5, ge=1)
    """Number of active iterations for PyTorch profiler schedule.
    This is the number of iterations where profiling data is actually collected.
    Defaults to 5 active iterations.
    """

    wait_iterations: int = Field(default=0, ge=0)
    """Number of wait iterations for PyTorch profiler schedule.
    During wait, the profiler is completely off with zero overhead.
    This allows skipping initial iterations before warmup begins.
    Defaults to 0 (no wait period).
    """

    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
        return hash_str

    @model_validator(mode="after")
    def _validate_profiler_config(self) -> Self:
        has_delay_or_limit = self.delay_iterations > 0 or self.max_iterations > 0
        if self.profiler == "torch" and has_delay_or_limit and not self.ignore_frontend:
            logger.warning_once(
                "Using 'torch' profiler with delay_iterations or max_iterations "
                "while ignore_frontend is False may result in high overhead."
            )

        profiler_dir = self.torch_profiler_dir
        if profiler_dir and self.profiler != "torch":
            raise ValueError(
                "torch_profiler_dir is only applicable when profiler is set to 'torch'"
            )
        if self.profiler == "torch" and not profiler_dir:
            raise ValueError("torch_profiler_dir must be set when profiler is 'torch'")

        # Support any URI scheme (gs://, s3://, hdfs://, etc.)
        # These paths should not be converted to absolute paths
        if profiler_dir and not _is_uri_path(profiler_dir):
            self.torch_profiler_dir = os.path.abspath(os.path.expanduser(profiler_dir))

        return self

`active_iterations = Field(default=5, ge=1)` `class-attribute` `instance-attribute` ¶

Number of active iterations for PyTorch profiler schedule. This is the number of iterations where profiling data is actually collected. Defaults to 5 active iterations.

`delay_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

Number of engine iterations to skip before starting profiling. Defaults to 0, meaning profiling starts immediately after receiving /start_profile.

`ignore_frontend = False` `class-attribute` `instance-attribute` ¶

If True, disables the front-end profiling of AsyncLLM when using the 'torch' profiler. This is needed to reduce overhead when using delay/limit options, since the front-end profiling does not track iterations and will capture the entire range.

`max_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

Maximum number of engine iterations to profile after starting profiling. Defaults to 0, meaning no limit.

`profiler = None` `class-attribute` `instance-attribute` ¶

Which profiler to use. Defaults to None. Options are:

'torch': Use PyTorch profiler.
'cuda': Use CUDA profiler.

`torch_profiler_dir = ''` `class-attribute` `instance-attribute` ¶

Directory to save torch profiler traces. Both AsyncLLM's CPU traces and worker's traces (CPU & GPU) will be saved under this directory. Note that it must be an absolute path.

`torch_profiler_dump_cuda_time_total = True` `class-attribute` `instance-attribute` ¶

If True, dumps total CUDA time in torch profiler traces. Enabled by default.

`torch_profiler_record_shapes = False` `class-attribute` `instance-attribute` ¶

If True, records tensor shapes in the torch profiler. Disabled by default.

`torch_profiler_use_gzip = True` `class-attribute` `instance-attribute` ¶

If True, saves torch profiler traces in gzip format. Enabled by default

`torch_profiler_with_flops = False` `class-attribute` `instance-attribute` ¶

If True, enables FLOPS counting in the torch profiler. Disabled by default.

`torch_profiler_with_memory = False` `class-attribute` `instance-attribute` ¶

If True, enables memory profiling in the torch profiler. Disabled by default.

`torch_profiler_with_stack = True` `class-attribute` `instance-attribute` ¶

If True, enables stack tracing in the torch profiler. Enabled by default as it is useful for debugging. Can be disabled via --profiler-config.torch_profiler_with_stack=false CLI flag.

`wait_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

Number of wait iterations for PyTorch profiler schedule. During wait, the profiler is completely off with zero overhead. This allows skipping initial iterations before warmup begins. Defaults to 0 (no wait period).

`warmup_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

Number of warmup iterations for PyTorch profiler schedule. During warmup, the profiler runs but data is discarded. This helps reduce noise from JIT compilation and other one-time costs in the profiled trace. Defaults to 0 (schedule-based profiling disabled, recording all iterations). Set to a positive value (e.g., 2) to enable schedule-based profiling.

`compute_hash()` ¶

WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if it affects the computation graph.

Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Source code in vllm/config/profiler.py

def compute_hash(self) -> str:
    """
    WARNING: Whenever a new field is added to this config,
    ensure that it is included in the factors list if
    it affects the computation graph.

    Provide a hash that uniquely identifies all the configs
    that affect the structure of the computation
    graph from input ids/embeddings to the final hidden states,
    excluding anything before input ids/embeddings and after
    the final hidden states.
    """
    # no factors to consider.
    # this config will not affect the computation graph.
    factors: list[Any] = []
    hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
    return hash_str

`_is_uri_path(path)` ¶

Check if path is a URI (scheme://...), excluding Windows drive letters.

Supports custom URI schemes like gs://, s3://, hdfs://, etc. These paths should not be converted to absolute paths.

Source code in vllm/config/profiler.py

def _is_uri_path(path: str) -> bool:
    """Check if path is a URI (scheme://...), excluding Windows drive letters.

    Supports custom URI schemes like gs://, s3://, hdfs://, etc.
    These paths should not be converted to absolute paths.
    """
    if "://" in path:
        scheme = path.split("://")[0]
        # Windows drive letters are single characters (e.g., C://)
        # Valid URI schemes have more than one character
        return len(scheme) > 1
    return False

vllm.config.profiler ¶

ProfilerConfig ¶

active_iterations = Field(default=5, ge=1) class-attribute instance-attribute ¶

delay_iterations = Field(default=0, ge=0) class-attribute instance-attribute ¶

ignore_frontend = False class-attribute instance-attribute ¶

max_iterations = Field(default=0, ge=0) class-attribute instance-attribute ¶

profiler = None class-attribute instance-attribute ¶

torch_profiler_dir = '' class-attribute instance-attribute ¶

torch_profiler_dump_cuda_time_total = True class-attribute instance-attribute ¶

torch_profiler_record_shapes = False class-attribute instance-attribute ¶

torch_profiler_use_gzip = True class-attribute instance-attribute ¶

torch_profiler_with_flops = False class-attribute instance-attribute ¶

torch_profiler_with_memory = False class-attribute instance-attribute ¶

torch_profiler_with_stack = True class-attribute instance-attribute ¶

wait_iterations = Field(default=0, ge=0) class-attribute instance-attribute ¶

warmup_iterations = Field(default=0, ge=0) class-attribute instance-attribute ¶

compute_hash() ¶

_is_uri_path(path) ¶

`vllm.config.profiler` ¶

`ProfilerConfig` ¶

`active_iterations = Field(default=5, ge=1)` `class-attribute` `instance-attribute` ¶

`delay_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

`ignore_frontend = False` `class-attribute` `instance-attribute` ¶

`max_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

`profiler = None` `class-attribute` `instance-attribute` ¶

`torch_profiler_dir = ''` `class-attribute` `instance-attribute` ¶

`torch_profiler_dump_cuda_time_total = True` `class-attribute` `instance-attribute` ¶

`torch_profiler_record_shapes = False` `class-attribute` `instance-attribute` ¶

`torch_profiler_use_gzip = True` `class-attribute` `instance-attribute` ¶

`torch_profiler_with_flops = False` `class-attribute` `instance-attribute` ¶

`torch_profiler_with_memory = False` `class-attribute` `instance-attribute` ¶

`torch_profiler_with_stack = True` `class-attribute` `instance-attribute` ¶

`wait_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

`warmup_iterations = Field(default=0, ge=0)` `class-attribute` `instance-attribute` ¶

`compute_hash()` ¶

`_is_uri_path(path)` ¶