Skip to content

vllm.tool_parsers.utils

Classes:

Functions:

UnexpectedAstError

Bases: Exception

Raised when the AST structure does not match the expected pythonic tool call format.

Source code in vllm/tool_parsers/utils.py
class UnexpectedAstError(Exception):
    """Raised when the AST structure does not match the expected
    pythonic tool call format."""

    pass

_ast_callable_dotted_name(node)

Return the dotted name for a call target, walking ast.Attribute chains so a.b.c(...) becomes "a.b.c".

Raises:

  • UnexpectedAstError

    If the chain does not bottom out in an ast.Name (e.g. subscript or call expression as receiver).

Source code in vllm/tool_parsers/utils.py
def _ast_callable_dotted_name(node: ast.expr) -> str:
    """Return the dotted name for a call target, walking ``ast.Attribute``
    chains so ``a.b.c(...)`` becomes ``"a.b.c"``.

    Raises:
        UnexpectedAstError: If the chain does not bottom out in an
            ``ast.Name`` (e.g. subscript or call expression as receiver).
    """
    parts: list[str] = []
    current: ast.expr = node
    while isinstance(current, ast.Attribute):
        parts.append(current.attr)
        current = current.value
    if not isinstance(current, ast.Name):
        raise UnexpectedAstError("Invalid tool call name")
    parts.append(current.id)
    return ".".join(reversed(parts))

coerce_to_schema_type(value, schema_type)

Best-effort coercion of a raw string value to a JSON Schema type.

Tries each type in priority order (null > integer > number > boolean > object > array > string) and returns the first successful coercion. Falls back to the original string when no coercion succeeds.

Parameters:

  • value

    (str) –

    The raw string value from the model output.

  • schema_type

    (str | list[str]) –

    One or more JSON Schema type strings (e.g. "string" or ["string", "null"]).

Source code in vllm/tool_parsers/utils.py
def coerce_to_schema_type(value: str, schema_type: str | list[str]) -> Any:
    """Best-effort coercion of a raw string value to a JSON Schema type.

    Tries each type in priority order (null > integer > number > boolean >
    object > array > string) and returns the first successful coercion.
    Falls back to the original string when no coercion succeeds.

    Args:
        value: The raw string value from the model output.
        schema_type: One or more JSON Schema type strings
            (e.g. ``"string"`` or ``["string", "null"]``).
    """
    if isinstance(schema_type, str):
        schema_type = [schema_type]

    normalized_types = {
        _TYPE_ALIASES.get(key, key) for t in schema_type for key in [t.strip().lower()]
    }

    # Priority: null > integer > number > boolean > object > array > string
    type_priority = [
        "null",
        "integer",
        "number",
        "boolean",
        "object",
        "array",
        "string",
    ]

    for candidate_type in type_priority:
        if candidate_type not in normalized_types:
            continue

        if candidate_type == "null":
            if value.lower() == "null":
                return None
            continue
        if candidate_type == "string":
            return value
        if candidate_type == "integer":
            try:
                return int(value)
            except (ValueError, TypeError):
                continue
        if candidate_type == "number":
            try:
                val = float(value)
                return val if val != int(val) else int(val)
            except (ValueError, TypeError):
                continue
        if candidate_type == "boolean":
            lower_val = value.lower().strip()
            if lower_val in ("true", "1"):
                return True
            if lower_val in ("false", "0"):
                return False
            continue
        if candidate_type in ("object", "array"):
            try:
                return json.loads(value)
            except (json.JSONDecodeError, ValueError, TypeError):
                continue

    try:
        return json.loads(value)
    except (json.JSONDecodeError, ValueError):
        return value

compute_tool_delta(previously_sent_args, new_call, index, withheld_suffix)

Compute the incremental delta between previously streamed arguments and the current tool call state.

Returns:

  • DeltaToolCall | None

    A DeltaToolCall with only the new argument characters, or None

  • DeltaToolCall | None

    if there is no difference from what was previously sent.

Source code in vllm/tool_parsers/utils.py
def compute_tool_delta(
    previously_sent_args: str,
    new_call: ToolCall,
    index: int,
    withheld_suffix: str,
) -> DeltaToolCall | None:
    """Compute the incremental delta between previously streamed arguments
    and the current tool call state.

    Returns:
        A DeltaToolCall with only the new argument characters, or None
        if there is no difference from what was previously sent.
    """
    new_call_args = new_call.function.arguments
    if withheld_suffix:
        if not new_call_args.endswith(withheld_suffix):
            msg = (
                f"Tool call arguments '{new_call_args}' do not end with "
                f"expected withheld suffix '{withheld_suffix}'"
            )
            logger.error(msg)
            raise ValueError(msg)
        new_call_args = new_call_args[: -len(withheld_suffix)]
    if not previously_sent_args:
        return DeltaToolCall(
            id=new_call.id,
            type="function",
            index=index,
            function=DeltaFunctionCall(
                name=new_call.function.name,
                arguments=new_call_args,
            ),
        )

    arg_diff = new_call_args[len(previously_sent_args) :]
    return (
        DeltaToolCall(
            id=None,
            index=index,
            function=DeltaFunctionCall(arguments=arg_diff),
        )
        if arg_diff
        else None
    )

extract_intermediate_diff(curr, old)

Given two strings, extract the difference in the middle between two strings that are known to have a common prefix and/or suffix.

This function is provided as a UTILITY for extracting information from JSON generated by partial_json_parser, to help in ensuring that the right tokens are returned in streaming, so that close-quotes, close-brackets and close-braces are not returned prematurely. The order of arguments IS important - the new version of the partially-parsed JSON must be the first argument, and the secnod argument must be from the previous generation.

What it returns, is tokens that should be streamed to the client.

e.g. extract_intermediate_diff('{"fruit": "apple"}', '{"fruit": "ap"}') -> 'ple'

Source code in vllm/tool_parsers/utils.py
def extract_intermediate_diff(curr: str, old: str) -> str:
    """
    Given two strings, extract the difference in the middle between two strings
    that are known to have a common prefix and/or suffix.

    This function is provided as a UTILITY for extracting information from JSON
    generated by partial_json_parser, to help in ensuring that the right tokens
    are returned in streaming, so that close-quotes, close-brackets and
    close-braces are not returned prematurely. The order of arguments IS
    important - the new version of the partially-parsed JSON must be the first
    argument, and the secnod argument must be from the previous generation.

    What it returns, is tokens that should be streamed to the client.

    e.g. extract_intermediate_diff('{"fruit": "apple"}', '{"fruit": "ap"}')
        -> 'ple'

    """
    suffix = find_common_suffix(curr, old)

    old = old[::-1].replace(suffix[::-1], "", 1)[::-1]
    prefix = find_common_prefix(curr, old)
    diff = curr
    if len(suffix):
        diff = diff[::-1].replace(suffix[::-1], "", 1)[::-1]

    if len(prefix):
        # replace the prefix only once in case it's mirrored
        diff = diff.replace(prefix, "", 1)

    return diff

extract_types_from_schema(schema)

Extract all possible type strings from a JSON Schema definition.

Handles type (string or list), enum value inference, and recursive anyOf/oneOf/allOf. Returns ["string"] when no type information can be determined.

Source code in vllm/tool_parsers/utils.py
def extract_types_from_schema(schema: Any) -> list[str]:
    """Extract all possible type strings from a JSON Schema definition.

    Handles ``type`` (string or list), ``enum`` value inference, and
    recursive ``anyOf``/``oneOf``/``allOf``.  Returns ``["string"]``
    when no type information can be determined.
    """
    if schema is None or not isinstance(schema, dict):
        return ["string"]

    types: set[str] = set()

    if "type" in schema:
        type_value = schema["type"]
        if isinstance(type_value, str):
            types.add(type_value)
        elif isinstance(type_value, list):
            for t in type_value:
                if isinstance(t, str):
                    types.add(t)

    if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
        for value in schema["enum"]:
            if value is None:
                types.add("null")
            elif isinstance(value, bool):
                types.add("boolean")
            elif isinstance(value, int):
                types.add("integer")
            elif isinstance(value, float):
                types.add("number")
            elif isinstance(value, str):
                types.add("string")
            elif isinstance(value, list):
                types.add("array")
            elif isinstance(value, dict):
                types.add("object")

    for choice_field in ("anyOf", "oneOf", "allOf"):
        if choice_field in schema and isinstance(schema[choice_field], list):
            for choice in schema[choice_field]:
                types.update(extract_types_from_schema(choice))

    return list(types) if types else ["string"]

find_common_prefix(s1, s2)

Finds a common prefix that is shared between two strings, if there is one. Order of arguments is NOT important.

This function is provided as a UTILITY for extracting information from JSON generated by partial_json_parser, to help in ensuring that the right tokens are returned in streaming, so that close-quotes, close-brackets and close-braces are not returned prematurely.

e.g. find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '{"fruit": "ap'

Source code in vllm/tool_parsers/utils.py
def find_common_prefix(s1: str, s2: str) -> str:
    """
    Finds a common prefix that is shared between two strings, if there is one.
    Order of arguments is NOT important.

    This function is provided as a UTILITY for extracting information from JSON
    generated by partial_json_parser, to help in ensuring that the right tokens
    are returned in streaming, so that close-quotes, close-brackets and
    close-braces are not returned prematurely.

    e.g. find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}') ->
    '{"fruit": "ap'
    """
    prefix = ""
    min_length = min(len(s1), len(s2))
    for i in range(0, min_length):
        if s1[i] == s2[i]:
            prefix += s1[i]
        else:
            break
    return prefix

find_common_suffix(s1, s2)

Finds a common suffix shared between two strings, if there is one. Order of arguments is NOT important. Stops when the suffix ends OR it hits an alphanumeric character

e.g. find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '"}'

Source code in vllm/tool_parsers/utils.py
def find_common_suffix(s1: str, s2: str) -> str:
    """
    Finds a common suffix shared between two strings, if there is one. Order of
    arguments is NOT important.
    Stops when the suffix ends OR it hits an alphanumeric character

    e.g. find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '"}'
    """
    suffix = ""
    min_length = min(len(s1), len(s2))
    for i in range(1, min_length + 1):
        if s1[-i] == s2[-i] and not s1[-i].isalnum():
            suffix = s1[-i] + suffix
        else:
            break
    return suffix

find_tool_properties(tools, tool_name)

Find a tool by name and return its properties dict, or {}.

Source code in vllm/tool_parsers/utils.py
def find_tool_properties(
    tools: list[Tool] | None,
    tool_name: str,
) -> dict[str, Any]:
    """Find a tool by name and return its properties dict, or {}."""
    if not tools:
        return {}
    for tool in tools:
        name, params = _extract_tool_info(tool)
        if name == tool_name:
            return (params or {}).get("properties", {})
    return {}

get_parameter_value(val)

Extract a Python literal value from an AST expression node.

Handles constants, dicts, lists, and JSON-style name literals (null, true, false) that some models produce instead of Python literals (None, True, False).

Raises:

Source code in vllm/tool_parsers/utils.py
def get_parameter_value(val: ast.expr) -> Any:
    """Extract a Python literal value from an AST expression node.

    Handles constants, dicts, lists, and JSON-style name literals
    (null, true, false) that some models produce instead of Python
    literals (None, True, False).

    Raises:
        UnexpectedAstError: If the AST node is not a supported literal type.
    """
    if isinstance(val, ast.Constant):
        return val.value
    elif isinstance(val, ast.Dict):
        if not all(isinstance(k, ast.Constant) for k in val.keys):
            logger.warning(
                "Dict argument keys are not all literals: %s",
                ast.dump(val),
            )
            raise UnexpectedAstError("Dict tool call arguments must have literal keys")
        return {
            k.value: get_parameter_value(v)  # type: ignore
            for k, v in zip(val.keys, val.values)
        }
    elif isinstance(val, ast.List):
        return [get_parameter_value(v) for v in val.elts]
    elif isinstance(val, ast.Name) and val.id in _JSON_NAME_LITERALS:
        return _JSON_NAME_LITERALS[val.id]
    else:
        logger.warning(
            "Unsupported AST node type in tool call arguments: %s",
            ast.dump(val),
        )
        raise UnexpectedAstError("Tool call arguments must be literals")

handle_single_tool(call)

Convert a single AST function call node into a ToolCall object.

Accepts both bare names (foo(...)) and dotted attribute chains (a.b.c(...)); the resulting tool call name field preserves the dotted form.

Raises:

  • UnexpectedAstError

    If the call target is neither a simple name nor a chain of attribute accesses bottoming out in a name.

Source code in vllm/tool_parsers/utils.py
def handle_single_tool(call: ast.Call) -> ToolCall:
    """Convert a single AST function call node into a ToolCall object.

    Accepts both bare names (``foo(...)``) and dotted attribute chains
    (``a.b.c(...)``); the resulting tool call ``name`` field preserves the
    dotted form.

    Raises:
        UnexpectedAstError: If the call target is neither a simple name
            nor a chain of attribute accesses bottoming out in a name.
    """
    if not isinstance(call.func, (ast.Name, ast.Attribute)):
        logger.warning(
            "Tool call has non-simple function name: %s",
            ast.dump(call.func),
        )
        raise UnexpectedAstError("Invalid tool call name")
    function_name = _ast_callable_dotted_name(call.func)
    arguments = {}
    for keyword in call.keywords:
        arguments[keyword.arg] = get_parameter_value(keyword.value)
    return ToolCall(
        type="function",
        function=FunctionCall(
            name=function_name,
            arguments=json.dumps(arguments, ensure_ascii=False),
        ),
    )

make_valid_python(text)

Attempt to close all open brackets/quotes to make partial Python valid.

Used during streaming to parse incomplete tool call expressions by appending the necessary closing characters.

Returns:

  • tuple[str, str] | None

    A tuple of (completed_text, added_suffix) if the text can be

  • tuple[str, str] | None

    made valid, or None if the text is too incomplete to complete

  • tuple[str, str] | None

    meaningfully (e.g. mid-parameter-name or mid-dict-key).

Raises:

Source code in vllm/tool_parsers/utils.py
def make_valid_python(text: str) -> tuple[str, str] | None:
    """Attempt to close all open brackets/quotes to make partial Python valid.

    Used during streaming to parse incomplete tool call expressions by
    appending the necessary closing characters.

    Returns:
        A tuple of (completed_text, added_suffix) if the text can be
        made valid, or None if the text is too incomplete to complete
        meaningfully (e.g. mid-parameter-name or mid-dict-key).

    Raises:
        UnexpectedAstError: If mismatched brackets or parentheses
            are detected.
    """
    bracket_stack: list[str] = []
    for index, char in enumerate(text):
        if char in {"[", "(", "{"}:
            bracket_stack.append(char)
        elif char == "]":
            if not bracket_stack or bracket_stack.pop() != "[":
                raise UnexpectedAstError("Mismatched square brackets")
        elif char == ")":
            if not bracket_stack or bracket_stack.pop() != "(":
                raise UnexpectedAstError("Mismatched parentheses")
        elif char == "}":
            if not bracket_stack or bracket_stack.pop() != "{":
                raise UnexpectedAstError("Mismatched curly braces")
        elif char in {"'", '"'}:
            if bracket_stack and bracket_stack[-1] == char:
                if index > 0 and text[index - 1] == "\\":
                    pass
                else:
                    bracket_stack.pop()
            elif bracket_stack and bracket_stack[-1] in {"'", '"'}:
                pass
            else:
                bracket_stack.append(char)

    text = text.rstrip()
    if text.endswith("=") or text.endswith(":"):
        return None
    if bracket_stack and bracket_stack[-1] == "{":
        trailing_dict_text = text[: text.rfind("{")]
        num_keys = trailing_dict_text.count(":")
        num_values = trailing_dict_text.count(",")
        if num_keys <= num_values:
            return None
    if bracket_stack and bracket_stack[-1] == "(":
        trailing_params_text = text[: text.rfind("(")]
        num_full_param_names = trailing_params_text.count("=")
        num_full_param_values = trailing_params_text.count(",")
        if num_full_param_names <= num_full_param_values:
            return None
    if text.endswith(","):
        text = text[:-1]
    if (
        bracket_stack
        and bracket_stack[-1] == "["
        and not text.endswith("[")
        and not text.endswith(")")
    ):
        return None

    _CLOSING = {"[": "]", "(": ")", "{": "}", "'": "'", '"': '"'}
    added_text = ""
    for char in reversed(bracket_stack):
        added_text += _CLOSING[char]

    candidate = text + added_text

    # Streaming partial text can land in shapes the bracket-counting
    # heuristics above don't catch. Two failure modes:
    #   1. Mid-key inside a dict (`..., "k`) closes to `..., "k"}` — a
    #      syntactically invalid mixed dict/set.
    #   2. A bare string inside a dict (`{"k`) closes to `{"k"}` — valid
    #      Python but a *set* literal, which downstream tool-call AST
    #      handling rejects.
    # Validate the candidate parses, has a body, and contains no Set
    # nodes (pythonic tool calls always use dicts for `{...}`).
    try:
        module = ast.parse(candidate)
    except SyntaxError:
        return None
    if not module.body:
        return None
    for node in ast.walk(module):
        if isinstance(node, ast.Set):
            return None

    return candidate, added_text

partial_tag_overlap(text, tag)

Length of the longest prefix of tag that matches a suffix of text.

E.g. text ending in "<tool_" returns 6 when tag is "<tool_call>". Returns 0 when there is no overlap.

Source code in vllm/tool_parsers/utils.py
def partial_tag_overlap(text: str, tag: str) -> int:
    """Length of the longest prefix of *tag* that matches a suffix of *text*.

    E.g. text ending in ``"<tool_"`` returns 6 when tag is ``"<tool_call>"``.
    Returns 0 when there is no overlap.
    """
    max_check = min(len(tag) - 1, len(text))
    for k in range(max_check, 0, -1):
        if text.endswith(tag[:k]):
            return k
    return 0