`vllm.model_executor.layers.quantization.quark.schemes.quark_scheme` ¶

Classes:

QuarkScheme –

Abstract class used to describe the weight creation and forward pass

`QuarkScheme` ¶

Bases: ABC

Abstract class used to describe the weight creation and forward pass of different quantization schemes supported by Quark.

Methods:

apply_weights –

Run the forward pass for the particular scheme. This is where
create_weights –

Weight creation for the particular scheme. Inputs to this function
get_min_capability –

Get minimum device capability.
process_weights_after_loading –

Called after weight loading is complete for any cleanup that

Source code in vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py

class QuarkScheme(ABC):
    """
    Abstract class used to describe the weight creation and forward pass
    of different quantization schemes supported by Quark.
    """

    @classmethod
    @abstractmethod
    def get_min_capability(cls) -> int:
        """
        Get minimum device capability.
        """
        raise NotImplementedError

    @abstractmethod
    def create_weights(self, *args, **kwargs):
        """
        Weight creation for the particular scheme. Inputs to this function

        """
        raise NotImplementedError

    @abstractmethod
    def apply_weights(
        self, layer: torch.nn.Module, x: torch.Tensor, bias: torch.Tensor | None
    ):
        """
        Run the forward pass for the particular scheme. This is where
        scheme-specific dequant/quant steps/kernels should be applied.

        Args:
            layer: torch.nn.Module with the registered weights and
                other parameters relevant to the particular scheme.
            x: input to the layer
            bias: bias parameter
        """
        raise NotImplementedError

    @abstractmethod
    def process_weights_after_loading(self, layer: torch.nn.Module):
        """
        Called after weight loading is complete for any cleanup that
        needs to occur.
        """
        raise NotImplementedError

`apply_weights(layer, x, bias)` `abstractmethod` ¶

Run the forward pass for the particular scheme. This is where scheme-specific dequant/quant steps/kernels should be applied.

Parameters:

layer ¶
(Module) –

torch.nn.Module with the registered weights and other parameters relevant to the particular scheme.
x ¶
(Tensor) –

input to the layer
bias ¶
(Tensor | None) –

bias parameter

Source code in vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py

@abstractmethod
def apply_weights(
    self, layer: torch.nn.Module, x: torch.Tensor, bias: torch.Tensor | None
):
    """
    Run the forward pass for the particular scheme. This is where
    scheme-specific dequant/quant steps/kernels should be applied.

    Args:
        layer: torch.nn.Module with the registered weights and
            other parameters relevant to the particular scheme.
        x: input to the layer
        bias: bias parameter
    """
    raise NotImplementedError

`create_weights(*args, **kwargs)` `abstractmethod` ¶

Weight creation for the particular scheme. Inputs to this function

Source code in vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py

@abstractmethod
def create_weights(self, *args, **kwargs):
    """
    Weight creation for the particular scheme. Inputs to this function

    """
    raise NotImplementedError

`get_min_capability()` `abstractmethod` `classmethod` ¶

Get minimum device capability.

Source code in vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py

@classmethod
@abstractmethod
def get_min_capability(cls) -> int:
    """
    Get minimum device capability.
    """
    raise NotImplementedError

`process_weights_after_loading(layer)` `abstractmethod` ¶

Called after weight loading is complete for any cleanup that needs to occur.

Source code in vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py

@abstractmethod
def process_weights_after_loading(self, layer: torch.nn.Module):
    """
    Called after weight loading is complete for any cleanup that
    needs to occur.
    """
    raise NotImplementedError

`vllm.model_executor.layers.quantization.quark.schemes.quark_scheme` ¶

`QuarkScheme` ¶

`apply_weights(layer, x, bias)` `abstractmethod` ¶

`layer` ¶

`x` ¶

`bias` ¶

`create_weights(*args, **kwargs)` `abstractmethod` ¶

`get_min_capability()` `abstractmethod` `classmethod` ¶

`process_weights_after_loading(layer)` `abstractmethod` ¶

vllm.model_executor.layers.quantization.quark.schemes.quark_scheme ¶

QuarkScheme ¶

apply_weights(layer, x, bias) abstractmethod ¶

layer ¶

x ¶

bias ¶

create_weights(*args, **kwargs) abstractmethod ¶

get_min_capability() abstractmethod classmethod ¶

process_weights_after_loading(layer) abstractmethod ¶

`vllm.model_executor.layers.quantization.quark.schemes.quark_scheme` ¶

`QuarkScheme` ¶

`apply_weights(layer, x, bias)` `abstractmethod` ¶

`layer` ¶

`x` ¶

`bias` ¶

`create_weights(*args, **kwargs)` `abstractmethod` ¶

`get_min_capability()` `abstractmethod` `classmethod` ¶

`process_weights_after_loading(layer)` `abstractmethod` ¶