modal.Cls

class Cls(modal.object.Object)

Cls adds method pooling and lifecycle hook behavior to modal.Function.

Generally, you will not construct a Cls directly. Instead, use the @app.cls() decorator on the App object.

hydrate

def hydrate(self, client: Optional[_Client] = None) -> Self:

Synchronize the local object with its identity on the Modal server.

It is rarely necessary to call this method explicitly, as most operations will lazily hydrate when needed. The main use case is when you need to access object metadata, such as its ID.

Added in v0.72.39: This method replaces the deprecated .resolve() method.

from_name

@classmethod
@renamed_parameter((2024, 12, 18), "tag", "name")
def from_name(
    cls: type["_Cls"],
    app_name: str,
    name: str,
    *,
    namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
    environment_name: Optional[str] = None,
    workspace: Optional[str] = None,  # Deprecated and unused
) -> "_Cls":

Reference a Cls from a deployed App by its name.

In contrast to modal.Cls.lookup, this is a lazy method that defers hydrating the local object with metadata from Modal servers until the first time it is actually used.

Model = modal.Cls.from_name("other-app", "Model")

with_options

@warn_on_renamed_autoscaler_settings
def with_options(
    self: "_Cls",
    *,
    cpu: Optional[Union[float, tuple[float, float]]] = None,
    memory: Optional[Union[int, tuple[int, int]]] = None,
    gpu: GPU_T = None,
    secrets: Collection[_Secret] = (),
    volumes: dict[Union[str, os.PathLike], _Volume] = {},
    retries: Optional[Union[int, Retries]] = None,
    max_containers: Optional[int] = None,  # Limit on the number of containers that can be concurrently running.
    buffer_containers: Optional[int] = None,  # Additional containers to scale up while Function is active.
    scaledown_window: Optional[int] = None,  # Max amount of time a container can remain idle before scaling down.
    timeout: Optional[int] = None,
    # The following parameters are deprecated
    concurrency_limit: Optional[int] = None,  # Now called `max_containers`
    container_idle_timeout: Optional[int] = None,  # Now called `scaledown_window`
    allow_concurrent_inputs: Optional[int] = None,  # See `.with_concurrency`
) -> "_Cls":

Create an instance of the Cls with configuration options overridden with new values.

Usage:

Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100")
ModelUsingGPU().generate.remote(42)  # will run with an A100 GPU

with_concurrency

def with_concurrency(self: "_Cls", *, max_inputs: int, target_inputs: Optional[int] = None) -> "_Cls":

Create an instance of the Cls with input concurrency enabled or overridden with new values.

Usage:

Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100").with_concurrency(max_inputs=100)
ModelUsingGPU().generate.remote(42)  # will run on an A100 GPU with input concurrency enabled

with_batching

def with_batching(self: "_Cls", *, max_batch_size: int, wait_ms: int) -> "_Cls":

Create an instance of the Cls with dynamic batching enabled or overridden with new values.

Usage:

Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100").with_batching(max_batch_size=100, batch_wait_ms=1000)
ModelUsingGPU().generate.remote(42)  # will run on an A100 GPU with input concurrency enabled

lookup

@staticmethod
@renamed_parameter((2024, 12, 18), "tag", "name")
def lookup(
    app_name: str,
    name: str,
    namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
    client: Optional[_Client] = None,
    environment_name: Optional[str] = None,
    workspace: Optional[str] = None,  # Deprecated and unused
) -> "_Cls":

Lookup a Cls from a deployed App by its name.

DEPRECATED: This method is deprecated in favor of modal.Cls.from_name.

In contrast to modal.Cls.from_name, this is an eager method that will hydrate the local object with metadata from Modal servers.

Model = modal.Cls.from_name("other-app", "Model")
model = Model()
model.inference(...)