modal.Cls
class Cls(modal.object.Object)
Cls adds method pooling and lifecycle hook behavior to modal.Function.
Generally, you will not construct a Cls directly.
Instead, use the @app.cls()
decorator on the App object.
hydrate
def hydrate(self, client: Optional[_Client] = None) -> Self:
Synchronize the local object with its identity on the Modal server.
It is rarely necessary to call this method explicitly, as most operations will lazily hydrate when needed. The main use case is when you need to access object metadata, such as its ID.
Added in v0.72.39: This method replaces the deprecated .resolve()
method.
from_name
@classmethod
@renamed_parameter((2024, 12, 18), "tag", "name")
def from_name(
cls: type["_Cls"],
app_name: str,
name: str,
*,
namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
environment_name: Optional[str] = None,
workspace: Optional[str] = None, # Deprecated and unused
) -> "_Cls":
Reference a Cls from a deployed App by its name.
In contrast to modal.Cls.lookup
, this is a lazy method
that defers hydrating the local object with metadata from
Modal servers until the first time it is actually used.
Model = modal.Cls.from_name("other-app", "Model")
with_options
@warn_on_renamed_autoscaler_settings
def with_options(
self: "_Cls",
*,
cpu: Optional[Union[float, tuple[float, float]]] = None,
memory: Optional[Union[int, tuple[int, int]]] = None,
gpu: GPU_T = None,
secrets: Collection[_Secret] = (),
volumes: dict[Union[str, os.PathLike], _Volume] = {},
retries: Optional[Union[int, Retries]] = None,
max_containers: Optional[int] = None, # Limit on the number of containers that can be concurrently running.
buffer_containers: Optional[int] = None, # Additional containers to scale up while Function is active.
scaledown_window: Optional[int] = None, # Max amount of time a container can remain idle before scaling down.
timeout: Optional[int] = None,
# The following parameters are deprecated
concurrency_limit: Optional[int] = None, # Now called `max_containers`
container_idle_timeout: Optional[int] = None, # Now called `scaledown_window`
allow_concurrent_inputs: Optional[int] = None, # See `.with_concurrency`
) -> "_Cls":
Create an instance of the Cls with configuration options overridden with new values.
Usage:
Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100")
ModelUsingGPU().generate.remote(42) # will run with an A100 GPU
with_concurrency
def with_concurrency(self: "_Cls", *, max_inputs: int, target_inputs: Optional[int] = None) -> "_Cls":
Create an instance of the Cls with input concurrency enabled or overridden with new values.
Usage:
Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100").with_concurrency(max_inputs=100)
ModelUsingGPU().generate.remote(42) # will run on an A100 GPU with input concurrency enabled
with_batching
def with_batching(self: "_Cls", *, max_batch_size: int, wait_ms: int) -> "_Cls":
Create an instance of the Cls with dynamic batching enabled or overridden with new values.
Usage:
Model = modal.Cls.from_name("my_app", "Model")
ModelUsingGPU = Model.with_options(gpu="A100").with_batching(max_batch_size=100, batch_wait_ms=1000)
ModelUsingGPU().generate.remote(42) # will run on an A100 GPU with input concurrency enabled
lookup
@staticmethod
@renamed_parameter((2024, 12, 18), "tag", "name")
def lookup(
app_name: str,
name: str,
namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
client: Optional[_Client] = None,
environment_name: Optional[str] = None,
workspace: Optional[str] = None, # Deprecated and unused
) -> "_Cls":
Lookup a Cls from a deployed App by its name.
DEPRECATED: This method is deprecated in favor of modal.Cls.from_name
.
In contrast to modal.Cls.from_name
, this is an eager method
that will hydrate the local object with metadata from Modal servers.
Model = modal.Cls.from_name("other-app", "Model")
model = Model()
model.inference(...)