Introduction to Stable Diffusion 3.5 Large
Stable Diffusion 3.5 Large is the most powerful model in the Stable Diffusion family. This model is ideal for professional use cases. Our example code shows how to run a distilled version, Stable Diffusion 3.5 Large Turbo, which generates high-quality images with exceptional prompt adherence in just 4 steps, making it considerably faster than Stable Diffusion 3.5 Large.
Example code for running Stable Diffusion 3.5 Large on Modal
To run the following code, you will need to:
- Create an account at modal.com
- Run
pip install modal
to install the modal Python package - Run
modal setup
to authenticate (if this doesn’t work, trypython -m modal setup
) - Copy the code below into a file called
app.py
- Run
modal run app.py
Please note that this code does not come with a UI. For a more detailed example of how to run Stable Diffusion 3.5 Large Turbo as a CLI, API, and UI, refer here.
import io
import random
from pathlib import Path
import modal
app = modal.App("stable-diffusion-large-model-library")
image = (
modal.Image.debian_slim(python_version="3.12")
.pip_install(
"accelerate==0.33.0",
"diffusers==0.31.0",
"fastapi[standard]==0.115.4",
"huggingface-hub[hf_transfer]==0.25.2",
"sentencepiece==0.2.0",
"torch==2.5.1",
"torchvision==0.20.1",
"transformers~=4.44.0",
)
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
)
with image.imports():
import diffusers
import torch
@app.cls(
image=image,
gpu="H100",
timeout=600,
)
class Inference:
@modal.build()
@modal.enter()
def initialize(self):
self.pipe = diffusers.StableDiffusion3Pipeline.from_pretrained(
"adamo1139/stable-diffusion-3.5-large-turbo-ungated",
revision="9ad870ac0b0e5e48ced156bb02f85d324b7275d2",
torch_dtype=torch.bfloat16,
)
@modal.enter()
def move_to_gpu(self):
self.pipe.to("cuda")
@modal.method()
def run(self, prompt: str, batch_size: int = 4, seed: int = None) -> list[bytes]:
seed = seed if seed is not None else random.randint(0, 2**32 - 1)
print("seeding RNG with", seed)
torch.manual_seed(seed)
images = self.pipe(
prompt,
num_images_per_prompt=batch_size,
num_inference_steps=4,
guidance_scale=0.0,
max_sequence_length=512,
).images
image_output = []
for image in images:
with io.BytesIO() as buf:
image.save(buf, format="PNG")
image_output.append(buf.getvalue())
torch.cuda.empty_cache()
return image_output
@app.local_entrypoint()
def main(prompt: str = "A princess riding on a pony"):
output_dir = Path("/tmp/stable-diffusion")
output_dir.mkdir(exist_ok=True)
images = Inference().run.remote(prompt, batch_size=1)
for i, image_bytes in enumerate(images):
output_path = output_dir / f"output_{i:02d}.png"
output_path.write_bytes(image_bytes)
print(f"Saved {output_path}")