File: //snap/google-cloud-cli/current/lib/googlecloudsdk/command_lib/run/presets.yaml
# TODO(b/414798340): Modify this when PresetMetadata design is finalized.
# TODO(b/436350694): Add boolean for presets with ingress container.
// TODO(b/446719563): Use type and name instead of name and display_name. Use proper proto enums instead for type and data_type.
presets:
- name: ai-inference
display_name: AI Inference
version: 0.0.1
category: CATEGORY_QUICKSTART
description: Create a service for running inference on AI models.
supported_resources:
- SERVICE
config_values:
CPU limit: 4 vCPUs
Memory limit: 16GiB
GPU: 1 NVIDIA L4 (no zonal redundancy)
Billing: instance-based
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=ai-inference
- name: ollama
display_name: Ollama
version: 0.0.1
category: CATEGORY_QUICKSTART
description: Inference server for open LLMs, using GPUs and Cloud Storage. Deploys the latest Ollama container, configured for Cloud Run with L4 GPUs and a Cloud Storage bucket for model storage.
supported_resources:
- SERVICE
parameters:
- name: bucket
label: GCS Bucket Name
description: Connect your service to a Cloud Storage bucket. Models downloaded by Ollama will be stored in this bucket, improving overall performance and minimizing cold start times.
type: GCS_BUCKET
data_type: DATA_TYPE_STRING
config_values:
Container Image: ollama/ollama:latest
CPU limit: 4 vCPUs
Memory llimit: 16GiB
GPU: 1 NVIDIA L4 (no zonal redundancy)
Billing: instance-based
example_gcloud_usage:
- gcloud alpha run deploy <service-name> --preset=ollama
- gcloud alpha run deploy <service-name> --preset=ollama:bucket=<bucket-name>
- name: private-service
display_name: Private Service
version: 0.0.1
category: CATEGORY_QUICKSTART
description: Create a private, internal service with access control enforced based on identity and IAM roles.
supported_resources:
- SERVICE
config_values:
Ingress: internal
Authentication: required
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=private-service
- name: public-service
display_name: Public Service
version: 0.0.1
category: CATEGORY_QUICKSTART
description: Publicly accessible endpoint, exposing your service to anyone on the internet, without any authentication.
supported_resources:
- SERVICE
config_values:
Ingress: all
Authentication: allow public access
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=public-service
- name: single-concurrency
display_name: Single Concurrency
version: 0.0.1
category: CATEGORY_QUICKSTART
description: Create a service where requests are handled strictly one at a time.
supported_resources:
- SERVICE
config_values:
Container Concurrency: 1
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=single-concurrency