HEX

File: //snap/google-cloud-cli/394/lib/googlecloudsdk/command_lib/run/presets.yaml
# TODO(b/414798340): Modify this when PresetMetadata design is finalized.
# TODO(b/436350694): Add boolean for presets with ingress container.
// TODO(b/446719563): Use type and name instead of name and display_name. Use proper proto enums instead for type and data_type.
presets:
- name: ai-inference
  display_name: AI Inference
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a service for running inference on AI models.
  supported_resources:
  - SERVICE
  config_values:
    CPU limit: 4 vCPUs
    Memory limit: 16GiB
    GPU: 1 NVIDIA L4 (no zonal redundancy)
    Billing: instance-based
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=ai-inference
- name: ollama
  display_name: Ollama
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Inference server for open LLMs, using GPUs and Cloud Storage. Deploys the latest Ollama container, configured for Cloud Run with L4 GPUs and a Cloud Storage bucket for model storage.
  supported_resources:
  - SERVICE
  parameters:
  - name: bucket
    label: GCS Bucket Name
    description: Connect your service to a Cloud Storage bucket. Models downloaded by Ollama will be stored in this bucket, improving overall performance and minimizing cold start times.
    type: GCS_BUCKET
    data_type: DATA_TYPE_STRING
  config_values:
    Container Image: ollama/ollama:latest
    CPU limit: 4 vCPUs
    Memory llimit: 16GiB
    GPU: 1 NVIDIA L4 (no zonal redundancy)
    Billing: instance-based
  example_gcloud_usage:
  - gcloud alpha run deploy <service-name> --preset=ollama
  - gcloud alpha run deploy <service-name> --preset=ollama:bucket=<bucket-name>
- name: private-service
  display_name: Private Service
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a private, internal service with access control enforced based on identity and IAM roles.
  supported_resources:
  - SERVICE
  config_values:
    Ingress: internal
    Authentication: required
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=private-service
- name: public-service
  display_name: Public Service
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Publicly accessible endpoint, exposing your service to anyone on the internet, without any authentication.
  supported_resources:
  - SERVICE
  config_values:
    Ingress: all
    Authentication: allow public access
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=public-service
- name: single-concurrency
  display_name: Single Concurrency
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a service where requests are handled strictly one at a time.
  supported_resources:
  - SERVICE
  config_values:
    Container Concurrency: 1
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=single-concurrency