HEX
Server: Apache/2.4.65 (Ubuntu)
System: Linux ielts-store-v2 6.8.0-1036-gcp #38~22.04.1-Ubuntu SMP Thu Aug 14 01:19:18 UTC 2025 x86_64
User: root (0)
PHP: 7.2.34-54+ubuntu20.04.1+deb.sury.org+1
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,
Upload Files
File: //snap/google-cloud-cli/394/lib/googlecloudsdk/command_lib/run/presets.yaml
# TODO(b/414798340): Modify this when PresetMetadata design is finalized.
# TODO(b/436350694): Add boolean for presets with ingress container.
// TODO(b/446719563): Use type and name instead of name and display_name. Use proper proto enums instead for type and data_type.
presets:
- name: ai-inference
  display_name: AI Inference
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a service for running inference on AI models.
  supported_resources:
  - SERVICE
  config_values:
    CPU limit: 4 vCPUs
    Memory limit: 16GiB
    GPU: 1 NVIDIA L4 (no zonal redundancy)
    Billing: instance-based
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=ai-inference
- name: ollama
  display_name: Ollama
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Inference server for open LLMs, using GPUs and Cloud Storage. Deploys the latest Ollama container, configured for Cloud Run with L4 GPUs and a Cloud Storage bucket for model storage.
  supported_resources:
  - SERVICE
  parameters:
  - name: bucket
    label: GCS Bucket Name
    description: Connect your service to a Cloud Storage bucket. Models downloaded by Ollama will be stored in this bucket, improving overall performance and minimizing cold start times.
    type: GCS_BUCKET
    data_type: DATA_TYPE_STRING
  config_values:
    Container Image: ollama/ollama:latest
    CPU limit: 4 vCPUs
    Memory llimit: 16GiB
    GPU: 1 NVIDIA L4 (no zonal redundancy)
    Billing: instance-based
  example_gcloud_usage:
  - gcloud alpha run deploy <service-name> --preset=ollama
  - gcloud alpha run deploy <service-name> --preset=ollama:bucket=<bucket-name>
- name: private-service
  display_name: Private Service
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a private, internal service with access control enforced based on identity and IAM roles.
  supported_resources:
  - SERVICE
  config_values:
    Ingress: internal
    Authentication: required
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=private-service
- name: public-service
  display_name: Public Service
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Publicly accessible endpoint, exposing your service to anyone on the internet, without any authentication.
  supported_resources:
  - SERVICE
  config_values:
    Ingress: all
    Authentication: allow public access
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=public-service
- name: single-concurrency
  display_name: Single Concurrency
  version: 0.0.1
  category: CATEGORY_QUICKSTART
  description: Create a service where requests are handled strictly one at a time.
  supported_resources:
  - SERVICE
  config_values:
    Container Concurrency: 1
  example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=single-concurrency