HEX

File: //snap/google-cloud-cli/396/lib/googlecloudsdk/api_lib/ai/model_garden/client.py
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for Vertex AI Model Garden APIs."""

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

import re

from apitools.base.py import list_pager
from googlecloudsdk.api_lib.util import apis
from googlecloudsdk.command_lib.ai import constants
from googlecloudsdk.command_lib.ai import flags

_HF_WILDCARD_FILTER = 'is_hf_wildcard(true)'
_NATIVE_MODEL_FILTER = 'is_hf_wildcard(false)'
_VERIFIED_DEPLOYMENT_FILTER = (
    'labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED'
)


def IsHuggingFaceModel(model_name: str) -> bool:
  """Returns whether the model is a Hugging Face model."""
  return bool(re.match(r'^[^/]+/[^/@]+$', model_name))


def IsCustomWeightsModel(model: str) -> bool:
  """Returns whether the model is a custom weights model."""
  return bool(re.match(r'^gs://', model))


def DeployCustomWeightsModel(
    messages,
    projects_locations_service,
    model,
    machine_type,
    accelerator_type,
    accelerator_count,
    project,
    location,
):
  """Deploys a custom weights model."""
  deploy_request = messages.GoogleCloudAiplatformV1beta1DeployRequest()
  deploy_request.customModel = (
      messages.GoogleCloudAiplatformV1beta1DeployRequestCustomModel(
          gcsUri=model
      )
  )

  if machine_type:
    deploy_request.deployConfig = messages.GoogleCloudAiplatformV1beta1DeployRequestDeployConfig(
        dedicatedResources=messages.GoogleCloudAiplatformV1beta1DedicatedResources(
            machineSpec=messages.GoogleCloudAiplatformV1beta1MachineSpec(
                machineType=machine_type,
                acceleratorType=accelerator_type,
                acceleratorCount=accelerator_count,
            ),
            minReplicaCount=1,
        ),
    )

  request = messages.AiplatformProjectsLocationsDeployRequest(
      destination=f'projects/{project}/locations/{location}',
      googleCloudAiplatformV1beta1DeployRequest=deploy_request,
  )
  return projects_locations_service.Deploy(request)


class ModelGardenClient(object):
  """Client used for interacting with Model Garden APIs."""

  def __init__(self, version=constants.BETA_VERSION):
    client = apis.GetClientInstance(
        constants.AI_PLATFORM_API_NAME,
        constants.AI_PLATFORM_API_VERSION[version],
    )
    self._messages = client.MESSAGES_MODULE
    self._publishers_models_service = client.publishers_models
    self._projects_locations_service = client.projects_locations

  def GetPublisherModel(
      self,
      model_name,
      is_hugging_face_model=False,
      include_equivalent_model_garden_model_deployment_configs=True,
      hugging_face_token=None,
  ):
    """Get a publisher model.

    Args:
      model_name: The name of the model to get. The format should be
        publishers/{publisher}/models/{model}
      is_hugging_face_model: Whether the model is a hugging face model.
      include_equivalent_model_garden_model_deployment_configs: Whether to
        include equivalent Model Garden model deployment configs for Hugging
        Face models.
      hugging_face_token: The Hugging Face access token to access the model
        artifacts for gated models unverified by Model Garden.

    Returns:
      A publisher model.
    """
    request = self._messages.AiplatformPublishersModelsGetRequest(
        name=model_name,
        isHuggingFaceModel=is_hugging_face_model,
        includeEquivalentModelGardenModelDeploymentConfigs=include_equivalent_model_garden_model_deployment_configs,
        huggingFaceToken=hugging_face_token,
    )
    return self._publishers_models_service.Get(request)

  def Deploy(
      self,
      project,
      location,
      model,
      accept_eula,
      accelerator_type,
      accelerator_count,
      machine_type,
      endpoint_display_name,
      hugging_face_access_token,
      spot,
      reservation_affinity,
      use_dedicated_endpoint,
      enable_fast_tryout,
      container_image_uri=None,
      container_command=None,
      container_args=None,
      container_env_vars=None,
      container_ports=None,
      container_grpc_ports=None,
      container_predict_route=None,
      container_health_route=None,
      container_deployment_timeout_seconds=None,
      container_shared_memory_size_mb=None,
      container_startup_probe_exec=None,
      container_startup_probe_period_seconds=None,
      container_startup_probe_timeout_seconds=None,
      container_health_probe_exec=None,
      container_health_probe_period_seconds=None,
      container_health_probe_timeout_seconds=None,
  ):
    """Deploy an open weight model.

    Args:
      project: The project to deploy the model to.
      location: The location to deploy the model to.
      model: The name of the model to deploy or its gcs uri for custom weights.
      accept_eula: Whether to accept the end-user license agreement.
      accelerator_type: The type of accelerator to use.
      accelerator_count: The number of accelerators to use.
      machine_type: The type of machine to use.
      endpoint_display_name: The display name of the endpoint.
      hugging_face_access_token: The Hugging Face access token.
      spot: Whether to deploy the model on Spot VMs.
      reservation_affinity: The reservation affinity to use.
      use_dedicated_endpoint: Whether to use a dedicated endpoint.
      enable_fast_tryout: Whether to enable fast tryout.
      container_image_uri: Immutable. URI of the Docker image to be used as the
        custom container for serving predictions. This URI must identify an
        image in Artifact Registry or Container Registry. Learn more about the
        [container publishing requirements](https://cloud.google.com/vertex-
        ai/docs/predictions/custom-container-requirements#publishing), including
        permissions requirements for the Vertex AI Service Agent. The container
        image is ingested upon ModelService.UploadModel, stored internally, and
        this original path is afterwards not used. To learn about the
        requirements for the Docker image itself, see [Custom container
        requirements](https://cloud.google.com/vertex-
        ai/docs/predictions/custom-container-requirements#). You can use the URI
        to one of Vertex AI's [pre-built container images for
        prediction](https://cloud.google.com/vertex-ai/docs/predictions/pre-
        built-containers) in this field.
      container_command: Specifies the command that runs when the container
        starts. This overrides the container's [ENTRYPOINT](https://docs.docker.
        com/engine/reference/builder/#entrypoint). Specify this field as an
        array of executable and arguments, similar to a Docker `ENTRYPOINT`'s
        "exec" form, not its "shell" form. If you do not specify this field,
        then the container's `ENTRYPOINT` runs, in conjunction with the args
        field or the container's
        [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd), if
        either exists. If this field is not specified and the container does not
        have an `ENTRYPOINT`, then refer to the Docker documentation about [how
        `CMD` and `ENTRYPOINT`
        interact](https://docs.docker.com/engine/reference/builder/#understand-
        how-cmd-and-entrypoint-interact). If you specify this field, then you
        can also specify the `args` field to provide additional arguments for
        this command. However, if you specify this field, then the container's
        `CMD` is ignored. See the [Kubernetes documentation about how the
        `command` and `args` fields interact with a container's `ENTRYPOINT` and
        `CMD`](https://kubernetes.io/docs/tasks/inject-data-application/define-
        command-argument-container/#notes). In this field, you can reference
        [environment variables set by Vertex
        AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-
        container-requirements#aip-variables) and environment variables set in
        the env field. You cannot reference environment variables set in the
        Docker image. In order for environment variables to be expanded,
        reference them by using the following syntax: $( VARIABLE_NAME) Note
        that this differs from Bash variable expansion, which does not use
        parentheses. If a variable cannot be resolved, the reference in the
        input string is used unchanged. To avoid variable expansion, you can
        escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field
        corresponds to the `command` field of the Kubernetes Containers [v1 core
        API](https://kubernetes.io/docs/reference/generated/kubernetes-
        api/v1.23/#container-v1-core).
      container_args: Specifies arguments for the command that runs when the
        container starts. This overrides the container's
        [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd). Specify
        this field as an array of executable and arguments, similar to a Docker
        `CMD`'s "default parameters" form. If you don't specify this field but
        do specify the command field, then the command from the `command` field
        runs without any additional arguments. See the [Kubernetes documentation
        about how the `command` and `args` fields interact with a container's
        `ENTRYPOINT` and `CMD`](https://kubernetes.io/docs/tasks/inject-data-
        application/define-command-argument-container/#notes). If you don't
        specify this field and don't specify the `command` field, then the
        container's
        [`ENTRYPOINT`](https://docs.docker.com/engine/reference/builder/#cmd)
        and `CMD` determine what runs based on their default behavior. See the
        Docker documentation about [how `CMD` and `ENTRYPOINT`
        interact](https://docs.docker.com/engine/reference/builder/#understand-
        how-cmd-and-entrypoint-interact). In this field, you can reference
        [environment variables set by Vertex
        AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-
        container-requirements#aip-variables) and environment variables set in
        the env field. You cannot reference environment variables set in the
        Docker image. In order for environment variables to be expanded,
        reference them by using the following syntax: $( VARIABLE_NAME) Note
        that this differs from Bash variable expansion, which does not use
        parentheses. If a variable cannot be resolved, the reference in the
        input string is used unchanged. To avoid variable expansion, you can
        escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field
        corresponds to the `args` field of the Kubernetes Containers [v1 core
        API](https://kubernetes.io/docs/reference/generated/kubernetes-
        api/v1.23/#container-v1-core)..
      container_env_vars: List of environment variables to set in the container.
        After the container starts running, code running in the container can
        read these environment variables. Additionally, the command and args
        fields can reference these variables. Later entries in this list can
        also reference earlier entries. For example, the following example sets
        the variable `VAR_2` to have the value `foo bar`: ```json [ { "name":
        "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" }
        ] ``` If you switch the order of the variables in the example, then the
        expansion does not occur. This field corresponds to the `env` field of
        the Kubernetes Containers [v1 core
        API](https://kubernetes.io/docs/reference/generated/kubernetes-
        api/v1.23/#container-v1-core).
      container_ports: List of ports to expose from the container. Vertex AI
        sends any http prediction requests that it receives to the first port on
        this list. Vertex AI also sends [liveness and health
        checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-
        container-requirements#liveness) to this port. If you do not specify
        this field, it defaults to following value: ```json [ { "containerPort":
        8080 } ] ``` Vertex AI does not use ports other than the first one
        listed. This field corresponds to the `ports` field of the Kubernetes
        Containers [v1 core
        API](https://kubernetes.io/docs/reference/generated/kubernetes-
        api/v1.23/#container-v1-core).
      container_grpc_ports: List of ports to expose from the container. Vertex
        AI sends any grpc prediction requests that it receives to the first port
        on this list. Vertex AI also sends [liveness and health
        checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-
        container-requirements#liveness) to this port. If you do not specify
        this field, gRPC requests to the container will be disabled. Vertex AI
        does not use ports other than the first one listed. This field
        corresponds to the `ports` field of the Kubernetes Containers [v1 core
        API](https://kubernetes.io/docs/reference/generated/kubernetes-
        api/v1.23/#container-v1-core).
      container_predict_route: HTTP path on the container to send prediction
        requests to. Vertex AI forwards requests sent using
        projects.locations.endpoints.predict to this path on the container's IP
        address and port. Vertex AI then returns the container's response in the
        API response. For example, if you set this field to `/foo`, then when
        Vertex AI receives a prediction request, it forwards the request body in
        a POST request to the `/foo` path on the port of your container
        specified by the first value of this `ModelContainerSpec`'s ports field.
        If you don't specify this field, it defaults to the following value when
        you deploy this Model to an Endpoint:
        /v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The
        placeholders in this value are replaced as follows: * ENDPOINT: The last
        segment (following `endpoints/`)of the Endpoint.name][] field of the
        Endpoint where this Model has been deployed. (Vertex AI makes this value
        available to your container code as the [`AIP_ENDPOINT_ID` environment
        variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-
        container-requirements#aip-variables).) * DEPLOYED_MODEL:
        DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value
        available to your container code as the [`AIP_DEPLOYED_MODEL_ID`
        environment variable](https://cloud.google.com/vertex-
        ai/docs/predictions/custom-container-requirements#aip-variables).)
      container_health_route: HTTP path on the container to send health checks
        to. Vertex AI intermittently sends GET requests to this path on the
        container's IP address and port to check that the container is healthy.
        Read more about [health checks](https://cloud.google.com/vertex-
        ai/docs/predictions/custom-container-requirements#health). For example,
        if you set this field to `/bar`, then Vertex AI intermittently sends a
        GET request to the `/bar` path on the port of your container specified
        by the first value of this `ModelContainerSpec`'s ports field. If you
        don't specify this field, it defaults to the following value when you
        deploy this Model to an Endpoint: /v1/endpoints/ENDPOINT/deployedModels/
        DEPLOYED_MODEL:predict The placeholders in this value are replaced as
          follows * ENDPOINT: The last segment (following `endpoints/`)of the
          Endpoint.name][] field of the Endpoint where this Model has been
          deployed. (Vertex AI makes this value available to your container code
          as the [`AIP_ENDPOINT_ID` environment
          variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-
          container-requirements#aip-variables).) * DEPLOYED_MODEL:
          DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value
          available to your container code as the [`AIP_DEPLOYED_MODEL_ID`
          environment variable](https://cloud.google.com/vertex-
          ai/docs/predictions/custom-container-requirements#aip-variables).)
      container_deployment_timeout_seconds (int): Deployment timeout in seconds.
      container_shared_memory_size_mb (int): The amount of the VM memory to
        reserve as the shared memory for the model in megabytes.
      container_startup_probe_exec (Sequence[str]): Exec specifies the action to
        take. Used by startup probe. An example of this argument would be
        ["cat", "/tmp/healthy"]
      container_startup_probe_period_seconds (int): How often (in seconds) to
        perform the startup probe. Default to 10 seconds. Minimum value is 1.
      container_startup_probe_timeout_seconds (int): Number of seconds after
        which the startup probe times out. Defaults to 1 second. Minimum value
        is 1.
      container_health_probe_exec (Sequence[str]): Exec specifies the action to
        take. Used by health probe. An example of this argument would be ["cat",
        "/tmp/healthy"]
      container_health_probe_period_seconds (int): How often (in seconds) to
        perform the health probe. Default to 10 seconds. Minimum value is 1.
      container_health_probe_timeout_seconds (int): Number of seconds after
        which the health probe times out. Defaults to 1 second. Minimum value is
        1.

    Returns:
      The deploy long-running operation.
    """
    container_spec = None
    if container_image_uri:
      container_spec = (
          self._messages.GoogleCloudAiplatformV1beta1ModelContainerSpec(
              healthRoute=container_health_route,
              imageUri=container_image_uri,
              predictRoute=container_predict_route,
          )
      )
      if container_command:
        container_spec.command = container_command
      if container_args:
        container_spec.args = container_args
      if container_env_vars:
        container_spec.env = [
            self._messages.GoogleCloudAiplatformV1beta1EnvVar(
                name=k, value=container_env_vars[k]
            )
            for k in container_env_vars
        ]
      if container_ports:
        container_spec.ports = [
            self._messages.GoogleCloudAiplatformV1beta1Port(containerPort=port)
            for port in container_ports
        ]
      if container_grpc_ports:
        container_spec.grpcPorts = [
            self._messages.GoogleCloudAiplatformV1beta1Port(containerPort=port)
            for port in container_grpc_ports
        ]
      if container_deployment_timeout_seconds:
        container_spec.deploymentTimeout = (
            str(container_deployment_timeout_seconds) + 's'
        )
      if container_shared_memory_size_mb:
        container_spec.sharedMemorySizeMb = container_shared_memory_size_mb
      if (
          container_startup_probe_exec
          or container_startup_probe_period_seconds
          or container_startup_probe_timeout_seconds
      ):
        startup_probe_exec = None
        if container_startup_probe_exec:
          startup_probe_exec = (
              self._messages.GoogleCloudAiplatformV1beta1ProbeExecAction(
                  command=container_startup_probe_exec
              )
          )
        container_spec.startupProbe = (
            self._messages.GoogleCloudAiplatformV1beta1Probe(
                exec_=startup_probe_exec,
                periodSeconds=container_startup_probe_period_seconds,
                timeoutSeconds=container_startup_probe_timeout_seconds,
            )
        )
      if (
          container_health_probe_exec
          or container_health_probe_period_seconds
          or container_health_probe_timeout_seconds
      ):
        health_probe_exec = None
        if container_health_probe_exec:
          health_probe_exec = (
              self._messages.GoogleCloudAiplatformV1beta1ProbeExecAction(
                  command=container_health_probe_exec
              )
          )
        container_spec.healthProbe = (
            self._messages.GoogleCloudAiplatformV1beta1Probe(
                exec_=health_probe_exec,
                periodSeconds=container_health_probe_period_seconds,
                timeoutSeconds=container_health_probe_timeout_seconds,
            )
        )

    if IsCustomWeightsModel(model):
      return DeployCustomWeightsModel(
          self._messages,
          self._projects_locations_service,
          model,
          machine_type,
          accelerator_type,
          accelerator_count,
          project,
          location,
      )
    elif IsHuggingFaceModel(model):
      deploy_request = self._messages.GoogleCloudAiplatformV1beta1DeployRequest(
          huggingFaceModelId=model
      )
    else:
      deploy_request = self._messages.GoogleCloudAiplatformV1beta1DeployRequest(
          publisherModelName=model
      )

    deploy_request.modelConfig = (
        self._messages.GoogleCloudAiplatformV1beta1DeployRequestModelConfig(
            huggingFaceAccessToken=hugging_face_access_token,
            acceptEula=accept_eula,
            containerSpec=container_spec,
        )
    )
    deploy_request.endpointConfig = (
        self._messages.GoogleCloudAiplatformV1beta1DeployRequestEndpointConfig(
            endpointDisplayName=endpoint_display_name,
            dedicatedEndpointEnabled=use_dedicated_endpoint,
        )
    )
    deploy_request.deployConfig = self._messages.GoogleCloudAiplatformV1beta1DeployRequestDeployConfig(
        dedicatedResources=self._messages.GoogleCloudAiplatformV1beta1DedicatedResources(
            machineSpec=self._messages.GoogleCloudAiplatformV1beta1MachineSpec(
                machineType=machine_type,
                acceleratorType=accelerator_type,
                acceleratorCount=accelerator_count,
                reservationAffinity=flags.ParseReservationAffinityFlag(
                    reservation_affinity, constants.BETA_VERSION
                ),
            ),
            minReplicaCount=1,
            spot=spot,
        ),
        fastTryoutEnabled=enable_fast_tryout,
    )
    request = self._messages.AiplatformProjectsLocationsDeployRequest(
        destination=f'projects/{project}/locations/{location}',
        googleCloudAiplatformV1beta1DeployRequest=deploy_request,
    )
    return self._projects_locations_service.Deploy(request)

  def ListPublisherModels(
      self,
      limit=None,
      batch_size=100,
      list_hf_models=False,
      model_filter=None,
  ):
    """List publisher models in Model Garden.

    Args:
      limit: The maximum number of items to list. None if all available records
        should be yielded.
      batch_size: The number of items to list per page.
      list_hf_models: Whether to only list Hugging Face models.
      model_filter: The filter on model name to apply on server-side.

    Returns:
      The list of publisher models in Model Garden..
    """
    filter_str = _NATIVE_MODEL_FILTER
    if list_hf_models:
      filter_str = ' AND '.join(
          [_HF_WILDCARD_FILTER, _VERIFIED_DEPLOYMENT_FILTER]
      )
    if model_filter:
      filter_str = (
          f'{filter_str} AND (model_user_id=~"(?i).*{model_filter}.*" OR'
          f' display_name=~"(?i).*{model_filter}.*")'
      )
    return list_pager.YieldFromList(
        self._publishers_models_service,
        self._messages.AiplatformPublishersModelsListRequest(
            parent='publishers/*',
            listAllVersions=True,
            filter=filter_str,
        ),
        field='publisherModels',
        batch_size_attribute='pageSize',
        batch_size=batch_size,
        limit=limit,
    )