HEX

File: //snap/google-cloud-cli/current/lib/surface/dataplex/tasks/create.yaml
- release_tracks: [ALPHA, GA]
  help_text:
    brief: |
      Create a Dataplex task resource.
    description: |
      Create a Dataplex task resource.

      A task represents a user visible job that you want Dataplex to perform on a schedule.  It
      encapsulates your code, your parameters and the schedule.

      This task ID must follow these rules:
       o Must contain only lowercase letters, numbers, and hyphens.
       o Must start with a letter.
       o Must end with a number or a letter.
       o Must be between 1-63 characters.
       o Must be unique within the customer project / location.

    examples: |
      To create a Dataplex task `test-task` with ON_DEMAND trigger type,
      `dataplex-demo-test@test-project.iam.gserviceaccount.com` as execution service account and
      `gs://test-bucket/test-file.py` as spark python script file within lake `test-lake` in location `us-central1`.

          $ {command} test-task --location=us-central1 --lake=test-lake \
              --execution-service-account=dataplex-demo-test@test-project.iam.gserviceaccount.com \
              --spark-python-script-file=gs://test-bucket/test-file.py \
              --trigger-type=ON_DEMAND

      To create a Dataplex task `test-task` with RECURRING trigger type starting every
      hour at minute 0, `dataplex-demo-test@test-project.iam.gserviceaccount.com` as execution
      service account and `gs://test-bucket/test-file.py` as spark python script file within lake `test-lake` in
      location `us-central1`.

        $ {command} test-task --location=us-central1 --lake=test-lake \
              --execution-service-account=dataplex-demo-test@test-project.iam.gserviceaccount.com \
              --spark-python-script-file=gs://test-bucket/test-file.py \
              --trigger-type=RECURRING --trigger-schedule="0 * * * *"
  request:
    ALPHA:
      api_version: v1
    collection: dataplex.projects.locations.lakes.tasks
  arguments:
    resource:
      help_text: |
        Arguments and flags that specify the Dataplex Task you want to create.
      spec: !REF googlecloudsdk.command_lib.dataplex.resources:task
    params:
    - arg_name: description
      api_field: googleCloudDataplexV1Task.description
      help_text: |
        Description of the Dataplex task.
    - arg_name: display-name
      api_field: googleCloudDataplexV1Task.displayName
      help_text: |
        Display name of the Dataplex task.
    - group:   # trigger_spec
        help_text: |
          Spec related to Dataplex task scheduling and frequency settings.
        required: true
        params:
        - arg_name: trigger-type
          api_field: googleCloudDataplexV1Task.triggerSpec.type
          required: true
          help_text: |
            Trigger type of the user-specified Dataplex Task.
          choices:
          - arg_value: on-demand
            enum_value: ON_DEMAND
            help_text: |
              The `ON_DEMAND` trigger type runs the Dataplex task one time shortly after task creation.
          - arg_value: recurring
            enum_value: RECURRING
            help_text: |
              The `RECURRING` trigger type makes the task scheduled to run periodically.
        - arg_name: trigger-start-time
          api_field: googleCloudDataplexV1Task.triggerSpec.startTime
          help_text: |
            The first run of the task begins after this time. If not specified, an ON_DEMAND
            task runs when it is submitted and a RECURRING task runs based on the trigger
            schedule.
        - arg_name: trigger-disabled
          api_field: googleCloudDataplexV1Task.triggerSpec.disabled
          type: bool
          default: false
          help_text: |
            Prevent the task from executing.
            This does not cancel already running tasks. It is intended to temporarily
            disable RECURRING tasks.
        - arg_name: trigger-max-retires
          type: int
          api_field: googleCloudDataplexV1Task.triggerSpec.maxRetries
          help_text: |
            Number of retry attempts before aborting.
            Set to zero to never attempt to retry a failed task.
        - arg_name: trigger-schedule
          api_field: googleCloudDataplexV1Task.triggerSpec.schedule
          help_text: |
            Cron schedule (https://en.wikipedia.org/wiki/Cron) for running
            tasks periodically.
    - group:   # execution_spec
        help_text: |
          Spec related to how a task is executed.
        required: true
        params:
        - arg_name: execution-service-account
          api_field: googleCloudDataplexV1Task.executionSpec.serviceAccount
          required: true
          help_text: |
            Service account to use to execute a task.
        - arg_name: execution-project
          api_field: googleCloudDataplexV1Task.executionSpec.project
          help_text: |
            The project in which jobs are run.
            By default, the project containing the Lake is used.
            If a project is provided, the --execution-service-account must belong to this same
            project.
        - arg_name: execution-args
          api_field: googleCloudDataplexV1Task.executionSpec.args.additionalProperties
          metavar: KEY=VALUE
          type:
            arg_dict:
              flatten: true
              spec:
              - api_field: key
              - api_field: value
          help_text: |
            The arguments to pass to the task.
            The args can use placeholders of the format ${placeholder} as
            part of key/value string. These will be interpolated before passing the
            args to the driver. Currently supported placeholders:
            - ${task_id}
            - ${job_time}
            To pass positional args, set the key as TASK_ARGS. The value should be a
            comma-separated string of all the positional arguments.
            See https://cloud.google.com/sdk/gcloud/reference/topic/escaping for details on
            using a delimiter other than a comma. In case of
            other keys being present in the args, then TASK_ARGS will be passed as
            the last argument.
        - arg_name: max-job-execution-lifetime
          api_field: googleCloudDataplexV1Task.executionSpec.maxJobExecutionLifetime
          help_text: |
            The maximum duration before the job execution expires.
        - arg_name: kms-key
          api_field: googleCloudDataplexV1Task.executionSpec.kmsKey
          help_text: |
            The Cloud KMS key to use for encryption, of the form:
            projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}
    - group:   # task_config
        help_text: |
          Select which task you want to schedule and provide the required arguments for the task. The 2 types of tasks supported are:-
          - spark tasks
          - notebook tasks
        mutex: true
        required: true
        params:
        - group:  # notebook_task_config
            help_text: |
              Config related to running custom notebook tasks.
            params:
            - arg_name: notebook
              required: true
              api_field: googleCloudDataplexV1Task.notebook.notebook
              help_text: |
                Path to input notebook. This can be the Google Cloud Storage URI of the notebook file
                or the path to a Notebook Content.
                The execution args are accessible as environment variables
                (`TASK_key=value`).
            - arg_name: notebook-file-uris
              repeated: true
              api_field: googleCloudDataplexV1Task.notebook.fileUris
              help_text: |
                Google Cloud Storage URIs of files to be placed in the working directory of each
                executor.
            - arg_name: notebook-archive-uris
              repeated: true
              api_field: googleCloudDataplexV1Task.notebook.archiveUris
              help_text: |
                Google Cloud Storage URIs of archives to be extracted into the working directory of
                each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
                .zip.
            - group:   # notebook_infrastructure_spec
                params:
                - group:   # notebook_batch_compute_resource
                    help_text: |
                      Compute resources needed for a Task when using Dataproc Serverless.
                    params:
                    - arg_name: notebook-batch-executors-count
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.batch.executorsCount
                      type: int
                      help_text: |
                        Total number of job executors.
                    - arg_name: notebook-batch-max-executors-count
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.batch.maxExecutorsCount
                      type: int
                      help_text: |
                        Max configurable executors.
                        If max_executors_count > executors_count, then auto-scaling is enabled.
                - group:   # notebook_container_image_runtime
                    help_text: |
                      Container Image Runtime Configuration.
                    params:
                    - arg_name: notebook-container-image
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.image
                      help_text: |
                        Optional custom container image for the job.
                    - arg_name: notebook-container-image-java-jars
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.javaJars
                      repeated: true
                      help_text: |
                        A list of Java JARS to add to the classpath.
                        Valid input includes Cloud Storage URIs to Jar binaries.
                        For example, gs://bucket-name/my/path/to/file.jar
                    - arg_name: notebook-container-image-properties
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.properties.additionalProperties
                      metavar: KEY=VALUE
                      type:
                        arg_dict:
                          flatten: true
                          spec:
                          - api_field: key
                          - api_field: value
                      help_text: |
                        The properties to set on daemon config files. Property keys are
                        specified in prefix:property format, for example core:hadoop.tmp.dir.
                        For more information, see Cluster properties
                        (https://cloud.google.com/dataproc/docs/concepts/cluster-properties)
                - group:   # notebook_vpc_network
                    help_text: |
                      Cloud VPC Network used to run the infrastructure.
                    params:
                    - group:
                        mutex: true
                        help_text: |
                          The Cloud VPC network identifier.
                        params:
                        - arg_name: notebook-vpc-network-name
                          api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.network
                          help_text: |
                            The Cloud VPC network in which the job is run. By default, the Cloud
                            VPC network named Default within the project is used.
                        - arg_name: notebook-vpc-sub-network-name
                          api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.subNetwork
                          help_text: |
                            The Cloud VPC sub-network in which the job is run.
                    - arg_name: notebook-vpc-network-tags
                      api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.networkTags
                      repeated: true
                      help_text: |
                        List of network tags to apply to the job.
        - group:   # spark_task_config
            help_text: |
              Config related to running custom Spark tasks.
            params:
            - group:   # driver
                required: true
                mutex: true
                help_text: |
                  The specification of the main method to call to drive the
                  job. Specify either the jar file that contains the main class or the
                  main class name.
                params:
                - arg_name: spark-main-jar-file-uri
                  api_field: googleCloudDataplexV1Task.spark.mainJarFileUri
                  help_text: |
                    The Google Cloud Storage URI of the jar file that contains the main class.
                    The execution args are passed in as a sequence of named process
                    arguments (`--key=value`).
                - arg_name: spark-main-class
                  api_field: googleCloudDataplexV1Task.spark.mainClass
                  help_text: |
                    The name of the driver's main class. The jar file that contains the
                    class must be in the default CLASSPATH or specified in `jar_file_uris`.
                    The execution args are passed in as a sequence of named process
                    arguments (`--key=value`).
                - arg_name: spark-python-script-file
                  api_field: googleCloudDataplexV1Task.spark.pythonScriptFile
                  help_text: |
                    The Google Cloud Storage URI of the main Python file to use as the driver. Must
                    be a .py file.
                - arg_name: spark-sql-script-file
                  api_field: googleCloudDataplexV1Task.spark.sqlScriptFile
                  help_text: |
                    A reference to a query file. This can be the Google Cloud Storage URI of the query file
                    or it can the path to a SqlScript Content.
                - arg_name: spark-sql-script
                  api_field: googleCloudDataplexV1Task.spark.sqlScript
                  help_text: |
                    The SQL query text.
            - arg_name: spark-file-uris
              repeated: true
              api_field: googleCloudDataplexV1Task.spark.fileUris
              help_text: |
                Google Cloud Storage URIs of files to be placed in the working directory of each
                executor.
            - arg_name: spark-archive-uris
              repeated: true
              api_field: googleCloudDataplexV1Task.spark.archiveUris
              help_text: |
                Google Cloud Storage URIs of archives to be extracted into the working directory of
                each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
                .zip.
            - group:   # infrastructure_spec
                params:
                - group:   # batch_compute_resource
                    help_text: |
                      Compute resources needed for a Task when using Dataproc Serverless.
                    params:
                    - arg_name: batch-executors-count
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.batch.executorsCount
                      type: int
                      help_text: |
                        Total number of job executors.
                    - arg_name: batch-max-executors-count
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.batch.maxExecutorsCount
                      type: int
                      help_text: |
                        Max configurable executors.
                        If max_executors_count > executors_count, then auto-scaling is enabled.
                - group:   # container_image_runtime
                    help_text: |
                      Container Image Runtime Configuration.
                    params:
                    - arg_name: container-image
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.image
                      help_text: |
                        Optional custom container image for the job.
                    - arg_name: container-image-java-jars
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.javaJars
                      repeated: true
                      help_text: |
                        A list of Java JARS to add to the classpath.
                        Valid input includes Cloud Storage URIs to Jar binaries.
                        For example, gs://bucket-name/my/path/to/file.jar
                    - arg_name: container-image-python-packages
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.pythonPackages
                      repeated: true
                      help_text: |
                        A list of python packages to be installed.
                        Valid formats include Cloud Storage URI to a PIP installable library.
                        For example, gs://bucket-name/my/path/to/lib.tar.gz
                    - arg_name: container-image-properties
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.properties.additionalProperties
                      metavar: KEY=VALUE
                      type:
                        arg_dict:
                          flatten: true
                          spec:
                          - api_field: key
                          - api_field: value
                      help_text: |
                        The properties to set on daemon config files. Property keys are
                        specified in prefix:property format, for example core:hadoop.tmp.dir.
                        For more information, see Cluster properties
                        (https://cloud.google.com/dataproc/docs/concepts/cluster-properties)
                - group:   # vpc-network
                    help_text: |
                      Cloud VPC Network used to run the infrastructure.
                    params:
                    - group:
                        mutex: true
                        help_text: |
                          The Cloud VPC network identifier.
                        params:
                        - arg_name: vpc-network-name
                          api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.network
                          help_text: |
                            The Cloud VPC network in which the job is run. By default, the Cloud
                            VPC network named Default within the project is used.
                        - arg_name: vpc-sub-network-name
                          api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.subNetwork
                          help_text: |
                            The Cloud VPC sub-network in which the job is run.
                    - arg_name: vpc-network-tags
                      api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.networkTags
                      repeated: true
                      help_text: |
                        List of network tags to apply to the job.
    labels:
      api_field: googleCloudDataplexV1Task.labels
  async:
    collection: dataplex.projects.locations.operations