HEX
Server: Apache/2.4.65 (Ubuntu)
System: Linux ielts-store-v2 6.8.0-1036-gcp #38~22.04.1-Ubuntu SMP Thu Aug 14 01:19:18 UTC 2025 x86_64
User: root (0)
PHP: 7.2.34-54+ubuntu20.04.1+deb.sury.org+1
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,
Upload Files
File: //snap/google-cloud-cli/current/lib/surface/dataproc/clusters/diagnose.py
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Diagnose cluster command."""

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

from apitools.base.py import encoding

from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.util import retry


@base.UniverseCompatible
class Diagnose(base.Command):
  """Run a detailed diagnostic on a cluster."""

  detailed_help = {
      'EXAMPLES': """
    To diagnose a cluster, run:

      $ {command} my-cluster --region=us-central1
"""
  }

  @classmethod
  def Args(cls, parser):
     # 26m is backend timeout + 4m for safety buffer.
    flags.AddTimeoutFlag(parser, default='30m')
    dataproc = dp.Dataproc(cls.ReleaseTrack())
    flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
    Diagnose.addDiagnoseFlags(parser, dataproc)

  @staticmethod
  def _GetValidTarballAccessChoices(dataproc):
    tarball_access_enums = (
        dataproc.messages.DiagnoseClusterRequest.TarballAccessValueValuesEnum
    )
    return [
        arg_utils.ChoiceToEnumName(n)
        for n in tarball_access_enums.names()
        if n != 'TARBALL_ACCESS_UNSPECIFIED'
    ]

  @staticmethod
  def addDiagnoseFlags(parser, dataproc):
    parser.add_argument(
        '--tarball-access',
        type=arg_utils.ChoiceToEnumName,
        choices=Diagnose._GetValidTarballAccessChoices(dataproc),
        help='Target access privileges for diagnostic tarball.')
    parser.add_argument(
        '--start-time',
        help='Time instant to start the diagnosis from (in ' +
        '%Y-%m-%dT%H:%M:%S.%fZ format).')
    parser.add_argument(
        '--end-time',
        help='Time instant to stop the diagnosis at (in ' +
        '%Y-%m-%dT%H:%M:%S.%fZ format).')
    parser.add_argument(
        '--job-id',
        hidden=True,
        help='The job on which to perform the diagnosis.',
        action=actions.DeprecationAction(
            '--job-id',
            warn=(
                'The {flag_name} option is deprecated and will be removed in'
                ' upcoming release; use --job-ids instead.'
            ),
            removed=False,
        ),
    )
    parser.add_argument(
        '--yarn-application-id',
        hidden=True,
        help='The yarn application on which to perform the diagnosis.',
        action=actions.DeprecationAction(
            '--yarn-application-id',
            warn=(
                'The {flag_name} option is deprecated and will be removed in'
                ' upcoming release; use --yarn-application-ids instead.'
            ),
            removed=False,
        ),
    )
    parser.add_argument(
        '--workers',
        hidden=True,
        help='A list of workers in the cluster to run the diagnostic script ' +
        'on.')
    parser.add_argument(
        '--job-ids',
        help='A list of jobs on which to perform the diagnosis.',
    )
    parser.add_argument(
        '--yarn-application-ids',
        help='A list of yarn applications on which to perform the diagnosis.',
    )
    parser.add_argument(
        '--tarball-gcs-dir',
        help='The output Cloud Storage directory for the diagnostic tarball. ' +
        'If not specified, a task-specific directory in the cluster\'s ' +
        'staging bucket will be used.'
    )

  def Run(self, args):
    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = args.CONCEPTS.cluster.Parse()

    request = None
    diagnose_request = dataproc.messages.DiagnoseClusterRequest(
        job=args.job_id, yarnApplicationId=args.yarn_application_id
    )
    diagnose_request.diagnosisInterval = dataproc.messages.Interval(
        startTime=args.start_time,
        endTime=args.end_time
    )
    if args.job_ids is not None:
      diagnose_request.jobs.extend(args.job_ids.split(','))
    if args.yarn_application_ids is not None:
      diagnose_request.yarnApplicationIds.extend(
          args.yarn_application_ids.split(','))
    if args.workers is not None:
      diagnose_request.workers.extend(args.workers.split(','))
    if args.tarball_access is not None:
      tarball_access = arg_utils.ChoiceToEnum(
          args.tarball_access,
          dataproc.messages.DiagnoseClusterRequest.TarballAccessValueValuesEnum)
      diagnose_request.tarballAccess = tarball_access
    if args.tarball_gcs_dir is not None:
      diagnose_request.tarballGcsDir = args.tarball_gcs_dir

    request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId,
        diagnoseClusterRequest=diagnose_request)

    operation = dataproc.client.projects_regions_clusters.Diagnose(request)
    # TODO(b/36052522): Stream output during polling.
    operation = util.WaitForOperation(
        dataproc,
        operation,
        message='Waiting for cluster diagnose operation',
        timeout_s=args.timeout)

    if not operation.response:
      raise exceptions.OperationError('Operation is missing response')

    properties = encoding.MessageToDict(operation.response)
    output_uri = properties['outputUri']

    if not output_uri:
      raise exceptions.OperationError('Response is missing outputUri')

    log.err.Print('Output from diagnostic:')
    log.err.Print('-----------------------------------------------')
    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
        output_uri)
    # A single read might not read whole stream. Try a few times.
    read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
    try:
      read_retrier.RetryOnResult(
          lambda: driver_log_stream.ReadIntoWritable(log.err),
          sleep_ms=100,
          should_retry_if=lambda *_: driver_log_stream.open)
    except retry.MaxRetrialsException:
      log.warning(
          'Diagnostic finished successfully, '
          'but output did not finish streaming.')
    log.err.Print('-----------------------------------------------')
    return output_uri