HEX
Server: Apache/2.4.65 (Ubuntu)
System: Linux ielts-store-v2 6.8.0-1036-gcp #38~22.04.1-Ubuntu SMP Thu Aug 14 01:19:18 UTC 2025 x86_64
User: root (0)
PHP: 7.2.34-54+ubuntu20.04.1+deb.sury.org+1
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,
Upload Files
File: //snap/google-cloud-cli/396/lib/googlecloudsdk/command_lib/compute/vm_status_troubleshooter.py
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Troubleshoot VPC setting for ssh connection."""

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

import datetime

from cloudsdk.google.protobuf import timestamp_pb2
from googlecloudsdk.api_lib.services import enable_api
from googlecloudsdk.api_lib.util import apis
from googlecloudsdk.command_lib.compute import ssh_troubleshooter
from googlecloudsdk.command_lib.compute import ssh_troubleshooter_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io

_API_MONITORING_CLIENT_NAME = 'monitoring'
_API_MONITORING_VERSION_V3 = 'v3'
_API_COMPUTE_CLIENT_NAME = 'compute'
_API_CLIENT_VERSION_V1 = 'v1'

MONITORING_API = 'monitoring.googleapis.com'

VM_STATUS_MESSAGE = (
    'The VM may not be running. Try restarting it. If this doesn\'t work, the '
    'VM may be in a panic state.\n'
    'Help for restarting a VM: '
    'https://cloud.google.com/compute/docs/instances/stop-start-instance\n')

CPU_METRICS = 'compute.googleapis.com/instance/cpu/utilization'
CPU_MESSAGE = (
    'VM CPU utilization is high, which may cause slow SSH connections. Stop '
    'your VM instance, increase the number of CPUs, and then restart it.\nHelp '
    'for stopping a VM: '
    'https://cloud.google.com/compute/docs/instances/stop-start-instance\n')

FILTER_TEMPLATE = (
    'metric.type = "{metrics_type}" AND '
    'metric.label.instance_name = "{instance_name}"')

CPU_THRESHOLD = 0.99

DISK_ERROR_PATTERN = [
    'No usable temporary directory found in',
    'No space left on device',
]

DISK_MESSAGE = (
    'The VM may need additional disk space. Resize and then restart the VM, '
    'or run a startup script to free up space.\n'
    'Disk: {0}\n'
    'Help for resizing a boot disk: '
    'https://cloud.google.com/sdk/gcloud/reference/compute/disks/resize\n'
    'Help for running a startup script: '
    'https://cloud.google.com/compute/docs/startupscript\n'
    'Help for additional troubleshooting of full disks: '
    'https://cloud.google.com/compute/docs/troubleshooting/troubleshooting-disk-full-resize#filesystem')  # pylint: disable=line-too-long


class VMStatusTroubleshooter(ssh_troubleshooter.SshTroubleshooter):
  """Check VM status.

  Performance cpu, memory, disk status checking.

  Attributes:
    project: The project object.
    zone: str, the zone name.
    instance: The instance object.
  """

  def __init__(self, project, zone, instance):
    self.project = project
    self.zone = zone
    self.instance = instance
    self.monitoring_client = apis.GetClientInstance(_API_MONITORING_CLIENT_NAME,
                                                    _API_MONITORING_VERSION_V3)
    self.monitoring_message = apis.GetMessagesModule(
        _API_MONITORING_CLIENT_NAME, _API_MONITORING_VERSION_V3)
    self.compute_client = apis.GetClientInstance(_API_COMPUTE_CLIENT_NAME,
                                                 _API_CLIENT_VERSION_V1)
    self.compute_message = apis.GetMessagesModule(_API_COMPUTE_CLIENT_NAME,
                                                  _API_CLIENT_VERSION_V1)
    self.issues = {}

  def check_prerequisite(self):
    log.status.Print('---- Checking VM status ----')
    msg = 'The Monitoring API is needed to check the VM\'s Status.'
    prompt = (
        "If not already enabled, is it OK to enable it and check the VM's"
        ' Status?'
    )
    cancel = 'Test skipped.'
    try:
      prompt_continue = console_io.PromptContinue(
          message=msg,
          prompt_string=prompt,
          cancel_on_no=True,
          cancel_string=cancel)
      self.skip_troubleshoot = not prompt_continue
    except console_io.OperationCancelledError:
      self.skip_troubleshoot = True

    if self.skip_troubleshoot:
      return

    # Enable API
    enable_api.EnableServiceIfDisabled(self.project.name, MONITORING_API)

  def cleanup_resources(self):
    return

  def troubleshoot(self):
    if self.skip_troubleshoot:
      return
    self._CheckVMStatus()
    self._CheckCpuStatus()
    self._CheckDiskStatus()
    log.status.Print('VM status: {0} issue(s) found.\n'.format(
        len(self.issues)))
    # Prompt appropriate messages to user.
    for message in self.issues.values():
      log.status.Print(message)

  def _CheckVMStatus(self):
    if self.instance.status != self.compute_message.Instance.StatusValueValuesEnum.RUNNING:  # pylint: disable=line-too-long
      self.issues['vm_status'] = VM_STATUS_MESSAGE

  def _CheckCpuStatus(self):
    """Check cpu utilization."""
    cpu_utilizatian = self._GetCpuUtilization()
    if cpu_utilizatian > CPU_THRESHOLD:
      self.issues['cpu'] = CPU_MESSAGE

  def _GetCpuUtilization(self):
    """Get CPU utilization from Cloud Monitoring API."""
    request = self._CreateTimeSeriesListRequest(CPU_METRICS)

    response = self.monitoring_client.projects_timeSeries.List(request=request)
    if response.timeSeries:
      points = response.timeSeries[0].points
      return sum(point.value.doubleValue for point in points) / len(points)
    return 0.0

  def _CheckDiskStatus(self):
    sc_log = ssh_troubleshooter_utils.GetSerialConsoleLog(
        self.compute_client, self.compute_message, self.instance.name,
        self.project.name, self.zone)
    if ssh_troubleshooter_utils.SearchPatternErrorInLog(DISK_ERROR_PATTERN,
                                                        sc_log):
      self.issues['disk'] = DISK_MESSAGE.format(self.instance.disks[0].source)

  def _CreateTimeSeriesListRequest(self, metrics_type):
    """Create a MonitoringProjectsTimeSeriesListRequest.

    Args:
      metrics_type: str, https://cloud.google.com/monitoring/api/metrics

    Returns:
      MonitoringProjectsTimeSeriesListRequest, input message for
      ProjectsTimeSeriesService List method.
    """
    current_time = datetime.datetime.now(datetime.timezone.utc)
    tp_proto_end_time = timestamp_pb2.Timestamp()
    tp_proto_end_time.FromDatetime(current_time)
    tp_proto_start_time = timestamp_pb2.Timestamp()
    tp_proto_start_time.FromDatetime(current_time -
                                     datetime.timedelta(seconds=600))
    return self.monitoring_message.MonitoringProjectsTimeSeriesListRequest(
        name='projects/{project}'.format(project=self.project.name),
        filter=FILTER_TEMPLATE.format(
            metrics_type=metrics_type, instance_name=self.instance.name),
        interval_endTime=tp_proto_end_time.ToJsonString(),
        interval_startTime=tp_proto_start_time.ToJsonString())