HEX

File: //snap/google-cloud-cli/current/lib/surface/spanner/samples/init.py
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command for spanner samples init."""

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

import json
import os
import textwrap

from apitools.base.py import exceptions as apitools_exceptions
from googlecloudsdk.api_lib.spanner import database_operations
from googlecloudsdk.api_lib.spanner import database_sessions
from googlecloudsdk.api_lib.spanner import databases
from googlecloudsdk.api_lib.spanner import instances
from googlecloudsdk.api_lib.storage import storage_api
from googlecloudsdk.api_lib.storage import storage_util
from googlecloudsdk.calliope import base
from googlecloudsdk.calliope import exceptions as calliope_exceptions
from googlecloudsdk.command_lib.spanner import ddl_parser
from googlecloudsdk.command_lib.spanner import samples
from googlecloudsdk.core import log
from googlecloudsdk.core import properties
from googlecloudsdk.core import resources
from googlecloudsdk.core.console import progress_tracker
from googlecloudsdk.core.util import files
from googlecloudsdk.core.util import retry


def check_instance(instance_id):
  """Raise if the given instance doesn't exist."""
  try:
    instances.Get(instance_id)
  except apitools_exceptions.HttpNotFoundError:
    raise ValueError(
        textwrap.dedent("""\
        Instance '{instance_id}' does not exist. Create it with:

        $ gcloud spanner instances create {instance_id}
        """.format(instance_id=instance_id)))


def download_sample_files(appname):
  """Download schema and binaries for the given sample app.

  If the schema and all binaries exist already, skip download. If any file
  doesn't exist, download them all.

  Args:
    appname: The name of the sample app, should exist in samples.APP_NAMES
  """
  storage_client = storage_api.StorageClient()
  bucket_ref = storage_util.BucketReference.FromUrl(samples.GCS_BUCKET)

  # Get the GCS object ref and local path for each file
  gcs_to_local = [(storage_util.ObjectReference.FromBucketRef(
      bucket_ref, samples.get_gcs_schema_name(appname)),
                   samples.get_local_schema_path(appname))]
  gcs_bin_msgs = storage_client.ListBucket(
      bucket_ref, prefix=samples.get_gcs_bin_prefix(appname))
  bin_path = samples.get_local_bin_path(appname)
  for gcs_ref in gcs_bin_msgs:
    # Skip folder or dir result in ListBucket result.
    if not gcs_ref.name.split('/')[-1]:
      continue
    gcs_ref = storage_util.ObjectReference.FromMessage(gcs_ref)
    local_path = os.path.join(bin_path, gcs_ref.name.split('/')[-1])
    gcs_to_local.append((gcs_ref, local_path))

  if samples.has_sample_data_statements(appname):
    insert_path = samples.get_gcs_data_insert_statements_prefix(appname)
    gcs_insert_files = storage_client.ListBucket(bucket_ref, prefix=insert_path)
    for insert_file in gcs_insert_files:
      insert_file_ref = storage_util.ObjectReference.FromMessage(insert_file)
      # Skip folder or dir in ListBucket result. Cannot use `os.path.isdir` to
      # check due to GCS file naming convention.
      if insert_file_ref.name.endswith('/'):
        continue
      data_local_path = samples.get_local_data_insert_statements_path(appname)
      local_path = os.path.join(
          data_local_path, insert_file_ref.name.split('/')[-1]
      )
      gcs_to_local.append((insert_file_ref, local_path))

  # Download all files again if any file is missing
  if any(not os.path.exists(file_path) for _, file_path in gcs_to_local):
    log.status.Print('Downloading files for the {} sample app'.format(appname))
    for gcs_ref, local_path in gcs_to_local:
      log.status.Print('Downloading {}'.format(local_path))
      local_dir = os.path.split(local_path)[0]
      if not os.path.exists(local_dir):
        files.MakeDir(local_dir)
      storage_client.CopyFileFromGCS(gcs_ref, local_path, overwrite=True)


def _create_db_op(instance_ref, database_id, statements, database_dialect):
  """Wrapper over databases.Create with error handling."""
  try:
    return databases.Create(
        instance_ref,
        database_id,
        statements,
        database_dialect=database_dialect)
  except apitools_exceptions.HttpConflictError:
    raise ValueError(
        textwrap.dedent("""\
        Database '{database_id}' exists already. Delete it with:

        $ gcloud spanner databases delete {database_id} --instance={instance_id}
        """.format(
            database_id=database_id, instance_id=instance_ref.instancesId)))
  except apitools_exceptions.HttpError as ex:
    raise ValueError(json.loads(ex.content)['error']['message'])
  except Exception:  # pylint: disable=broad-except
    raise ValueError("Failed to create database '{}'.".format(database_id))


def insert_sample_data_in_one_file(appname, file_name, session_ref):
  """Read and execute all insert statements in one file."""
  if not samples.has_sample_data_statements(appname):
    raise ValueError('{} cannot pre-populate data.'.format(appname))

  insert_statements = files.ReadFileContents(file_name)
  for insert_statement in insert_statements.split('\n'):
    if not insert_statement:
      continue
    if not insert_statement.startswith('INSERT'):
      continue
    # Use a retryer to handle 409 txn abort errors that tend to happen
    # when db is just created and group assignment contends with insert and
    # commit dual-trip txns.
    retry.Retryer(max_retrials=5).RetryOnException(
        database_sessions.ExecuteSql,
        args=[insert_statement, 'NORMAL', session_ref],
        should_retry_if=lambda exc_type, *args: True,
        sleep_ms=2000,
    )


def insert_sample_data(appname, database_id, session_ref):
  """Insert sample data."""
  if not samples.has_sample_data_statements(appname):
    raise ValueError('{} cannot pre-populate data.'.format(appname))

  with progress_tracker.ProgressTracker(
      'Populating data into `{}`'.format(database_id),
      aborted_message='Aborting wait for data population.\n',
  ):
    data_files = files.GetDirectoryTreeListing(
        samples.get_local_data_insert_statements_path(appname)
    )
    for data_file in data_files:
      insert_sample_data_in_one_file(
          appname,
          data_file,
          session_ref,
      )


def check_create_db(appname, instance_ref, database_id):
  """Create the DB if it doesn't exist already, raise otherwise."""
  schema_file = samples.get_local_schema_path(appname)
  database_dialect = samples.get_database_dialect(appname)

  schema = files.ReadFileContents(schema_file)
  # Special case for POSTGRESQL dialect:
  # a. CreateDatabase does not support additional_statements. Instead a
  #    separate call to UpdateDDL is used.
  # b. ddl_parser only supports GSQL; instead remove comment lines, then
  #    split on ';'.
  if database_dialect == databases.DATABASE_DIALECT_POSTGRESQL:
    create_ddl = []
    # Remove comments
    schema = '\n'.join(
        [line for line in schema.split('\n') if not line.startswith('--')])
    # TODO(b/195711543): This would be incorrect if ';' is inside strings
    # and / or comments.
    update_ddl = [stmt for stmt in schema.split(';') if stmt]
  else:
    create_ddl = ddl_parser.PreprocessDDLWithParser(schema)
    update_ddl = []

  create_op = _create_db_op(instance_ref, database_id, create_ddl,
                            database_dialect)
  database_operations.Await(create_op,
                            "Creating database '{}'".format(database_id))
  if update_ddl:
    database_ref = resources.REGISTRY.Parse(
        database_id,
        params={
            'instancesId': instance_ref.instancesId,
            'projectsId': instance_ref.projectsId,
        },
        collection='spanner.projects.instances.databases')
    update_op = databases.UpdateDdl(database_ref, update_ddl)
    database_operations.Await(update_op,
                              "Updating database '{}'".format(database_id))


@base.DefaultUniverseOnly
class Init(base.Command):
  """Initialize a Cloud Spanner sample app.

  This command creates a Cloud Spanner database in the given instance for the
  sample app and loads any initial data required by the application.
  """

  detailed_help = {
      'EXAMPLES': textwrap.dedent("""\
          To initialize the 'finance' sample app using instance
          'my-instance', run:

          $ {command} finance --instance-id=my-instance

          To initialize the 'finance-graph' sample app using instance
          'my-instance', run:

          $ {command} finance-graph --instance-id=my-instance
        """),
  }

  @staticmethod
  def Args(parser):
    """Args is called by calliope to gather arguments for this command.

    Args:
      parser: An argparse parser that you can use to add arguments that go on
        the command line after this command. Positional arguments are allowed.
    """
    parser.add_argument(
        'appname', help='The sample app name, e.g. "finance", "finance-graph".'
    )
    parser.add_argument(
        '--instance-id',
        required=True,
        type=str,
        help='The Cloud Spanner instance ID for the sample app.')
    parser.add_argument(
        '--database-id',
        type=str,
        help='ID of the new Cloud Spanner database to create for the sample '
        'app.')

  def Run(self, args):
    """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace. All the arguments that were provided to this
        command invocation.

    Returns:
      Some value that we want to have printed later.
    """
    appname = args.appname
    try:
      samples.check_appname(appname)
    except ValueError as ex:
      raise calliope_exceptions.BadArgumentException('APPNAME', ex)

    instance_id = args.instance_id
    instance_ref = resources.REGISTRY.Parse(
        instance_id,
        params={
            'projectsId': properties.VALUES.core.project.GetOrFail,
        },
        collection='spanner.projects.instances')

    if args.database_id is not None:
      database_id = args.database_id
    else:
      database_id = samples.get_db_id_for_app(appname)

    # Check that the instance exists
    log.status.Print("Checking instance '{}'".format(instance_id))
    try:
      check_instance(instance_id)
    except ValueError as ex:
      raise calliope_exceptions.BadArgumentException('--instance-id', ex)

    # Download any missing sample app binaries from GCS, including the schema
    # file we need to create the DB
    download_sample_files(appname)

    # Create the sample app DB
    log.status.Print(
        "Initializing database '{database_id}' for sample app '{appname}'"
        .format(database_id=database_id, appname=appname))
    try:
      check_create_db(appname, instance_ref, database_id)
    except ValueError as ex:
      raise calliope_exceptions.BadArgumentException('--database-id', ex)

    if samples.has_sample_data_statements(appname):
      database_ref = resources.REGISTRY.Parse(
          database_id,
          params={
              'instancesId': instance_ref.instancesId,
              'projectsId': instance_ref.projectsId,
          },
          collection='spanner.projects.instances.databases',
      )
      session = database_sessions.Create(database_ref)
      session_ref = resources.REGISTRY.ParseRelativeName(
          relative_name=session.name,
          collection='spanner.projects.instances.databases.sessions',
      )
      try:
        insert_sample_data(appname, database_id, session_ref)
      except Exception:
        raise SystemError(
            'Failed to insert data for the database. Please fallback to '
            'manually insert.'
        )
      else:
        return textwrap.dedent("""\
            Initialization done for your Spanner database.
            """)
      finally:
        database_sessions.Delete(session_ref)
    else:
      backend_args = '{appname} --instance-id={instance_id}'.format(
          appname=appname, instance_id=instance_id
      )
      if args.database_id is not None:
        backend_args += ' --database-id {}'.format(database_id)
      return textwrap.dedent("""\
          Initialization done. Next, start the backend gRPC service with:

          $ gcloud spanner samples backend {}
          """.format(backend_args))