HEX

File: //snap/google-cloud-cli/396/platform/bq/clients/client_dataset.py
#!/usr/bin/env python
"""The BigQuery CLI dataset client library."""

import datetime
from typing import Dict, List, NamedTuple, Optional
from googleapiclient import discovery
from clients import utils as bq_client_utils
from frontend import utils as frontend_utils
from utils import bq_error
from utils import bq_id_utils
from utils import bq_processor_utils

EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME = 'externalCatalogDatasetOptions'


def GetDataset(apiclient: discovery.Resource, reference, dataset_view=None):
  """Get dataset with dataset_view parameter."""
  request = dict(reference)
  request['accessPolicyVersion'] = (
      bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION
  )
  if dataset_view is not None:
    request['datasetView'] = dataset_view
  return apiclient.datasets().get(**request).execute()


def ListDatasets(
    apiclient: discovery.Resource,
    id_fallbacks: NamedTuple(
        'IDS',
        [
            ('project_id', Optional[str]),
        ],
    ),
    reference: Optional[bq_id_utils.ApiClientHelper.ProjectReference] = None,
    max_results: Optional[int] = None,
    page_token: Optional[str] = None,
    list_all: Optional[bool] = None,
    filter_expression: Optional[str] = None,
):
  """List the datasets associated with this reference."""
  return ListDatasetsWithTokenAndUnreachable(
      apiclient,
      id_fallbacks,
      reference,
      max_results,
      page_token,
      list_all,
      filter_expression,
  )['datasets']


def ListDatasetsWithTokenAndUnreachable(
    apiclient: discovery.Resource,
    id_fallbacks: NamedTuple(
        'IDS',
        [
            ('project_id', Optional[str]),
        ],
    ),
    reference: Optional[bq_id_utils.ApiClientHelper.ProjectReference] = None,
    max_results: Optional[int] = None,
    page_token: Optional[str] = None,
    list_all: Optional[bool] = None,
    filter_expression: Optional[str] = None,
):
  """List the datasets associated with this reference."""
  reference = bq_client_utils.NormalizeProjectReference(
      id_fallbacks=id_fallbacks, reference=reference
  )
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.ProjectReference,
      method='ListDatasets',
  )
  request = bq_processor_utils.PrepareListRequest(
      reference, max_results, page_token, filter_expression
  )
  if list_all is not None:
    request['all'] = list_all
  result = apiclient.datasets().list(**request).execute()
  dataset_list = result.get('datasets', [])
  unreachable_set = set(result.get('unreachable', []))
  next_token = result.get('nextPageToken', None)
  if max_results is not None:
    while 'nextPageToken' in result and len(dataset_list) < max_results:
      request['maxResults'] = max_results - len(dataset_list)
      request['pageToken'] = result['nextPageToken']
      result = apiclient.datasets().list(**request).execute()
      dataset_list.extend(result.get('datasets', []))
      unreachable_set.update(result.get('unreachable', []))
      next_token = result.get('nextPageToken', None)
  response = dict(datasets=dataset_list)
  if next_token:
    response['token'] = next_token
  if unreachable_set:
    response['unreachable'] = list(unreachable_set)
  return response


def GetDatasetIAMPolicy(apiclient, reference):
  """Gets IAM policy for the given dataset resource.

  Arguments:
    apiclient: the apiclient used to make the request.
    reference: the DatasetReference for the dataset resource.

  Returns:
    The IAM policy attached to the given dataset resource.

  Raises:
    BigqueryTypeError: if reference is not a DatasetReference.
  """
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='GetDatasetIAMPolicy',
  )
  formatted_resource = 'projects/%s/datasets/%s' % (
      reference.projectId,
      reference.datasetId,
  )
  body = {
      'options': {
          'requestedPolicyVersion': (
              bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION
          )
      }
  }
  return (
      apiclient.datasets()
      .getIamPolicy(
          resource=formatted_resource,
          body=body,
      )
      .execute()
  )


def SetDatasetIAMPolicy(apiclient: discovery.Resource, reference, policy):
  """Sets IAM policy for the given dataset resource.

  Arguments:
    apiclient: the apiclient used to make the request.
    reference: the DatasetReference for the dataset resource.
    policy: The policy string in JSON format.

  Returns:
    The updated IAM policy attached to the given dataset resource.

  Raises:
    BigqueryTypeError: if reference is not a DatasetReference.
  """
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='SetDatasetIAMPolicy',
  )
  formatted_resource = 'projects/%s/datasets/%s' % (
      reference.projectId,
      reference.datasetId,
  )
  request = {'policy': policy}
  return (
      apiclient.datasets()
      .setIamPolicy(body=request, resource=formatted_resource)
      .execute()
  )


def DatasetExists(
    apiclient: discovery.Resource,
    reference: 'bq_id_utils.ApiClientHelper.DatasetReference',
) -> bool:
  """Returns true if a dataset exists."""
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='DatasetExists',
  )
  try:
    apiclient.datasets().get(**dict(reference)).execute()
    return True
  except bq_error.BigqueryNotFoundError:
    return False


def GetDatasetRegion(
    apiclient: discovery.Resource,
    reference: 'bq_id_utils.ApiClientHelper.DatasetReference',
) -> Optional[str]:
  """Returns the region of a dataset as a string."""
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='GetDatasetRegion',
  )
  try:
    return apiclient.datasets().get(**dict(reference)).execute()['location']
  except bq_error.BigqueryNotFoundError:
    return None


# TODO(b/191712821): add tags modification here. For the Preview Tags are not
# modifiable using BigQuery UI/Cli, only using ResourceManager.
def CreateDataset(
    apiclient: discovery.Resource,
    reference,
    ignore_existing=False,
    description=None,
    display_name=None,
    acl=None,
    default_table_expiration_ms=None,
    default_partition_expiration_ms=None,
    data_location=None,
    labels=None,
    default_kms_key=None,
    source_dataset_reference=None,
    external_source=None,
    connection_id=None,
    external_catalog_dataset_options=None,
    max_time_travel_hours=None,
    storage_billing_model=None,
    resource_tags=None,
):
  """Create a dataset corresponding to DatasetReference.

  Args:
    apiclient: The apiclient used to make the request.
    reference: The DatasetReference to create.
    ignore_existing: (boolean, default False) If False, raise an exception if
      the dataset already exists.
    description: An optional dataset description.
    display_name: An optional friendly name for the dataset.
    acl: An optional ACL for the dataset, as a list of dicts.
    default_table_expiration_ms: Default expiration time to apply to new tables
      in this dataset.
    default_partition_expiration_ms: Default partition expiration time to apply
      to new partitioned tables in this dataset.
    data_location: Location where the data in this dataset should be stored.
      Must be either 'EU' or 'US'. If specified, the project that owns the
      dataset must be enabled for data location.
    labels: An optional dict of labels.
    default_kms_key: An optional kms dey that will apply to all newly created
      tables in the dataset, if no explicit key is supplied in the creating
      request.
    source_dataset_reference: An optional ApiClientHelper.DatasetReference that
      will be the source of this linked dataset. #
    external_source: External source that backs this dataset.
    connection_id: Connection used for accessing the external_source.
    external_catalog_dataset_options: An optional JSON string or file path
      containing the external catalog dataset options to create.
    max_time_travel_hours: Optional. Define the max time travel in hours. The
      value can be from 48 to 168 hours (2 to 7 days). The default value is 168
      hours if this is not set.
    storage_billing_model: Optional. Sets the storage billing model for the
      dataset.
    resource_tags: An optional dict of tags to attach to the dataset.

  Raises:
    BigqueryTypeError: If reference is not an ApiClientHelper.DatasetReference
      or if source_dataset_reference is provided but is not an
      bq_id_utils.ApiClientHelper.DatasetReference.
      or if both external_dataset_reference and source_dataset_reference
      are provided or if not all required arguments for external database is
      provided.
    BigqueryDuplicateError: if reference exists and ignore_existing
        is False.
  """
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='CreateDataset',
  )

  body = bq_processor_utils.ConstructObjectInfo(reference)
  if display_name is not None:
    body['friendlyName'] = display_name
  if description is not None:
    body['description'] = description
  if acl is not None:
    body['access'] = acl
  if default_table_expiration_ms is not None:
    body['defaultTableExpirationMs'] = default_table_expiration_ms
  if default_partition_expiration_ms is not None:
    body['defaultPartitionExpirationMs'] = default_partition_expiration_ms
  if default_kms_key is not None:
    body['defaultEncryptionConfiguration'] = {'kmsKeyName': default_kms_key}
  if data_location is not None:
    body['location'] = data_location
  if labels:
    body['labels'] = {}
    for label_key, label_value in labels.items():
      body['labels'][label_key] = label_value
  if source_dataset_reference is not None:
    bq_id_utils.typecheck(
        source_dataset_reference,
        bq_id_utils.ApiClientHelper.DatasetReference,
        method='CreateDataset',
    )
    body['linkedDatasetSource'] = {
        'sourceDataset': bq_processor_utils.ConstructObjectInfo(
            source_dataset_reference
        )['datasetReference']
    }
  # externalDatasetReference can only be specified in case of externals
  # datasets. This option cannot be used in case of regular dataset or linked
  # datasets.
  # So we only set this if an external_source is specified.
  if external_source:
    body['externalDatasetReference'] = {
        'externalSource': external_source,
        'connection': connection_id,
    }
  if external_catalog_dataset_options is not None:
    body[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME] = frontend_utils.GetJson(
        external_catalog_dataset_options
    )
  if max_time_travel_hours is not None:
    body['maxTimeTravelHours'] = max_time_travel_hours
  if storage_billing_model is not None:
    body['storageBillingModel'] = storage_billing_model
  if resource_tags is not None:
    body['resourceTags'] = resource_tags

  args = dict(reference.GetProjectReference())
  args['accessPolicyVersion'] = bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION
  try:
    apiclient.datasets().insert(body=body, **args).execute()
  except bq_error.BigqueryDuplicateError:
    if not ignore_existing:
      raise


def UpdateDataset(
    apiclient: discovery.Resource,
    reference: 'bq_id_utils.ApiClientHelper.DatasetReference',
    description: Optional[str] = None,
    display_name: Optional[str] = None,
    acl=None,
    default_table_expiration_ms=None,
    default_partition_expiration_ms=None,
    labels_to_set=None,
    label_keys_to_remove=None,
    etag=None,
    default_kms_key=None,
    max_time_travel_hours=None,
    storage_billing_model=None,
    tags_to_attach: Optional[Dict[str, str]] = None,
    tags_to_remove: Optional[List[str]] = None,
    clear_all_tags: Optional[bool] = False,
    external_catalog_dataset_options: Optional[str] = None,
    update_mode: Optional[bq_client_utils.UpdateMode] = None,
):
  """Updates a dataset.

  Args:
    apiclient: The apiclient used to make the request.
    reference: The DatasetReference to update.
    description: An optional dataset description.
    display_name: An optional friendly name for the dataset.
    acl: An optional ACL for the dataset, as a list of dicts.
    default_table_expiration_ms: Optional number of milliseconds for the default
      expiration duration for new tables created in this dataset.
    default_partition_expiration_ms: Optional number of milliseconds for the
      default partition expiration duration for new partitioned tables created
      in this dataset.
    labels_to_set: An optional dict of labels to set on this dataset.
    label_keys_to_remove: An optional list of label keys to remove from this
      dataset.
    etag: If set, checks that etag in the existing dataset matches.
    default_kms_key: An optional kms dey that will apply to all newly created
      tables in the dataset, if no explicit key is supplied in the creating
      request.
    max_time_travel_hours: Optional. Define the max time travel in hours. The
      value can be from 48 to 168 hours (2 to 7 days). The default value is 168
      hours if this is not set.
    storage_billing_model: Optional. Sets the storage billing model for the
      dataset.
    tags_to_attach: An optional dict of tags to attach to the dataset
    tags_to_remove: An optional list of tag keys to remove from the dataset
    clear_all_tags: If set, clears all the tags attached to the dataset
    external_catalog_dataset_options: An optional JSON string or file path
      containing the external catalog dataset options to update.
    update_mode: An optional flag indicating which datasets fields to update,
      either metadata fields only, ACL fields only, or both metadata and ACL
      fields.

  Raises:
    BigqueryTypeError: If reference is not a DatasetReference.
  """
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='UpdateDataset',
  )

  # Get the existing dataset and associated ETag.
  dataset = _ExecuteGetDatasetRequest(apiclient, reference, etag)

  # Merge in the changes.
  if display_name is not None:
    dataset['friendlyName'] = display_name
  if description is not None:
    dataset['description'] = description
  if acl is not None:
    dataset['access'] = acl
  if default_table_expiration_ms is not None:
    dataset['defaultTableExpirationMs'] = default_table_expiration_ms
  if default_partition_expiration_ms is not None:
    if default_partition_expiration_ms == 0:
      dataset['defaultPartitionExpirationMs'] = None
    else:
      dataset['defaultPartitionExpirationMs'] = default_partition_expiration_ms
  if default_kms_key is not None:
    dataset['defaultEncryptionConfiguration'] = {'kmsKeyName': default_kms_key}
  if 'labels' not in dataset:
    dataset['labels'] = {}
  if labels_to_set:
    for label_key, label_value in labels_to_set.items():
      dataset['labels'][label_key] = label_value
  if label_keys_to_remove:
    for label_key in label_keys_to_remove:
      dataset['labels'][label_key] = None
  if max_time_travel_hours is not None:
    dataset['maxTimeTravelHours'] = max_time_travel_hours
  if storage_billing_model is not None:
    dataset['storageBillingModel'] = storage_billing_model
  resource_tags = {}
  if clear_all_tags and 'resourceTags' in dataset:
    for tag in dataset['resourceTags']:
      resource_tags[tag] = None
  else:
    for tag in tags_to_remove or []:
      resource_tags[tag] = None
  for tag in tags_to_attach or {}:
    resource_tags[tag] = tags_to_attach[tag]
  # resourceTags is used to add a new tag binding, update value of existing
  # tag and also to remove a tag binding
  dataset['resourceTags'] = resource_tags

  if external_catalog_dataset_options is not None:
    dataset.setdefault(EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME, {})
    current_options = dataset[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME]
    dataset[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME] = (
        frontend_utils.UpdateExternalCatalogDatasetOptions(
            current_options, external_catalog_dataset_options
        )
    )

  _ExecutePatchDatasetRequest(
      apiclient,
      reference,
      dataset,
      etag,
      update_mode,
  )


def _ExecuteGetDatasetRequest(
    apiclient: discovery.Resource,
    reference,
    etag: Optional[str] = None,
):
  """Executes request to get dataset.

  Args:
    apiclient: the apiclient used to make the request.
    reference: the DatasetReference to get.
    etag: if set, checks that etag in the existing dataset matches.

  Returns:
  The result of executing the request, if it succeeds.
  """
  args = dict(reference)
  args['accessPolicyVersion'] = bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION
  get_request = apiclient.datasets().get(**args)
  if etag:
    get_request.headers['If-Match'] = etag
  dataset = get_request.execute()
  return dataset


def _ExecutePatchDatasetRequest(
    apiclient: discovery.Resource,
    reference,
    dataset,
    etag: Optional[str] = None,
    update_mode: Optional[bq_client_utils.UpdateMode] = None,
):
  """Executes request to patch dataset.

  Args:
    apiclient: the apiclient used to make the request.
    reference: the DatasetReference to patch.
    dataset: the body of request
    etag: if set, checks that etag in the existing dataset matches.
    update_mode: a flag indicating which datasets fields to update.
  """
  parameters = dict(reference)
  parameters['accessPolicyVersion'] = (
      bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION
  )
  if update_mode is not None:
    parameters['updateMode'] = update_mode.value

  request = apiclient.datasets().patch(body=dataset, **parameters)

  # Perform a conditional update to protect against concurrent
  # modifications to this dataset.  By placing the ETag returned in
  # the get operation into the If-Match header, the API server will
  # make sure the dataset hasn't changed.  If there is a conflicting
  # change, this update will fail with a "Precondition failed"
  # error.
  if etag or dataset['etag']:
    request.headers['If-Match'] = etag if etag else dataset['etag']
  request.execute()


def DeleteDataset(
    apiclient: discovery.Resource,
    reference: bq_id_utils.ApiClientHelper.DatasetReference,
    ignore_not_found: bool = False,
    delete_contents: Optional[bool] = None,
) -> None:
  """Deletes DatasetReference reference.

  Args:
    apiclient: the api client to make the request with.
    reference: the DatasetReference to delete.
    ignore_not_found: Whether to ignore "not found" errors.
    delete_contents: [Boolean] Whether to delete the contents of non-empty
      datasets. If not specified and the dataset has tables in it, the delete
      will fail. If not specified, the server default applies.

  Raises:
    BigqueryTypeError: if reference is not a DatasetReference.
    bq_error.BigqueryNotFoundError: if reference does not exist and
      ignore_not_found is False.
  """
  bq_id_utils.typecheck(
      reference,
      bq_id_utils.ApiClientHelper.DatasetReference,
      method='DeleteDataset',
  )

  args = dict(reference)

  if delete_contents is not None:
    args['deleteContents'] = delete_contents
  try:
    apiclient.datasets().delete(**args).execute()
  except bq_error.BigqueryNotFoundError:
    if not ignore_not_found:
      raise


def UndeleteDataset(
    apiclient: discovery.Resource,
    dataset_reference: bq_id_utils.ApiClientHelper.DatasetReference,
    timestamp: Optional[datetime.datetime] = None,
) -> bool:
  """Undeletes a dataset.

  Args:
    apiclient: The api client to make the request with.
    dataset_reference: [Type:
      bq_id_utils.ApiClientHelper.DatasetReference]DatasetReference of the
      dataset to be undeleted
    timestamp: [Type: Optional[datetime.datetime]]Timestamp for which dataset
      version is to be undeleted

  Returns:
    bool: The job description, or None for ignored errors.

  Raises:
    BigqueryDuplicateError: when the dataset to be undeleted already exists.
  """
  try:
    args = dict(dataset_reference)
    if timestamp:
      args['body'] = {
          'deletionTime': frontend_utils.FormatRfc3339(timestamp).replace(
              '+00:00', ''
          )
      }
    return apiclient.datasets().undelete(**args).execute()

  except bq_error.BigqueryDuplicateError as e:
    raise e