// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataproc.v1beta2;

import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1beta2";


// The ClusterControllerService provides methods to manage clusters
// of Google Compute Engine instances.
service ClusterController {
  // Creates a cluster in a project.
  rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters" body: "cluster" };
  }

  // Updates a cluster in a project.
  rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { patch: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" };
  }

  // Deletes a cluster in a project.
  rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { delete: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
  }

  // Gets the resource representation for a cluster in a project.
  rpc GetCluster(GetClusterRequest) returns (Cluster) {
    option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
  }

  // Lists all regions/{region}/clusters in a project.
  rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
    option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters" };
  }

  // Gets cluster diagnostic information.
  // After the operation completes, the Operation.response field
  // contains `DiagnoseClusterOutputLocation`.
  rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" };
  }
}

// Describes the identifying information, config, and status of
// a cluster of Google Compute Engine instances.
message Cluster {
  // Required. The Google Cloud Platform project ID that the cluster belongs to.
  string project_id = 1;

  // Required. The cluster name. Cluster names within a project must be
  // unique. Names of deleted clusters can be reused.
  string cluster_name = 2;

  // Required. The cluster config. Note that Cloud Dataproc may set
  // default values, and values may change when clusters are updated.
  ClusterConfig config = 3;

  // Optional. The labels to associate with this cluster.
  // Label **keys** must contain 1 to 63 characters, and must conform to
  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  // Label **values** may be empty, but, if present, must contain 1 to 63
  // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  // No more than 32 labels can be associated with a cluster.
  map<string, string> labels = 8;

  // Output-only. Cluster status.
  ClusterStatus status = 4;

  // Output-only. The previous cluster status.
  repeated ClusterStatus status_history = 7;

  // Output-only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc
  // generates this value when it creates the cluster.
  string cluster_uuid = 6;

  // Contains cluster daemon metrics such as HDFS and YARN stats.
  //
  // **Beta Feature**: This report is available for testing purposes only. It may
  // be changed before final release.
  ClusterMetrics metrics = 9;
}

// The cluster config.
message ClusterConfig {
  // Optional. A Google Cloud Storage staging bucket used for sharing generated
  // SSH keys and config. If you do not specify a staging bucket, Cloud
  // Dataproc will determine an appropriate Cloud Storage location (US,
  // ASIA, or EU) for your cluster's staging bucket according to the Google
  // Compute Engine zone where your cluster is deployed, and then it will create
  // and manage this project-level, per-location bucket for you.
  string config_bucket = 1;

  // Required. The shared Google Compute Engine config settings for
  // all instances in a cluster.
  GceClusterConfig gce_cluster_config = 8;

  // Optional. The Google Compute Engine config settings for
  // the master instance in a cluster.
  InstanceGroupConfig master_config = 9;

  // Optional. The Google Compute Engine config settings for
  // worker instances in a cluster.
  InstanceGroupConfig worker_config = 10;

  // Optional. The Google Compute Engine config settings for
  // additional worker instances in a cluster.
  InstanceGroupConfig secondary_worker_config = 12;

  // Optional. The config settings for software inside the cluster.
  SoftwareConfig software_config = 13;

  // Optional. The config setting for auto delete cluster schedule.
  LifecycleConfig lifecycle_config = 14;

  // Optional. Commands to execute on each node after config is
  // completed. By default, executables are run on master and all worker nodes.
  // You can test a node's <code>role</code> metadata to run an executable on
  // a master or worker node, as shown below using `curl` (you can also use `wget`):
  //
  //     ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1beta2/instance/attributes/dataproc-role)
  //     if [[ "${ROLE}" == 'Master' ]]; then
  //       ... master specific actions ...
  //     else
  //       ... worker specific actions ...
  //     fi
  repeated NodeInitializationAction initialization_actions = 11;
}

// Common config settings for resources of Google Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
  // Optional. The zone where the Google Compute Engine cluster will be located.
  // On a create request, it is required in the "global" region. If omitted
  // in a non-global Cloud Dataproc region, the service will pick a zone in the
  // corresponding Compute Engine region. On a get request, zone will always be
  // present.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
  // * `projects/[project_id]/zones/[zone]`
  // * `us-central1-f`
  string zone_uri = 1;

  // Optional. The Google Compute Engine network to be used for machine
  // communications. Cannot be specified with subnetwork_uri. If neither
  // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
  // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
  // [Using Subnetworks](/compute/docs/subnetworks) for more information).
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
  // * `projects/[project_id]/regions/global/default`
  // * `default`
  string network_uri = 2;

  // Optional. The Google Compute Engine subnetwork to be used for machine
  // communications. Cannot be specified with network_uri.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`
  // * `projects/[project_id]/regions/us-east1/sub0`
  // * `sub0`
  string subnetwork_uri = 6;

  // Optional. If true, all instances in the cluster will only have internal IP
  // addresses. By default, clusters are not restricted to internal IP addresses,
  // and will have ephemeral external IP addresses assigned to each instance.
  // This `internal_ip_only` restriction can only be enabled for subnetwork
  // enabled networks, and all off-cluster dependencies must be configured to be
  // accessible without external IP addresses.
  bool internal_ip_only = 7;

  // Optional. The service account of the instances. Defaults to the default
  // Google Compute Engine service account. Custom service accounts need
  // permissions equivalent to the folloing IAM roles:
  //
  // * roles/logging.logWriter
  // * roles/storage.objectAdmin
  //
  // (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts
  // for more information).
  // Example: `[account_id]@[project_id].iam.gserviceaccount.com`
  string service_account = 8;

  // Optional. The URIs of service account scopes to be included in Google
  // Compute Engine instances. The following base set of scopes is always
  // included:
  //
  // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
  // * https://www.googleapis.com/auth/devstorage.read_write
  // * https://www.googleapis.com/auth/logging.write
  //
  // If no scopes are specified, the following defaults are also provided:
  //
  // * https://www.googleapis.com/auth/bigquery
  // * https://www.googleapis.com/auth/bigtable.admin.table
  // * https://www.googleapis.com/auth/bigtable.data
  // * https://www.googleapis.com/auth/devstorage.full_control
  repeated string service_account_scopes = 3;

  // The Google Compute Engine tags to add to all instances (see
  // [Tagging instances](/compute/docs/label-or-tag-resources#tags)).
  repeated string tags = 4;

  // The Google Compute Engine metadata entries to add to all instances (see
  // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
  map<string, string> metadata = 5;
}

// Optional. The config settings for Google Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
  // Optional. The number of VM instances in the instance group.
  // For master instance groups, must be set to 1.
  int32 num_instances = 1;

  // Optional. The list of instance names. Cloud Dataproc derives the names from
  // `cluster_name`, `num_instances`, and the instance group if not set by user
  // (recommended practice is to let Cloud Dataproc derive the name).
  repeated string instance_names = 2;

  // Output-only. The Google Compute Engine image resource used for cluster
  // instances. Inferred from `SoftwareConfig.image_version`.
  string image_uri = 3;

  // Optional. The Google Compute Engine machine type used for cluster instances.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
  // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
  // * `n1-standard-2`
  string machine_type_uri = 4;

  // Optional. Disk option config settings.
  DiskConfig disk_config = 5;

  // Optional. Specifies that this instance group contains preemptible instances.
  bool is_preemptible = 6;

  // Output-only. The config for Google Compute Engine Instance Group
  // Manager that manages this group.
  // This is only used for preemptible instance groups.
  ManagedGroupConfig managed_group_config = 7;

  // Optional. The Google Compute Engine accelerator configuration for these
  // instances.
  //
  // **Beta Feature**: This feature is still under development. It may be
  // changed before final release.
  repeated AcceleratorConfig accelerators = 8;
}

// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
  // Output-only. The name of the Instance Template used for the Managed
  // Instance Group.
  string instance_template_name = 1;

  // Output-only. The name of the Instance Group Manager for this group.
  string instance_group_manager_name = 2;
}

// Specifies the type and number of accelerator cards attached to the instances
// of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)).
message AcceleratorConfig {
  // Full URL, partial URI, or short name of the accelerator type resource to
  // expose to this instance. See [Google Compute Engine AcceleratorTypes](
  // /compute/docs/reference/beta/acceleratorTypes)
  //
  // Examples
  // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
  // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
  // * `nvidia-tesla-k80`
  string accelerator_type_uri = 1;

  // The number of the accelerator cards of this type exposed to this instance.
  int32 accelerator_count = 2;
}

// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
  // Optional. Size in GB of the boot disk (default is 500GB).
  int32 boot_disk_size_gb = 1;

  // Optional. Number of attached SSDs, from 0 to 4 (default is 0).
  // If SSDs are not attached, the boot disk is used to store runtime logs and
  // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
  // If one or more SSDs are attached, this runtime bulk
  // data is spread across them, and the boot disk contains only basic
  // config and installed binaries.
  int32 num_local_ssds = 2;
}

// Specifies the cluster auto delete related schedule configuration.
message LifecycleConfig {
  // Optional. The longest duration that cluster would keep alive while staying
  //  idle; passing this threshold will cause cluster to be auto-deleted.
  google.protobuf.Duration idle_delete_ttl = 1;

  oneof ttl {
    // Optional. The time when cluster will be auto-deleted.
    google.protobuf.Timestamp auto_delete_time = 2;

    // Optional. The life duration of cluster, the cluster will be auto-deleted
    // at the end of this duration.
    google.protobuf.Duration auto_delete_ttl = 3;
  }
}

// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
  // Required. Google Cloud Storage URI of executable file.
  string executable_file = 1;

  // Optional. Amount of time executable has to complete. Default is
  // 10 minutes. Cluster creation fails with an explanatory error message (the
  // name of the executable that caused the error and the exceeded timeout
  // period) if the executable is not completed at end of the timeout period.
  google.protobuf.Duration execution_timeout = 2;
}

// The status of a cluster and its instances.
message ClusterStatus {
  // The cluster state.
  enum State {
    // The cluster state is unknown.
    UNKNOWN = 0;

    // The cluster is being created and set up. It is not ready for use.
    CREATING = 1;

    // The cluster is currently running and healthy. It is ready for use.
    RUNNING = 2;

    // The cluster encountered an error. It is not ready for use.
    ERROR = 3;

    // The cluster is being deleted. It cannot be used.
    DELETING = 4;

    // The cluster is being updated. It continues to accept and process jobs.
    UPDATING = 5;
  }

  enum Substate {
    UNSPECIFIED = 0;

    // The cluster is known to be in an unhealthy state
    // (for example, critical daemons are not running or HDFS capacity is
    // exhausted).
    //
    // Applies to RUNNING state.
    UNHEALTHY = 1;

    // The agent-reported status is out of date (may occur if
    // Cloud Dataproc loses communication with Agent).
    //
    // Applies to RUNNING state.
    STALE_STATUS = 2;
  }

  // Output-only. The cluster's state.
  State state = 1;

  // Output-only. Optional details of cluster's state.
  string detail = 2;

  // Output-only. Time when this state was entered.
  google.protobuf.Timestamp state_start_time = 3;

  // Output-only. Additional state information that includes
  // status reported by the agent.
  Substate substate = 4;
}

// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
  // Optional. The version of software inside the cluster. It must match the
  // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the
  // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)).
  string image_version = 1;

  // Optional. The properties to set on daemon config files.
  //
  // Property keys are specified in `prefix:property` format, such as
  // `core:fs.defaultFS`. The following are supported prefixes
  // and their mappings:
  //
  // * capacity-scheduler: `capacity-scheduler.xml`
  // * core:   `core-site.xml`
  // * distcp: `distcp-default.xml`
  // * hdfs:   `hdfs-site.xml`
  // * hive:   `hive-site.xml`
  // * mapred: `mapred-site.xml`
  // * pig:    `pig.properties`
  // * spark:  `spark-defaults.conf`
  // * yarn:   `yarn-site.xml`
  //
  // For more information, see
  // [Cluster properties](/dataproc/docs/concepts/cluster-properties).
  map<string, string> properties = 2;
}

// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
  // The HDFS metrics.
  map<string, int64> hdfs_metrics = 1;

  // The YARN metrics.
  map<string, int64> yarn_metrics = 2;
}

// A request to create a cluster.
message CreateClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 3;

  // Required. The cluster to create.
  Cluster cluster = 2;
}

// A request to update a cluster.
message UpdateClusterRequest {
  // Required. The ID of the Google Cloud Platform project the
  // cluster belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 5;

  // Required. The cluster name.
  string cluster_name = 2;

  // Required. The changes to the cluster.
  Cluster cluster = 3;

  // Optional. Timeout for graceful YARN decomissioning. Graceful
  // decommissioning allows removing nodes from the cluster without
  // interrupting jobs in progress. Timeout specifies how long to wait for jobs
  // in progress to finish before forcefully removing nodes (and potentially
  // interrupting jobs). Default timeout is 0 (for forceful decommission), and
  // the maximum allowed timeout is 1 day.
  //
  // Only supported on Dataproc image versions 1.2 and higher.
  google.protobuf.Duration graceful_decommission_timeout = 6;

  // Required. Specifies the path, relative to <code>Cluster</code>, of
  // the field to update. For example, to change the number of workers
  // in a cluster to 5, the <code>update_mask</code> parameter would be
  // specified as <code>config.worker_config.num_instances</code>,
  // and the `PATCH` request body would specify the new value, as follows:
  //
  //     {
  //       "config":{
  //         "workerConfig":{
  //           "numInstances":"5"
  //         }
  //       }
  //     }
  // Similarly, to change the number of preemptible workers in a cluster to 5, the
  // <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>,
  // and the `PATCH` request body would be set as follows:
  //
  //     {
  //       "config":{
  //         "secondaryWorkerConfig":{
  //           "numInstances":"5"
  //         }
  //       }
  //     }
  // <strong>Note:</strong> currently only some fields can be updated:
  // |Mask|Purpose|
  // |`labels`|Updates labels|
  // |`config.worker_config.num_instances`|Resize primary worker group|
  // |`config.secondary_worker_config.num_instances`|Resize secondary worker group|
  google.protobuf.FieldMask update_mask = 4;
}

// A request to delete a cluster.
message DeleteClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 3;

  // Required. The cluster name.
  string cluster_name = 2;

  // Optional. Specifying the `cluster_uuid` means the RPC should fail
  // (with error NOT_FOUND) if cluster with specified UUID does not exist.
  string cluster_uuid = 4;
}

// Request to get the resource representation for a cluster in a project.
message GetClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 3;

  // Required. The cluster name.
  string cluster_name = 2;
}

// A request to list the clusters in a project.
message ListClustersRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 4;

  // Optional. A filter constraining the clusters to list. Filters are
  // case-sensitive and have the following syntax:
  //
  // field = value [AND [field = value]] ...
  //
  // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
  // and `[KEY]` is a label key. **value** can be `*` to match all values.
  // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
  // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
  // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
  // contains the `DELETING` and `ERROR` states.
  // `clusterName` is the name of the cluster provided at creation time.
  // Only the logical `AND` operator is supported; space-separated items are
  // treated as having an implicit `AND` operator.
  //
  // Example filter:
  //
  // status.state = ACTIVE AND clusterName = mycluster
  // AND labels.env = staging AND labels.starred = *
  string filter = 5;

  // Optional. The standard List page size.
  int32 page_size = 2;

  // Optional. The standard List page token.
  string page_token = 3;
}

// The list of all clusters in a project.
message ListClustersResponse {
  // Output-only. The clusters in the project.
  repeated Cluster clusters = 1;

  // Output-only. This token is included in the response if there are more
  // results to fetch. To fetch additional results, provide this value as the
  // `page_token` in a subsequent <code>ListClustersRequest</code>.
  string next_page_token = 2;
}

// A request to collect cluster diagnostic information.
message DiagnoseClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
  // belongs to.
  string project_id = 1;

  // Required. The Cloud Dataproc region in which to handle the request.
  string region = 3;

  // Required. The cluster name.
  string cluster_name = 2;
}

// The location of diagnostic output.
message DiagnoseClusterResults {
  // Output-only. The Google Cloud Storage URI of the diagnostic output.
  // The output report is a plain text file with a summary of collected
  // diagnostics.
  string output_uri = 1;
}