// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.dataproc.v1beta2; import "google/api/annotations.proto"; import "google/longrunning/operations.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc"; option java_multiple_files = true; option java_outer_classname = "ClustersProto"; option java_package = "com.google.cloud.dataproc.v1beta2"; // The ClusterControllerService provides methods to manage clusters // of Google Compute Engine instances. service ClusterController { // Creates a cluster in a project. rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters" body: "cluster" }; } // Updates a cluster in a project. rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) { option (google.api.http) = { patch: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" }; } // Deletes a cluster in a project. rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) { option (google.api.http) = { delete: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; } // Gets the resource representation for a cluster in a project. rpc GetCluster(GetClusterRequest) returns (Cluster) { option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; } // Lists all regions/{region}/clusters in a project. rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters" }; } // Gets cluster diagnostic information. // After the operation completes, the Operation.response field // contains `DiagnoseClusterOutputLocation`. rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" }; } } // Describes the identifying information, config, and status of // a cluster of Google Compute Engine instances. message Cluster { // Required. The Google Cloud Platform project ID that the cluster belongs to. string project_id = 1; // Required. The cluster name. Cluster names within a project must be // unique. Names of deleted clusters can be reused. string cluster_name = 2; // Required. The cluster config. Note that Cloud Dataproc may set // default values, and values may change when clusters are updated. ClusterConfig config = 3; // Optional. The labels to associate with this cluster. // Label **keys** must contain 1 to 63 characters, and must conform to // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). // Label **values** may be empty, but, if present, must contain 1 to 63 // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). // No more than 32 labels can be associated with a cluster. map labels = 8; // Output-only. Cluster status. ClusterStatus status = 4; // Output-only. The previous cluster status. repeated ClusterStatus status_history = 7; // Output-only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc // generates this value when it creates the cluster. string cluster_uuid = 6; // Contains cluster daemon metrics such as HDFS and YARN stats. // // **Beta Feature**: This report is available for testing purposes only. It may // be changed before final release. ClusterMetrics metrics = 9; } // The cluster config. message ClusterConfig { // Optional. A Google Cloud Storage staging bucket used for sharing generated // SSH keys and config. If you do not specify a staging bucket, Cloud // Dataproc will determine an appropriate Cloud Storage location (US, // ASIA, or EU) for your cluster's staging bucket according to the Google // Compute Engine zone where your cluster is deployed, and then it will create // and manage this project-level, per-location bucket for you. string config_bucket = 1; // Required. The shared Google Compute Engine config settings for // all instances in a cluster. GceClusterConfig gce_cluster_config = 8; // Optional. The Google Compute Engine config settings for // the master instance in a cluster. InstanceGroupConfig master_config = 9; // Optional. The Google Compute Engine config settings for // worker instances in a cluster. InstanceGroupConfig worker_config = 10; // Optional. The Google Compute Engine config settings for // additional worker instances in a cluster. InstanceGroupConfig secondary_worker_config = 12; // Optional. The config settings for software inside the cluster. SoftwareConfig software_config = 13; // Optional. The config setting for auto delete cluster schedule. LifecycleConfig lifecycle_config = 14; // Optional. Commands to execute on each node after config is // completed. By default, executables are run on master and all worker nodes. // You can test a node's role metadata to run an executable on // a master or worker node, as shown below using `curl` (you can also use `wget`): // // ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1beta2/instance/attributes/dataproc-role) // if [[ "${ROLE}" == 'Master' ]]; then // ... master specific actions ... // else // ... worker specific actions ... // fi repeated NodeInitializationAction initialization_actions = 11; } // Common config settings for resources of Google Compute Engine cluster // instances, applicable to all instances in the cluster. message GceClusterConfig { // Optional. The zone where the Google Compute Engine cluster will be located. // On a create request, it is required in the "global" region. If omitted // in a non-global Cloud Dataproc region, the service will pick a zone in the // corresponding Compute Engine region. On a get request, zone will always be // present. // // A full URL, partial URI, or short name are valid. Examples: // // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]` // * `projects/[project_id]/zones/[zone]` // * `us-central1-f` string zone_uri = 1; // Optional. The Google Compute Engine network to be used for machine // communications. Cannot be specified with subnetwork_uri. If neither // `network_uri` nor `subnetwork_uri` is specified, the "default" network of // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see // [Using Subnetworks](/compute/docs/subnetworks) for more information). // // A full URL, partial URI, or short name are valid. Examples: // // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` // * `projects/[project_id]/regions/global/default` // * `default` string network_uri = 2; // Optional. The Google Compute Engine subnetwork to be used for machine // communications. Cannot be specified with network_uri. // // A full URL, partial URI, or short name are valid. Examples: // // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0` // * `projects/[project_id]/regions/us-east1/sub0` // * `sub0` string subnetwork_uri = 6; // Optional. If true, all instances in the cluster will only have internal IP // addresses. By default, clusters are not restricted to internal IP addresses, // and will have ephemeral external IP addresses assigned to each instance. // This `internal_ip_only` restriction can only be enabled for subnetwork // enabled networks, and all off-cluster dependencies must be configured to be // accessible without external IP addresses. bool internal_ip_only = 7; // Optional. The service account of the instances. Defaults to the default // Google Compute Engine service account. Custom service accounts need // permissions equivalent to the folloing IAM roles: // // * roles/logging.logWriter // * roles/storage.objectAdmin // // (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts // for more information). // Example: `[account_id]@[project_id].iam.gserviceaccount.com` string service_account = 8; // Optional. The URIs of service account scopes to be included in Google // Compute Engine instances. The following base set of scopes is always // included: // // * https://www.googleapis.com/auth/cloud.useraccounts.readonly // * https://www.googleapis.com/auth/devstorage.read_write // * https://www.googleapis.com/auth/logging.write // // If no scopes are specified, the following defaults are also provided: // // * https://www.googleapis.com/auth/bigquery // * https://www.googleapis.com/auth/bigtable.admin.table // * https://www.googleapis.com/auth/bigtable.data // * https://www.googleapis.com/auth/devstorage.full_control repeated string service_account_scopes = 3; // The Google Compute Engine tags to add to all instances (see // [Tagging instances](/compute/docs/label-or-tag-resources#tags)). repeated string tags = 4; // The Google Compute Engine metadata entries to add to all instances (see // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). map metadata = 5; } // Optional. The config settings for Google Compute Engine resources in // an instance group, such as a master or worker group. message InstanceGroupConfig { // Optional. The number of VM instances in the instance group. // For master instance groups, must be set to 1. int32 num_instances = 1; // Optional. The list of instance names. Cloud Dataproc derives the names from // `cluster_name`, `num_instances`, and the instance group if not set by user // (recommended practice is to let Cloud Dataproc derive the name). repeated string instance_names = 2; // Output-only. The Google Compute Engine image resource used for cluster // instances. Inferred from `SoftwareConfig.image_version`. string image_uri = 3; // Optional. The Google Compute Engine machine type used for cluster instances. // // A full URL, partial URI, or short name are valid. Examples: // // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` // * `n1-standard-2` string machine_type_uri = 4; // Optional. Disk option config settings. DiskConfig disk_config = 5; // Optional. Specifies that this instance group contains preemptible instances. bool is_preemptible = 6; // Output-only. The config for Google Compute Engine Instance Group // Manager that manages this group. // This is only used for preemptible instance groups. ManagedGroupConfig managed_group_config = 7; // Optional. The Google Compute Engine accelerator configuration for these // instances. // // **Beta Feature**: This feature is still under development. It may be // changed before final release. repeated AcceleratorConfig accelerators = 8; } // Specifies the resources used to actively manage an instance group. message ManagedGroupConfig { // Output-only. The name of the Instance Template used for the Managed // Instance Group. string instance_template_name = 1; // Output-only. The name of the Instance Group Manager for this group. string instance_group_manager_name = 2; } // Specifies the type and number of accelerator cards attached to the instances // of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)). message AcceleratorConfig { // Full URL, partial URI, or short name of the accelerator type resource to // expose to this instance. See [Google Compute Engine AcceleratorTypes]( // /compute/docs/reference/beta/acceleratorTypes) // // Examples // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` // * `nvidia-tesla-k80` string accelerator_type_uri = 1; // The number of the accelerator cards of this type exposed to this instance. int32 accelerator_count = 2; } // Specifies the config of disk options for a group of VM instances. message DiskConfig { // Optional. Size in GB of the boot disk (default is 500GB). int32 boot_disk_size_gb = 1; // Optional. Number of attached SSDs, from 0 to 4 (default is 0). // If SSDs are not attached, the boot disk is used to store runtime logs and // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. // If one or more SSDs are attached, this runtime bulk // data is spread across them, and the boot disk contains only basic // config and installed binaries. int32 num_local_ssds = 2; } // Specifies the cluster auto delete related schedule configuration. message LifecycleConfig { // Optional. The longest duration that cluster would keep alive while staying // idle; passing this threshold will cause cluster to be auto-deleted. google.protobuf.Duration idle_delete_ttl = 1; oneof ttl { // Optional. The time when cluster will be auto-deleted. google.protobuf.Timestamp auto_delete_time = 2; // Optional. The life duration of cluster, the cluster will be auto-deleted // at the end of this duration. google.protobuf.Duration auto_delete_ttl = 3; } } // Specifies an executable to run on a fully configured node and a // timeout period for executable completion. message NodeInitializationAction { // Required. Google Cloud Storage URI of executable file. string executable_file = 1; // Optional. Amount of time executable has to complete. Default is // 10 minutes. Cluster creation fails with an explanatory error message (the // name of the executable that caused the error and the exceeded timeout // period) if the executable is not completed at end of the timeout period. google.protobuf.Duration execution_timeout = 2; } // The status of a cluster and its instances. message ClusterStatus { // The cluster state. enum State { // The cluster state is unknown. UNKNOWN = 0; // The cluster is being created and set up. It is not ready for use. CREATING = 1; // The cluster is currently running and healthy. It is ready for use. RUNNING = 2; // The cluster encountered an error. It is not ready for use. ERROR = 3; // The cluster is being deleted. It cannot be used. DELETING = 4; // The cluster is being updated. It continues to accept and process jobs. UPDATING = 5; } enum Substate { UNSPECIFIED = 0; // The cluster is known to be in an unhealthy state // (for example, critical daemons are not running or HDFS capacity is // exhausted). // // Applies to RUNNING state. UNHEALTHY = 1; // The agent-reported status is out of date (may occur if // Cloud Dataproc loses communication with Agent). // // Applies to RUNNING state. STALE_STATUS = 2; } // Output-only. The cluster's state. State state = 1; // Output-only. Optional details of cluster's state. string detail = 2; // Output-only. Time when this state was entered. google.protobuf.Timestamp state_start_time = 3; // Output-only. Additional state information that includes // status reported by the agent. Substate substate = 4; } // Specifies the selection and config of software inside the cluster. message SoftwareConfig { // Optional. The version of software inside the cluster. It must match the // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)). string image_version = 1; // Optional. The properties to set on daemon config files. // // Property keys are specified in `prefix:property` format, such as // `core:fs.defaultFS`. The following are supported prefixes // and their mappings: // // * capacity-scheduler: `capacity-scheduler.xml` // * core: `core-site.xml` // * distcp: `distcp-default.xml` // * hdfs: `hdfs-site.xml` // * hive: `hive-site.xml` // * mapred: `mapred-site.xml` // * pig: `pig.properties` // * spark: `spark-defaults.conf` // * yarn: `yarn-site.xml` // // For more information, see // [Cluster properties](/dataproc/docs/concepts/cluster-properties). map properties = 2; } // Contains cluster daemon metrics, such as HDFS and YARN stats. // // **Beta Feature**: This report is available for testing purposes only. It may // be changed before final release. message ClusterMetrics { // The HDFS metrics. map hdfs_metrics = 1; // The YARN metrics. map yarn_metrics = 2; } // A request to create a cluster. message CreateClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 3; // Required. The cluster to create. Cluster cluster = 2; } // A request to update a cluster. message UpdateClusterRequest { // Required. The ID of the Google Cloud Platform project the // cluster belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 5; // Required. The cluster name. string cluster_name = 2; // Required. The changes to the cluster. Cluster cluster = 3; // Optional. Timeout for graceful YARN decomissioning. Graceful // decommissioning allows removing nodes from the cluster without // interrupting jobs in progress. Timeout specifies how long to wait for jobs // in progress to finish before forcefully removing nodes (and potentially // interrupting jobs). Default timeout is 0 (for forceful decommission), and // the maximum allowed timeout is 1 day. // // Only supported on Dataproc image versions 1.2 and higher. google.protobuf.Duration graceful_decommission_timeout = 6; // Required. Specifies the path, relative to Cluster, of // the field to update. For example, to change the number of workers // in a cluster to 5, the update_mask parameter would be // specified as config.worker_config.num_instances, // and the `PATCH` request body would specify the new value, as follows: // // { // "config":{ // "workerConfig":{ // "numInstances":"5" // } // } // } // Similarly, to change the number of preemptible workers in a cluster to 5, the // update_mask parameter would be config.secondary_worker_config.num_instances, // and the `PATCH` request body would be set as follows: // // { // "config":{ // "secondaryWorkerConfig":{ // "numInstances":"5" // } // } // } // Note: currently only some fields can be updated: // |Mask|Purpose| // |`labels`|Updates labels| // |`config.worker_config.num_instances`|Resize primary worker group| // |`config.secondary_worker_config.num_instances`|Resize secondary worker group| google.protobuf.FieldMask update_mask = 4; } // A request to delete a cluster. message DeleteClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 3; // Required. The cluster name. string cluster_name = 2; // Optional. Specifying the `cluster_uuid` means the RPC should fail // (with error NOT_FOUND) if cluster with specified UUID does not exist. string cluster_uuid = 4; } // Request to get the resource representation for a cluster in a project. message GetClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 3; // Required. The cluster name. string cluster_name = 2; } // A request to list the clusters in a project. message ListClustersRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 4; // Optional. A filter constraining the clusters to list. Filters are // case-sensitive and have the following syntax: // // field = value [AND [field = value]] ... // // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, // and `[KEY]` is a label key. **value** can be `*` to match all values. // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE` // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE` // contains the `DELETING` and `ERROR` states. // `clusterName` is the name of the cluster provided at creation time. // Only the logical `AND` operator is supported; space-separated items are // treated as having an implicit `AND` operator. // // Example filter: // // status.state = ACTIVE AND clusterName = mycluster // AND labels.env = staging AND labels.starred = * string filter = 5; // Optional. The standard List page size. int32 page_size = 2; // Optional. The standard List page token. string page_token = 3; } // The list of all clusters in a project. message ListClustersResponse { // Output-only. The clusters in the project. repeated Cluster clusters = 1; // Output-only. This token is included in the response if there are more // results to fetch. To fetch additional results, provide this value as the // `page_token` in a subsequent ListClustersRequest. string next_page_token = 2; } // A request to collect cluster diagnostic information. message DiagnoseClusterRequest { // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; // Required. The Cloud Dataproc region in which to handle the request. string region = 3; // Required. The cluster name. string cluster_name = 2; } // The location of diagnostic output. message DiagnoseClusterResults { // Output-only. The Google Cloud Storage URI of the diagnostic output. // The output report is a plain text file with a summary of collected // diagnostics. string output_uri = 1; }