Files
2026-04-05 16:14:49 -04:00

1964 lines
67 KiB
Protocol Buffer

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.privacy.dlp.v2beta2;
import "google/api/annotations.proto";
import "google/privacy/dlp/v2beta2/storage.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/date.proto";
import "google/type/dayofweek.proto";
import "google/type/timeofday.proto";
option csharp_namespace = "Google.Cloud.Dlp.V2Beta2";
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2beta2;dlp";
option java_multiple_files = true;
option java_outer_classname = "DlpProto";
option java_package = "com.google.privacy.dlp.v2beta2";
option php_namespace = "Google\\Cloud\\Dlp\\V2beta2";
// The DLP API is a service that allows clients
// to detect the presence of Personally Identifiable Information (PII) and other
// privacy-sensitive data in user-supplied, unstructured data streams, like text
// blocks or images.
// The service also includes methods for sensitive data redaction and
// scheduling of data scans on Google Cloud Platform based data sets.
service DlpService {
// Finds potentially sensitive info in content.
// This method has limits on input size, processing time, and output size.
// [How-to guide for text](/dlp/docs/inspecting-text), [How-to guide for
// images](/dlp/docs/inspecting-images)
rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/content:inspect" body: "*" };
}
// Redacts potentially sensitive info from an image.
// This method has limits on input size, processing time, and output size.
// [How-to guide](/dlp/docs/redacting-sensitive-data-images)
rpc RedactImage(RedactImageRequest) returns (RedactImageResponse) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/image:redact" body: "*" };
}
// De-identifies potentially sensitive info from a ContentItem.
// This method has limits on input size and output size.
// [How-to guide](/dlp/docs/deidentify-sensitive-data)
rpc DeidentifyContent(DeidentifyContentRequest) returns (DeidentifyContentResponse) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/content:deidentify" body: "*" };
}
// Re-identify content that has been de-identified.
rpc ReidentifyContent(ReidentifyContentRequest) returns (ReidentifyContentResponse) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/content:reidentify" body: "*" };
}
// Schedules a job scanning content in a Google Cloud Platform data
// repository. [How-to guide](/dlp/docs/inspecting-storage)
rpc InspectDataSource(InspectDataSourceRequest) returns (DlpJob) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/dataSource:inspect" body: "*" };
}
// Schedules a job to compute risk analysis metrics over content in a Google
// Cloud Platform repository. [How-to guide](/dlp/docs/compute-risk-analysis)
rpc AnalyzeDataSourceRisk(AnalyzeDataSourceRiskRequest) returns (DlpJob) {
option (google.api.http) = { post: "/v2beta2/{parent=projects/*}/dataSource:analyze" body: "*" };
}
// Returns sensitive information types DLP supports.
rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) {
option (google.api.http) = { get: "/v2beta2/infoTypes" };
}
// Creates an inspect template for re-using frequently used configuration
// for inspecting content, images, and storage.
rpc CreateInspectTemplate(CreateInspectTemplateRequest)
returns (InspectTemplate) {
option (google.api.http) = {
post: "/v2beta2/{parent=organizations/*}/inspectTemplates"
body: "*"
additional_bindings {
post: "/v2beta2/{parent=projects/*}/inspectTemplates"
body: "*"
}
};
}
// Updates the inspect template.
rpc UpdateInspectTemplate(UpdateInspectTemplateRequest)
returns (InspectTemplate) {
option (google.api.http) = {
patch: "/v2beta2/{name=organizations/*/inspectTemplates/*}"
body: "*"
additional_bindings:
{ patch: "/v2beta2/{name=projects/*/inspectTemplates/*}" body: "*" }
};
}
// Gets an inspect template.
rpc GetInspectTemplate(GetInspectTemplateRequest) returns (InspectTemplate) {
option (google.api.http) = {
get: "/v2beta2/{name=organizations/*/inspectTemplates/*}"
additional_bindings {
get: "/v2beta2/{name=projects/*/inspectTemplates/*}"
}
};
}
// Lists inspect templates.
rpc ListInspectTemplates(ListInspectTemplatesRequest)
returns (ListInspectTemplatesResponse) {
option (google.api.http) = {
get: "/v2beta2/{parent=organizations/*}/inspectTemplates"
additional_bindings {
get: "/v2beta2/{parent=projects/*}/inspectTemplates"
}
};
}
// Deletes inspect templates.
rpc DeleteInspectTemplate(DeleteInspectTemplateRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2beta2/{name=organizations/*/inspectTemplates/*}"
additional_bindings {
delete: "/v2beta2/{name=projects/*/inspectTemplates/*}"
}
};
}
// Creates an Deidentify template for re-using frequently used configuration
// for Deidentifying content, images, and storage.
rpc CreateDeidentifyTemplate(CreateDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
post: "/v2beta2/{parent=organizations/*}/deidentifyTemplates"
body: "*"
additional_bindings {
post: "/v2beta2/{parent=projects/*}/deidentifyTemplates"
body: "*"
}
};
}
// Updates the inspect template.
rpc UpdateDeidentifyTemplate(UpdateDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
patch: "/v2beta2/{name=organizations/*/deidentifyTemplates/*}"
body: "*"
additional_bindings: {
patch: "/v2beta2/{name=projects/*/deidentifyTemplates/*}"
body: "*"
}
};
}
// Gets an inspect template.
rpc GetDeidentifyTemplate(GetDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
get: "/v2beta2/{name=organizations/*/deidentifyTemplates/*}"
additional_bindings {
get: "/v2beta2/{name=projects/*/deidentifyTemplates/*}"
}
};
}
// Lists inspect templates.
rpc ListDeidentifyTemplates(ListDeidentifyTemplatesRequest)
returns (ListDeidentifyTemplatesResponse) {
option (google.api.http) = {
get: "/v2beta2/{parent=organizations/*}/deidentifyTemplates"
additional_bindings {
get: "/v2beta2/{parent=projects/*}/deidentifyTemplates"
}
};
}
// Deletes inspect templates.
rpc DeleteDeidentifyTemplate(DeleteDeidentifyTemplateRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2beta2/{name=organizations/*/deidentifyTemplates/*}"
additional_bindings {
delete: "/v2beta2/{name=projects/*/deidentifyTemplates/*}"
}
};
}
// Lists DlpJobs that match the specified filter in the request.
rpc ListDlpJobs(ListDlpJobsRequest) returns (ListDlpJobsResponse) {
option (google.api.http) = { get: "/v2beta2/{parent=projects/*}/dlpJobs" };
}
// Gets the latest state of a long-running DlpJob.
rpc GetDlpJob(GetDlpJobRequest) returns (DlpJob) {
option (google.api.http) = { get: "/v2beta2/{name=projects/*/dlpJobs/*}" };
}
// Deletes a long-running DlpJob. This method indicates that the client is
// no longer interested in the DlpJob result. The job will be cancelled if
// possible.
rpc DeleteDlpJob(DeleteDlpJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v2beta2/{name=projects/*/dlpJobs/*}" };
}
// Starts asynchronous cancellation on a long-running DlpJob. The server
// makes a best effort to cancel the DlpJob, but success is not
// guaranteed.
rpc CancelDlpJob(CancelDlpJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { post: "/v2beta2/{name=projects/*/dlpJobs/*}:cancel" body: "*" };
}
}
// Configuration description of the scanning process.
// When used with redactContent only info_types and min_likelihood are currently
// used.
message InspectConfig {
message FindingLimits {
// Max findings configuration per infoType, per content item or long
// running DlpJob.
message InfoTypeLimit {
// Type of information the findings limit applies to. Only one limit per
// info_type should be provided. If InfoTypeLimit does not have an
// info_type, the DLP API applies the limit against all info_types that
// are found but not specified in another InfoTypeLimit.
InfoType info_type = 1;
// Max findings limit for the given infoType.
int32 max_findings = 2;
}
// Max number of findings that will be returned for each item scanned.
int32 max_findings_per_item = 1;
// Max total number of findings that will be returned per request/job.
int32 max_findings_per_request = 2;
// Configuration of findings limit given for specified infoTypes.
repeated InfoTypeLimit max_findings_per_info_type = 3;
}
// Restricts what info_types to look for. The values must correspond to
// InfoType values returned by ListInfoTypes or found in documentation.
// Empty info_types runs all enabled detectors.
repeated InfoType info_types = 1;
// Only returns findings equal or above this threshold. The default is
// POSSIBLE.
Likelihood min_likelihood = 2;
FindingLimits limits = 3;
// When true, a contextual quote from the data that triggered a finding is
// included in the response; see Finding.quote.
bool include_quote = 4;
// When true, excludes type information of the findings.
bool exclude_info_types = 5;
// Custom infoTypes provided by the user.
repeated CustomInfoType custom_info_types = 6;
}
// Container structure for the content to inspect.
message ContentItem {
// Type of the content, as defined in Content-Type HTTP header.
// Supported types are: all "text" types, octet streams, PNG images,
// JPEG images.
string type = 1;
// Data of the item either in the byte array or UTF-8 string form.
oneof data_item {
// Content data to inspect or redact.
bytes data = 2;
// String data to inspect or redact.
string value = 3;
// Structured content for inspection.
Table table = 4;
}
}
// Structured content to inspect. Up to 50,000 `Value`s per request allowed.
message Table {
message Row {
repeated Value values = 1;
}
repeated FieldId headers = 1;
repeated Row rows = 2;
}
// All the findings for a single scanned item.
message InspectResult {
// List of findings for an item.
repeated Finding findings = 1;
// If true, then this item might have more findings than were returned,
// and the findings returned are an arbitrary subset of all findings.
// The findings list might be truncated because the input items were too
// large, or because the server reached the maximum amount of resources
// allowed for a single API call. For best results, divide the input into
// smaller batches.
bool findings_truncated = 2;
}
// Represents a piece of potentially sensitive content.
message Finding {
// The content that was found. Even if the content is not textual, it
// may be converted to a textual representation here.
// Provided if requested by the `InspectConfig` and the finding is
// less than or equal to 4096 bytes long. If the finding exceeds 4096 bytes
// in length, the quote may be omitted.
string quote = 1;
// The type of content that might have been found.
// Provided if requested by the `InspectConfig`.
InfoType info_type = 2;
// Estimate of how likely it is that the `info_type` is correct.
Likelihood likelihood = 3;
// Where the content was found.
Location location = 4;
// Timestamp when finding was detected.
google.protobuf.Timestamp create_time = 6;
}
// Specifies the location of the finding.
message Location {
// Zero-based byte offsets delimiting the finding.
// These are relative to the finding's containing element.
// Note that when the content is not textual, this references
// the UTF-8 encoded textual representation of the content.
// Omitted if content is an image.
Range byte_range = 1;
// Unicode character offsets delimiting the finding.
// These are relative to the finding's containing element.
// Provided when the content is text.
Range codepoint_range = 2;
// The area within the image that contained the finding.
// Provided when the content is an image.
repeated ImageLocation image_boxes = 3;
// The pointer to the record in storage that contained the field the
// finding was found in.
// Provided when the finding's containing element is a property
// of a storage object.
RecordKey record_key = 4;
// The pointer to the property or cell that contained the finding.
// Provided when the finding's containing element is a cell in a table
// or a property of storage object.
FieldId field_id = 5;
// The pointer to the row of the table that contained the finding.
// Provided when the finding's containing element is a cell of a table.
TableLocation table_location = 6;
}
// Location of a finding within a table.
message TableLocation {
// The zero-based index of the row where the finding is located.
int64 row_index = 1;
}
// Generic half-open interval [start, end)
message Range {
// Index of the first character of the range (inclusive).
int64 start = 1;
// Index of the last character of the range (exclusive).
int64 end = 2;
}
// Bounding box encompassing detected text within an image.
message ImageLocation {
// Top coordinate of the bounding box. (0,0) is upper left.
int32 top = 1;
// Left coordinate of the bounding box. (0,0) is upper left.
int32 left = 2;
// Width of the bounding box in pixels.
int32 width = 3;
// Height of the bounding box in pixels.
int32 height = 4;
}
// Request to search for potentially sensitive info in a list of items
// and replace it with a default or provided content.
message RedactImageRequest {
// Configuration for determining how redaction of images should occur.
message ImageRedactionConfig {
// Type of information to redact from images.
oneof target {
// Only one per info_type should be provided per request. If not
// specified, and redact_all_text is false, the DLP API will redact all
// text that it matches against all info_types that are found, but not
// specified in another ImageRedactionConfig.
InfoType info_type = 1;
// If true, all text found in the image, regardless whether it matches an
// info_type, is redacted.
bool redact_all_text = 2;
}
// The color to use when redacting content from an image. If not specified,
// the default is black.
Color redaction_color = 3;
}
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the inspector.
InspectConfig inspect_config = 2;
// Type of the content, as defined in Content-Type HTTP header.
// Supported types are: PNG, JPEG, SVG, & BMP.
string image_type = 3;
// The bytes of the image to redact.
bytes image_data = 4;
// The configuration for specifying what content to redact from images.
repeated ImageRedactionConfig image_redaction_configs = 5;
}
// Represents a color in the RGB color space.
message Color {
// The amount of red in the color as a value in the interval [0, 1].
float red = 1;
// The amount of green in the color as a value in the interval [0, 1].
float green = 2;
// The amount of blue in the color as a value in the interval [0, 1].
float blue = 3;
}
// Results of redacting an image.
message RedactImageResponse {
// The redacted image. The type will be the same as the original image.
bytes redacted_image = 1;
// If an image was being inspected and the InspectConfig's include_quote was
// set to true, then this field will include all text, if any, that was found
// in the image.
string extracted_text = 2;
}
// Request to de-identify a list of items.
message DeidentifyContentRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the de-identification of the content item.
// Items specified here will override the template referenced by the
// deidentify_template_name argument.
DeidentifyConfig deidentify_config = 2;
// Configuration for the inspector.
// Items specified here will override the template referenced by the
// inspect_template_name argument.
InspectConfig inspect_config = 3;
// The item to de-identify. Will be treated as text.
ContentItem item = 4;
// Optional template to use. Any configuration directly specified in
// inspect_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 5;
// Optional template to use. Any configuration directly specified in
// deidentify_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string deidentify_template_name = 6;
}
// Results of de-identifying a ContentItem.
message DeidentifyContentResponse {
// The de-identified item.
ContentItem item = 1;
// An overview of the changes that were made on the `item`.
TransformationOverview overview = 2;
}
// Request to re-identify an item.
message ReidentifyContentRequest {
// The parent resource name.
string parent = 1;
// Configuration for the re-identification of the content item.
// This field shares the same proto message type that is used for
// de-identification, however its usage here is for the reversal of the
// previous de-identification. Re-identification is performed by examining
// the transformations used to de-identify the items and executing the
// reverse. This requires that only reversible transformations
// be provided here. The reversible transformations are:
//
// - `CryptoReplaceFfxFpeConfig`
DeidentifyConfig reidentify_config = 2;
// Configuration for the inspector.
InspectConfig inspect_config = 3;
// The item to re-identify. Will be treated as text.
ContentItem item = 4;
// Optional template to use. Any configuration directly specified in
// `inspect_config` will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 5;
// Optional template to use. References an instance of `DeidentifyTemplate`.
// Any configuration directly specified in `reidentify_config` or
// `inspect_config` will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string reidentify_template_name = 6;
}
// Results of re-identifying a item.
message ReidentifyContentResponse {
// The re-identified item.
ContentItem item = 1;
// An overview of the changes that were made to the `item`.
TransformationOverview overview = 2;
}
// Request to search for potentially sensitive info in a ContentItem.
message InspectContentRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the inspector. What specified here will override
// the template referenced by the inspect_template_name argument.
InspectConfig inspect_config = 2;
// The item to inspect.
ContentItem item = 3;
// Optional template to use. Any configuration directly specified in
// inspect_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 4;
}
// Results of inspecting an item.
message InspectContentResponse {
// The findings.
InspectResult result = 1;
}
// Request for scheduling a scan of a data subset from a Google Platform data
// repository.
message InspectDataSourceRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// A configuration for the job.
InspectJobConfig job_config = 2;
}
// Cloud repository for storing output.
message OutputStorageConfig {
oneof type {
// Store findings in a new table in an existing dataset. If table_id is not
// set a new one will be generated for you with the following format:
// dlp_googleapis_yyyy_mm_dd_[dlp_job_id]. Pacific timezone will be used for
// generating the date details.
BigQueryTable table = 1;
}
}
// Statistics regarding a specific InfoType.
message InfoTypeStatistics {
// The type of finding this stat is for.
InfoType info_type = 1;
// Number of findings for this infoType.
int64 count = 2;
}
// The results of an inspect DataSource job.
message InspectDataSourceDetails {
message RequestedOptions {
// If run with an inspect template, a snapshot of it's state at the time of
// this run.
InspectTemplate snapshot_inspect_template = 1;
InspectJobConfig job_config = 3;
}
message Result {
// Total size in bytes that were processed.
int64 processed_bytes = 1;
// Estimate of the number of bytes to process.
int64 total_estimated_bytes = 2;
// Statistics of how many instances of each info type were found during
// inspect job.
repeated InfoTypeStatistics info_type_stats = 3;
}
// The configuration used for this job.
RequestedOptions requested_options = 2;
// A summary of the outcome of this inspect job.
Result result = 3;
}
// InfoType description.
message InfoTypeDescription {
// Internal name of the infoType.
string name = 1;
// Human readable form of the infoType name.
string display_name = 2;
// Which parts of the API supports this InfoType.
repeated InfoTypeSupportedBy supported_by = 3;
}
// Request for the list of infoTypes.
message ListInfoTypesRequest {
// Optional BCP-47 language code for localized infoType friendly
// names. If omitted, or if localized strings are not available,
// en-US strings will be returned.
string language_code = 1;
// Optional filter to only return infoTypes supported by certain parts of the
// API. Defaults to supported_by=INSPECT.
string filter = 2;
}
// Response to the ListInfoTypes request.
message ListInfoTypesResponse {
// Set of sensitive infoTypes.
repeated InfoTypeDescription info_types = 1;
}
// Request for creating a risk analysis DlpJob.
message AnalyzeDataSourceRiskRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for this risk analysis job.
RiskAnalysisJobConfig job_config = 2;
}
// Configuration for a risk analysis job.
message RiskAnalysisJobConfig {
// Privacy metric to compute.
PrivacyMetric privacy_metric = 1;
// Input dataset to compute metrics over.
BigQueryTable source_table = 2;
}
// Privacy metric to compute for reidentification risk analysis.
message PrivacyMetric {
// Compute numerical stats over an individual column, including
// min, max, and quantiles.
message NumericalStatsConfig {
// Field to compute numerical stats on. Supported types are
// integer, float, date, datetime, timestamp, time.
FieldId field = 1;
}
// Compute numerical stats over an individual column, including
// number of distinct values and value count distribution.
message CategoricalStatsConfig {
// Field to compute categorical stats on. All column types are
// supported except for arrays and structs. However, it may be more
// informative to use NumericalStats when the field type is supported,
// depending on the data.
FieldId field = 1;
}
// k-anonymity metric, used for analysis of reidentification risk.
message KAnonymityConfig {
// Set of fields to compute k-anonymity over. When multiple fields are
// specified, they are considered a single composite key. Structs and
// repeated data types are not supported; however, nested fields are
// supported so long as they are not structs themselves or nested within
// a repeated field.
repeated FieldId quasi_ids = 1;
// Optional message indicating that each distinct entity_id should not
// contribute to the k-anonymity count more than once per equivalence class.
// If an entity_id appears on several rows with different quasi-identifier
// tuples, it will contribute to each count exactly once.
//
// This can lead to unexpected results. Consider a table where ID 1 is
// associated to quasi-identifier "foo", ID 2 to "bar", and ID 3 to *both*
// quasi-identifiers "foo" and "bar" (on separate rows), and where this ID
// is used as entity_id. Then, the anonymity value associated to ID 3 will
// be 2, even if it is the only ID to be associated to both values "foo" and
// "bar".
EntityId entity_id = 2;
}
// l-diversity metric, used for analysis of reidentification risk.
message LDiversityConfig {
// Set of quasi-identifiers indicating how equivalence classes are
// defined for the l-diversity computation. When multiple fields are
// specified, they are considered a single composite key.
repeated FieldId quasi_ids = 1;
// Sensitive field for computing the l-value.
FieldId sensitive_attribute = 2;
}
// Reidentifiability metric. This corresponds to a risk model similar to what
// is called "journalist risk" in the literature, except the attack dataset is
// statistically modeled instead of being perfectly known. This can be done
// using publicly available data (like the US Census), or using a custom
// statistical model (indicated as one or several BigQuery tables), or by
// extrapolating from the distribution of values in the input dataset.
message KMapEstimationConfig {
// A column with a semantic tag attached.
message TaggedField {
// Identifies the column. [required]
FieldId field = 1;
// Semantic tag that identifies what a column contains, to determine which
// statistical model to use to estimate the reidentifiability of each
// value. [required]
oneof tag {
// A column can be tagged with a InfoType to use the relevant public
// dataset as a statistical model of population, if available. We
// currently support US ZIP codes, region codes, ages and genders.
// To programmatically obtain the list of supported InfoTypes, use
// ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
InfoType info_type = 2;
// A column can be tagged with a custom tag. In this case, the user must
// indicate an auxiliary table that contains statistical information on
// the possible values of this column (below).
string custom_tag = 3;
// If no semantic tag is indicated, we infer the statistical model from
// the distribution of values in the input data
google.protobuf.Empty inferred = 4;
}
}
// An auxiliary table contains statistical information on the relative
// frequency of different quasi-identifiers values. It has one or several
// quasi-identifiers columns, and one column that indicates the relative
// frequency of each quasi-identifier tuple.
// If a tuple is present in the data but not in the auxiliary table, the
// corresponding relative frequency is assumed to be zero (and thus, the
// tuple is highly reidentifiable).
message AuxiliaryTable {
// A quasi-identifier column has a custom_tag, used to know which column
// in the data corresponds to which column in the statistical model.
message QuasiIdField {
FieldId field = 1;
string custom_tag = 2;
}
// Auxiliary table location. [required]
BigQueryTable table = 3;
// Quasi-identifier columns. [required]
repeated QuasiIdField quasi_ids = 1;
// The relative frequency column must contain a floating-point number
// between 0 and 1 (inclusive). Null values are assumed to be zero.
// [required]
FieldId relative_frequency = 2;
}
// Fields considered to be quasi-identifiers. No two columns can have the
// same tag. [required]
repeated TaggedField quasi_ids = 1;
// ISO 3166-1 alpha-2 region code to use in the statistical modeling.
// Required if no column is tagged with a region-specific InfoType (like
// US_ZIP_5) or a region code.
string region_code = 2;
// Several auxiliary tables can be used in the analysis. Each custom_tag
// used to tag a quasi-identifiers column must appear in exactly one column
// of one auxiliary table.
repeated AuxiliaryTable auxiliary_tables = 3;
}
oneof type {
NumericalStatsConfig numerical_stats_config = 1;
CategoricalStatsConfig categorical_stats_config = 2;
KAnonymityConfig k_anonymity_config = 3;
LDiversityConfig l_diversity_config = 4;
KMapEstimationConfig k_map_estimation_config = 5;
}
}
// Result of a risk analysis operation request.
message AnalyzeDataSourceRiskDetails {
// Result of the numerical stats computation.
message NumericalStatsResult {
// Minimum value appearing in the column.
Value min_value = 1;
// Maximum value appearing in the column.
Value max_value = 2;
// List of 99 values that partition the set of field values into 100 equal
// sized buckets.
repeated Value quantile_values = 4;
}
// Result of the categorical stats computation.
message CategoricalStatsResult {
message CategoricalStatsHistogramBucket {
// Lower bound on the value frequency of the values in this bucket.
int64 value_frequency_lower_bound = 1;
// Upper bound on the value frequency of the values in this bucket.
int64 value_frequency_upper_bound = 2;
// Total number of values in this bucket.
int64 bucket_size = 3;
// Sample of value frequencies in this bucket. The total number of
// values returned per bucket is capped at 20.
repeated ValueFrequency bucket_values = 4;
}
// Histogram of value frequencies in the column.
repeated CategoricalStatsHistogramBucket value_frequency_histogram_buckets = 5;
}
// Result of the k-anonymity computation.
message KAnonymityResult {
// The set of columns' values that share the same ldiversity value
message KAnonymityEquivalenceClass {
// Set of values defining the equivalence class. One value per
// quasi-identifier column in the original KAnonymity metric message.
// The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the equivalence class, for example number of rows with the
// above set of values.
int64 equivalence_class_size = 2;
}
message KAnonymityHistogramBucket {
// Lower bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_lower_bound = 1;
// Upper bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_upper_bound = 2;
// Total number of equivalence classes in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated KAnonymityEquivalenceClass bucket_values = 4;
}
// Histogram of k-anonymity equivalence classes.
repeated KAnonymityHistogramBucket equivalence_class_histogram_buckets = 5;
}
// Result of the l-diversity computation.
message LDiversityResult {
// The set of columns' values that share the same ldiversity value.
message LDiversityEquivalenceClass {
// Quasi-identifier values defining the k-anonymity equivalence
// class. The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the k-anonymity equivalence class.
int64 equivalence_class_size = 2;
// Number of distinct sensitive values in this equivalence class.
int64 num_distinct_sensitive_values = 3;
// Estimated frequencies of top sensitive values.
repeated ValueFrequency top_sensitive_values = 4;
}
message LDiversityHistogramBucket {
// Lower bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_lower_bound = 1;
// Upper bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_upper_bound = 2;
// Total number of equivalence classes in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated LDiversityEquivalenceClass bucket_values = 4;
}
// Histogram of l-diversity equivalence class sensitive value frequencies.
repeated LDiversityHistogramBucket sensitive_value_frequency_histogram_buckets = 5;
}
// Result of the reidentifiability analysis. Note that these results are an
// estimation, not exact values.
message KMapEstimationResult {
// A tuple of values for the quasi-identifier columns.
message KMapEstimationQuasiIdValues {
// The quasi-identifier values.
repeated Value quasi_ids_values = 1;
// The estimated anonymity for these quasi-identifier values.
int64 estimated_anonymity = 2;
}
// A KMapEstimationHistogramBucket message with the following values:
// min_anonymity: 3
// max_anonymity: 5
// frequency: 42
// means that there are 42 records whose quasi-identifier values correspond
// to 3, 4 or 5 people in the overlying population. An important particular
// case is when min_anonymity = max_anonymity = 1: the frequency field then
// corresponds to the number of uniquely identifiable records.
message KMapEstimationHistogramBucket {
// Always positive.
int64 min_anonymity = 1;
// Always greater than or equal to min_anonymity.
int64 max_anonymity = 2;
// Number of records within these anonymity bounds.
int64 bucket_size = 5;
// Sample of quasi-identifier tuple values in this bucket. The total
// number of classes returned per bucket is capped at 20.
repeated KMapEstimationQuasiIdValues bucket_values = 6;
}
// The intervals [min_anonymity, max_anonymity] do not overlap. If a value
// doesn't correspond to any such interval, the associated frequency is
// zero. For example, the following records:
// {min_anonymity: 1, max_anonymity: 1, frequency: 17}
// {min_anonymity: 2, max_anonymity: 3, frequency: 42}
// {min_anonymity: 5, max_anonymity: 10, frequency: 99}
// mean that there are no record with an estimated anonymity of 4, 5, or
// larger than 10.
repeated KMapEstimationHistogramBucket k_map_estimation_histogram = 1;
}
// Privacy metric to compute.
PrivacyMetric requested_privacy_metric = 1;
// Input dataset to compute metrics over.
BigQueryTable requested_source_table = 2;
// Values associated with this metric.
oneof result {
NumericalStatsResult numerical_stats_result = 3;
CategoricalStatsResult categorical_stats_result = 4;
KAnonymityResult k_anonymity_result = 5;
LDiversityResult l_diversity_result = 6;
KMapEstimationResult k_map_estimation_result = 7;
}
}
// A value of a field, including its frequency.
message ValueFrequency {
// A value contained in the field in question.
Value value = 1;
// How many times the value is contained in the field.
int64 count = 2;
}
// Set of primitive values supported by the system.
// Note that for the purposes of inspection or transformation, the number
// of bytes considered to comprise a 'Value' is based on its representation
// as a UTF-8 encoded string. For example, if 'integer_value' is set to
// 123456789, the number of bytes would be counted as 9, even though an
// int64 only holds up to 8 bytes of data.
message Value {
oneof type {
int64 integer_value = 1;
double float_value = 2;
string string_value = 3;
bool boolean_value = 4;
google.protobuf.Timestamp timestamp_value = 5;
google.type.TimeOfDay time_value = 6;
google.type.Date date_value = 7;
}
}
// The configuration that controls how the data will change.
message DeidentifyConfig {
oneof transformation {
// Treat the dataset as free-form text and apply the same free text
// transformation everywhere.
InfoTypeTransformations info_type_transformations = 1;
// Treat the dataset as structured. Transformations can be applied to
// specific locations within structured datasets, such as transforming
// a column within a table.
RecordTransformations record_transformations = 2;
}
}
// A rule for transforming a value.
message PrimitiveTransformation {
oneof transformation {
ReplaceValueConfig replace_config = 1;
RedactConfig redact_config = 2;
CharacterMaskConfig character_mask_config = 3;
CryptoReplaceFfxFpeConfig crypto_replace_ffx_fpe_config = 4;
FixedSizeBucketingConfig fixed_size_bucketing_config = 5;
BucketingConfig bucketing_config = 6;
ReplaceWithInfoTypeConfig replace_with_info_type_config = 7;
TimePartConfig time_part_config = 8;
CryptoHashConfig crypto_hash_config = 9;
}
}
// For use with `Date`, `Timestamp`, and `TimeOfDay`, extract or preserve a
// portion of the value.
message TimePartConfig {
enum TimePart {
TIME_PART_UNSPECIFIED = 0;
// [0-9999]
YEAR = 1;
// [1-12]
MONTH = 2;
// [1-31]
DAY_OF_MONTH = 3;
// [1-7]
DAY_OF_WEEK = 4;
// [1-52]
WEEK_OF_YEAR = 5;
// [0-23]
HOUR_OF_DAY = 6;
}
TimePart part_to_extract = 1;
}
// Pseudonymization method that generates surrogates via cryptographic hashing.
// Uses SHA-256.
// The key size must be either 32 or 64 bytes.
// Outputs a 32 byte digest as an uppercase hex string
// (for example, 41D1567F7F99F1DC2A5FAB886DEE5BEE).
// Currently, only string and integer values can be hashed.
message CryptoHashConfig {
// The key used by the hash function.
CryptoKey crypto_key = 1;
}
// Replace each input value with a given `Value`.
message ReplaceValueConfig {
// Value to replace it with.
Value new_value = 1;
}
// Replace each matching finding with the name of the info_type.
message ReplaceWithInfoTypeConfig {
}
// Redact a given value. For example, if used with an `InfoTypeTransformation`
// transforming PHONE_NUMBER, and input 'My phone number is 206-555-0123', the
// output would be 'My phone number is '.
message RedactConfig {
}
// Characters to skip when doing deidentification of a value. These will be left
// alone and skipped.
message CharsToIgnore {
enum CommonCharsToIgnore {
COMMON_CHARS_TO_IGNORE_UNSPECIFIED = 0;
// 0-9
NUMERIC = 1;
// A-Z
ALPHA_UPPER_CASE = 2;
// a-z
ALPHA_LOWER_CASE = 3;
// US Punctuation, one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
PUNCTUATION = 4;
// Whitespace character, one of [ \t\n\x0B\f\r]
WHITESPACE = 5;
}
oneof characters {
string characters_to_skip = 1;
CommonCharsToIgnore common_characters_to_ignore = 2;
}
}
// Partially mask a string by replacing a given number of characters with a
// fixed character. Masking can start from the beginning or end of the string.
// This can be used on data of any type (numbers, longs, and so on) and when
// de-identifying structured data we'll attempt to preserve the original data's
// type. (This allows you to take a long like 123 and modify it to a string like
// **3.
message CharacterMaskConfig {
// Character to mask the sensitive values&mdash;for example, "*" for an
// alphabetic string such as name, or "0" for a numeric string such as ZIP
// code or credit card number. String must have length 1. If not supplied, we
// will default to "*" for strings, 0 for digits.
string masking_character = 1;
// Number of characters to mask. If not set, all matching chars will be
// masked. Skipped characters do not count towards this tally.
int32 number_to_mask = 2;
// Mask characters in reverse order. For example, if `masking_character` is
// '0', number_to_mask is 14, and `reverse_order` is false, then
// 1234-5678-9012-3456 -> 00000000000000-3456
// If `masking_character` is '*', `number_to_mask` is 3, and `reverse_order`
// is true, then 12345 -> 12***
bool reverse_order = 3;
// When masking a string, items in this list will be skipped when replacing.
// For example, if your string is 555-555-5555 and you ask us to skip `-` and
// mask 5 chars with * we would produce ***-*55-5555.
repeated CharsToIgnore characters_to_ignore = 4;
}
// Buckets values based on fixed size ranges. The
// Bucketing transformation can provide all of this functionality,
// but requires more configuration. This message is provided as a convenience to
// the user for simple bucketing strategies.
//
// The transformed value will be a hyphenated string of
// <lower_bound>-<upper_bound>, i.e if lower_bound = 10 and upper_bound = 20
// all values that are within this bucket will be replaced with "10-20".
//
// This can be used on data of type: double, long.
//
// If the bound Value type differs from the type of data
// being transformed, we will first attempt converting the type of the data to
// be transformed to match the type of the bound before comparing.
message FixedSizeBucketingConfig {
// Lower bound value of buckets. All values less than `lower_bound` are
// grouped together into a single bucket; for example if `lower_bound` = 10,
// then all values less than 10 are replaced with the value “-10”. [Required].
Value lower_bound = 1;
// Upper bound value of buckets. All values greater than upper_bound are
// grouped together into a single bucket; for example if `upper_bound` = 89,
// then all values greater than 89 are replaced with the value “89+”.
// [Required].
Value upper_bound = 2;
// Size of each bucket (except for minimum and maximum buckets). So if
// `lower_bound` = 10, `upper_bound` = 89, and `bucket_size` = 10, then the
// following buckets would be used: -10, 10-20, 20-30, 30-40, 40-50, 50-60,
// 60-70, 70-80, 80-89, 89+. Precision up to 2 decimals works. [Required].
double bucket_size = 3;
}
// Generalization function that buckets values based on ranges. The ranges and
// replacement values are dynamically provided by the user for custom behavior,
// such as 1-30 -> LOW 31-65 -> MEDIUM 66-100 -> HIGH
// This can be used on
// data of type: number, long, string, timestamp.
// If the bound `Value` type differs from the type of data being transformed, we
// will first attempt converting the type of the data to be transformed to match
// the type of the bound before comparing.
message BucketingConfig {
// Bucket is represented as a range, along with replacement values.
message Bucket {
// Lower bound of the range, inclusive. Type should be the same as max if
// used.
Value min = 1;
// Upper bound of the range, exclusive; type must match min.
Value max = 2;
// Replacement value for this bucket. If not provided
// the default behavior will be to hyphenate the min-max range.
Value replacement_value = 3;
}
// Set of buckets. Ranges must be non-overlapping.
repeated Bucket buckets = 1;
}
// Replaces an identifier with a surrogate using FPE with the FFX
// mode of operation; however when used in the `ReidentifyContent` API method,
// it serves the opposite function by reversing the surrogate back into
// the original identifier.
// The identifier must be encoded as ASCII.
// For a given crypto key and context, the same identifier will be
// replaced with the same surrogate.
// Identifiers must be at least two characters long.
// In the case that the identifier is the empty string, it will be skipped.
message CryptoReplaceFfxFpeConfig {
// These are commonly used subsets of the alphabet that the FFX mode
// natively supports. In the algorithm, the alphabet is selected using
// the "radix". Therefore each corresponds to particular radix.
enum FfxCommonNativeAlphabet {
FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED = 0;
// [0-9] (radix of 10)
NUMERIC = 1;
// [0-9A-F] (radix of 16)
HEXADECIMAL = 2;
// [0-9A-Z] (radix of 36)
UPPER_CASE_ALPHA_NUMERIC = 3;
// [0-9A-Za-z] (radix of 62)
ALPHA_NUMERIC = 4;
}
// The key used by the encryption algorithm. [required]
CryptoKey crypto_key = 1;
// The 'tweak', a context may be used for higher security since the same
// identifier in two different contexts won't be given the same surrogate. If
// the context is not set, a default tweak will be used.
//
// If the context is set but:
//
// 1. there is no record present when transforming a given value or
// 1. the field is not present when transforming a given value,
//
// a default tweak will be used.
//
// Note that case (1) is expected when an `InfoTypeTransformation` is
// applied to both structured and non-structured `ContentItem`s.
// Currently, the referenced field may be of value type integer or string.
//
// The tweak is constructed as a sequence of bytes in big endian byte order
// such that:
//
// - a 64 bit integer is encoded followed by a single byte of value 1
// - a string is encoded in UTF-8 format followed by a single byte of value
// å 2
FieldId context = 2;
oneof alphabet {
FfxCommonNativeAlphabet common_alphabet = 4;
// This is supported by mapping these to the alphanumeric characters
// that the FFX mode natively supports. This happens before/after
// encryption/decryption.
// Each character listed must appear only once.
// Number of characters must be in the range [2, 62].
// This must be encoded as ASCII.
// The order of characters does not matter.
string custom_alphabet = 5;
// The native way to select the alphabet. Must be in the range [2, 62].
int32 radix = 6;
}
// The custom infoType to annotate the surrogate with.
// This annotation will be applied to the surrogate by prefixing it with
// the name of the custom infoType followed by the number of
// characters comprising the surrogate. The following scheme defines the
// format: info_type_name(surrogate_character_count):surrogate
//
// For example, if the name of custom infoType is 'MY_TOKEN_INFO_TYPE' and
// the surrogate is 'abc', the full replacement value
// will be: 'MY_TOKEN_INFO_TYPE(3):abc'
//
// This annotation identifies the surrogate when inspecting content using the
// custom infoType
// [`SurrogateType`](/dlp/docs/reference/rest/v2beta2/InspectConfig#surrogatetype).
// This facilitates reversal of the surrogate when it occurs in free text.
//
// In order for inspection to work properly, the name of this infoType must
// not occur naturally anywhere in your data; otherwise, inspection may
// find a surrogate that does not correspond to an actual identifier.
// Therefore, choose your custom infoType name carefully after considering
// what your data looks like. One way to select a name that has a high chance
// of yielding reliable detection is to include one or more unicode characters
// that are highly improbable to exist in your data.
// For example, assuming your data is entered from a regular ASCII keyboard,
// the symbol with the hex code point 29DD might be used like so:
// ⧝MY_TOKEN_TYPE
InfoType surrogate_info_type = 8;
}
// This is a data encryption key (DEK) (as opposed to
// a key encryption key (KEK) stored by KMS).
// When using KMS to wrap/unwrap DEKs, be sure to set an appropriate
// IAM policy on the KMS CryptoKey (KEK) to ensure an attacker cannot
// unwrap the data crypto key.
message CryptoKey {
oneof source {
TransientCryptoKey transient = 1;
UnwrappedCryptoKey unwrapped = 2;
KmsWrappedCryptoKey kms_wrapped = 3;
}
}
// Use this to have a random data crypto key generated.
// It will be discarded after the request finishes.
message TransientCryptoKey {
// Name of the key. [required]
// This is an arbitrary string used to differentiate different keys.
// A unique key is generated per name: two separate `TransientCryptoKey`
// protos share the same generated key if their names are the same.
// When the data crypto key is generated, this name is not used in any way
// (repeating the api call will result in a different key being generated).
string name = 1;
}
// Using raw keys is prone to security risks due to accidentally
// leaking the key. Choose another type of key if possible.
message UnwrappedCryptoKey {
// The AES 128/192/256 bit key. [required]
bytes key = 1;
}
// Include to use an existing data crypto key wrapped by KMS.
// Authorization requires the following IAM permissions when sending a request
// to perform a crypto transformation using a kms-wrapped crypto key:
// dlp.kms.encrypt
message KmsWrappedCryptoKey {
// The wrapped data crypto key. [required]
bytes wrapped_key = 1;
// The resource name of the KMS CryptoKey to use for unwrapping. [required]
string crypto_key_name = 2;
}
// A type of transformation that will scan unstructured text and
// apply various `PrimitiveTransformation`s to each finding, where the
// transformation is applied to only values that were identified as a specific
// info_type.
message InfoTypeTransformations {
// A transformation to apply to text that is identified as a specific
// info_type.
message InfoTypeTransformation {
// InfoTypes to apply the transformation to. Empty list will match all
// available infoTypes for this transformation.
repeated InfoType info_types = 1;
// Primitive transformation to apply to the infoType. [required]
PrimitiveTransformation primitive_transformation = 2;
}
// Transformation for each infoType. Cannot specify more than one
// for a given infoType. [required]
repeated InfoTypeTransformation transformations = 1;
}
// The transformation to apply to the field.
message FieldTransformation {
// Input field(s) to apply the transformation to. [required]
repeated FieldId fields = 1;
// Only apply the transformation if the condition evaluates to true for the
// given `RecordCondition`. The conditions are allowed to reference fields
// that are not used in the actual transformation. [optional]
//
// Example Use Cases:
//
// - Apply a different bucket transformation to an age column if the zip code
// column for the same record is within a specific range.
// - Redact a field if the date of birth field is greater than 85.
RecordCondition condition = 3;
// Transformation to apply. [required]
oneof transformation {
// Apply the transformation to the entire field.
PrimitiveTransformation primitive_transformation = 4;
// Treat the contents of the field as free text, and selectively
// transform content that matches an `InfoType`.
InfoTypeTransformations info_type_transformations = 5;
}
}
// A type of transformation that is applied over structured data such as a
// table.
message RecordTransformations {
// Transform the record by applying various field transformations.
repeated FieldTransformation field_transformations = 1;
// Configuration defining which records get suppressed entirely. Records that
// match any suppression rule are omitted from the output [optional].
repeated RecordSuppression record_suppressions = 2;
}
// Configuration to suppress records whose suppression conditions evaluate to
// true.
message RecordSuppression {
// A condition that when it evaluates to true will result in the record being
// evaluated to be suppressed from the transformed content.
RecordCondition condition = 1;
}
// A condition for determining whether a transformation should be applied to
// a field.
message RecordCondition {
// The field type of `value` and `field` do not need to match to be
// considered equal, but not all comparisons are possible.
//
// A `value` of type:
//
// - `string` can be compared against all other types
// - `boolean` can only be compared against other booleans
// - `integer` can be compared against doubles or a string if the string value
// can be parsed as an integer.
// - `double` can be compared against integers or a string if the string can
// be parsed as a double.
// - `Timestamp` can be compared against strings in RFC 3339 date string
// format.
// - `TimeOfDay` can be compared against timestamps and strings in the format
// of 'HH:mm:ss'.
//
// If we fail to compare do to type mismatch, a warning will be given and
// the condition will evaluate to false.
message Condition {
// Field within the record this condition is evaluated against. [required]
FieldId field = 1;
// Operator used to compare the field or infoType to the value. [required]
RelationalOperator operator = 3;
// Value to compare against. [Required, except for `EXISTS` tests.]
Value value = 4;
}
// A collection of conditions.
message Conditions {
repeated Condition conditions = 1;
}
// An expression, consisting or an operator and conditions.
message Expressions {
enum LogicalOperator {
LOGICAL_OPERATOR_UNSPECIFIED = 0;
AND = 1;
}
// The operator to apply to the result of conditions. Default and currently
// only supported value is `AND`.
LogicalOperator logical_operator = 1;
oneof type {
Conditions conditions = 3;
}
}
// An expression.
Expressions expressions = 3;
}
// Overview of the modifications that occurred.
message TransformationOverview {
// Total size in bytes that were transformed in some way.
int64 transformed_bytes = 2;
// Transformations applied to the dataset.
repeated TransformationSummary transformation_summaries = 3;
}
// Summary of a single tranformation.
// Only one of 'transformation', 'field_transformation', or 'record_suppress'
// will be set.
message TransformationSummary {
// A collection that informs the user the number of times a particular
// `TransformationResultCode` and error details occurred.
message SummaryResult {
int64 count = 1;
TransformationResultCode code = 2;
// A place for warnings or errors to show up if a transformation didn't
// work as expected.
string details = 3;
}
// Possible outcomes of transformations.
enum TransformationResultCode {
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;
SUCCESS = 1;
ERROR = 2;
}
// Set if the transformation was limited to a specific info_type.
InfoType info_type = 1;
// Set if the transformation was limited to a specific FieldId.
FieldId field = 2;
// The specific transformation these stats apply to.
PrimitiveTransformation transformation = 3;
// The field transformation that was applied.
// If multiple field transformations are requested for a single field,
// this list will contain all of them; otherwise, only one is supplied.
repeated FieldTransformation field_transformations = 5;
// The specific suppression option these stats apply to.
RecordSuppression record_suppress = 6;
repeated SummaryResult results = 4;
// Total size in bytes that were transformed in some way.
int64 transformed_bytes = 7;
}
// The inspectTemplate contains a configuration (set of types of sensitive data
// to be detected) to be used anywhere you otherwise would normally specify
// InspectConfig.
message InspectTemplate {
// The template name. Output only.
//
// The template will have one of the following formats:
// `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
// `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`
string name = 1;
// Display name (max 256 chars).
string display_name = 2;
// Short description (max 256 chars).
string description = 3;
// The creation timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp create_time = 4;
// The last update timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp update_time = 5;
// The core content of the template. Configuration of the scanning process.
InspectConfig inspect_config = 6;
}
// The DeidentifyTemplates contains instructions on how to deidentify content.
message DeidentifyTemplate {
// The template name. Output only.
//
// The template will have one of the following formats:
// `projects/PROJECT_ID/deidentifyTemplates/TEMPLATE_ID` OR
// `organizations/ORGANIZATION_ID/deidentifyTemplates/TEMPLATE_ID`
string name = 1;
// Display name (max 256 chars).
string display_name = 2;
// Short description (max 256 chars).
string description = 3;
// The creation timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp create_time = 4;
// The last update timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp update_time = 5;
// ///////////// // The core content of the template // ///////////////
DeidentifyConfig deidentify_config = 6;
}
// Request message for CreateInspectTemplate.
message CreateInspectTemplateRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// The InspectTemplate to create.
InspectTemplate inspect_template = 2;
// The template id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string template_id = 3;
}
// Request message for UpdateInspectTemplate.
message UpdateInspectTemplateRequest {
// Resource name of organization and inspectTemplate to be updated, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
// New InspectTemplate value.
InspectTemplate inspect_template = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetInspectTemplate.
message GetInspectTemplateRequest {
// Resource name of the organization and inspectTemplate to be read, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
}
// Request message for ListInspectTemplates.
message ListInspectTemplatesRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to `ListInspectTemplates`.
string page_token = 2;
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;
}
// Response message for ListInspectTemplates.
message ListInspectTemplatesResponse {
// List of inspectTemplates, up to page_size in ListInspectTemplatesRequest.
repeated InspectTemplate inspect_templates = 1;
// If the next page is available then the next page token to be used
// in following ListInspectTemplates request.
string next_page_token = 2;
}
// Request message for DeleteInspectTemplate.
message DeleteInspectTemplateRequest {
// Resource name of the organization and inspectTemplate to be deleted, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
}
message InspectJobConfig {
// The data to scan.
StorageConfig storage_config = 1;
// Where to put the findings.
OutputStorageConfig output_config = 2;
// How and what to scan for.
InspectConfig inspect_config = 3;
// If provided, will be used as the default for all values in InspectConfig.
// `inspect_config` will be merged into the values persisted as part of the
// template.
string inspect_template_name = 4;
}
// Combines all of the information about a DLP job.
message DlpJob {
enum JobState {
JOB_STATE_UNSPECIFIED = 0;
// The job has not yet started.
PENDING = 1;
// The job is currently running.
RUNNING = 2;
// The job is no longer running.
DONE = 3;
// The job was canceled before it could complete.
CANCELED = 4;
// The job had an error and did not complete.
FAILED = 5;
}
// The server-assigned name.
string name = 1;
// The type of job.
DlpJobType type = 2;
// State of a job.
JobState state = 3;
oneof details {
// Results from analyzing risk of a data source.
AnalyzeDataSourceRiskDetails risk_details = 4;
// Results from inspecting a data source.
InspectDataSourceDetails inspect_details = 5;
}
// Time when the job was created.
google.protobuf.Timestamp create_time = 6;
// Time when the job started.
google.protobuf.Timestamp start_time = 7;
// Time when the job finished.
google.protobuf.Timestamp end_time = 8;
// A stream of errors encountered running the job.
repeated google.rpc.Status error_results = 9;
}
// The request message for [DlpJobs.GetDlpJob][].
message GetDlpJobRequest {
// The name of the DlpJob resource.
string name = 1;
}
// The request message for listing DLP jobs.
message ListDlpJobsRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 4;
// Optional. Allows filtering.
//
// Supported syntax:
//
// * Filter expressions are made up of one or more restrictions.
// * Restrictions can be combined by `AND` or `OR` logical operators. A
// sequence of restrictions implicitly uses `AND`.
// * A restriction has the form of `<field> <operator> <value>`.
// * Supported fields/values for inspect jobs:
// - `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED
// - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY
// * Supported fields for risk analysis jobs:
// - `state` - RUNNING|CANCELED|FINISHED|FAILED
// * The operator must be `=` or `!=`.
//
// Examples:
//
// * inspected_storage = cloud_storage AND state = done
// * inspected_storage = cloud_storage OR inspected_storage = bigquery
// * inspected_storage = cloud_storage AND (state = done OR state = canceled)
//
// The length of this field should be no more than 500 characters.
string filter = 1;
// The standard list page size.
int32 page_size = 2;
// The standard list page token.
string page_token = 3;
// The type of job. Defaults to `DlpJobType.INSPECT`
DlpJobType type = 5;
}
// The response message for listing DLP jobs.
message ListDlpJobsResponse {
// A list of DlpJobs that matches the specified filter in the request.
repeated DlpJob jobs = 1;
// The standard List next-page token.
string next_page_token = 2;
}
// The request message for canceling a DLP job.
message CancelDlpJobRequest {
// The name of the DlpJob resource to be cancelled.
string name = 1;
}
// The request message for deleting a DLP job.
message DeleteDlpJobRequest {
// The name of the DlpJob resource to be deleted.
string name = 1;
}
// Request message for CreateDeidentifyTemplate.
message CreateDeidentifyTemplateRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// The DeidentifyTemplate to create.
DeidentifyTemplate deidentify_template = 2;
// The template id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string template_id = 3;
}
// Request message for UpdateDeidentifyTemplate.
message UpdateDeidentifyTemplateRequest {
// Resource name of organization and deidentify template to be updated, for
// example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
// New DeidentifyTemplate value.
DeidentifyTemplate deidentify_template = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetDeidentifyTemplate.
message GetDeidentifyTemplateRequest {
// Resource name of the organization and deidentify template to be read, for
// example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
}
// Request message for ListDeidentifyTemplates.
message ListDeidentifyTemplatesRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to `ListDeidentifyTemplates`.
string page_token = 2;
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;
}
// Response message for ListDeidentifyTemplates.
message ListDeidentifyTemplatesResponse {
// List of deidentify templates, up to page_size in
// ListDeidentifyTemplatesRequest.
repeated DeidentifyTemplate deidentify_templates = 1;
// If the next page is available then the next page token to be used
// in following ListDeidentifyTemplates request.
string next_page_token = 2;
}
// Request message for DeleteDeidentifyTemplate.
message DeleteDeidentifyTemplateRequest {
// Resource name of the organization and deidentify template to be deleted,
// for example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
}
// Parts of the APIs which use certain infoTypes.
enum InfoTypeSupportedBy {
ENUM_TYPE_UNSPECIFIED = 0;
// Supported by the inspect operations.
INSPECT = 1;
// Supported by the risk analysis operations.
RISK_ANALYSIS = 2;
}
// Operators available for comparing the value of fields.
enum RelationalOperator {
RELATIONAL_OPERATOR_UNSPECIFIED = 0;
// Equal.
EQUAL_TO = 1;
// Not equal to.
NOT_EQUAL_TO = 2;
// Greater than.
GREATER_THAN = 3;
// Less than.
LESS_THAN = 4;
// Greater than or equals.
GREATER_THAN_OR_EQUALS = 5;
// Less than or equals.
LESS_THAN_OR_EQUALS = 6;
// Exists
EXISTS = 7;
}
// An enum to represent the various type of DLP jobs.
enum DlpJobType {
DLP_JOB_TYPE_UNSPECIFIED = 0;
// The job inspected Google Cloud for sensitive data.
INSPECT_JOB = 1;
// The job executed a Risk Analysis computation.
RISK_ANALYSIS_JOB = 2;
}