Files
2026-04-05 16:14:49 -04:00

1330 lines
43 KiB
Protocol Buffer

// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.privacy.dlp.v2beta1;
import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/privacy/dlp/v2beta1/storage.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/type/date.proto";
import "google/type/timeofday.proto";
option csharp_namespace = "Google.Cloud.Dlp.V2Beta1";
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2beta1;dlp";
option java_multiple_files = true;
option java_outer_classname = "DlpProto";
option java_package = "com.google.privacy.dlp.v2beta1";
option php_namespace = "Google\\Cloud\\Dlp\\V2beta1";
// The DLP API is a service that allows clients
// to detect the presence of Personally Identifiable Information (PII) and other
// privacy-sensitive data in user-supplied, unstructured data streams, like text
// blocks or images.
// The service also includes methods for sensitive data redaction and
// scheduling of data scans on Google Cloud Platform based data sets.
service DlpService {
// Finds potentially sensitive info in a list of strings.
// This method has limits on input size, processing time, and output size.
rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) {
option (google.api.http) = { post: "/v2beta1/content:inspect" body: "*" };
}
// Redacts potentially sensitive info from a list of strings.
// This method has limits on input size, processing time, and output size.
rpc RedactContent(RedactContentRequest) returns (RedactContentResponse) {
option (google.api.http) = { post: "/v2beta1/content:redact" body: "*" };
}
// De-identifies potentially sensitive info from a list of strings.
// This method has limits on input size and output size.
rpc DeidentifyContent(DeidentifyContentRequest) returns (DeidentifyContentResponse) {
option (google.api.http) = { post: "/v2beta1/content:deidentify" body: "*" };
}
// Schedules a job scanning content in a Google Cloud Platform data
// repository.
rpc CreateInspectOperation(CreateInspectOperationRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v2beta1/inspect/operations" body: "*" };
}
// Schedules a job to compute risk analysis metrics over content in a Google
// Cloud Platform repository.
rpc AnalyzeDataSourceRisk(AnalyzeDataSourceRiskRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v2beta1/dataSource:analyze" body: "*" };
}
// Returns list of results for given inspect operation result set id.
rpc ListInspectFindings(ListInspectFindingsRequest) returns (ListInspectFindingsResponse) {
option (google.api.http) = { get: "/v2beta1/{name=inspect/results/*}/findings" };
}
// Returns sensitive information types for given category.
rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) {
option (google.api.http) = { get: "/v2beta1/rootCategories/{category=*}/infoTypes" };
}
// Returns the list of root categories of sensitive information.
rpc ListRootCategories(ListRootCategoriesRequest) returns (ListRootCategoriesResponse) {
option (google.api.http) = { get: "/v2beta1/rootCategories" };
}
}
// Configuration description of the scanning process.
// When used with redactContent only info_types and min_likelihood are currently
// used.
message InspectConfig {
// Max findings configuration per info type, per content item or long running
// operation.
message InfoTypeLimit {
// Type of information the findings limit applies to. Only one limit per
// info_type should be provided. If InfoTypeLimit does not have an
// info_type, the DLP API applies the limit against all info_types that are
// found but not specified in another InfoTypeLimit.
InfoType info_type = 1;
// Max findings limit for the given infoType.
int32 max_findings = 2;
}
// Restricts what info_types to look for. The values must correspond to
// InfoType values returned by ListInfoTypes or found in documentation.
// Empty info_types runs all enabled detectors.
repeated InfoType info_types = 1;
// Only returns findings equal or above this threshold.
Likelihood min_likelihood = 2;
// Limits the number of findings per content item or long running operation.
int32 max_findings = 3;
// When true, a contextual quote from the data that triggered a finding is
// included in the response; see Finding.quote.
bool include_quote = 4;
// When true, excludes type information of the findings.
bool exclude_types = 6;
// Configuration of findings limit given for specified info types.
repeated InfoTypeLimit info_type_limits = 7;
// Custom info types provided by the user.
repeated CustomInfoType custom_info_types = 8;
}
// Additional configuration for inspect long running operations.
message OperationConfig {
// Max number of findings per file, Datastore entity, or database row.
int64 max_item_findings = 1;
}
// Container structure for the content to inspect.
message ContentItem {
// Type of the content, as defined in Content-Type HTTP header.
// Supported types are: all "text" types, octet streams, PNG images,
// JPEG images.
string type = 1;
// Data of the item either in the byte array or UTF-8 string form.
oneof data_item {
// Content data to inspect or redact.
bytes data = 2;
// String data to inspect or redact.
string value = 3;
// Structured content for inspection.
Table table = 4;
}
}
// Structured content to inspect. Up to 50,000 `Value`s per request allowed.
message Table {
message Row {
repeated Value values = 1;
}
repeated FieldId headers = 1;
repeated Row rows = 2;
}
// All the findings for a single scanned item.
message InspectResult {
// List of findings for an item.
repeated Finding findings = 1;
// If true, then this item might have more findings than were returned,
// and the findings returned are an arbitrary subset of all findings.
// The findings list might be truncated because the input items were too
// large, or because the server reached the maximum amount of resources
// allowed for a single API call. For best results, divide the input into
// smaller batches.
bool findings_truncated = 2;
}
// Container structure describing a single finding within a string or image.
message Finding {
// The specific string that may be potentially sensitive info.
string quote = 1;
// The specific type of info the string might be.
InfoType info_type = 2;
// Estimate of how likely it is that the info_type is correct.
Likelihood likelihood = 3;
// Location of the info found.
Location location = 4;
// Timestamp when finding was detected.
google.protobuf.Timestamp create_time = 6;
}
// Specifies the location of a finding within its source item.
message Location {
// Zero-based byte offsets within a content item.
Range byte_range = 1;
// Character offsets within a content item, included when content type
// is a text. Default charset assumed to be UTF-8.
Range codepoint_range = 2;
// Location within an image's pixels.
repeated ImageLocation image_boxes = 3;
// Key of the finding.
RecordKey record_key = 4;
// Field id of the field containing the finding.
FieldId field_id = 5;
// Location within a `ContentItem.Table`.
TableLocation table_location = 6;
}
// Location of a finding within a `ContentItem.Table`.
message TableLocation {
// The zero-based index of the row where the finding is located.
int64 row_index = 1;
}
// Generic half-open interval [start, end)
message Range {
// Index of the first character of the range (inclusive).
int64 start = 1;
// Index of the last character of the range (exclusive).
int64 end = 2;
}
// Bounding box encompassing detected text within an image.
message ImageLocation {
// Top coordinate of the bounding box. (0,0) is upper left.
int32 top = 1;
// Left coordinate of the bounding box. (0,0) is upper left.
int32 left = 2;
// Width of the bounding box in pixels.
int32 width = 3;
// Height of the bounding box in pixels.
int32 height = 4;
}
// Request to search for potentially sensitive info in a list of items
// and replace it with a default or provided content.
message RedactContentRequest {
message ReplaceConfig {
// Type of information to replace. Only one ReplaceConfig per info_type
// should be provided. If ReplaceConfig does not have an info_type, the DLP
// API matches it against all info_types that are found but not specified in
// another ReplaceConfig.
InfoType info_type = 1;
// Content replacing sensitive information of given type. Max 256 chars.
string replace_with = 2;
}
// Configuration for determining how redaction of images should occur.
message ImageRedactionConfig {
// Type of information to redact from images.
oneof target {
// Only one per info_type should be provided per request. If not
// specified, and redact_all_text is false, the DLP API will redact all
// text that it matches against all info_types that are found, but not
// specified in another ImageRedactionConfig.
InfoType info_type = 1;
// If true, all text found in the image, regardless whether it matches an
// info_type, is redacted.
bool redact_all_text = 2;
}
// The color to use when redacting content from an image. If not specified,
// the default is black.
Color redaction_color = 3;
}
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// The list of items to inspect. Up to 100 are allowed per request.
repeated ContentItem items = 2;
// The strings to replace findings text findings with. Must specify at least
// one of these or one ImageRedactionConfig if redacting images.
repeated ReplaceConfig replace_configs = 3;
// The configuration for specifying what content to redact from images.
repeated ImageRedactionConfig image_redaction_configs = 4;
}
// Represents a color in the RGB color space.
message Color {
// The amount of red in the color as a value in the interval [0, 1].
float red = 1;
// The amount of green in the color as a value in the interval [0, 1].
float green = 2;
// The amount of blue in the color as a value in the interval [0, 1].
float blue = 3;
}
// Results of redacting a list of items.
message RedactContentResponse {
// The redacted content.
repeated ContentItem items = 1;
}
// Request to de-identify a list of items.
message DeidentifyContentRequest {
// Configuration for the de-identification of the list of content items.
DeidentifyConfig deidentify_config = 1;
// Configuration for the inspector.
InspectConfig inspect_config = 2;
// The list of items to inspect. Up to 100 are allowed per request.
// All items will be treated as text/*.
repeated ContentItem items = 3;
}
// Results of de-identifying a list of items.
message DeidentifyContentResponse {
repeated ContentItem items = 1;
// A review of the transformations that took place for each item.
repeated DeidentificationSummary summaries = 2;
}
// Request to search for potentially sensitive info in a list of items.
message InspectContentRequest {
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// The list of items to inspect. Items in a single request are
// considered "related" unless inspect_config.independent_inputs is true.
// Up to 100 are allowed per request.
repeated ContentItem items = 2;
}
// Results of inspecting a list of items.
message InspectContentResponse {
// Each content_item from the request has a result in this list, in the
// same order as the request.
repeated InspectResult results = 1;
}
// Request for scheduling a scan of a data subset from a Google Platform data
// repository.
message CreateInspectOperationRequest {
// Configuration for the inspector.
InspectConfig inspect_config = 1;
// Specification of the data set to process.
StorageConfig storage_config = 2;
// Optional location to store findings.
OutputStorageConfig output_config = 3;
// Additional configuration settings for long running operations.
OperationConfig operation_config = 5;
}
// Cloud repository for storing output.
message OutputStorageConfig {
oneof type {
// Store findings in a new table in the dataset.
BigQueryTable table = 1;
// The path to a Google Cloud Storage location to store output.
// The bucket must already exist and
// the Google APIs service account for DLP must have write permission to
// write to the given bucket.
// Results are split over multiple csv files with each file name matching
// the pattern "[operation_id]_[count].csv", for example
// `3094877188788974909_1.csv`. The `operation_id` matches the
// identifier for the Operation, and the `count` is a counter used for
// tracking the number of files written.
//
// The CSV file(s) contain the following columns regardless of storage type
// scanned:
// - id
// - info_type
// - likelihood
// - byte size of finding
// - quote
// - timestamp
//
// For Cloud Storage the next columns are:
//
// - file_path
// - start_offset
//
// For Cloud Datastore the next columns are:
//
// - project_id
// - namespace_id
// - path
// - column_name
// - offset
//
// For BigQuery the next columns are:
//
// - row_number
// - project_id
// - dataset_id
// - table_id
CloudStoragePath storage_path = 2;
}
}
// Statistics regarding a specific InfoType.
message InfoTypeStatistics {
// The type of finding this stat is for.
InfoType info_type = 1;
// Number of findings for this info type.
int64 count = 2;
}
// Metadata returned within GetOperation for an inspect request.
message InspectOperationMetadata {
// Total size in bytes that were processed.
int64 processed_bytes = 1;
// Estimate of the number of bytes to process.
int64 total_estimated_bytes = 4;
repeated InfoTypeStatistics info_type_stats = 2;
// The time which this request was started.
google.protobuf.Timestamp create_time = 3;
// The inspect config used to create the Operation.
InspectConfig request_inspect_config = 5;
// The storage config used to create the Operation.
StorageConfig request_storage_config = 6;
// Optional location to store findings.
OutputStorageConfig request_output_config = 7;
}
// The operational data.
message InspectOperationResult {
// The server-assigned name, which is only unique within the same service that
// originally returns it. If you use the default HTTP mapping, the
// `name` should have the format of `inspect/results/{id}`.
string name = 1;
}
// Request for the list of results in a given inspect operation.
message ListInspectFindingsRequest {
// Identifier of the results set returned as metadata of
// the longrunning operation created by a call to InspectDataSource.
// Should be in the format of `inspect/results/{id}`.
string name = 1;
// Maximum number of results to return.
// If 0, the implementation selects a reasonable value.
int32 page_size = 2;
// The value returned by the last `ListInspectFindingsResponse`; indicates
// that this is a continuation of a prior `ListInspectFindings` call, and that
// the system should return the next page of data.
string page_token = 3;
// Restricts findings to items that match. Supports info_type and likelihood.
//
// Examples:
//
// - info_type=EMAIL_ADDRESS
// - info_type=PHONE_NUMBER,EMAIL_ADDRESS
// - likelihood=VERY_LIKELY
// - likelihood=VERY_LIKELY,LIKELY
// - info_type=EMAIL_ADDRESS,likelihood=VERY_LIKELY,LIKELY
string filter = 4;
}
// Response to the ListInspectFindings request.
message ListInspectFindingsResponse {
// The results.
InspectResult result = 1;
// If not empty, indicates that there may be more results that match the
// request; this value should be passed in a new `ListInspectFindingsRequest`.
string next_page_token = 2;
}
// Description of the information type (infoType).
message InfoTypeDescription {
// Internal name of the infoType.
string name = 1;
// Human readable form of the infoType name.
string display_name = 2;
// List of categories this infoType belongs to.
repeated CategoryDescription categories = 3;
}
// Request for the list of info types belonging to a given category,
// or all supported info types if no category is specified.
message ListInfoTypesRequest {
// Category name as returned by ListRootCategories.
string category = 1;
// Optional BCP-47 language code for localized info type friendly
// names. If omitted, or if localized strings are not available,
// en-US strings will be returned.
string language_code = 2;
}
// Response to the ListInfoTypes request.
message ListInfoTypesResponse {
// Set of sensitive info types belonging to a category.
repeated InfoTypeDescription info_types = 1;
}
// Info Type Category description.
message CategoryDescription {
// Internal name of the category.
string name = 1;
// Human readable form of the category name.
string display_name = 2;
}
// Request for root categories of Info Types supported by the API.
// Example values might include "FINANCE", "HEALTH", "FAST", "DEFAULT".
message ListRootCategoriesRequest {
// Optional language code for localized friendly category names.
// If omitted or if localized strings are not available,
// en-US strings will be returned.
string language_code = 1;
}
// Response for ListRootCategories request.
message ListRootCategoriesResponse {
// List of all into type categories supported by the API.
repeated CategoryDescription categories = 1;
}
// Request for creating a risk analysis operation.
message AnalyzeDataSourceRiskRequest {
// Privacy metric to compute.
PrivacyMetric privacy_metric = 1;
// Input dataset to compute metrics over.
BigQueryTable source_table = 3;
}
// Privacy metric to compute for reidentification risk analysis.
message PrivacyMetric {
// Compute numerical stats over an individual column, including
// min, max, and quantiles.
message NumericalStatsConfig {
// Field to compute numerical stats on. Supported types are
// integer, float, date, datetime, timestamp, time.
FieldId field = 1;
}
// Compute numerical stats over an individual column, including
// number of distinct values and value count distribution.
message CategoricalStatsConfig {
// Field to compute categorical stats on. All column types are
// supported except for arrays and structs. However, it may be more
// informative to use NumericalStats when the field type is supported,
// depending on the data.
FieldId field = 1;
}
// k-anonymity metric, used for analysis of reidentification risk.
message KAnonymityConfig {
// Set of fields to compute k-anonymity over. When multiple fields are
// specified, they are considered a single composite key. Structs and
// repeated data types are not supported; however, nested fields are
// supported so long as they are not structs themselves or nested within
// a repeated field.
repeated FieldId quasi_ids = 1;
// Optional message indicating that each distinct `EntityId` should not
// contribute to the k-anonymity count more than once per equivalence class.
EntityId entity_id = 2;
}
// l-diversity metric, used for analysis of reidentification risk.
message LDiversityConfig {
// Set of quasi-identifiers indicating how equivalence classes are
// defined for the l-diversity computation. When multiple fields are
// specified, they are considered a single composite key.
repeated FieldId quasi_ids = 1;
// Sensitive field for computing the l-value.
FieldId sensitive_attribute = 2;
}
oneof type {
NumericalStatsConfig numerical_stats_config = 1;
CategoricalStatsConfig categorical_stats_config = 2;
KAnonymityConfig k_anonymity_config = 3;
LDiversityConfig l_diversity_config = 4;
}
}
// Metadata returned within the
// [`riskAnalysis.operations.get`](/dlp/docs/reference/rest/v2beta1/riskAnalysis.operations/get)
// for risk analysis.
message RiskAnalysisOperationMetadata {
// The time which this request was started.
google.protobuf.Timestamp create_time = 1;
// Privacy metric to compute.
PrivacyMetric requested_privacy_metric = 2;
// Input dataset to compute metrics over.
BigQueryTable requested_source_table = 3;
}
// Result of a risk analysis
// [`Operation`](/dlp/docs/reference/rest/v2beta1/inspect.operations)
// request.
message RiskAnalysisOperationResult {
// Result of the numerical stats computation.
message NumericalStatsResult {
// Minimum value appearing in the column.
Value min_value = 1;
// Maximum value appearing in the column.
Value max_value = 2;
// List of 99 values that partition the set of field values into 100 equal
// sized buckets.
repeated Value quantile_values = 4;
}
// Result of the categorical stats computation.
message CategoricalStatsResult {
// Histogram bucket of value frequencies in the column.
message CategoricalStatsHistogramBucket {
// Lower bound on the value frequency of the values in this bucket.
int64 value_frequency_lower_bound = 1;
// Upper bound on the value frequency of the values in this bucket.
int64 value_frequency_upper_bound = 2;
// Total number of records in this bucket.
int64 bucket_size = 3;
// Sample of value frequencies in this bucket. The total number of
// values returned per bucket is capped at 20.
repeated ValueFrequency bucket_values = 4;
}
// Histogram of value frequencies in the column.
repeated CategoricalStatsHistogramBucket value_frequency_histogram_buckets = 5;
}
// Result of the k-anonymity computation.
message KAnonymityResult {
// The set of columns' values that share the same k-anonymity value.
message KAnonymityEquivalenceClass {
// Set of values defining the equivalence class. One value per
// quasi-identifier column in the original KAnonymity metric message.
// The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the equivalence class, for example number of rows with the
// above set of values.
int64 equivalence_class_size = 2;
}
// Histogram bucket of equivalence class sizes in the table.
message KAnonymityHistogramBucket {
// Lower bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_lower_bound = 1;
// Upper bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_upper_bound = 2;
// Total number of records in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated KAnonymityEquivalenceClass bucket_values = 4;
}
// Histogram of k-anonymity equivalence classes.
repeated KAnonymityHistogramBucket equivalence_class_histogram_buckets = 5;
}
// Result of the l-diversity computation.
message LDiversityResult {
// The set of columns' values that share the same l-diversity value.
message LDiversityEquivalenceClass {
// Quasi-identifier values defining the k-anonymity equivalence
// class. The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the k-anonymity equivalence class.
int64 equivalence_class_size = 2;
// Number of distinct sensitive values in this equivalence class.
int64 num_distinct_sensitive_values = 3;
// Estimated frequencies of top sensitive values.
repeated ValueFrequency top_sensitive_values = 4;
}
// Histogram bucket of sensitive value frequencies in the table.
message LDiversityHistogramBucket {
// Lower bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_lower_bound = 1;
// Upper bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_upper_bound = 2;
// Total number of records in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated LDiversityEquivalenceClass bucket_values = 4;
}
// Histogram of l-diversity equivalence class sensitive value frequencies.
repeated LDiversityHistogramBucket sensitive_value_frequency_histogram_buckets = 5;
}
// Values associated with this metric.
oneof result {
NumericalStatsResult numerical_stats_result = 3;
CategoricalStatsResult categorical_stats_result = 4;
KAnonymityResult k_anonymity_result = 5;
LDiversityResult l_diversity_result = 6;
}
}
// A value of a field, including its frequency.
message ValueFrequency {
// A value contained in the field in question.
Value value = 1;
// How many times the value is contained in the field.
int64 count = 2;
}
// Set of primitive values supported by the system.
message Value {
oneof type {
int64 integer_value = 1;
double float_value = 2;
string string_value = 3;
bool boolean_value = 4;
google.protobuf.Timestamp timestamp_value = 5;
google.type.TimeOfDay time_value = 6;
google.type.Date date_value = 7;
}
}
// The configuration that controls how the data will change.
message DeidentifyConfig {
oneof transformation {
// Treat the dataset as free-form text and apply the same free text
// transformation everywhere.
InfoTypeTransformations info_type_transformations = 1;
// Treat the dataset as structured. Transformations can be applied to
// specific locations within structured datasets, such as transforming
// a column within a table.
RecordTransformations record_transformations = 2;
}
}
// A rule for transforming a value.
message PrimitiveTransformation {
oneof transformation {
ReplaceValueConfig replace_config = 1;
RedactConfig redact_config = 2;
CharacterMaskConfig character_mask_config = 3;
CryptoReplaceFfxFpeConfig crypto_replace_ffx_fpe_config = 4;
FixedSizeBucketingConfig fixed_size_bucketing_config = 5;
BucketingConfig bucketing_config = 6;
ReplaceWithInfoTypeConfig replace_with_info_type_config = 7;
TimePartConfig time_part_config = 8;
CryptoHashConfig crypto_hash_config = 9;
}
}
// For use with `Date`, `Timestamp`, and `TimeOfDay`, extract or preserve a
// portion of the value.
message TimePartConfig {
enum TimePart {
TIME_PART_UNSPECIFIED = 0;
// [000-9999]
YEAR = 1;
// [1-12]
MONTH = 2;
// [1-31]
DAY_OF_MONTH = 3;
// [1-7]
DAY_OF_WEEK = 4;
// [1-52]
WEEK_OF_YEAR = 5;
// [0-24]
HOUR_OF_DAY = 6;
}
TimePart part_to_extract = 1;
}
// Pseudonymization method that generates surrogates via cryptographic hashing.
// Uses SHA-256.
// Outputs a 32 byte digest as an uppercase hex string
// (for example, 41D1567F7F99F1DC2A5FAB886DEE5BEE).
// Currently, only string and integer values can be hashed.
message CryptoHashConfig {
// The key used by the hash function.
CryptoKey crypto_key = 1;
}
// Replace each input value with a given `Value`.
message ReplaceValueConfig {
// Value to replace it with.
Value new_value = 1;
}
// Replace each matching finding with the name of the info_type.
message ReplaceWithInfoTypeConfig {
}
// Redact a given value. For example, if used with an `InfoTypeTransformation`
// transforming PHONE_NUMBER, and input 'My phone number is 206-555-0123', the
// output would be 'My phone number is '.
message RedactConfig {
}
// Characters to skip when doing deidentification of a value. These will be left
// alone and skipped.
message CharsToIgnore {
enum CharacterGroup {
CHARACTER_GROUP_UNSPECIFIED = 0;
// 0-9
NUMERIC = 1;
// A-Z
ALPHA_UPPER_CASE = 2;
// a-z
ALPHA_LOWER_CASE = 3;
// US Punctuation, one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
PUNCTUATION = 4;
// Whitespace character, one of [ \t\n\x0B\f\r]
WHITESPACE = 5;
}
oneof characters {
string characters_to_skip = 1;
CharacterGroup common_characters_to_ignore = 2;
}
}
// Partially mask a string by replacing a given number of characters with a
// fixed character. Masking can start from the beginning or end of the string.
// This can be used on data of any type (numbers, longs, and so on) and when
// de-identifying structured data we'll attempt to preserve the original data's
// type. (This allows you to take a long like 123 and modify it to a string like
// **3.
message CharacterMaskConfig {
// Character to mask the sensitive values&mdash;for example, "*" for an
// alphabetic string such as name, or "0" for a numeric string such as ZIP
// code or credit card number. String must have length 1. If not supplied, we
// will default to "*" for strings, 0 for digits.
string masking_character = 1;
// Number of characters to mask. If not set, all matching chars will be
// masked. Skipped characters do not count towards this tally.
int32 number_to_mask = 2;
// Mask characters in reverse order. For example, if `masking_character` is
// '0', number_to_mask is 14, and `reverse_order` is false, then
// 1234-5678-9012-3456 -> 00000000000000-3456
// If `masking_character` is '*', `number_to_mask` is 3, and `reverse_order`
// is true, then 12345 -> 12***
bool reverse_order = 3;
// When masking a string, items in this list will be skipped when replacing.
// For example, if your string is 555-555-5555 and you ask us to skip `-` and
// mask 5 chars with * we would produce ***-*55-5555.
repeated CharsToIgnore characters_to_ignore = 4;
}
// Buckets values based on fixed size ranges. The
// Bucketing transformation can provide all of this functionality,
// but requires more configuration. This message is provided as a convenience to
// the user for simple bucketing strategies.
// The resulting value will be a hyphenated string of
// lower_bound-upper_bound.
// This can be used on data of type: double, long.
// If the bound Value type differs from the type of data
// being transformed, we will first attempt converting the type of the data to
// be transformed to match the type of the bound before comparing.
message FixedSizeBucketingConfig {
// Lower bound value of buckets. All values less than `lower_bound` are
// grouped together into a single bucket; for example if `lower_bound` = 10,
// then all values less than 10 are replaced with the value “-10”. [Required].
Value lower_bound = 1;
// Upper bound value of buckets. All values greater than upper_bound are
// grouped together into a single bucket; for example if `upper_bound` = 89,
// then all values greater than 89 are replaced with the value “89+”.
// [Required].
Value upper_bound = 2;
// Size of each bucket (except for minimum and maximum buckets). So if
// `lower_bound` = 10, `upper_bound` = 89, and `bucket_size` = 10, then the
// following buckets would be used: -10, 10-20, 20-30, 30-40, 40-50, 50-60,
// 60-70, 70-80, 80-89, 89+. Precision up to 2 decimals works. [Required].
double bucket_size = 3;
}
// Generalization function that buckets values based on ranges. The ranges and
// replacement values are dynamically provided by the user for custom behavior,
// such as 1-30 -> LOW 31-65 -> MEDIUM 66-100 -> HIGH
// This can be used on
// data of type: number, long, string, timestamp.
// If the bound `Value` type differs from the type of data being transformed, we
// will first attempt converting the type of the data to be transformed to match
// the type of the bound before comparing.
message BucketingConfig {
// Buckets represented as ranges, along with replacement values. Ranges must
// be non-overlapping.
message Bucket {
// Lower bound of the range, inclusive. Type should be the same as max if
// used.
Value min = 1;
// Upper bound of the range, exclusive; type must match min.
Value max = 2;
// Replacement value for this bucket. If not provided
// the default behavior will be to hyphenate the min-max range.
Value replacement_value = 3;
}
repeated Bucket buckets = 1;
}
// Replaces an identifier with a surrogate using FPE with the FFX
// mode of operation.
// The identifier must be representable by the US-ASCII character set.
// For a given crypto key and context, the same identifier will be
// replaced with the same surrogate.
// Identifiers must be at least two characters long.
// In the case that the identifier is the empty string, it will be skipped.
message CryptoReplaceFfxFpeConfig {
// These are commonly used subsets of the alphabet that the FFX mode
// natively supports. In the algorithm, the alphabet is selected using
// the "radix". Therefore each corresponds to particular radix.
enum FfxCommonNativeAlphabet {
FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED = 0;
// [0-9] (radix of 10)
NUMERIC = 1;
// [0-9A-F] (radix of 16)
HEXADECIMAL = 2;
// [0-9A-Z] (radix of 36)
UPPER_CASE_ALPHA_NUMERIC = 3;
// [0-9A-Za-z] (radix of 62)
ALPHA_NUMERIC = 4;
}
// The key used by the encryption algorithm. [required]
CryptoKey crypto_key = 1;
// A context may be used for higher security since the same
// identifier in two different contexts likely will be given a distinct
// surrogate. The principle is that the likeliness is inversely related
// to the ratio of the number of distinct identifiers per context over the
// number of possible surrogates: As long as this ratio is small, the
// likehood is large.
//
// If the context is not set, a default tweak will be used.
// If the context is set but:
//
// 1. there is no record present when transforming a given value or
// 1. the field is not present when transforming a given value,
//
// a default tweak will be used.
//
// Note that case (1) is expected when an `InfoTypeTransformation` is
// applied to both structured and non-structured `ContentItem`s.
// Currently, the referenced field may be of value type integer or string.
//
// The tweak is constructed as a sequence of bytes in big endian byte order
// such that:
//
// - a 64 bit integer is encoded followed by a single byte of value 1
// - a string is encoded in UTF-8 format followed by a single byte of value 2
//
// This is also known as the 'tweak', as in tweakable encryption.
FieldId context = 2;
oneof alphabet {
FfxCommonNativeAlphabet common_alphabet = 4;
// This is supported by mapping these to the alphanumeric characters
// that the FFX mode natively supports. This happens before/after
// encryption/decryption.
// Each character listed must appear only once.
// Number of characters must be in the range [2, 62].
// This must be encoded as ASCII.
// The order of characters does not matter.
string custom_alphabet = 5;
// The native way to select the alphabet. Must be in the range [2, 62].
int32 radix = 6;
}
}
// This is a data encryption key (DEK) (as opposed to
// a key encryption key (KEK) stored by KMS).
// When using KMS to wrap/unwrap DEKs, be sure to set an appropriate
// IAM policy on the KMS CryptoKey (KEK) to ensure an attacker cannot
// unwrap the data crypto key.
message CryptoKey {
oneof source {
TransientCryptoKey transient = 1;
UnwrappedCryptoKey unwrapped = 2;
KmsWrappedCryptoKey kms_wrapped = 3;
}
}
// Use this to have a random data crypto key generated.
// It will be discarded after the operation/request finishes.
message TransientCryptoKey {
// Name of the key. [required]
// This is an arbitrary string used to differentiate different keys.
// A unique key is generated per name: two separate `TransientCryptoKey`
// protos share the same generated key if their names are the same.
// When the data crypto key is generated, this name is not used in any way
// (repeating the api call will result in a different key being generated).
string name = 1;
}
// Using raw keys is prone to security risks due to accidentally
// leaking the key. Choose another type of key if possible.
message UnwrappedCryptoKey {
// The AES 128/192/256 bit key. [required]
bytes key = 1;
}
// Include to use an existing data crypto key wrapped by KMS.
// Authorization requires the following IAM permissions when sending a request
// to perform a crypto transformation using a kms-wrapped crypto key:
// dlp.kms.encrypt
message KmsWrappedCryptoKey {
// The wrapped data crypto key. [required]
bytes wrapped_key = 1;
// The resource name of the KMS CryptoKey to use for unwrapping. [required]
string crypto_key_name = 2;
}
// A type of transformation that will scan unstructured text and
// apply various `PrimitiveTransformation`s to each finding, where the
// transformation is applied to only values that were identified as a specific
// info_type.
message InfoTypeTransformations {
// A transformation to apply to text that is identified as a specific
// info_type.
message InfoTypeTransformation {
// Info types to apply the transformation to. Empty list will match all
// available info types for this transformation.
repeated InfoType info_types = 1;
// Primitive transformation to apply to the info type. [required]
PrimitiveTransformation primitive_transformation = 2;
}
// Transformation for each info type. Cannot specify more than one
// for a given info type. [required]
repeated InfoTypeTransformation transformations = 1;
}
// The transformation to apply to the field.
message FieldTransformation {
// Input field(s) to apply the transformation to. [required]
repeated FieldId fields = 1;
// Only apply the transformation if the condition evaluates to true for the
// given `RecordCondition`. The conditions are allowed to reference fields
// that are not used in the actual transformation. [optional]
//
// Example Use Cases:
//
// - Apply a different bucket transformation to an age column if the zip code
// column for the same record is within a specific range.
// - Redact a field if the date of birth field is greater than 85.
RecordCondition condition = 3;
// Transformation to apply. [required]
oneof transformation {
// Apply the transformation to the entire field.
PrimitiveTransformation primitive_transformation = 4;
// Treat the contents of the field as free text, and selectively
// transform content that matches an `InfoType`.
InfoTypeTransformations info_type_transformations = 5;
}
}
// A type of transformation that is applied over structured data such as a
// table.
message RecordTransformations {
// Transform the record by applying various field transformations.
repeated FieldTransformation field_transformations = 1;
// Configuration defining which records get suppressed entirely. Records that
// match any suppression rule are omitted from the output [optional].
repeated RecordSuppression record_suppressions = 2;
}
// Configuration to suppress records whose suppression conditions evaluate to
// true.
message RecordSuppression {
RecordCondition condition = 1;
}
// A condition for determining whether a transformation should be applied to
// a field.
message RecordCondition {
// The field type of `value` and `field` do not need to match to be
// considered equal, but not all comparisons are possible.
//
// A `value` of type:
//
// - `string` can be compared against all other types
// - `boolean` can only be compared against other booleans
// - `integer` can be compared against doubles or a string if the string value
// can be parsed as an integer.
// - `double` can be compared against integers or a string if the string can
// be parsed as a double.
// - `Timestamp` can be compared against strings in RFC 3339 date string
// format.
// - `TimeOfDay` can be compared against timestamps and strings in the format
// of 'HH:mm:ss'.
//
// If we fail to compare do to type mismatch, a warning will be given and
// the condition will evaluate to false.
message Condition {
// Field within the record this condition is evaluated against. [required]
FieldId field = 1;
// Operator used to compare the field or info type to the value. [required]
RelationalOperator operator = 3;
// Value to compare against. [Required, except for `EXISTS` tests.]
Value value = 4;
}
message Conditions {
repeated Condition conditions = 1;
}
// A collection of expressions
message Expressions {
enum LogicalOperator {
LOGICAL_OPERATOR_UNSPECIFIED = 0;
AND = 1;
}
// The operator to apply to the result of conditions. Default and currently
// only supported value is `AND`.
LogicalOperator logical_operator = 1;
oneof type {
Conditions conditions = 3;
}
}
Expressions expressions = 3;
}
// High level summary of deidentification.
message DeidentificationSummary {
// Total size in bytes that were transformed in some way.
int64 transformed_bytes = 2;
// Transformations applied to the dataset.
repeated TransformationSummary transformation_summaries = 3;
}
// Summary of a single tranformation.
message TransformationSummary {
// A collection that informs the user the number of times a particular
// `TransformationResultCode` and error details occurred.
message SummaryResult {
int64 count = 1;
TransformationResultCode code = 2;
// A place for warnings or errors to show up if a transformation didn't
// work as expected.
string details = 3;
}
// Possible outcomes of transformations.
enum TransformationResultCode {
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;
SUCCESS = 1;
ERROR = 2;
}
// Set if the transformation was limited to a specific info_type.
InfoType info_type = 1;
// Set if the transformation was limited to a specific FieldId.
FieldId field = 2;
// The specific transformation these stats apply to.
PrimitiveTransformation transformation = 3;
// The field transformation that was applied. This list will contain
// multiple only in the case of errors.
repeated FieldTransformation field_transformations = 5;
// The specific suppression option these stats apply to.
RecordSuppression record_suppress = 6;
repeated SummaryResult results = 4;
}
// Categorization of results based on how likely they are to represent a match,
// based on the number of elements they contain which imply a match.
enum Likelihood {
// Default value; information with all likelihoods is included.
LIKELIHOOD_UNSPECIFIED = 0;
// Few matching elements.
VERY_UNLIKELY = 1;
UNLIKELY = 2;
// Some matching elements.
POSSIBLE = 3;
LIKELY = 4;
// Many matching elements.
VERY_LIKELY = 5;
}
// Operators available for comparing the value of fields.
enum RelationalOperator {
RELATIONAL_OPERATOR_UNSPECIFIED = 0;
// Equal.
EQUAL_TO = 1;
// Not equal to.
NOT_EQUAL_TO = 2;
// Greater than.
GREATER_THAN = 3;
// Less than.
LESS_THAN = 4;
// Greater than or equals.
GREATER_THAN_OR_EQUALS = 5;
// Less than or equals.
LESS_THAN_OR_EQUALS = 6;
// Exists
EXISTS = 7;
}