// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.privacy.dlp.v2beta1; import "google/api/annotations.proto"; import "google/longrunning/operations.proto"; import "google/privacy/dlp/v2beta1/storage.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/type/date.proto"; import "google/type/timeofday.proto"; option csharp_namespace = "Google.Cloud.Dlp.V2Beta1"; option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2beta1;dlp"; option java_multiple_files = true; option java_outer_classname = "DlpProto"; option java_package = "com.google.privacy.dlp.v2beta1"; option php_namespace = "Google\\Cloud\\Dlp\\V2beta1"; // The DLP API is a service that allows clients // to detect the presence of Personally Identifiable Information (PII) and other // privacy-sensitive data in user-supplied, unstructured data streams, like text // blocks or images. // The service also includes methods for sensitive data redaction and // scheduling of data scans on Google Cloud Platform based data sets. service DlpService { // Finds potentially sensitive info in a list of strings. // This method has limits on input size, processing time, and output size. rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) { option (google.api.http) = { post: "/v2beta1/content:inspect" body: "*" }; } // Redacts potentially sensitive info from a list of strings. // This method has limits on input size, processing time, and output size. rpc RedactContent(RedactContentRequest) returns (RedactContentResponse) { option (google.api.http) = { post: "/v2beta1/content:redact" body: "*" }; } // De-identifies potentially sensitive info from a list of strings. // This method has limits on input size and output size. rpc DeidentifyContent(DeidentifyContentRequest) returns (DeidentifyContentResponse) { option (google.api.http) = { post: "/v2beta1/content:deidentify" body: "*" }; } // Schedules a job scanning content in a Google Cloud Platform data // repository. rpc CreateInspectOperation(CreateInspectOperationRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v2beta1/inspect/operations" body: "*" }; } // Schedules a job to compute risk analysis metrics over content in a Google // Cloud Platform repository. rpc AnalyzeDataSourceRisk(AnalyzeDataSourceRiskRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v2beta1/dataSource:analyze" body: "*" }; } // Returns list of results for given inspect operation result set id. rpc ListInspectFindings(ListInspectFindingsRequest) returns (ListInspectFindingsResponse) { option (google.api.http) = { get: "/v2beta1/{name=inspect/results/*}/findings" }; } // Returns sensitive information types for given category. rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) { option (google.api.http) = { get: "/v2beta1/rootCategories/{category=*}/infoTypes" }; } // Returns the list of root categories of sensitive information. rpc ListRootCategories(ListRootCategoriesRequest) returns (ListRootCategoriesResponse) { option (google.api.http) = { get: "/v2beta1/rootCategories" }; } } // Configuration description of the scanning process. // When used with redactContent only info_types and min_likelihood are currently // used. message InspectConfig { // Max findings configuration per info type, per content item or long running // operation. message InfoTypeLimit { // Type of information the findings limit applies to. Only one limit per // info_type should be provided. If InfoTypeLimit does not have an // info_type, the DLP API applies the limit against all info_types that are // found but not specified in another InfoTypeLimit. InfoType info_type = 1; // Max findings limit for the given infoType. int32 max_findings = 2; } // Restricts what info_types to look for. The values must correspond to // InfoType values returned by ListInfoTypes or found in documentation. // Empty info_types runs all enabled detectors. repeated InfoType info_types = 1; // Only returns findings equal or above this threshold. Likelihood min_likelihood = 2; // Limits the number of findings per content item or long running operation. int32 max_findings = 3; // When true, a contextual quote from the data that triggered a finding is // included in the response; see Finding.quote. bool include_quote = 4; // When true, excludes type information of the findings. bool exclude_types = 6; // Configuration of findings limit given for specified info types. repeated InfoTypeLimit info_type_limits = 7; // Custom info types provided by the user. repeated CustomInfoType custom_info_types = 8; } // Additional configuration for inspect long running operations. message OperationConfig { // Max number of findings per file, Datastore entity, or database row. int64 max_item_findings = 1; } // Container structure for the content to inspect. message ContentItem { // Type of the content, as defined in Content-Type HTTP header. // Supported types are: all "text" types, octet streams, PNG images, // JPEG images. string type = 1; // Data of the item either in the byte array or UTF-8 string form. oneof data_item { // Content data to inspect or redact. bytes data = 2; // String data to inspect or redact. string value = 3; // Structured content for inspection. Table table = 4; } } // Structured content to inspect. Up to 50,000 `Value`s per request allowed. message Table { message Row { repeated Value values = 1; } repeated FieldId headers = 1; repeated Row rows = 2; } // All the findings for a single scanned item. message InspectResult { // List of findings for an item. repeated Finding findings = 1; // If true, then this item might have more findings than were returned, // and the findings returned are an arbitrary subset of all findings. // The findings list might be truncated because the input items were too // large, or because the server reached the maximum amount of resources // allowed for a single API call. For best results, divide the input into // smaller batches. bool findings_truncated = 2; } // Container structure describing a single finding within a string or image. message Finding { // The specific string that may be potentially sensitive info. string quote = 1; // The specific type of info the string might be. InfoType info_type = 2; // Estimate of how likely it is that the info_type is correct. Likelihood likelihood = 3; // Location of the info found. Location location = 4; // Timestamp when finding was detected. google.protobuf.Timestamp create_time = 6; } // Specifies the location of a finding within its source item. message Location { // Zero-based byte offsets within a content item. Range byte_range = 1; // Character offsets within a content item, included when content type // is a text. Default charset assumed to be UTF-8. Range codepoint_range = 2; // Location within an image's pixels. repeated ImageLocation image_boxes = 3; // Key of the finding. RecordKey record_key = 4; // Field id of the field containing the finding. FieldId field_id = 5; // Location within a `ContentItem.Table`. TableLocation table_location = 6; } // Location of a finding within a `ContentItem.Table`. message TableLocation { // The zero-based index of the row where the finding is located. int64 row_index = 1; } // Generic half-open interval [start, end) message Range { // Index of the first character of the range (inclusive). int64 start = 1; // Index of the last character of the range (exclusive). int64 end = 2; } // Bounding box encompassing detected text within an image. message ImageLocation { // Top coordinate of the bounding box. (0,0) is upper left. int32 top = 1; // Left coordinate of the bounding box. (0,0) is upper left. int32 left = 2; // Width of the bounding box in pixels. int32 width = 3; // Height of the bounding box in pixels. int32 height = 4; } // Request to search for potentially sensitive info in a list of items // and replace it with a default or provided content. message RedactContentRequest { message ReplaceConfig { // Type of information to replace. Only one ReplaceConfig per info_type // should be provided. If ReplaceConfig does not have an info_type, the DLP // API matches it against all info_types that are found but not specified in // another ReplaceConfig. InfoType info_type = 1; // Content replacing sensitive information of given type. Max 256 chars. string replace_with = 2; } // Configuration for determining how redaction of images should occur. message ImageRedactionConfig { // Type of information to redact from images. oneof target { // Only one per info_type should be provided per request. If not // specified, and redact_all_text is false, the DLP API will redact all // text that it matches against all info_types that are found, but not // specified in another ImageRedactionConfig. InfoType info_type = 1; // If true, all text found in the image, regardless whether it matches an // info_type, is redacted. bool redact_all_text = 2; } // The color to use when redacting content from an image. If not specified, // the default is black. Color redaction_color = 3; } // Configuration for the inspector. InspectConfig inspect_config = 1; // The list of items to inspect. Up to 100 are allowed per request. repeated ContentItem items = 2; // The strings to replace findings text findings with. Must specify at least // one of these or one ImageRedactionConfig if redacting images. repeated ReplaceConfig replace_configs = 3; // The configuration for specifying what content to redact from images. repeated ImageRedactionConfig image_redaction_configs = 4; } // Represents a color in the RGB color space. message Color { // The amount of red in the color as a value in the interval [0, 1]. float red = 1; // The amount of green in the color as a value in the interval [0, 1]. float green = 2; // The amount of blue in the color as a value in the interval [0, 1]. float blue = 3; } // Results of redacting a list of items. message RedactContentResponse { // The redacted content. repeated ContentItem items = 1; } // Request to de-identify a list of items. message DeidentifyContentRequest { // Configuration for the de-identification of the list of content items. DeidentifyConfig deidentify_config = 1; // Configuration for the inspector. InspectConfig inspect_config = 2; // The list of items to inspect. Up to 100 are allowed per request. // All items will be treated as text/*. repeated ContentItem items = 3; } // Results of de-identifying a list of items. message DeidentifyContentResponse { repeated ContentItem items = 1; // A review of the transformations that took place for each item. repeated DeidentificationSummary summaries = 2; } // Request to search for potentially sensitive info in a list of items. message InspectContentRequest { // Configuration for the inspector. InspectConfig inspect_config = 1; // The list of items to inspect. Items in a single request are // considered "related" unless inspect_config.independent_inputs is true. // Up to 100 are allowed per request. repeated ContentItem items = 2; } // Results of inspecting a list of items. message InspectContentResponse { // Each content_item from the request has a result in this list, in the // same order as the request. repeated InspectResult results = 1; } // Request for scheduling a scan of a data subset from a Google Platform data // repository. message CreateInspectOperationRequest { // Configuration for the inspector. InspectConfig inspect_config = 1; // Specification of the data set to process. StorageConfig storage_config = 2; // Optional location to store findings. OutputStorageConfig output_config = 3; // Additional configuration settings for long running operations. OperationConfig operation_config = 5; } // Cloud repository for storing output. message OutputStorageConfig { oneof type { // Store findings in a new table in the dataset. BigQueryTable table = 1; // The path to a Google Cloud Storage location to store output. // The bucket must already exist and // the Google APIs service account for DLP must have write permission to // write to the given bucket. // Results are split over multiple csv files with each file name matching // the pattern "[operation_id]_[count].csv", for example // `3094877188788974909_1.csv`. The `operation_id` matches the // identifier for the Operation, and the `count` is a counter used for // tracking the number of files written. // // The CSV file(s) contain the following columns regardless of storage type // scanned: // - id // - info_type // - likelihood // - byte size of finding // - quote // - timestamp // // For Cloud Storage the next columns are: // // - file_path // - start_offset // // For Cloud Datastore the next columns are: // // - project_id // - namespace_id // - path // - column_name // - offset // // For BigQuery the next columns are: // // - row_number // - project_id // - dataset_id // - table_id CloudStoragePath storage_path = 2; } } // Statistics regarding a specific InfoType. message InfoTypeStatistics { // The type of finding this stat is for. InfoType info_type = 1; // Number of findings for this info type. int64 count = 2; } // Metadata returned within GetOperation for an inspect request. message InspectOperationMetadata { // Total size in bytes that were processed. int64 processed_bytes = 1; // Estimate of the number of bytes to process. int64 total_estimated_bytes = 4; repeated InfoTypeStatistics info_type_stats = 2; // The time which this request was started. google.protobuf.Timestamp create_time = 3; // The inspect config used to create the Operation. InspectConfig request_inspect_config = 5; // The storage config used to create the Operation. StorageConfig request_storage_config = 6; // Optional location to store findings. OutputStorageConfig request_output_config = 7; } // The operational data. message InspectOperationResult { // The server-assigned name, which is only unique within the same service that // originally returns it. If you use the default HTTP mapping, the // `name` should have the format of `inspect/results/{id}`. string name = 1; } // Request for the list of results in a given inspect operation. message ListInspectFindingsRequest { // Identifier of the results set returned as metadata of // the longrunning operation created by a call to InspectDataSource. // Should be in the format of `inspect/results/{id}`. string name = 1; // Maximum number of results to return. // If 0, the implementation selects a reasonable value. int32 page_size = 2; // The value returned by the last `ListInspectFindingsResponse`; indicates // that this is a continuation of a prior `ListInspectFindings` call, and that // the system should return the next page of data. string page_token = 3; // Restricts findings to items that match. Supports info_type and likelihood. // // Examples: // // - info_type=EMAIL_ADDRESS // - info_type=PHONE_NUMBER,EMAIL_ADDRESS // - likelihood=VERY_LIKELY // - likelihood=VERY_LIKELY,LIKELY // - info_type=EMAIL_ADDRESS,likelihood=VERY_LIKELY,LIKELY string filter = 4; } // Response to the ListInspectFindings request. message ListInspectFindingsResponse { // The results. InspectResult result = 1; // If not empty, indicates that there may be more results that match the // request; this value should be passed in a new `ListInspectFindingsRequest`. string next_page_token = 2; } // Description of the information type (infoType). message InfoTypeDescription { // Internal name of the infoType. string name = 1; // Human readable form of the infoType name. string display_name = 2; // List of categories this infoType belongs to. repeated CategoryDescription categories = 3; } // Request for the list of info types belonging to a given category, // or all supported info types if no category is specified. message ListInfoTypesRequest { // Category name as returned by ListRootCategories. string category = 1; // Optional BCP-47 language code for localized info type friendly // names. If omitted, or if localized strings are not available, // en-US strings will be returned. string language_code = 2; } // Response to the ListInfoTypes request. message ListInfoTypesResponse { // Set of sensitive info types belonging to a category. repeated InfoTypeDescription info_types = 1; } // Info Type Category description. message CategoryDescription { // Internal name of the category. string name = 1; // Human readable form of the category name. string display_name = 2; } // Request for root categories of Info Types supported by the API. // Example values might include "FINANCE", "HEALTH", "FAST", "DEFAULT". message ListRootCategoriesRequest { // Optional language code for localized friendly category names. // If omitted or if localized strings are not available, // en-US strings will be returned. string language_code = 1; } // Response for ListRootCategories request. message ListRootCategoriesResponse { // List of all into type categories supported by the API. repeated CategoryDescription categories = 1; } // Request for creating a risk analysis operation. message AnalyzeDataSourceRiskRequest { // Privacy metric to compute. PrivacyMetric privacy_metric = 1; // Input dataset to compute metrics over. BigQueryTable source_table = 3; } // Privacy metric to compute for reidentification risk analysis. message PrivacyMetric { // Compute numerical stats over an individual column, including // min, max, and quantiles. message NumericalStatsConfig { // Field to compute numerical stats on. Supported types are // integer, float, date, datetime, timestamp, time. FieldId field = 1; } // Compute numerical stats over an individual column, including // number of distinct values and value count distribution. message CategoricalStatsConfig { // Field to compute categorical stats on. All column types are // supported except for arrays and structs. However, it may be more // informative to use NumericalStats when the field type is supported, // depending on the data. FieldId field = 1; } // k-anonymity metric, used for analysis of reidentification risk. message KAnonymityConfig { // Set of fields to compute k-anonymity over. When multiple fields are // specified, they are considered a single composite key. Structs and // repeated data types are not supported; however, nested fields are // supported so long as they are not structs themselves or nested within // a repeated field. repeated FieldId quasi_ids = 1; // Optional message indicating that each distinct `EntityId` should not // contribute to the k-anonymity count more than once per equivalence class. EntityId entity_id = 2; } // l-diversity metric, used for analysis of reidentification risk. message LDiversityConfig { // Set of quasi-identifiers indicating how equivalence classes are // defined for the l-diversity computation. When multiple fields are // specified, they are considered a single composite key. repeated FieldId quasi_ids = 1; // Sensitive field for computing the l-value. FieldId sensitive_attribute = 2; } oneof type { NumericalStatsConfig numerical_stats_config = 1; CategoricalStatsConfig categorical_stats_config = 2; KAnonymityConfig k_anonymity_config = 3; LDiversityConfig l_diversity_config = 4; } } // Metadata returned within the // [`riskAnalysis.operations.get`](/dlp/docs/reference/rest/v2beta1/riskAnalysis.operations/get) // for risk analysis. message RiskAnalysisOperationMetadata { // The time which this request was started. google.protobuf.Timestamp create_time = 1; // Privacy metric to compute. PrivacyMetric requested_privacy_metric = 2; // Input dataset to compute metrics over. BigQueryTable requested_source_table = 3; } // Result of a risk analysis // [`Operation`](/dlp/docs/reference/rest/v2beta1/inspect.operations) // request. message RiskAnalysisOperationResult { // Result of the numerical stats computation. message NumericalStatsResult { // Minimum value appearing in the column. Value min_value = 1; // Maximum value appearing in the column. Value max_value = 2; // List of 99 values that partition the set of field values into 100 equal // sized buckets. repeated Value quantile_values = 4; } // Result of the categorical stats computation. message CategoricalStatsResult { // Histogram bucket of value frequencies in the column. message CategoricalStatsHistogramBucket { // Lower bound on the value frequency of the values in this bucket. int64 value_frequency_lower_bound = 1; // Upper bound on the value frequency of the values in this bucket. int64 value_frequency_upper_bound = 2; // Total number of records in this bucket. int64 bucket_size = 3; // Sample of value frequencies in this bucket. The total number of // values returned per bucket is capped at 20. repeated ValueFrequency bucket_values = 4; } // Histogram of value frequencies in the column. repeated CategoricalStatsHistogramBucket value_frequency_histogram_buckets = 5; } // Result of the k-anonymity computation. message KAnonymityResult { // The set of columns' values that share the same k-anonymity value. message KAnonymityEquivalenceClass { // Set of values defining the equivalence class. One value per // quasi-identifier column in the original KAnonymity metric message. // The order is always the same as the original request. repeated Value quasi_ids_values = 1; // Size of the equivalence class, for example number of rows with the // above set of values. int64 equivalence_class_size = 2; } // Histogram bucket of equivalence class sizes in the table. message KAnonymityHistogramBucket { // Lower bound on the size of the equivalence classes in this bucket. int64 equivalence_class_size_lower_bound = 1; // Upper bound on the size of the equivalence classes in this bucket. int64 equivalence_class_size_upper_bound = 2; // Total number of records in this bucket. int64 bucket_size = 3; // Sample of equivalence classes in this bucket. The total number of // classes returned per bucket is capped at 20. repeated KAnonymityEquivalenceClass bucket_values = 4; } // Histogram of k-anonymity equivalence classes. repeated KAnonymityHistogramBucket equivalence_class_histogram_buckets = 5; } // Result of the l-diversity computation. message LDiversityResult { // The set of columns' values that share the same l-diversity value. message LDiversityEquivalenceClass { // Quasi-identifier values defining the k-anonymity equivalence // class. The order is always the same as the original request. repeated Value quasi_ids_values = 1; // Size of the k-anonymity equivalence class. int64 equivalence_class_size = 2; // Number of distinct sensitive values in this equivalence class. int64 num_distinct_sensitive_values = 3; // Estimated frequencies of top sensitive values. repeated ValueFrequency top_sensitive_values = 4; } // Histogram bucket of sensitive value frequencies in the table. message LDiversityHistogramBucket { // Lower bound on the sensitive value frequencies of the equivalence // classes in this bucket. int64 sensitive_value_frequency_lower_bound = 1; // Upper bound on the sensitive value frequencies of the equivalence // classes in this bucket. int64 sensitive_value_frequency_upper_bound = 2; // Total number of records in this bucket. int64 bucket_size = 3; // Sample of equivalence classes in this bucket. The total number of // classes returned per bucket is capped at 20. repeated LDiversityEquivalenceClass bucket_values = 4; } // Histogram of l-diversity equivalence class sensitive value frequencies. repeated LDiversityHistogramBucket sensitive_value_frequency_histogram_buckets = 5; } // Values associated with this metric. oneof result { NumericalStatsResult numerical_stats_result = 3; CategoricalStatsResult categorical_stats_result = 4; KAnonymityResult k_anonymity_result = 5; LDiversityResult l_diversity_result = 6; } } // A value of a field, including its frequency. message ValueFrequency { // A value contained in the field in question. Value value = 1; // How many times the value is contained in the field. int64 count = 2; } // Set of primitive values supported by the system. message Value { oneof type { int64 integer_value = 1; double float_value = 2; string string_value = 3; bool boolean_value = 4; google.protobuf.Timestamp timestamp_value = 5; google.type.TimeOfDay time_value = 6; google.type.Date date_value = 7; } } // The configuration that controls how the data will change. message DeidentifyConfig { oneof transformation { // Treat the dataset as free-form text and apply the same free text // transformation everywhere. InfoTypeTransformations info_type_transformations = 1; // Treat the dataset as structured. Transformations can be applied to // specific locations within structured datasets, such as transforming // a column within a table. RecordTransformations record_transformations = 2; } } // A rule for transforming a value. message PrimitiveTransformation { oneof transformation { ReplaceValueConfig replace_config = 1; RedactConfig redact_config = 2; CharacterMaskConfig character_mask_config = 3; CryptoReplaceFfxFpeConfig crypto_replace_ffx_fpe_config = 4; FixedSizeBucketingConfig fixed_size_bucketing_config = 5; BucketingConfig bucketing_config = 6; ReplaceWithInfoTypeConfig replace_with_info_type_config = 7; TimePartConfig time_part_config = 8; CryptoHashConfig crypto_hash_config = 9; } } // For use with `Date`, `Timestamp`, and `TimeOfDay`, extract or preserve a // portion of the value. message TimePartConfig { enum TimePart { TIME_PART_UNSPECIFIED = 0; // [000-9999] YEAR = 1; // [1-12] MONTH = 2; // [1-31] DAY_OF_MONTH = 3; // [1-7] DAY_OF_WEEK = 4; // [1-52] WEEK_OF_YEAR = 5; // [0-24] HOUR_OF_DAY = 6; } TimePart part_to_extract = 1; } // Pseudonymization method that generates surrogates via cryptographic hashing. // Uses SHA-256. // Outputs a 32 byte digest as an uppercase hex string // (for example, 41D1567F7F99F1DC2A5FAB886DEE5BEE). // Currently, only string and integer values can be hashed. message CryptoHashConfig { // The key used by the hash function. CryptoKey crypto_key = 1; } // Replace each input value with a given `Value`. message ReplaceValueConfig { // Value to replace it with. Value new_value = 1; } // Replace each matching finding with the name of the info_type. message ReplaceWithInfoTypeConfig { } // Redact a given value. For example, if used with an `InfoTypeTransformation` // transforming PHONE_NUMBER, and input 'My phone number is 206-555-0123', the // output would be 'My phone number is '. message RedactConfig { } // Characters to skip when doing deidentification of a value. These will be left // alone and skipped. message CharsToIgnore { enum CharacterGroup { CHARACTER_GROUP_UNSPECIFIED = 0; // 0-9 NUMERIC = 1; // A-Z ALPHA_UPPER_CASE = 2; // a-z ALPHA_LOWER_CASE = 3; // US Punctuation, one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ PUNCTUATION = 4; // Whitespace character, one of [ \t\n\x0B\f\r] WHITESPACE = 5; } oneof characters { string characters_to_skip = 1; CharacterGroup common_characters_to_ignore = 2; } } // Partially mask a string by replacing a given number of characters with a // fixed character. Masking can start from the beginning or end of the string. // This can be used on data of any type (numbers, longs, and so on) and when // de-identifying structured data we'll attempt to preserve the original data's // type. (This allows you to take a long like 123 and modify it to a string like // **3. message CharacterMaskConfig { // Character to mask the sensitive values—for example, "*" for an // alphabetic string such as name, or "0" for a numeric string such as ZIP // code or credit card number. String must have length 1. If not supplied, we // will default to "*" for strings, 0 for digits. string masking_character = 1; // Number of characters to mask. If not set, all matching chars will be // masked. Skipped characters do not count towards this tally. int32 number_to_mask = 2; // Mask characters in reverse order. For example, if `masking_character` is // '0', number_to_mask is 14, and `reverse_order` is false, then // 1234-5678-9012-3456 -> 00000000000000-3456 // If `masking_character` is '*', `number_to_mask` is 3, and `reverse_order` // is true, then 12345 -> 12*** bool reverse_order = 3; // When masking a string, items in this list will be skipped when replacing. // For example, if your string is 555-555-5555 and you ask us to skip `-` and // mask 5 chars with * we would produce ***-*55-5555. repeated CharsToIgnore characters_to_ignore = 4; } // Buckets values based on fixed size ranges. The // Bucketing transformation can provide all of this functionality, // but requires more configuration. This message is provided as a convenience to // the user for simple bucketing strategies. // The resulting value will be a hyphenated string of // lower_bound-upper_bound. // This can be used on data of type: double, long. // If the bound Value type differs from the type of data // being transformed, we will first attempt converting the type of the data to // be transformed to match the type of the bound before comparing. message FixedSizeBucketingConfig { // Lower bound value of buckets. All values less than `lower_bound` are // grouped together into a single bucket; for example if `lower_bound` = 10, // then all values less than 10 are replaced with the value “-10”. [Required]. Value lower_bound = 1; // Upper bound value of buckets. All values greater than upper_bound are // grouped together into a single bucket; for example if `upper_bound` = 89, // then all values greater than 89 are replaced with the value “89+”. // [Required]. Value upper_bound = 2; // Size of each bucket (except for minimum and maximum buckets). So if // `lower_bound` = 10, `upper_bound` = 89, and `bucket_size` = 10, then the // following buckets would be used: -10, 10-20, 20-30, 30-40, 40-50, 50-60, // 60-70, 70-80, 80-89, 89+. Precision up to 2 decimals works. [Required]. double bucket_size = 3; } // Generalization function that buckets values based on ranges. The ranges and // replacement values are dynamically provided by the user for custom behavior, // such as 1-30 -> LOW 31-65 -> MEDIUM 66-100 -> HIGH // This can be used on // data of type: number, long, string, timestamp. // If the bound `Value` type differs from the type of data being transformed, we // will first attempt converting the type of the data to be transformed to match // the type of the bound before comparing. message BucketingConfig { // Buckets represented as ranges, along with replacement values. Ranges must // be non-overlapping. message Bucket { // Lower bound of the range, inclusive. Type should be the same as max if // used. Value min = 1; // Upper bound of the range, exclusive; type must match min. Value max = 2; // Replacement value for this bucket. If not provided // the default behavior will be to hyphenate the min-max range. Value replacement_value = 3; } repeated Bucket buckets = 1; } // Replaces an identifier with a surrogate using FPE with the FFX // mode of operation. // The identifier must be representable by the US-ASCII character set. // For a given crypto key and context, the same identifier will be // replaced with the same surrogate. // Identifiers must be at least two characters long. // In the case that the identifier is the empty string, it will be skipped. message CryptoReplaceFfxFpeConfig { // These are commonly used subsets of the alphabet that the FFX mode // natively supports. In the algorithm, the alphabet is selected using // the "radix". Therefore each corresponds to particular radix. enum FfxCommonNativeAlphabet { FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED = 0; // [0-9] (radix of 10) NUMERIC = 1; // [0-9A-F] (radix of 16) HEXADECIMAL = 2; // [0-9A-Z] (radix of 36) UPPER_CASE_ALPHA_NUMERIC = 3; // [0-9A-Za-z] (radix of 62) ALPHA_NUMERIC = 4; } // The key used by the encryption algorithm. [required] CryptoKey crypto_key = 1; // A context may be used for higher security since the same // identifier in two different contexts likely will be given a distinct // surrogate. The principle is that the likeliness is inversely related // to the ratio of the number of distinct identifiers per context over the // number of possible surrogates: As long as this ratio is small, the // likehood is large. // // If the context is not set, a default tweak will be used. // If the context is set but: // // 1. there is no record present when transforming a given value or // 1. the field is not present when transforming a given value, // // a default tweak will be used. // // Note that case (1) is expected when an `InfoTypeTransformation` is // applied to both structured and non-structured `ContentItem`s. // Currently, the referenced field may be of value type integer or string. // // The tweak is constructed as a sequence of bytes in big endian byte order // such that: // // - a 64 bit integer is encoded followed by a single byte of value 1 // - a string is encoded in UTF-8 format followed by a single byte of value 2 // // This is also known as the 'tweak', as in tweakable encryption. FieldId context = 2; oneof alphabet { FfxCommonNativeAlphabet common_alphabet = 4; // This is supported by mapping these to the alphanumeric characters // that the FFX mode natively supports. This happens before/after // encryption/decryption. // Each character listed must appear only once. // Number of characters must be in the range [2, 62]. // This must be encoded as ASCII. // The order of characters does not matter. string custom_alphabet = 5; // The native way to select the alphabet. Must be in the range [2, 62]. int32 radix = 6; } } // This is a data encryption key (DEK) (as opposed to // a key encryption key (KEK) stored by KMS). // When using KMS to wrap/unwrap DEKs, be sure to set an appropriate // IAM policy on the KMS CryptoKey (KEK) to ensure an attacker cannot // unwrap the data crypto key. message CryptoKey { oneof source { TransientCryptoKey transient = 1; UnwrappedCryptoKey unwrapped = 2; KmsWrappedCryptoKey kms_wrapped = 3; } } // Use this to have a random data crypto key generated. // It will be discarded after the operation/request finishes. message TransientCryptoKey { // Name of the key. [required] // This is an arbitrary string used to differentiate different keys. // A unique key is generated per name: two separate `TransientCryptoKey` // protos share the same generated key if their names are the same. // When the data crypto key is generated, this name is not used in any way // (repeating the api call will result in a different key being generated). string name = 1; } // Using raw keys is prone to security risks due to accidentally // leaking the key. Choose another type of key if possible. message UnwrappedCryptoKey { // The AES 128/192/256 bit key. [required] bytes key = 1; } // Include to use an existing data crypto key wrapped by KMS. // Authorization requires the following IAM permissions when sending a request // to perform a crypto transformation using a kms-wrapped crypto key: // dlp.kms.encrypt message KmsWrappedCryptoKey { // The wrapped data crypto key. [required] bytes wrapped_key = 1; // The resource name of the KMS CryptoKey to use for unwrapping. [required] string crypto_key_name = 2; } // A type of transformation that will scan unstructured text and // apply various `PrimitiveTransformation`s to each finding, where the // transformation is applied to only values that were identified as a specific // info_type. message InfoTypeTransformations { // A transformation to apply to text that is identified as a specific // info_type. message InfoTypeTransformation { // Info types to apply the transformation to. Empty list will match all // available info types for this transformation. repeated InfoType info_types = 1; // Primitive transformation to apply to the info type. [required] PrimitiveTransformation primitive_transformation = 2; } // Transformation for each info type. Cannot specify more than one // for a given info type. [required] repeated InfoTypeTransformation transformations = 1; } // The transformation to apply to the field. message FieldTransformation { // Input field(s) to apply the transformation to. [required] repeated FieldId fields = 1; // Only apply the transformation if the condition evaluates to true for the // given `RecordCondition`. The conditions are allowed to reference fields // that are not used in the actual transformation. [optional] // // Example Use Cases: // // - Apply a different bucket transformation to an age column if the zip code // column for the same record is within a specific range. // - Redact a field if the date of birth field is greater than 85. RecordCondition condition = 3; // Transformation to apply. [required] oneof transformation { // Apply the transformation to the entire field. PrimitiveTransformation primitive_transformation = 4; // Treat the contents of the field as free text, and selectively // transform content that matches an `InfoType`. InfoTypeTransformations info_type_transformations = 5; } } // A type of transformation that is applied over structured data such as a // table. message RecordTransformations { // Transform the record by applying various field transformations. repeated FieldTransformation field_transformations = 1; // Configuration defining which records get suppressed entirely. Records that // match any suppression rule are omitted from the output [optional]. repeated RecordSuppression record_suppressions = 2; } // Configuration to suppress records whose suppression conditions evaluate to // true. message RecordSuppression { RecordCondition condition = 1; } // A condition for determining whether a transformation should be applied to // a field. message RecordCondition { // The field type of `value` and `field` do not need to match to be // considered equal, but not all comparisons are possible. // // A `value` of type: // // - `string` can be compared against all other types // - `boolean` can only be compared against other booleans // - `integer` can be compared against doubles or a string if the string value // can be parsed as an integer. // - `double` can be compared against integers or a string if the string can // be parsed as a double. // - `Timestamp` can be compared against strings in RFC 3339 date string // format. // - `TimeOfDay` can be compared against timestamps and strings in the format // of 'HH:mm:ss'. // // If we fail to compare do to type mismatch, a warning will be given and // the condition will evaluate to false. message Condition { // Field within the record this condition is evaluated against. [required] FieldId field = 1; // Operator used to compare the field or info type to the value. [required] RelationalOperator operator = 3; // Value to compare against. [Required, except for `EXISTS` tests.] Value value = 4; } message Conditions { repeated Condition conditions = 1; } // A collection of expressions message Expressions { enum LogicalOperator { LOGICAL_OPERATOR_UNSPECIFIED = 0; AND = 1; } // The operator to apply to the result of conditions. Default and currently // only supported value is `AND`. LogicalOperator logical_operator = 1; oneof type { Conditions conditions = 3; } } Expressions expressions = 3; } // High level summary of deidentification. message DeidentificationSummary { // Total size in bytes that were transformed in some way. int64 transformed_bytes = 2; // Transformations applied to the dataset. repeated TransformationSummary transformation_summaries = 3; } // Summary of a single tranformation. message TransformationSummary { // A collection that informs the user the number of times a particular // `TransformationResultCode` and error details occurred. message SummaryResult { int64 count = 1; TransformationResultCode code = 2; // A place for warnings or errors to show up if a transformation didn't // work as expected. string details = 3; } // Possible outcomes of transformations. enum TransformationResultCode { TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0; SUCCESS = 1; ERROR = 2; } // Set if the transformation was limited to a specific info_type. InfoType info_type = 1; // Set if the transformation was limited to a specific FieldId. FieldId field = 2; // The specific transformation these stats apply to. PrimitiveTransformation transformation = 3; // The field transformation that was applied. This list will contain // multiple only in the case of errors. repeated FieldTransformation field_transformations = 5; // The specific suppression option these stats apply to. RecordSuppression record_suppress = 6; repeated SummaryResult results = 4; } // Categorization of results based on how likely they are to represent a match, // based on the number of elements they contain which imply a match. enum Likelihood { // Default value; information with all likelihoods is included. LIKELIHOOD_UNSPECIFIED = 0; // Few matching elements. VERY_UNLIKELY = 1; UNLIKELY = 2; // Some matching elements. POSSIBLE = 3; LIKELY = 4; // Many matching elements. VERY_LIKELY = 5; } // Operators available for comparing the value of fields. enum RelationalOperator { RELATIONAL_OPERATOR_UNSPECIFIED = 0; // Equal. EQUAL_TO = 1; // Not equal to. NOT_EQUAL_TO = 2; // Greater than. GREATER_THAN = 3; // Less than. LESS_THAN = 4; // Greater than or equals. GREATER_THAN_OR_EQUALS = 5; // Less than or equals. LESS_THAN_OR_EQUALS = 6; // Exists EXISTS = 7; }