Module aws_lambda_powertools.utilities.data_masking
Sub-modules
aws_lambda_powertools.utilities.data_masking.base
aws_lambda_powertools.utilities.data_masking.constants
aws_lambda_powertools.utilities.data_masking.exceptions
aws_lambda_powertools.utilities.data_masking.provider
Classes
class DataMasking (provider: BaseProvider | None = None, raise_on_missing_field: bool = True)
-
The DataMasking class orchestrates erasing, encrypting, and decrypting for the base provider.
Example:
from aws_lambda_powertools.utilities.data_masking.base import DataMasking def lambda_handler(event, context): masker = DataMasking() data = { "project": "powertools", "sensitive": "password" } erased = masker.erase(data,fields=["sensitive"]) return erased
Expand source code
class DataMasking: """ The DataMasking class orchestrates erasing, encrypting, and decrypting for the base provider. Example: ``` from aws_lambda_powertools.utilities.data_masking.base import DataMasking def lambda_handler(event, context): masker = DataMasking() data = { "project": "powertools", "sensitive": "password" } erased = masker.erase(data,fields=["sensitive"]) return erased ``` """ def __init__( self, provider: BaseProvider | None = None, raise_on_missing_field: bool = True, ): self.provider = provider or BaseProvider() # NOTE: we depend on Provider to not confuse customers in passing the same 2 serializers in 2 places self.json_serializer = self.provider.json_serializer self.json_deserializer = self.provider.json_deserializer self.raise_on_missing_field = raise_on_missing_field def encrypt( self, data: dict | Mapping | Sequence | Number, provider_options: dict | None = None, **encryption_context: str, ) -> str: return self._apply_action( data=data, fields=None, action=self.provider.encrypt, provider_options=provider_options or {}, **encryption_context, ) def decrypt( self, data, provider_options: dict | None = None, **encryption_context: str, ) -> Any: return self._apply_action( data=data, fields=None, action=self.provider.decrypt, provider_options=provider_options or {}, **encryption_context, ) @overload def erase(self, data, fields: None) -> str: ... @overload def erase(self, data: list, fields: list[str]) -> list[str]: ... @overload def erase(self, data: tuple, fields: list[str]) -> tuple[str]: ... @overload def erase(self, data: dict, fields: list[str]) -> dict: ... def erase(self, data: Sequence | Mapping, fields: list[str] | None = None) -> str | list[str] | tuple[str] | dict: return self._apply_action(data=data, fields=fields, action=self.provider.erase) def _apply_action( self, data, fields: list[str] | None, action: Callable, provider_options: dict | None = None, **encryption_context: str, ): """ Helper method to determine whether to apply a given action to the entire input data or to specific fields if the 'fields' argument is specified. Parameters ---------- data : str | dict The input data to process. fields : list[str] | None A list of fields to apply the action to. If 'None', the action is applied to the entire 'data'. action : Callable The action to apply to the data. It should be a callable that performs an operation on the data and returns the modified value. provider_options : dict Provider specific keyword arguments to propagate; used as an escape hatch. encryption_context: str Encryption context to use in encrypt and decrypt operations. Returns ------- any The modified data after applying the action. """ if fields is not None: logger.debug(f"Running action {action.__name__} with fields {fields}") return self._apply_action_to_fields( data=data, fields=fields, action=action, provider_options=provider_options, **encryption_context, ) else: logger.debug(f"Running action {action.__name__} with the entire data") return action(data=data, provider_options=provider_options, **encryption_context) def _apply_action_to_fields( self, data: dict | str, fields: list, action: Callable, provider_options: dict | None = None, **encryption_context: str, ) -> dict | str: """ This method takes the input data, which can be either a dictionary or a JSON string, and erases, encrypts, or decrypts the specified fields. Parameters ---------- data : dict | str) The input data to process. It can be either a dictionary or a JSON string. fields : list A list of fields to apply the action to. Each field can be specified as a string or a list of strings representing nested keys in the dictionary. action : Callable The action to apply to the fields. It should be a callable that takes the current value of the field as the first argument and any additional arguments that might be required for the action. It performs an operation on the current value using the provided arguments and returns the modified value. provider_options : dict Optional dictionary representing additional options for the action. **encryption_context: str Additional keyword arguments collected into a dictionary. Returns ------- dict | str The modified dictionary or string after applying the action to the specified fields. Raises ------- ValueError If 'fields' parameter is None. TypeError If the 'data' parameter is not a traversable type Example ------- ```python >>> data = {'a': {'b': {'c': 1}}, 'x': {'y': 2}} >>> fields = ['a.b.c', 'a.x.y'] # The function will transform the value at 'a.b.c' (1) and 'a.x.y' (2) # and store the result as: new_dict = {'a': {'b': {'c': '*****'}}, 'x': {'y': '*****'}} ``` """ data_parsed: dict = self._normalize_data_to_parse(fields, data) # For in-place updates, json_parse accepts a callback function # this function must receive 3 args: field_value, fields, field_name # We create a partial callback to pre-populate known options (action, provider opts, enc ctx) update_callback = functools.partial( self._call_action, action=action, provider_options=provider_options, **encryption_context, # type: ignore[arg-type] ) # Iterate over each field to be parsed. for field_parse in fields: # Parse the field expression using a 'parse' function. json_parse = parse(field_parse) # Find the corresponding keys in the normalized data using the parsed expression. result_parse = json_parse.find(data_parsed) if not result_parse: if self.raise_on_missing_field: # If the data for the field is not found, raise an exception. raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") else: # If the data for the field is not found, warning. warnings.warn(f"Field or expression {field_parse} not found in {data_parsed}", stacklevel=2) # For in-place updates, json_parse accepts a callback function # that receives 3 args: field_value, fields, field_name # We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx) update_callback = functools.partial( self._call_action, action=action, provider_options=provider_options, **encryption_context, # type: ignore[arg-type] ) json_parse.update( data_parsed, lambda field_value, fields, field_name: update_callback(field_value, fields, field_name), # type: ignore[misc] # noqa: B023 ) return data_parsed @staticmethod def _call_action( field_value: Any, fields: dict[str, Any], field_name: str, action: Callable, provider_options: dict[str, Any] | None = None, **encryption_context, ) -> None: """ Apply a specified action to a field value and update the fields dictionary. Params: -------- - field_value: Current value of the field being processed. - fields: Dictionary representing the fields being processed (mutable). - field_name: Name of the field being processed. - action: Callable (function or method) to be applied to the field_value. - provider_options: Optional dictionary representing additional options for the action. - **encryption_context: Additional keyword arguments collected into a dictionary. Returns: - fields[field_name]: Returns the processed field value """ fields[field_name] = action(field_value, provider_options=provider_options, **encryption_context) return fields[field_name] def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: if not fields: raise ValueError("No fields specified.") if isinstance(data, str): # Parse JSON string as dictionary data_parsed = self.json_deserializer(data) elif isinstance(data, dict): # Convert the data to a JSON string in case it contains non-string keys (e.g., ints) # Parse the JSON string back into a dictionary data_parsed = self.json_deserializer(self.json_serializer(data)) else: raise DataMaskingUnsupportedTypeError( f"Unsupported data type. Expected a traversable type (dict or str), but got {type(data)}.", ) return data_parsed
Methods
def decrypt(self, data, provider_options: dict | None = None, **encryption_context: str) ‑> Any
def encrypt(self, data: dict | Mapping | Sequence | Number, provider_options: dict | None = None, **encryption_context: str)
def erase(self, data: Sequence | Mapping, fields: list[str] | None = None) ‑> str | list[str] | tuple[str] | dict