Module aws_lambda_powertools.utilities.batch
Batch processing utility
Expand source code
# -*- coding: utf-8 -*-
"""
Batch processing utility
"""
from aws_lambda_powertools.utilities.batch.base import (
BasePartialProcessor,
BatchProcessor,
EventType,
FailureResponse,
SuccessResponse,
batch_processor,
)
from aws_lambda_powertools.utilities.batch.exceptions import ExceptionInfo
__all__ = (
"BatchProcessor",
"BasePartialProcessor",
"ExceptionInfo",
"EventType",
"FailureResponse",
"SuccessResponse",
"batch_processor",
)
Sub-modules
aws_lambda_powertools.utilities.batch.base
-
Batch processing utilities
aws_lambda_powertools.utilities.batch.exceptions
-
Batch processing exceptions
Functions
def batch_processor(handler: Callable, event: Dict[~KT, ~VT], context: LambdaContext, record_handler: Callable, processor: BasePartialProcessor)
-
Middleware to handle batch event processing
Parameters
handler
:Callable
- Lambda's handler
event
:Dict
- Lambda's Event
context
:LambdaContext
- Lambda's Context
record_handler
:Callable
- Callable to process each record from the batch
processor
:BasePartialProcessor
- Batch Processor to handle partial failure cases
Examples
Processes Lambda's event with a BasePartialProcessor
>>> from aws_lambda_powertools.utilities.batch import batch_processor, BatchProcessor >>> >>> def record_handler(record): >>> return record["body"] >>> >>> @batch_processor(record_handler=record_handler, processor=BatchProcessor()) >>> def handler(event, context): >>> return {"StatusCode": 200}
Limitations
- Async batch processors
Expand source code
@lambda_handler_decorator def batch_processor( handler: Callable, event: Dict, context: LambdaContext, record_handler: Callable, processor: BasePartialProcessor ): """ Middleware to handle batch event processing Parameters ---------- handler: Callable Lambda's handler event: Dict Lambda's Event context: LambdaContext Lambda's Context record_handler: Callable Callable to process each record from the batch processor: BasePartialProcessor Batch Processor to handle partial failure cases Examples -------- **Processes Lambda's event with a BasePartialProcessor** >>> from aws_lambda_powertools.utilities.batch import batch_processor, BatchProcessor >>> >>> def record_handler(record): >>> return record["body"] >>> >>> @batch_processor(record_handler=record_handler, processor=BatchProcessor()) >>> def handler(event, context): >>> return {"StatusCode": 200} Limitations ----------- * Async batch processors """ records = event["Records"] with processor(records, record_handler, lambda_context=context): processor.process() return handler(event, context)
Classes
class BasePartialProcessor
-
Abstract class for batch processors.
Expand source code
class BasePartialProcessor(ABC): """ Abstract class for batch processors. """ lambda_context: LambdaContext def __init__(self): self.success_messages: List[BatchEventTypes] = [] self.fail_messages: List[BatchEventTypes] = [] self.exceptions: List[ExceptionInfo] = [] @abstractmethod def _prepare(self): """ Prepare context manager. """ raise NotImplementedError() @abstractmethod def _clean(self): """ Clear context manager. """ raise NotImplementedError() @abstractmethod def _process_record(self, record: dict): """ Process record with handler. """ raise NotImplementedError() def process(self) -> List[Tuple]: """ Call instance's handler for each record. """ return [self._process_record(record) for record in self.records] def __enter__(self): self._prepare() return self def __exit__(self, exception_type, exception_value, traceback): self._clean() def __call__(self, records: List[dict], handler: Callable, lambda_context: Optional[LambdaContext] = None): """ Set instance attributes before execution Parameters ---------- records: List[dict] List with objects to be processed. handler: Callable Callable to process "records" entries. """ self.records = records self.handler = handler # NOTE: If a record handler has `lambda_context` parameter in its function signature, we inject it. # This is the earliest we can inspect for signature to prevent impacting performance. # # Mechanism: # # 1. When using the `@batch_processor` decorator, this happens automatically. # 2. When using the context manager, customers have to include `lambda_context` param. # # Scenario: Injects Lambda context # # def record_handler(record, lambda_context): ... # noqa: E800 # with processor(records=batch, handler=record_handler, lambda_context=context): ... # noqa: E800 # # Scenario: Does NOT inject Lambda context (default) # # def record_handler(record): pass # noqa: E800 # with processor(records=batch, handler=record_handler): ... # noqa: E800 # if lambda_context is None: self._handler_accepts_lambda_context = False else: self.lambda_context = lambda_context self._handler_accepts_lambda_context = "lambda_context" in inspect.signature(self.handler).parameters return self def success_handler(self, record, result: Any) -> SuccessResponse: """ Keeps track of batch records that were processed successfully Parameters ---------- record: Any record that succeeded processing result: Any result from record handler Returns ------- SuccessResponse "success", result, original record """ entry = ("success", result, record) self.success_messages.append(record) return entry def failure_handler(self, record, exception: ExceptionInfo) -> FailureResponse: """ Keeps track of batch records that failed processing Parameters ---------- record: Any record that failed processing exception: ExceptionInfo Exception information containing type, value, and traceback (sys.exc_info()) Returns ------- FailureResponse "fail", exceptions args, original record """ exception_string = f"{exception[0]}:{exception[1]}" entry = ("fail", exception_string, record) logger.debug(f"Record processing exception: {exception_string}") self.exceptions.append(exception) self.fail_messages.append(record) return entry
Ancestors
- abc.ABC
Subclasses
Class variables
var lambda_context : LambdaContext
Methods
def failure_handler(self, record, exception: Tuple[Optional[Type[BaseException]], Optional[BaseException], Optional[traceback]]) ‑> Tuple[str, str, Union[SQSRecord, KinesisStreamRecord, DynamoDBRecord, Union[Type[SqsRecordModel], Type[DynamoDBStreamRecordModel], Type[KinesisDataStreamRecord], None]]]
-
Keeps track of batch records that failed processing
Parameters
record
:Any
- record that failed processing
exception
:ExceptionInfo
- Exception information containing type, value, and traceback (sys.exc_info())
Returns
FailureResponse
- "fail", exceptions args, original record
Expand source code
def failure_handler(self, record, exception: ExceptionInfo) -> FailureResponse: """ Keeps track of batch records that failed processing Parameters ---------- record: Any record that failed processing exception: ExceptionInfo Exception information containing type, value, and traceback (sys.exc_info()) Returns ------- FailureResponse "fail", exceptions args, original record """ exception_string = f"{exception[0]}:{exception[1]}" entry = ("fail", exception_string, record) logger.debug(f"Record processing exception: {exception_string}") self.exceptions.append(exception) self.fail_messages.append(record) return entry
def process(self) ‑> List[Tuple[]]
-
Call instance's handler for each record.
Expand source code
def process(self) -> List[Tuple]: """ Call instance's handler for each record. """ return [self._process_record(record) for record in self.records]
def success_handler(self, record, result: Any) ‑> Tuple[str, Any, Union[SQSRecord, KinesisStreamRecord, DynamoDBRecord, Union[Type[SqsRecordModel], Type[DynamoDBStreamRecordModel], Type[KinesisDataStreamRecord], None]]]
-
Keeps track of batch records that were processed successfully
Parameters
record
:Any
- record that succeeded processing
result
:Any
- result from record handler
Returns
SuccessResponse
- "success", result, original record
Expand source code
def success_handler(self, record, result: Any) -> SuccessResponse: """ Keeps track of batch records that were processed successfully Parameters ---------- record: Any record that succeeded processing result: Any result from record handler Returns ------- SuccessResponse "success", result, original record """ entry = ("success", result, record) self.success_messages.append(record) return entry
class BatchProcessor (event_type: EventType, model: Optional[ForwardRef('BatchTypeModels')] = None)
-
Process native partial responses from SQS, Kinesis Data Streams, and DynamoDB.
Example
Process batch triggered by SQS
import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.SQS) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: SQSRecord): payload: str = record.body if payload: item: dict = json.loads(payload) ... @logger.inject_lambda_context @tracer.capture_lambda_handler @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context: LambdaContext): return processor.response()
Process batch triggered by Kinesis Data Streams
import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.KinesisDataStreams) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: KinesisStreamRecord): logger.info(record.kinesis.data_as_text) payload: dict = record.kinesis.data_as_json() ... @logger.inject_lambda_context @tracer.capture_lambda_handler @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context: LambdaContext): return processor.response()
Process batch triggered by DynamoDB Data Streams
import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.DynamoDBStreams) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: DynamoDBRecord): logger.info(record.dynamodb.new_image) payload: dict = json.loads(record.dynamodb.new_image.get("item")) # alternatively: # changes: Dict[str, Any] = record.dynamodb.new_image # noqa: E800 # payload = change.get("Message") -> "<payload>" ... @logger.inject_lambda_context @tracer.capture_lambda_handler def lambda_handler(event, context: LambdaContext): batch = event["Records"] with processor(records=batch, processor=processor): processed_messages = processor.process() # kick off processing, return list[tuple] return processor.response()
Raises
BatchProcessingError
- When all batch records fail processing
Process batch and partially report failed items
Parameters
event_type
:EventType
- Whether this is a SQS, DynamoDB Streams, or Kinesis Data Stream event
model
:Optional["BatchTypeModels"]
- Parser's data model using either SqsRecordModel, DynamoDBStreamRecordModel, KinesisDataStreamRecord
Exceptions
BatchProcessingError Raised when the entire batch has failed processing
Expand source code
class BatchProcessor(BasePartialProcessor): """Process native partial responses from SQS, Kinesis Data Streams, and DynamoDB. Example ------- ## Process batch triggered by SQS ```python import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.SQS) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: SQSRecord): payload: str = record.body if payload: item: dict = json.loads(payload) ... @logger.inject_lambda_context @tracer.capture_lambda_handler @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context: LambdaContext): return processor.response() ``` ## Process batch triggered by Kinesis Data Streams ```python import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.KinesisDataStreams) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: KinesisStreamRecord): logger.info(record.kinesis.data_as_text) payload: dict = record.kinesis.data_as_json() ... @logger.inject_lambda_context @tracer.capture_lambda_handler @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context: LambdaContext): return processor.response() ``` ## Process batch triggered by DynamoDB Data Streams ```python import json from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord from aws_lambda_powertools.utilities.typing import LambdaContext processor = BatchProcessor(event_type=EventType.DynamoDBStreams) tracer = Tracer() logger = Logger() @tracer.capture_method def record_handler(record: DynamoDBRecord): logger.info(record.dynamodb.new_image) payload: dict = json.loads(record.dynamodb.new_image.get("item")) # alternatively: # changes: Dict[str, Any] = record.dynamodb.new_image # noqa: E800 # payload = change.get("Message") -> "<payload>" ... @logger.inject_lambda_context @tracer.capture_lambda_handler def lambda_handler(event, context: LambdaContext): batch = event["Records"] with processor(records=batch, processor=processor): processed_messages = processor.process() # kick off processing, return list[tuple] return processor.response() ``` Raises ------ BatchProcessingError When all batch records fail processing """ DEFAULT_RESPONSE: Dict[str, List[Optional[dict]]] = {"batchItemFailures": []} def __init__(self, event_type: EventType, model: Optional["BatchTypeModels"] = None): """Process batch and partially report failed items Parameters ---------- event_type: EventType Whether this is a SQS, DynamoDB Streams, or Kinesis Data Stream event model: Optional["BatchTypeModels"] Parser's data model using either SqsRecordModel, DynamoDBStreamRecordModel, KinesisDataStreamRecord Exceptions ---------- BatchProcessingError Raised when the entire batch has failed processing """ self.event_type = event_type self.model = model self.batch_response = copy.deepcopy(self.DEFAULT_RESPONSE) self._COLLECTOR_MAPPING = { EventType.SQS: self._collect_sqs_failures, EventType.KinesisDataStreams: self._collect_kinesis_failures, EventType.DynamoDBStreams: self._collect_dynamodb_failures, } self._DATA_CLASS_MAPPING = { EventType.SQS: SQSRecord, EventType.KinesisDataStreams: KinesisStreamRecord, EventType.DynamoDBStreams: DynamoDBRecord, } super().__init__() def response(self): """Batch items that failed processing, if any""" return self.batch_response def _prepare(self): """ Remove results from previous execution. """ self.success_messages.clear() self.fail_messages.clear() self.exceptions.clear() self.batch_response = copy.deepcopy(self.DEFAULT_RESPONSE) def _process_record(self, record: dict) -> Union[SuccessResponse, FailureResponse]: """ Process a record with instance's handler Parameters ---------- record: dict A batch record to be processed. """ data = self._to_batch_type(record=record, event_type=self.event_type, model=self.model) try: if self._handler_accepts_lambda_context: result = self.handler(record=data, lambda_context=self.lambda_context) else: result = self.handler(record=data) return self.success_handler(record=record, result=result) except Exception: return self.failure_handler(record=data, exception=sys.exc_info()) def _clean(self): """ Report messages to be deleted in case of partial failure. """ if not self._has_messages_to_report(): return if self._entire_batch_failed(): raise BatchProcessingError( msg=f"All records failed processing. {len(self.exceptions)} individual errors logged " f"separately below.", child_exceptions=self.exceptions, ) messages = self._get_messages_to_report() self.batch_response = {"batchItemFailures": messages} def _has_messages_to_report(self) -> bool: if self.fail_messages: return True logger.debug(f"All {len(self.success_messages)} records successfully processed") return False def _entire_batch_failed(self) -> bool: return len(self.exceptions) == len(self.records) def _get_messages_to_report(self) -> List[Dict[str, str]]: """ Format messages to use in batch deletion """ return self._COLLECTOR_MAPPING[self.event_type]() # Event Source Data Classes follow python idioms for fields # while Parser/Pydantic follows the event field names to the latter def _collect_sqs_failures(self): failures = [] for msg in self.fail_messages: msg_id = msg.messageId if self.model else msg.message_id failures.append({"itemIdentifier": msg_id}) return failures def _collect_kinesis_failures(self): failures = [] for msg in self.fail_messages: msg_id = msg.kinesis.sequenceNumber if self.model else msg.kinesis.sequence_number failures.append({"itemIdentifier": msg_id}) return failures def _collect_dynamodb_failures(self): failures = [] for msg in self.fail_messages: msg_id = msg.dynamodb.SequenceNumber if self.model else msg.dynamodb.sequence_number failures.append({"itemIdentifier": msg_id}) return failures @overload def _to_batch_type(self, record: dict, event_type: EventType, model: "BatchTypeModels") -> "BatchTypeModels": ... # pragma: no cover @overload def _to_batch_type(self, record: dict, event_type: EventType) -> EventSourceDataClassTypes: ... # pragma: no cover def _to_batch_type(self, record: dict, event_type: EventType, model: Optional["BatchTypeModels"] = None): if model is not None: return model.parse_obj(record) return self._DATA_CLASS_MAPPING[event_type](record)
Ancestors
- BasePartialProcessor
- abc.ABC
Class variables
var DEFAULT_RESPONSE : Dict[str, List[Optional[dict]]]
Methods
def response(self)
-
Batch items that failed processing, if any
Expand source code
def response(self): """Batch items that failed processing, if any""" return self.batch_response
Inherited members
class EventType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Expand source code
class EventType(Enum): SQS = "SQS" KinesisDataStreams = "KinesisDataStreams" DynamoDBStreams = "DynamoDBStreams"
Ancestors
- enum.Enum
Class variables
var DynamoDBStreams
var KinesisDataStreams
var SQS