diff --git a/lib_afc_mosaic/universe_map_file.py b/lib_afc_mosaic/universe_map_file.py index 7d913ad..ee3f384 100644 --- a/lib_afc_mosaic/universe_map_file.py +++ b/lib_afc_mosaic/universe_map_file.py @@ -6,6 +6,7 @@ import json import os import logging import uuid +import time import phonenumbers @@ -54,7 +55,8 @@ class AFCMUniverseMapFile: "incoming_count": { "total": 0 }, "removed_count": { "total": 0 }, "cleaned_count": { "total": 0 }, - "removed_details": {} + "removed_details": {}, + "timings": {} } @@ -201,6 +203,7 @@ class AFCMUniverseMapFile: :param voterset_filename: The VoterSet filename to process """ + overall_time_start = time.time() if 'mapping' not in self.mosaic_file_list[voterset_filename]: logging.debug("Missing 'mapping' key, skipping %s", voterset_filename) @@ -230,6 +233,7 @@ class AFCMUniverseMapFile: amplify_rows = [] removed_rows = [] + reading_time_start = time.time() try: voterset_filename_s3_key = os.path.join(self.metadata.get("s3_key", ""), voterset_filename) file_data_stream = self.voterset_s3_connection.get_object(self.voterset_s3_bucket, @@ -238,12 +242,14 @@ class AFCMUniverseMapFile: raise Exception(f"universe_map_file.process_file: Failed to get {self.voterset_s3_bucket}/{voterset_filename_s3_key}: {ex}") from ex rows = read_voterdata_csv_stream(file_data_stream, csv_keys_needed) + reading_time = time.time() - reading_time_start logging.debug("rows = %s", len(rows)) # Update incoming stats after reading this file map self.stats["incoming_count"][voterset_filename] = len(rows) self.stats["incoming_count"]["total"] += self.stats["incoming_count"][voterset_filename] + processing_time_start = time.time() for i,r in enumerate(rows): if 'combine' in mapping['Cell_Phone']: @@ -342,10 +348,12 @@ class AFCMUniverseMapFile: amplify_rows.append(new_row) self.final_rows.append(new_row) + processing_time = time.time() - processing_time_start (filename_prefix, extension) = os.path.splitext(voterset_filename) # Need to write *_PREPPED.csv + writing_cleaned_time_start = time.time() prep_file_name = f"{filename_prefix}_PREPPED.csv" prep_full_pathname = f"/tmp/{prep_file_name}" if len(amplify_rows) > 0: @@ -362,8 +370,10 @@ class AFCMUniverseMapFile: # remove the temp file now os.remove(prep_full_pathname) + writing_cleaned_time = time.time() - writing_cleaned_time_start # Need to write *_REMOVED.csv + writing_removed_time_start = time.time() removed_file_name = f"{filename_prefix}_REMOVED.csv" removed_full_pathname = f"/tmp/{removed_file_name}" if len(removed_rows) > 0: @@ -380,17 +390,28 @@ class AFCMUniverseMapFile: # remove the temp file now os.remove(removed_full_pathname) + writing_removed_time = time.time() - writing_removed_time_start self.removed_row_count += len(removed_rows) self.processed_row_count += len(rows) - # Update cleaned & removed stats for this file + # Update stats for this file self.stats['cleaned_count'][voterset_filename] = len(amplify_rows) self.stats['cleaned_count']["total"] += self.stats["cleaned_count"][voterset_filename] self.stats['removed_count'][voterset_filename] = len(removed_rows) self.stats['removed_count']["total"] += self.stats["removed_count"][voterset_filename] + overall_time = time.time() - overall_time_start + + self.stats["timings"][voterset_filename] = { + "reading": reading_time, + "processing": processing_time, + "writing cleaned file": writing_cleaned_time, + "writing removed file": writing_removed_time, + "file total": overall_time + } + return None