Added 'timings' to the file processing stats

This commit is contained in:
Rick Ross 2023-12-08 16:26:09 -08:00
parent 580c25f36e
commit 4073ca5a9f
1 changed files with 23 additions and 2 deletions

View File

@ -6,6 +6,7 @@ import json
import os
import logging
import uuid
import time
import phonenumbers
@ -54,7 +55,8 @@ class AFCMUniverseMapFile:
"incoming_count": { "total": 0 },
"removed_count": { "total": 0 },
"cleaned_count": { "total": 0 },
"removed_details": {}
"removed_details": {},
"timings": {}
}
@ -201,6 +203,7 @@ class AFCMUniverseMapFile:
:param voterset_filename: The VoterSet filename to process
"""
overall_time_start = time.time()
if 'mapping' not in self.mosaic_file_list[voterset_filename]:
logging.debug("Missing 'mapping' key, skipping %s", voterset_filename)
@ -230,6 +233,7 @@ class AFCMUniverseMapFile:
amplify_rows = []
removed_rows = []
reading_time_start = time.time()
try:
voterset_filename_s3_key = os.path.join(self.metadata.get("s3_key", ""), voterset_filename)
file_data_stream = self.voterset_s3_connection.get_object(self.voterset_s3_bucket,
@ -238,12 +242,14 @@ class AFCMUniverseMapFile:
raise Exception(f"universe_map_file.process_file: Failed to get {self.voterset_s3_bucket}/{voterset_filename_s3_key}: {ex}") from ex
rows = read_voterdata_csv_stream(file_data_stream, csv_keys_needed)
reading_time = time.time() - reading_time_start
logging.debug("rows = %s", len(rows))
# Update incoming stats after reading this file map
self.stats["incoming_count"][voterset_filename] = len(rows)
self.stats["incoming_count"]["total"] += self.stats["incoming_count"][voterset_filename]
processing_time_start = time.time()
for i,r in enumerate(rows):
if 'combine' in mapping['Cell_Phone']:
@ -342,10 +348,12 @@ class AFCMUniverseMapFile:
amplify_rows.append(new_row)
self.final_rows.append(new_row)
processing_time = time.time() - processing_time_start
(filename_prefix, extension) = os.path.splitext(voterset_filename)
# Need to write *_PREPPED.csv
writing_cleaned_time_start = time.time()
prep_file_name = f"{filename_prefix}_PREPPED.csv"
prep_full_pathname = f"/tmp/{prep_file_name}"
if len(amplify_rows) > 0:
@ -362,8 +370,10 @@ class AFCMUniverseMapFile:
# remove the temp file now
os.remove(prep_full_pathname)
writing_cleaned_time = time.time() - writing_cleaned_time_start
# Need to write *_REMOVED.csv
writing_removed_time_start = time.time()
removed_file_name = f"{filename_prefix}_REMOVED.csv"
removed_full_pathname = f"/tmp/{removed_file_name}"
if len(removed_rows) > 0:
@ -380,17 +390,28 @@ class AFCMUniverseMapFile:
# remove the temp file now
os.remove(removed_full_pathname)
writing_removed_time = time.time() - writing_removed_time_start
self.removed_row_count += len(removed_rows)
self.processed_row_count += len(rows)
# Update cleaned & removed stats for this file
# Update stats for this file
self.stats['cleaned_count'][voterset_filename] = len(amplify_rows)
self.stats['cleaned_count']["total"] += self.stats["cleaned_count"][voterset_filename]
self.stats['removed_count'][voterset_filename] = len(removed_rows)
self.stats['removed_count']["total"] += self.stats["removed_count"][voterset_filename]
overall_time = time.time() - overall_time_start
self.stats["timings"][voterset_filename] = {
"reading": reading_time,
"processing": processing_time,
"writing cleaned file": writing_cleaned_time,
"writing removed file": writing_removed_time,
"file total": overall_time
}
return None