Added 'timings' to the file processing stats
This commit is contained in:
parent
580c25f36e
commit
4073ca5a9f
|
|
@ -6,6 +6,7 @@ import json
|
|||
import os
|
||||
import logging
|
||||
import uuid
|
||||
import time
|
||||
|
||||
import phonenumbers
|
||||
|
||||
|
|
@ -54,7 +55,8 @@ class AFCMUniverseMapFile:
|
|||
"incoming_count": { "total": 0 },
|
||||
"removed_count": { "total": 0 },
|
||||
"cleaned_count": { "total": 0 },
|
||||
"removed_details": {}
|
||||
"removed_details": {},
|
||||
"timings": {}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -201,6 +203,7 @@ class AFCMUniverseMapFile:
|
|||
|
||||
:param voterset_filename: The VoterSet filename to process
|
||||
"""
|
||||
overall_time_start = time.time()
|
||||
|
||||
if 'mapping' not in self.mosaic_file_list[voterset_filename]:
|
||||
logging.debug("Missing 'mapping' key, skipping %s", voterset_filename)
|
||||
|
|
@ -230,6 +233,7 @@ class AFCMUniverseMapFile:
|
|||
amplify_rows = []
|
||||
removed_rows = []
|
||||
|
||||
reading_time_start = time.time()
|
||||
try:
|
||||
voterset_filename_s3_key = os.path.join(self.metadata.get("s3_key", ""), voterset_filename)
|
||||
file_data_stream = self.voterset_s3_connection.get_object(self.voterset_s3_bucket,
|
||||
|
|
@ -238,12 +242,14 @@ class AFCMUniverseMapFile:
|
|||
raise Exception(f"universe_map_file.process_file: Failed to get {self.voterset_s3_bucket}/{voterset_filename_s3_key}: {ex}") from ex
|
||||
|
||||
rows = read_voterdata_csv_stream(file_data_stream, csv_keys_needed)
|
||||
reading_time = time.time() - reading_time_start
|
||||
logging.debug("rows = %s", len(rows))
|
||||
|
||||
# Update incoming stats after reading this file map
|
||||
self.stats["incoming_count"][voterset_filename] = len(rows)
|
||||
self.stats["incoming_count"]["total"] += self.stats["incoming_count"][voterset_filename]
|
||||
|
||||
processing_time_start = time.time()
|
||||
for i,r in enumerate(rows):
|
||||
|
||||
if 'combine' in mapping['Cell_Phone']:
|
||||
|
|
@ -342,10 +348,12 @@ class AFCMUniverseMapFile:
|
|||
amplify_rows.append(new_row)
|
||||
self.final_rows.append(new_row)
|
||||
|
||||
processing_time = time.time() - processing_time_start
|
||||
|
||||
(filename_prefix, extension) = os.path.splitext(voterset_filename)
|
||||
|
||||
# Need to write *_PREPPED.csv
|
||||
writing_cleaned_time_start = time.time()
|
||||
prep_file_name = f"{filename_prefix}_PREPPED.csv"
|
||||
prep_full_pathname = f"/tmp/{prep_file_name}"
|
||||
if len(amplify_rows) > 0:
|
||||
|
|
@ -362,8 +370,10 @@ class AFCMUniverseMapFile:
|
|||
|
||||
# remove the temp file now
|
||||
os.remove(prep_full_pathname)
|
||||
writing_cleaned_time = time.time() - writing_cleaned_time_start
|
||||
|
||||
# Need to write *_REMOVED.csv
|
||||
writing_removed_time_start = time.time()
|
||||
removed_file_name = f"{filename_prefix}_REMOVED.csv"
|
||||
removed_full_pathname = f"/tmp/{removed_file_name}"
|
||||
if len(removed_rows) > 0:
|
||||
|
|
@ -380,17 +390,28 @@ class AFCMUniverseMapFile:
|
|||
|
||||
# remove the temp file now
|
||||
os.remove(removed_full_pathname)
|
||||
writing_removed_time = time.time() - writing_removed_time_start
|
||||
|
||||
self.removed_row_count += len(removed_rows)
|
||||
self.processed_row_count += len(rows)
|
||||
|
||||
# Update cleaned & removed stats for this file
|
||||
# Update stats for this file
|
||||
self.stats['cleaned_count'][voterset_filename] = len(amplify_rows)
|
||||
self.stats['cleaned_count']["total"] += self.stats["cleaned_count"][voterset_filename]
|
||||
|
||||
self.stats['removed_count'][voterset_filename] = len(removed_rows)
|
||||
self.stats['removed_count']["total"] += self.stats["removed_count"][voterset_filename]
|
||||
|
||||
overall_time = time.time() - overall_time_start
|
||||
|
||||
self.stats["timings"][voterset_filename] = {
|
||||
"reading": reading_time,
|
||||
"processing": processing_time,
|
||||
"writing cleaned file": writing_cleaned_time,
|
||||
"writing removed file": writing_removed_time,
|
||||
"file total": overall_time
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue