Added 'timings' to the file processing stats
This commit is contained in:
parent
580c25f36e
commit
4073ca5a9f
|
|
@ -6,6 +6,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
import time
|
||||||
|
|
||||||
import phonenumbers
|
import phonenumbers
|
||||||
|
|
||||||
|
|
@ -54,7 +55,8 @@ class AFCMUniverseMapFile:
|
||||||
"incoming_count": { "total": 0 },
|
"incoming_count": { "total": 0 },
|
||||||
"removed_count": { "total": 0 },
|
"removed_count": { "total": 0 },
|
||||||
"cleaned_count": { "total": 0 },
|
"cleaned_count": { "total": 0 },
|
||||||
"removed_details": {}
|
"removed_details": {},
|
||||||
|
"timings": {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -201,6 +203,7 @@ class AFCMUniverseMapFile:
|
||||||
|
|
||||||
:param voterset_filename: The VoterSet filename to process
|
:param voterset_filename: The VoterSet filename to process
|
||||||
"""
|
"""
|
||||||
|
overall_time_start = time.time()
|
||||||
|
|
||||||
if 'mapping' not in self.mosaic_file_list[voterset_filename]:
|
if 'mapping' not in self.mosaic_file_list[voterset_filename]:
|
||||||
logging.debug("Missing 'mapping' key, skipping %s", voterset_filename)
|
logging.debug("Missing 'mapping' key, skipping %s", voterset_filename)
|
||||||
|
|
@ -230,6 +233,7 @@ class AFCMUniverseMapFile:
|
||||||
amplify_rows = []
|
amplify_rows = []
|
||||||
removed_rows = []
|
removed_rows = []
|
||||||
|
|
||||||
|
reading_time_start = time.time()
|
||||||
try:
|
try:
|
||||||
voterset_filename_s3_key = os.path.join(self.metadata.get("s3_key", ""), voterset_filename)
|
voterset_filename_s3_key = os.path.join(self.metadata.get("s3_key", ""), voterset_filename)
|
||||||
file_data_stream = self.voterset_s3_connection.get_object(self.voterset_s3_bucket,
|
file_data_stream = self.voterset_s3_connection.get_object(self.voterset_s3_bucket,
|
||||||
|
|
@ -238,12 +242,14 @@ class AFCMUniverseMapFile:
|
||||||
raise Exception(f"universe_map_file.process_file: Failed to get {self.voterset_s3_bucket}/{voterset_filename_s3_key}: {ex}") from ex
|
raise Exception(f"universe_map_file.process_file: Failed to get {self.voterset_s3_bucket}/{voterset_filename_s3_key}: {ex}") from ex
|
||||||
|
|
||||||
rows = read_voterdata_csv_stream(file_data_stream, csv_keys_needed)
|
rows = read_voterdata_csv_stream(file_data_stream, csv_keys_needed)
|
||||||
|
reading_time = time.time() - reading_time_start
|
||||||
logging.debug("rows = %s", len(rows))
|
logging.debug("rows = %s", len(rows))
|
||||||
|
|
||||||
# Update incoming stats after reading this file map
|
# Update incoming stats after reading this file map
|
||||||
self.stats["incoming_count"][voterset_filename] = len(rows)
|
self.stats["incoming_count"][voterset_filename] = len(rows)
|
||||||
self.stats["incoming_count"]["total"] += self.stats["incoming_count"][voterset_filename]
|
self.stats["incoming_count"]["total"] += self.stats["incoming_count"][voterset_filename]
|
||||||
|
|
||||||
|
processing_time_start = time.time()
|
||||||
for i,r in enumerate(rows):
|
for i,r in enumerate(rows):
|
||||||
|
|
||||||
if 'combine' in mapping['Cell_Phone']:
|
if 'combine' in mapping['Cell_Phone']:
|
||||||
|
|
@ -342,10 +348,12 @@ class AFCMUniverseMapFile:
|
||||||
amplify_rows.append(new_row)
|
amplify_rows.append(new_row)
|
||||||
self.final_rows.append(new_row)
|
self.final_rows.append(new_row)
|
||||||
|
|
||||||
|
processing_time = time.time() - processing_time_start
|
||||||
|
|
||||||
(filename_prefix, extension) = os.path.splitext(voterset_filename)
|
(filename_prefix, extension) = os.path.splitext(voterset_filename)
|
||||||
|
|
||||||
# Need to write *_PREPPED.csv
|
# Need to write *_PREPPED.csv
|
||||||
|
writing_cleaned_time_start = time.time()
|
||||||
prep_file_name = f"{filename_prefix}_PREPPED.csv"
|
prep_file_name = f"{filename_prefix}_PREPPED.csv"
|
||||||
prep_full_pathname = f"/tmp/{prep_file_name}"
|
prep_full_pathname = f"/tmp/{prep_file_name}"
|
||||||
if len(amplify_rows) > 0:
|
if len(amplify_rows) > 0:
|
||||||
|
|
@ -362,8 +370,10 @@ class AFCMUniverseMapFile:
|
||||||
|
|
||||||
# remove the temp file now
|
# remove the temp file now
|
||||||
os.remove(prep_full_pathname)
|
os.remove(prep_full_pathname)
|
||||||
|
writing_cleaned_time = time.time() - writing_cleaned_time_start
|
||||||
|
|
||||||
# Need to write *_REMOVED.csv
|
# Need to write *_REMOVED.csv
|
||||||
|
writing_removed_time_start = time.time()
|
||||||
removed_file_name = f"{filename_prefix}_REMOVED.csv"
|
removed_file_name = f"{filename_prefix}_REMOVED.csv"
|
||||||
removed_full_pathname = f"/tmp/{removed_file_name}"
|
removed_full_pathname = f"/tmp/{removed_file_name}"
|
||||||
if len(removed_rows) > 0:
|
if len(removed_rows) > 0:
|
||||||
|
|
@ -380,17 +390,28 @@ class AFCMUniverseMapFile:
|
||||||
|
|
||||||
# remove the temp file now
|
# remove the temp file now
|
||||||
os.remove(removed_full_pathname)
|
os.remove(removed_full_pathname)
|
||||||
|
writing_removed_time = time.time() - writing_removed_time_start
|
||||||
|
|
||||||
self.removed_row_count += len(removed_rows)
|
self.removed_row_count += len(removed_rows)
|
||||||
self.processed_row_count += len(rows)
|
self.processed_row_count += len(rows)
|
||||||
|
|
||||||
# Update cleaned & removed stats for this file
|
# Update stats for this file
|
||||||
self.stats['cleaned_count'][voterset_filename] = len(amplify_rows)
|
self.stats['cleaned_count'][voterset_filename] = len(amplify_rows)
|
||||||
self.stats['cleaned_count']["total"] += self.stats["cleaned_count"][voterset_filename]
|
self.stats['cleaned_count']["total"] += self.stats["cleaned_count"][voterset_filename]
|
||||||
|
|
||||||
self.stats['removed_count'][voterset_filename] = len(removed_rows)
|
self.stats['removed_count'][voterset_filename] = len(removed_rows)
|
||||||
self.stats['removed_count']["total"] += self.stats["removed_count"][voterset_filename]
|
self.stats['removed_count']["total"] += self.stats["removed_count"][voterset_filename]
|
||||||
|
|
||||||
|
overall_time = time.time() - overall_time_start
|
||||||
|
|
||||||
|
self.stats["timings"][voterset_filename] = {
|
||||||
|
"reading": reading_time,
|
||||||
|
"processing": processing_time,
|
||||||
|
"writing cleaned file": writing_cleaned_time,
|
||||||
|
"writing removed file": writing_removed_time,
|
||||||
|
"file total": overall_time
|
||||||
|
}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue