Example input
Programming Language: [Python]
Databases: [ MySQL]
input file name: [ immi_data_csv.csv]
Header Data:[ Full Name City State Passport Number Passport Valid From Passport Valid To Country of Citizenship Visa Type Visa Valid From Visa Valid To
]
Header Mandatory Data [ Full Name]
Primary key/ Unique columns for each Row : [Full Name , Passport Number]
Example output
Data Load Program for CSV/Excel to MySQL Database
"""
Program Summary: This program reads data from a CSV file and loads it to a MySQL database.
It validates the mandatory fields and unique columns for each row, sorts the file with unique fields
and writes any duplicate rows to a duplicate record file. It creates a table based on the input file
data and loads the unique data to the table using UPSERT method. Finally, it generates a summary report
with information on job start time, end time, duration, number of records bypassed due to duplicates,
number of records inserted, number of records updated and other relevant information.
Programming Language: Python
Database: MySQL
Input File Name: immi_data_csv.csv
Header Data: Full Name, City, State, Passport Number, Passport Valid From, Passport Valid To,
Country of Citizenship, Visa Type, Visa Valid From, Visa Valid To
Header Mandatory Data: Full Name
Primary Key/Unique Columns for Each Row: Full Name, Passport Number
"""
import csv
import os
import time
from datetime import datetime, timedelta
import mysql.connector
Function to read input file
def read_input_file(file_path):
try:
with open(file_path, 'r') as f:
reader = csv.DictReader(f)
data = [row for row in reader]
return data
except FileNotFoundError:
print(f'ERROR: {file_path} not found')
return None
except PermissionError:
print(f'ERROR: Access denied for {file_path}')
return None
except Exception as e:
print(f'ERROR: {e}')
return None
Function to validate mandatory fields
def validate_mandatory_fields(data):
mandatory_fields = ['Full Name']
for row in data:
for field in mandatory_fields:
if row[field] == '':
print(f'ERROR: {field} is mandatory for {row}')
return False
return True
Function to validate unique fields and write duplicate rows to file
def validate_unique_fields(data):
unique_fields = ['Full Name', 'Passport Number']
sorted_data = sorted(data, key=lambda x: (x[unique_fields[0]], x[unique_fields[1]]))
duplicates = []
i = 0
while i < len(sorted_data) - 1:
if sorted_data[i][unique_fields[0]] == sorted_data[i + 1][unique_fields[0]] and
sorted_data[i][unique_fields[1]] == sorted_data[i + 1][unique_fields[1]]:
duplicates.append(sorted_data[i])
duplicates.append(sorted_data[i + 1])
i += 1
if duplicates:
duplicate_file_name = f'{int(time.time())}_{"immi_data_csv.csv"}DupRec.csv'
with open(duplicate_file_name, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys())
writer.writeheader()
writer.writerows(duplicates)
print(f'INFO: Duplicate records written to {duplicate_file_name}')
unique_data = [row for row in sorted_data if row not in duplicates]
else:
unique_data = sorted_data
process_file_name = f'{int(time.time())}{"immi_data_csv.csv"}_Process.csv'
with open(process_file_name, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=unique_data[0].keys())
writer.writeheader()
writer.writerows(unique_data)
print(f'INFO: Unique records written to {process_file_name}')
return process_file_name