Prompt Details
Model
(gemini-3.0-flash)
Token size
437
Example input
[Insert file name with extension, e.g., data.csv, report.xlsx, export.json]: "donnees_test.csv"
[Insert list of columns to analyze,]: "Produit", "Ventes_EUR", "Unites_Vendues", "Satisfaction"
[Describe charts needed, e.g., Bar chart and Boxplot]: "Bar chart of total sales per product, and a boxplot of satisfaction levels"
Example output
# -*- coding: utf-8 -*-
"""
Created on Fri May 1 12:19:47 2026
@author: Ben Luc
"""
import os
import sys
import logging
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Configure logging for production tracking
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)]
)
class DataCleaner:
"""
A class used to load, clean, and preprocess raw data.
"""
def __init__(self, file_path: str, required_columns: list):
"""
Initializes the DataCleaner with file path and target columns.
:param file_path: Path to the input CSV file.
:param required_columns: List of columns required for processing.
"""
self.file_path = file_path
self.required_columns = required_columns
self.df = None
def load_data(self) -> pd.DataFrame:
"""
Loads the CSV data into a pandas DataFrame.
:return: Loaded pandas DataFrame.
:raises FileNotFoundError: If the specified file path does not exist.
"""
logging.info(f"Attempting to load file from: {self.file_path}")
try:
if not os.path.exists(self.file_path):
raise FileNotFoundError(f"The file {self.file_path} was not found.")
self.df = pd.read_csv(self.file_path)
logging.info("Data successfully loaded.")
return self.df
except Exception as e:
logging.error(f"Error loading data: {str(e)}")
raise
def process_data(self) -> pd.DataFrame:
"""
Filters required columns and handles missing values and outliers.
- Keeps only target columns.
- Drops rows where key categorical or critical numerical data is missing.
- Imputes remaining missing numerical values with the median.
:return: Cleaned pandas DataFrame.
"""
if self.df is None:
self.load_data()
logging.info("Starting data cleaning and preprocessing.")
try:
# Check for column existence
missing_cols = [col for col in self.required_columns if col not in self.df.columns]
if missing_cols:
raise KeyError(f"Missing required columns in dataset: {missing_cols}")
# Subset DataFrame to target columns
cleaned_df = self.df[self.required_columns].copy()
# Remove rows where the critical categorical identifier is null
if "Produit" in cleaned_df.columns:
cleaned_df = cleaned_df.dropna(subset=["Produit"])
# Handle missing values for numerical columns
num_cols = ["Ventes_EUR", "Unites_Vendues", "Satisfaction"]
for col in num_cols:
if col in cleaned_df.columns:
cleaned_df[col] = pd.to_numeric(cleaned_df[col], errors="coerce")
median_val = cleaned_df[col].median()
cleaned_df[col] = cleaned_df[col].fillna(median_val)
self.df = cleaned_df
logging.info("Data cleaning completed successfully.")
return self.df
except Exception as e:
logging.error(f"Error processing data: {str(e)}")
raise
def generate_visualizations(df: pd.DataFrame) -> None:
"""
Generates and displays two production-quality charts:
1. A bar chart of total sales per product.
2. A boxplot of customer satisfaction levels.
:param df: Cleaned pandas DataFrame.
"""
logging.info("Generating visualizations.")
try:
sns.set_theme(style="whitegrid")
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Chart 1: Bar chart of total sales per product
sales_per_product = df.groupby("Produit")["Ventes_EUR"].sum().reset_index()
sns.barplot(
data=sales_per_product,
x="Produit",
y="Ventes_EUR",
ax=axes[0],
palette="viridis",
hue="Produit",
legend=False
)
axes[0].set_title("Total des Ventes par Produit", fontsize=14, fontweight="bold")
axes[0].set_xlabel("Produit", fontsize=12)
axes[0].set_ylabel("Ventes (EUR)", fontsize=12)
axes[0].tick_params(axis='x', rotation=45)
# Chart 2: Boxplot of satisfaction levels
sns.boxplot(
data=df,
y="Satisfaction",
ax=axes[1],
color="#4c72b0",
width=0.4
)
axes[1].set_title("Distribution du Niveau de Satisfaction", fontsize=14, fontweight="bold")
axes[1].set_ylabel("Satisfaction (Score)", fontsize=12)
plt.tight_layout()
logging.info("Visualizations successfully generated.")
plt.show()
except Exception as e:
logging.error(f"Error generating visualizations: {str(e)}")
raise
def main():
"""
Main execution pipeline for data cleaning and visualization.
"""
DATA_SOURCE = "donnees_test.csv"
TARGET_COLUMNS = ["Produit", "Ventes_EUR", "Unites_Vendues", "Satisfaction"]
try:
cleaner = DataCleaner(DATA_SOURCE, TARGET_COLUMNS)
cleaned_data = cleaner.process_data()
generate_visualizations(cleaned_data)
logging.info("Pipeline executed successfully.")
except Exception as e:
logging.critical(f"Pipeline execution failed: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
By purchasing this prompt, you agree to our terms of service
GEMINI-3.0-FLASH
Stop wasting time writing boilerplate code for data analysis. This master prompt generates a sophisticated, production-ready Python script tailored to your specific dataset.
Key Features:
- Format Agnostic
- Robust Data Cleaning
- Professional Visualization
- Enterprise Ready
Perfect for data scientists, financial analysts, researchers, and developers who need reliable, modular code in seconds.
...more
Added 6 days ago
