Prompt Details

Model

(gemini-3.0-flash)

Token size

437

Example input

[Insert file name with extension, e.g., data.csv, report.xlsx, export.json]: "donnees_test.csv" [Insert list of columns to analyze,]: "Produit", "Ventes_EUR", "Unites_Vendues", "Satisfaction" [Describe charts needed, e.g., Bar chart and Boxplot]: "Bar chart of total sales per product, and a boxplot of satisfaction levels"

Example output

# -*- coding: utf-8 -*- """ Created on Fri May 1 12:19:47 2026 @author: Ben Luc """ import os import sys import logging import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Configure logging for production tracking logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout)] ) class DataCleaner: """ A class used to load, clean, and preprocess raw data. """ def __init__(self, file_path: str, required_columns: list): """ Initializes the DataCleaner with file path and target columns. :param file_path: Path to the input CSV file. :param required_columns: List of columns required for processing. """ self.file_path = file_path self.required_columns = required_columns self.df = None def load_data(self) -> pd.DataFrame: """ Loads the CSV data into a pandas DataFrame. :return: Loaded pandas DataFrame. :raises FileNotFoundError: If the specified file path does not exist. """ logging.info(f"Attempting to load file from: {self.file_path}") try: if not os.path.exists(self.file_path): raise FileNotFoundError(f"The file {self.file_path} was not found.") self.df = pd.read_csv(self.file_path) logging.info("Data successfully loaded.") return self.df except Exception as e: logging.error(f"Error loading data: {str(e)}") raise def process_data(self) -> pd.DataFrame: """ Filters required columns and handles missing values and outliers. - Keeps only target columns. - Drops rows where key categorical or critical numerical data is missing. - Imputes remaining missing numerical values with the median. :return: Cleaned pandas DataFrame. """ if self.df is None: self.load_data() logging.info("Starting data cleaning and preprocessing.") try: # Check for column existence missing_cols = [col for col in self.required_columns if col not in self.df.columns] if missing_cols: raise KeyError(f"Missing required columns in dataset: {missing_cols}") # Subset DataFrame to target columns cleaned_df = self.df[self.required_columns].copy() # Remove rows where the critical categorical identifier is null if "Produit" in cleaned_df.columns: cleaned_df = cleaned_df.dropna(subset=["Produit"]) # Handle missing values for numerical columns num_cols = ["Ventes_EUR", "Unites_Vendues", "Satisfaction"] for col in num_cols: if col in cleaned_df.columns: cleaned_df[col] = pd.to_numeric(cleaned_df[col], errors="coerce") median_val = cleaned_df[col].median() cleaned_df[col] = cleaned_df[col].fillna(median_val) self.df = cleaned_df logging.info("Data cleaning completed successfully.") return self.df except Exception as e: logging.error(f"Error processing data: {str(e)}") raise def generate_visualizations(df: pd.DataFrame) -> None: """ Generates and displays two production-quality charts: 1. A bar chart of total sales per product. 2. A boxplot of customer satisfaction levels. :param df: Cleaned pandas DataFrame. """ logging.info("Generating visualizations.") try: sns.set_theme(style="whitegrid") fig, axes = plt.subplots(1, 2, figsize=(16, 6)) # Chart 1: Bar chart of total sales per product sales_per_product = df.groupby("Produit")["Ventes_EUR"].sum().reset_index() sns.barplot( data=sales_per_product, x="Produit", y="Ventes_EUR", ax=axes[0], palette="viridis", hue="Produit", legend=False ) axes[0].set_title("Total des Ventes par Produit", fontsize=14, fontweight="bold") axes[0].set_xlabel("Produit", fontsize=12) axes[0].set_ylabel("Ventes (EUR)", fontsize=12) axes[0].tick_params(axis='x', rotation=45) # Chart 2: Boxplot of satisfaction levels sns.boxplot( data=df, y="Satisfaction", ax=axes[1], color="#4c72b0", width=0.4 ) axes[1].set_title("Distribution du Niveau de Satisfaction", fontsize=14, fontweight="bold") axes[1].set_ylabel("Satisfaction (Score)", fontsize=12) plt.tight_layout() logging.info("Visualizations successfully generated.") plt.show() except Exception as e: logging.error(f"Error generating visualizations: {str(e)}") raise def main(): """ Main execution pipeline for data cleaning and visualization. """ DATA_SOURCE = "donnees_test.csv" TARGET_COLUMNS = ["Produit", "Ventes_EUR", "Unites_Vendues", "Satisfaction"] try: cleaner = DataCleaner(DATA_SOURCE, TARGET_COLUMNS) cleaned_data = cleaner.process_data() generate_visualizations(cleaned_data) logging.info("Pipeline executed successfully.") except Exception as e: logging.critical(f"Pipeline execution failed: {str(e)}") sys.exit(1) if __name__ == "__main__": main()

♊️ Gemini

Advanced Python Data Pipeline Automated

Name: Advanced Python Data Pipeline Automated Gemini Prompt
Brand: PromptBase
Price: 2.99 USD
Availability: InStock
Author: benlucsg

@benlucsg

$2.99

What do I get when I buy a prompt?

Add to Cart

Or get it with Select10 Select prompts/mo — under $2 each

Learn more

Instant access

Commercial use

Money‑back

By purchasing this prompt, you agree to our terms of service

GEMINI-3.0-FLASH

Stop wasting time writing boilerplate code for data analysis. This master prompt generates a sophisticated, production-ready Python script tailored to your specific dataset. Key Features: - Format Agnostic - Robust Data Cleaning - Professional Visualization - Enterprise Ready Perfect for data scientists, financial analysts, researchers, and developers who need reliable, modular code in seconds.

...more

Added over 1 month ago

Browse Marketplace