Create a CSV Cleaning and SQLite Saver Tool with Tkinter and Pandas DataFrame


Cleaning of CSV data by using DataFrame through Tkinter Interface

Select any CSV file by using the file browser on click of a button. Use the read_csv() method to create a Pandas Dataframe.
Clean the DataFrame by removing duplicate rows and managing missing data.
Finally the data is stored in a selected SQLite database file.
Sample CSV file with missing data and duplicate rows for testing script.

1. Importing Libraries


    import tkinter as tk
    from tkinter import filedialog, messagebox, simpledialog
    import pandas as pd
    from sqlalchemy import create_engine
    
  • Tkinter is used for the GUI to interact with users (file loading, buttons, etc.).
  • Pandas is used for data cleaning and manipulation.
  • SQLAlchemy is used for saving cleaned data into an SQLite database.

2. Defining Global Variables


    global df, engine
    df = None  # Stores the loaded data as a Pandas DataFrame
    engine = None  # SQLite engine for saving data
    
  • df will store the loaded CSV data as a Pandas DataFrame.
  • engine will store the SQLite database connection.

3. Loading a CSV File


    def load_csv():
        """Load a CSV file into a Pandas DataFrame."""
        global df
        file_path = filedialog.askopenfilename(
            filetypes=[("CSV files", "*.csv")]
        )
        if file_path:  
            try:
                df = pd.read_csv(file_path)
                text_area.delete("1.0", tk.END)
                text_area.insert(tk.END, f"CSV loaded successfully!\n\n{df.head()}")
                messagebox.showinfo("Success", "CSV file loaded successfully!")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to load CSV: {e}")
    

4. Handling Missing Values


    def handle_missing_values():
        """Handle missing values in the DataFrame."""
        global df
        if df is None:
            messagebox.showwarning("Warning", "No data loaded. Load a CSV file first.")
            return
        option = simpledialog.askstring(
            "Handle Missing Values",
            "Choose an option:\n1. Replace with 0\n2. Drop rows"
        )
        if option == "1":
            df.fillna(0, inplace=True)
        elif option == "2":
            df.dropna(inplace=True)
    
  • fillna : Allows the user to handle missing data by either replacing it with `0` or dropping rows with missing values.
  • Prompts the user to choose one of these options using a dialog box.

5. Removing Duplicate Rows


    def remove_duplicates():
        """Remove duplicate rows from the DataFrame."""
        global df
        if df is None:
            messagebox.showwarning("Warning", "No data loaded. Load a CSV file first.")
            return
        duplicates = df.duplicated().sum()
        if duplicates > 0:
            df.drop_duplicates(inplace=True)
    
  • duplicated() : Getting duplicate rows from a DataFrame.
  • drop_duplicates() : Removes duplicate rows in the DataFrame and updates it.
  • Informs the user about the number of duplicate rows removed.

6. Saving Data to SQLite


    def save_to_sqlite():
        """Save the DataFrame to an SQLite database."""
        global df, engine

        if df is None:
            messagebox.showwarning(
                "Warning", 
                "No data loaded. Load a CSV file first."
            )
            return
        
        # Prompt user to select or create a SQLite database file
        file_path = filedialog.asksaveasfilename(
            defaultextension=".db", 
            filetypes=[("SQLite Database", "*.db")]
        )
        
        if file_path:
            try:
                # Connect to the SQLite database file
                engine = create_engine(f"sqlite:///{file_path}")
                
                # Prompt user for table name
                table_name = simpledialog.askstring(
                    "Input", 
                    "Enter the table name to save data:"
                )
                
                if not table_name:
                    messagebox.showerror(
                        "Error", 
                        "Table name cannot be empty. Please try again."
                    )
                    return
                
                # Save the DataFrame to the SQLite database
                df.to_sql(table_name,engine,if_exists="replace",index=False)
                
                # Notify the user of success
                messagebox.showinfo(
                    "Success", 
                    f"Data saved to '{table_name}' table in the SQLite database."
                )
            except Exception as e:
                # Notify user of any errors
                messagebox.showerror(
                    "Error", 
                    f"Failed to save data to SQLite: {e}"
                )
        else:
            # If no file path is selected, show an error
            messagebox.showwarning(
                "Warning", 
                "No database file selected. Operation cancelled."
            )
    

7. Tkinter GUI Setup


    my_w = tk.Tk()  
    my_w.title("CSV Cleaning and SQLite Saver")
    


CSV data upload with Tkinter and Pandas to save cleaned data in SQLite : A Practical Python Tutorial


import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import pandas as pd
from sqlalchemy import create_engine

# Global variables
df = None  # Pandas DataFrame for storing the loaded data
engine = None  # SQLite database engine for saving data

# Function to load a CSV file
def load_csv():
    """Loads a CSV file into a Pandas DataFrame."""
    global df
    file_path = filedialog.askopenfilename(
        filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
    )
    if file_path:  # Check if a file was selected
        try:
            # Load the selected CSV file into a DataFrame
            df = pd.read_csv(file_path)
            # Clear and update the text area with the first few rows of the DataFrame
            text_area.delete("1.0", tk.END)
            text_area.insert(tk.END, f"CSV loaded successfully!\n\n{df.head()}")
            messagebox.showinfo("Success", "CSV file loaded successfully!")
        except Exception as e:
            # Show error message if the file fails to load
            messagebox.showerror("Error", f"Failed to load CSV: {e}")

# Function to handle missing values
def handle_missing_values():
    """Handles missing values in the DataFrame."""
    global df
    if df is None:
        # Warn the user if no data is loaded
        messagebox.showwarning("Warning", "No data loaded. Load a CSV file first.")
        return
    # Prompt user to choose how to handle missing values
    option = simpledialog.askstring(
        "Handle Missing Values",
        "Choose an option:\n1. Replace with 0\n2. Drop rows",
    )
    if option == "1":
        # Replace missing values with 0
        df.fillna(0, inplace=True)
        text_area.delete("1.0", tk.END)
        text_area.insert(tk.END, f"Missing values replaced with 0.\n\n{df.head()}")
    elif option == "2":
        # Drop rows containing missing values
        df.dropna(inplace=True)
        text_area.delete("1.0", tk.END)
        text_area.insert(tk.END, f"Rows with missing values dropped.\n\n{df.head()}")
    else:
        # Handle invalid input
        messagebox.showerror("Error", "Invalid option selected!")

# Function to remove duplicate entries
def remove_duplicates():
    """Removes duplicate rows from the DataFrame."""
    global df
    if df is None:
        # Warn the user if no data is loaded
        messagebox.showwarning("Warning", "No data loaded. Load a CSV file first.")
        return
    # Count the number of duplicate rows
    duplicates = df.duplicated().sum()
    if duplicates > 0:
        # Remove duplicates and update the DataFrame
        df.drop_duplicates(inplace=True)
        text_area.delete("1.0", tk.END)
        text_area.insert(tk.END, f"Duplicates removed: {duplicates}\n\n{df.head()}")
    else:
        # Notify the user if no duplicates are found
        messagebox.showinfo("Info", "No duplicate rows found.")

# Function to save cleaned data to SQLite
def save_to_sqlite():
    """Saves the cleaned DataFrame to an SQLite database."""
    global df, engine
    if df is None:
        # Warn the user if no data is loaded
        messagebox.showwarning("Warning", "No data loaded. Load a CSV file first.")
        return
    # Open a file dialog to specify the SQLite database file
    file_path = filedialog.asksaveasfilename(
        defaultextension=".db", filetypes=[("SQLite Database", "*.db")]
    )
    if file_path:
        try:
            # Create an SQLite database engine
            engine = create_engine(f"sqlite:///{file_path}")
            # Prompt user to enter the table name
            table_name = simpledialog.askstring("Input", "Enter the table name:")
            if table_name:
                # Save the DataFrame to the SQLite table
                df.to_sql(table_name, engine, if_exists="replace", index=False)
                messagebox.showinfo(
                    "Success", f"Data saved to SQLite database as '{table_name}' table."
                )
            else:
                # Handle empty table name
                messagebox.showerror("Error", "Table name cannot be empty.")
        except Exception as e:
            # Show error message if saving fails
            messagebox.showerror("Error", f"Failed to save data to SQLite: {e}")

# GUI Setup
my_w = tk.Tk()  # Create the main window
my_w.title("CSV Cleaning and SQLite Saver www.plus2net.com")
my_w.geometry("800x600")  # Set the dimensions of the main window

# Frame for buttons
frame = tk.Frame(my_w)
frame.pack(pady=10)  # Add padding to separate from the top

# Buttons for operations
btn_load = tk.Button(frame, text="Load CSV", 
        command=load_csv, width=15, bg="lightblue")
btn_load.grid(row=0, column=0, padx=5)

btn_missing = tk.Button(frame, text="Handle Missing Values", 
        command=handle_missing_values, width=20, bg="lightgreen")
btn_missing.grid(row=0, column=1, padx=5)

btn_duplicates = tk.Button(frame, text="Remove Duplicates", 
        command=remove_duplicates, width=20, bg="lightyellow")
btn_duplicates.grid(row=0, column=2, padx=5)

btn_save_sqlite = tk.Button(frame, text="Save to SQLite", 
        command=save_to_sqlite, width=15, bg="lightcoral")
btn_save_sqlite.grid(row=0, column=3, padx=5)

# Text area for output
text_area = tk.Text(my_w, wrap="word", font=("Arial", 10))
text_area.pack(expand=True, fill="both", padx=10, pady=10)

# Start the GUI event loop
my_w.mainloop()

Export SQLite database table data to CSV file by using Dataframe
Tkinter Projects Projects Tkinter using Pandas DataFrame

Subscribe to our YouTube Channel here


Subscribe

* indicates required
Subscribe to plus2net

    plus2net.com







    Python Video Tutorials
    Python SQLite Video Tutorials
    Python MySQL Video Tutorials
    Python Tkinter Video Tutorials
    We use cookies to improve your browsing experience. . Learn more
    HTML MySQL PHP JavaScript ASP Photoshop Articles FORUM . Contact us
    ©2000-2024 plus2net.com All rights reserved worldwide Privacy Policy Disclaimer