data-comparison/gui_app.py
2025-08-20 14:03:31 +07:00

319 lines
12 KiB
Python

import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
from pathlib import Path
from data_comparator import KSTCoordiComparator
class DataComparisonGUI:
def __init__(self, root):
self.root = root
self.root.title("KST vs Coordi Data Comparison Tool")
self.root.geometry("1200x800")
self.comparator = None
self.comparison_data = None
self.setup_ui()
def setup_ui(self):
# Main container
main_frame = ttk.Frame(self.root, padding="10")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# Configure grid weights
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
main_frame.columnconfigure(1, weight=1)
main_frame.rowconfigure(2, weight=1)
# Title
title_label = ttk.Label(main_frame, text="KST vs Coordi Data Comparison",
font=("Arial", 16, "bold"))
title_label.grid(row=0, column=0, columnspan=3, pady=(0, 20))
# File selection frame
file_frame = ttk.LabelFrame(main_frame, text="File Selection", padding="10")
file_frame.grid(row=1, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 10))
file_frame.columnconfigure(1, weight=1)
ttk.Label(file_frame, text="Excel File:").grid(row=0, column=0, sticky=tk.W, padx=(0, 10))
self.file_path_var = tk.StringVar(value="data/sample-data.xlsx")
self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50)
self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 10))
browse_btn = ttk.Button(file_frame, text="Browse", command=self.browse_file)
browse_btn.grid(row=0, column=2)
analyze_btn = ttk.Button(file_frame, text="Analyze Data", command=self.analyze_data)
analyze_btn.grid(row=0, column=3, padx=(10, 0))
# Results notebook (tabs)
self.notebook = ttk.Notebook(main_frame)
self.notebook.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S))
# Create tabs
self.create_summary_tab()
self.create_matched_tab()
self.create_kst_only_tab()
self.create_coordi_only_tab()
# Status bar
self.status_var = tk.StringVar(value="Ready - Select an Excel file and click 'Analyze Data'")
status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN)
status_bar.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0))
def create_summary_tab(self):
# Summary tab
summary_frame = ttk.Frame(self.notebook)
self.notebook.add(summary_frame, text="Summary")
# Configure grid
summary_frame.columnconfigure(0, weight=1)
summary_frame.rowconfigure(1, weight=1)
# Summary text widget
summary_text_frame = ttk.LabelFrame(summary_frame, text="Comparison Summary", padding="10")
summary_text_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=10)
summary_text_frame.columnconfigure(0, weight=1)
summary_text_frame.rowconfigure(0, weight=1)
self.summary_text = tk.Text(summary_text_frame, wrap=tk.WORD, height=15)
summary_scrollbar = ttk.Scrollbar(summary_text_frame, orient=tk.VERTICAL, command=self.summary_text.yview)
self.summary_text.configure(yscrollcommand=summary_scrollbar.set)
self.summary_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
summary_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
# Reconciliation info
reconcile_frame = ttk.LabelFrame(summary_frame, text="Reconciliation Results", padding="10")
reconcile_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), padx=10, pady=(0, 10))
self.reconcile_text = tk.Text(reconcile_frame, wrap=tk.WORD, height=8)
reconcile_scrollbar = ttk.Scrollbar(reconcile_frame, orient=tk.VERTICAL, command=self.reconcile_text.yview)
self.reconcile_text.configure(yscrollcommand=reconcile_scrollbar.set)
self.reconcile_text.grid(row=0, column=0, sticky=(tk.W, tk.E))
reconcile_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
reconcile_frame.columnconfigure(0, weight=1)
def create_matched_tab(self):
matched_frame = ttk.Frame(self.notebook)
self.notebook.add(matched_frame, text="Matched Items")
self.create_data_table(matched_frame, "matched")
def create_kst_only_tab(self):
kst_frame = ttk.Frame(self.notebook)
self.notebook.add(kst_frame, text="KST Only")
self.create_data_table(kst_frame, "kst_only")
def create_coordi_only_tab(self):
coordi_frame = ttk.Frame(self.notebook)
self.notebook.add(coordi_frame, text="Coordi Only")
self.create_data_table(coordi_frame, "coordi_only")
def create_data_table(self, parent, table_type):
# Configure grid
parent.columnconfigure(0, weight=1)
parent.rowconfigure(1, weight=1)
# Info label
info_frame = ttk.Frame(parent)
info_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=10, pady=10)
info_frame.columnconfigure(1, weight=1)
count_label = ttk.Label(info_frame, text="Count:")
count_label.grid(row=0, column=0, padx=(0, 10))
count_var = tk.StringVar(value="0")
setattr(self, f"{table_type}_count_var", count_var)
count_display = ttk.Label(info_frame, textvariable=count_var, font=("Arial", 10, "bold"))
count_display.grid(row=0, column=1, sticky=tk.W)
# Table frame
table_frame = ttk.Frame(parent)
table_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=(0, 10))
table_frame.columnconfigure(0, weight=1)
table_frame.rowconfigure(0, weight=1)
# Create treeview
columns = ("Title", "Episode", "Sheet", "Row", "Reason")
tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20)
# Configure columns
tree.heading("Title", text="Title")
tree.heading("Episode", text="Episode")
tree.heading("Sheet", text="Sheet")
tree.heading("Row", text="Row")
tree.heading("Reason", text="Reason")
tree.column("Title", width=300)
tree.column("Episode", width=100)
tree.column("Sheet", width=120)
tree.column("Row", width=80)
tree.column("Reason", width=300)
# Scrollbars
v_scrollbar = ttk.Scrollbar(table_frame, orient=tk.VERTICAL, command=tree.yview)
h_scrollbar = ttk.Scrollbar(table_frame, orient=tk.HORIZONTAL, command=tree.xview)
tree.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E))
# Store tree widget
setattr(self, f"{table_type}_tree", tree)
def browse_file(self):
file_path = filedialog.askopenfilename(
title="Select Excel File",
filetypes=[("Excel files", "*.xlsx *.xls"), ("All files", "*.*")]
)
if file_path:
self.file_path_var.set(file_path)
def analyze_data(self):
file_path = self.file_path_var.get().strip()
if not file_path:
messagebox.showerror("Error", "Please select an Excel file")
return
if not Path(file_path).exists():
messagebox.showerror("Error", f"File not found: {file_path}")
return
try:
self.status_var.set("Analyzing data...")
self.root.update()
# Create comparator and analyze
self.comparator = KSTCoordiComparator(file_path)
if not self.comparator.load_data():
messagebox.showerror("Error", "Failed to load Excel data")
return
# Get comparison results
self.comparison_data = self.comparator.get_comparison_summary()
# Update GUI
self.update_summary()
self.update_data_tables()
self.status_var.set("Analysis complete!")
except Exception as e:
messagebox.showerror("Error", f"Analysis failed: {str(e)}")
self.status_var.set("Analysis failed")
def update_summary(self):
if not self.comparison_data:
return
# Clear previous content
self.summary_text.delete(1.0, tk.END)
self.reconcile_text.delete(1.0, tk.END)
data = self.comparison_data
# Summary text
summary = f"""COMPARISON SUMMARY
{'='*50}
Original Counts:
KST Total: {data['original_counts']['kst_total']:,}
Coordi Total: {data['original_counts']['coordi_total']:,}
Matched Items: {data['matched_items_count']:,}
Mismatches:
KST Only: {data['mismatches']['kst_only_count']:,}
Coordi Only: {data['mismatches']['coordi_only_count']:,}
KST Duplicates: {data['mismatches']['kst_duplicates_count']:,}
Coordi Duplicates: {data['mismatches']['coordi_duplicates_count']:,}
Total Mismatches: {data['mismatches']['kst_only_count'] + data['mismatches']['coordi_only_count'] + data['mismatches']['kst_duplicates_count'] + data['mismatches']['coordi_duplicates_count']:,}
"""
self.summary_text.insert(tk.END, summary)
# Reconciliation text
reconcile = data['reconciliation']
reconcile_info = f"""RECONCILIATION RESULTS
{'='*40}
After excluding mismatches:
KST Count: {reconcile['reconciled_kst_count']:,}
Coordi Count: {reconcile['reconciled_coordi_count']:,}
Counts Match: {'✅ YES' if reconcile['counts_match_after_reconciliation'] else '❌ NO'}
Items to exclude:
From KST: {reconcile['items_to_exclude_from_kst']:,}
From Coordi: {reconcile['items_to_exclude_from_coordi']:,}
Final Result: Both datasets will have {reconcile['reconciled_kst_count']:,} matching items after reconciliation.
"""
self.reconcile_text.insert(tk.END, reconcile_info)
def update_data_tables(self):
if not self.comparison_data:
return
mismatches = self.comparison_data['mismatch_details']
# Update matched items (create from intersection)
matched_count = self.comparison_data['matched_items_count']
self.matched_count_var.set(f"{matched_count:,}")
# Clear matched tree
for item in self.matched_tree.get_children():
self.matched_tree.delete(item)
# Add matched items (we'll show the first few as examples)
if self.comparator:
categorization = self.comparator.categorize_mismatches()
matched_items = categorization['matched_items']
for i, item in enumerate(list(matched_items)[:100]): # Show first 100
self.matched_tree.insert("", tk.END, values=(
item.title, item.episode, item.source_sheet, item.row_index + 1, "Perfect match"
))
# Update KST only
kst_only = mismatches['kst_only']
self.kst_only_count_var.set(f"{len(kst_only):,}")
for item in self.kst_only_tree.get_children():
self.kst_only_tree.delete(item)
for mismatch in kst_only:
self.kst_only_tree.insert("", tk.END, values=(
mismatch['title'], mismatch['episode'], mismatch['sheet'],
mismatch['row_index'] + 1, mismatch['reason']
))
# Update Coordi only
coordi_only = mismatches['coordi_only']
self.coordi_only_count_var.set(f"{len(coordi_only):,}")
for item in self.coordi_only_tree.get_children():
self.coordi_only_tree.delete(item)
for mismatch in coordi_only:
self.coordi_only_tree.insert("", tk.END, values=(
mismatch['title'], mismatch['episode'], mismatch['sheet'],
mismatch['row_index'] + 1, mismatch['reason']
))
def main():
root = tk.Tk()
app = DataComparisonGUI(root)
root.mainloop()
if __name__ == "__main__":
main()