319 lines
12 KiB
Python
319 lines
12 KiB
Python
|
|
import tkinter as tk
|
||
|
|
from tkinter import ttk, filedialog, messagebox
|
||
|
|
import pandas as pd
|
||
|
|
from pathlib import Path
|
||
|
|
from data_comparator import KSTCoordiComparator
|
||
|
|
|
||
|
|
class DataComparisonGUI:
|
||
|
|
def __init__(self, root):
|
||
|
|
self.root = root
|
||
|
|
self.root.title("KST vs Coordi Data Comparison Tool")
|
||
|
|
self.root.geometry("1200x800")
|
||
|
|
|
||
|
|
self.comparator = None
|
||
|
|
self.comparison_data = None
|
||
|
|
|
||
|
|
self.setup_ui()
|
||
|
|
|
||
|
|
def setup_ui(self):
|
||
|
|
# Main container
|
||
|
|
main_frame = ttk.Frame(self.root, padding="10")
|
||
|
|
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||
|
|
|
||
|
|
# Configure grid weights
|
||
|
|
self.root.columnconfigure(0, weight=1)
|
||
|
|
self.root.rowconfigure(0, weight=1)
|
||
|
|
main_frame.columnconfigure(1, weight=1)
|
||
|
|
main_frame.rowconfigure(2, weight=1)
|
||
|
|
|
||
|
|
# Title
|
||
|
|
title_label = ttk.Label(main_frame, text="KST vs Coordi Data Comparison",
|
||
|
|
font=("Arial", 16, "bold"))
|
||
|
|
title_label.grid(row=0, column=0, columnspan=3, pady=(0, 20))
|
||
|
|
|
||
|
|
# File selection frame
|
||
|
|
file_frame = ttk.LabelFrame(main_frame, text="File Selection", padding="10")
|
||
|
|
file_frame.grid(row=1, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 10))
|
||
|
|
file_frame.columnconfigure(1, weight=1)
|
||
|
|
|
||
|
|
ttk.Label(file_frame, text="Excel File:").grid(row=0, column=0, sticky=tk.W, padx=(0, 10))
|
||
|
|
|
||
|
|
self.file_path_var = tk.StringVar(value="data/sample-data.xlsx")
|
||
|
|
self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50)
|
||
|
|
self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 10))
|
||
|
|
|
||
|
|
browse_btn = ttk.Button(file_frame, text="Browse", command=self.browse_file)
|
||
|
|
browse_btn.grid(row=0, column=2)
|
||
|
|
|
||
|
|
analyze_btn = ttk.Button(file_frame, text="Analyze Data", command=self.analyze_data)
|
||
|
|
analyze_btn.grid(row=0, column=3, padx=(10, 0))
|
||
|
|
|
||
|
|
# Results notebook (tabs)
|
||
|
|
self.notebook = ttk.Notebook(main_frame)
|
||
|
|
self.notebook.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||
|
|
|
||
|
|
# Create tabs
|
||
|
|
self.create_summary_tab()
|
||
|
|
self.create_matched_tab()
|
||
|
|
self.create_kst_only_tab()
|
||
|
|
self.create_coordi_only_tab()
|
||
|
|
|
||
|
|
# Status bar
|
||
|
|
self.status_var = tk.StringVar(value="Ready - Select an Excel file and click 'Analyze Data'")
|
||
|
|
status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN)
|
||
|
|
status_bar.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0))
|
||
|
|
|
||
|
|
def create_summary_tab(self):
|
||
|
|
# Summary tab
|
||
|
|
summary_frame = ttk.Frame(self.notebook)
|
||
|
|
self.notebook.add(summary_frame, text="Summary")
|
||
|
|
|
||
|
|
# Configure grid
|
||
|
|
summary_frame.columnconfigure(0, weight=1)
|
||
|
|
summary_frame.rowconfigure(1, weight=1)
|
||
|
|
|
||
|
|
# Summary text widget
|
||
|
|
summary_text_frame = ttk.LabelFrame(summary_frame, text="Comparison Summary", padding="10")
|
||
|
|
summary_text_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=10)
|
||
|
|
summary_text_frame.columnconfigure(0, weight=1)
|
||
|
|
summary_text_frame.rowconfigure(0, weight=1)
|
||
|
|
|
||
|
|
self.summary_text = tk.Text(summary_text_frame, wrap=tk.WORD, height=15)
|
||
|
|
summary_scrollbar = ttk.Scrollbar(summary_text_frame, orient=tk.VERTICAL, command=self.summary_text.yview)
|
||
|
|
self.summary_text.configure(yscrollcommand=summary_scrollbar.set)
|
||
|
|
|
||
|
|
self.summary_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||
|
|
summary_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||
|
|
|
||
|
|
# Reconciliation info
|
||
|
|
reconcile_frame = ttk.LabelFrame(summary_frame, text="Reconciliation Results", padding="10")
|
||
|
|
reconcile_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), padx=10, pady=(0, 10))
|
||
|
|
|
||
|
|
self.reconcile_text = tk.Text(reconcile_frame, wrap=tk.WORD, height=8)
|
||
|
|
reconcile_scrollbar = ttk.Scrollbar(reconcile_frame, orient=tk.VERTICAL, command=self.reconcile_text.yview)
|
||
|
|
self.reconcile_text.configure(yscrollcommand=reconcile_scrollbar.set)
|
||
|
|
|
||
|
|
self.reconcile_text.grid(row=0, column=0, sticky=(tk.W, tk.E))
|
||
|
|
reconcile_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||
|
|
|
||
|
|
reconcile_frame.columnconfigure(0, weight=1)
|
||
|
|
|
||
|
|
def create_matched_tab(self):
|
||
|
|
matched_frame = ttk.Frame(self.notebook)
|
||
|
|
self.notebook.add(matched_frame, text="Matched Items")
|
||
|
|
|
||
|
|
self.create_data_table(matched_frame, "matched")
|
||
|
|
|
||
|
|
def create_kst_only_tab(self):
|
||
|
|
kst_frame = ttk.Frame(self.notebook)
|
||
|
|
self.notebook.add(kst_frame, text="KST Only")
|
||
|
|
|
||
|
|
self.create_data_table(kst_frame, "kst_only")
|
||
|
|
|
||
|
|
def create_coordi_only_tab(self):
|
||
|
|
coordi_frame = ttk.Frame(self.notebook)
|
||
|
|
self.notebook.add(coordi_frame, text="Coordi Only")
|
||
|
|
|
||
|
|
self.create_data_table(coordi_frame, "coordi_only")
|
||
|
|
|
||
|
|
def create_data_table(self, parent, table_type):
|
||
|
|
# Configure grid
|
||
|
|
parent.columnconfigure(0, weight=1)
|
||
|
|
parent.rowconfigure(1, weight=1)
|
||
|
|
|
||
|
|
# Info label
|
||
|
|
info_frame = ttk.Frame(parent)
|
||
|
|
info_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=10, pady=10)
|
||
|
|
info_frame.columnconfigure(1, weight=1)
|
||
|
|
|
||
|
|
count_label = ttk.Label(info_frame, text="Count:")
|
||
|
|
count_label.grid(row=0, column=0, padx=(0, 10))
|
||
|
|
|
||
|
|
count_var = tk.StringVar(value="0")
|
||
|
|
setattr(self, f"{table_type}_count_var", count_var)
|
||
|
|
count_display = ttk.Label(info_frame, textvariable=count_var, font=("Arial", 10, "bold"))
|
||
|
|
count_display.grid(row=0, column=1, sticky=tk.W)
|
||
|
|
|
||
|
|
# Table frame
|
||
|
|
table_frame = ttk.Frame(parent)
|
||
|
|
table_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=(0, 10))
|
||
|
|
table_frame.columnconfigure(0, weight=1)
|
||
|
|
table_frame.rowconfigure(0, weight=1)
|
||
|
|
|
||
|
|
# Create treeview
|
||
|
|
columns = ("Title", "Episode", "Sheet", "Row", "Reason")
|
||
|
|
tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20)
|
||
|
|
|
||
|
|
# Configure columns
|
||
|
|
tree.heading("Title", text="Title")
|
||
|
|
tree.heading("Episode", text="Episode")
|
||
|
|
tree.heading("Sheet", text="Sheet")
|
||
|
|
tree.heading("Row", text="Row")
|
||
|
|
tree.heading("Reason", text="Reason")
|
||
|
|
|
||
|
|
tree.column("Title", width=300)
|
||
|
|
tree.column("Episode", width=100)
|
||
|
|
tree.column("Sheet", width=120)
|
||
|
|
tree.column("Row", width=80)
|
||
|
|
tree.column("Reason", width=300)
|
||
|
|
|
||
|
|
# Scrollbars
|
||
|
|
v_scrollbar = ttk.Scrollbar(table_frame, orient=tk.VERTICAL, command=tree.yview)
|
||
|
|
h_scrollbar = ttk.Scrollbar(table_frame, orient=tk.HORIZONTAL, command=tree.xview)
|
||
|
|
tree.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
|
||
|
|
|
||
|
|
tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||
|
|
v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||
|
|
h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E))
|
||
|
|
|
||
|
|
# Store tree widget
|
||
|
|
setattr(self, f"{table_type}_tree", tree)
|
||
|
|
|
||
|
|
def browse_file(self):
|
||
|
|
file_path = filedialog.askopenfilename(
|
||
|
|
title="Select Excel File",
|
||
|
|
filetypes=[("Excel files", "*.xlsx *.xls"), ("All files", "*.*")]
|
||
|
|
)
|
||
|
|
if file_path:
|
||
|
|
self.file_path_var.set(file_path)
|
||
|
|
|
||
|
|
def analyze_data(self):
|
||
|
|
file_path = self.file_path_var.get().strip()
|
||
|
|
|
||
|
|
if not file_path:
|
||
|
|
messagebox.showerror("Error", "Please select an Excel file")
|
||
|
|
return
|
||
|
|
|
||
|
|
if not Path(file_path).exists():
|
||
|
|
messagebox.showerror("Error", f"File not found: {file_path}")
|
||
|
|
return
|
||
|
|
|
||
|
|
try:
|
||
|
|
self.status_var.set("Analyzing data...")
|
||
|
|
self.root.update()
|
||
|
|
|
||
|
|
# Create comparator and analyze
|
||
|
|
self.comparator = KSTCoordiComparator(file_path)
|
||
|
|
if not self.comparator.load_data():
|
||
|
|
messagebox.showerror("Error", "Failed to load Excel data")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Get comparison results
|
||
|
|
self.comparison_data = self.comparator.get_comparison_summary()
|
||
|
|
|
||
|
|
# Update GUI
|
||
|
|
self.update_summary()
|
||
|
|
self.update_data_tables()
|
||
|
|
|
||
|
|
self.status_var.set("Analysis complete!")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
messagebox.showerror("Error", f"Analysis failed: {str(e)}")
|
||
|
|
self.status_var.set("Analysis failed")
|
||
|
|
|
||
|
|
def update_summary(self):
|
||
|
|
if not self.comparison_data:
|
||
|
|
return
|
||
|
|
|
||
|
|
# Clear previous content
|
||
|
|
self.summary_text.delete(1.0, tk.END)
|
||
|
|
self.reconcile_text.delete(1.0, tk.END)
|
||
|
|
|
||
|
|
data = self.comparison_data
|
||
|
|
|
||
|
|
# Summary text
|
||
|
|
summary = f"""COMPARISON SUMMARY
|
||
|
|
{'='*50}
|
||
|
|
|
||
|
|
Original Counts:
|
||
|
|
KST Total: {data['original_counts']['kst_total']:,}
|
||
|
|
Coordi Total: {data['original_counts']['coordi_total']:,}
|
||
|
|
|
||
|
|
Matched Items: {data['matched_items_count']:,}
|
||
|
|
|
||
|
|
Mismatches:
|
||
|
|
KST Only: {data['mismatches']['kst_only_count']:,}
|
||
|
|
Coordi Only: {data['mismatches']['coordi_only_count']:,}
|
||
|
|
KST Duplicates: {data['mismatches']['kst_duplicates_count']:,}
|
||
|
|
Coordi Duplicates: {data['mismatches']['coordi_duplicates_count']:,}
|
||
|
|
|
||
|
|
Total Mismatches: {data['mismatches']['kst_only_count'] + data['mismatches']['coordi_only_count'] + data['mismatches']['kst_duplicates_count'] + data['mismatches']['coordi_duplicates_count']:,}
|
||
|
|
"""
|
||
|
|
|
||
|
|
self.summary_text.insert(tk.END, summary)
|
||
|
|
|
||
|
|
# Reconciliation text
|
||
|
|
reconcile = data['reconciliation']
|
||
|
|
reconcile_info = f"""RECONCILIATION RESULTS
|
||
|
|
{'='*40}
|
||
|
|
|
||
|
|
After excluding mismatches:
|
||
|
|
KST Count: {reconcile['reconciled_kst_count']:,}
|
||
|
|
Coordi Count: {reconcile['reconciled_coordi_count']:,}
|
||
|
|
Counts Match: {'✅ YES' if reconcile['counts_match_after_reconciliation'] else '❌ NO'}
|
||
|
|
|
||
|
|
Items to exclude:
|
||
|
|
From KST: {reconcile['items_to_exclude_from_kst']:,}
|
||
|
|
From Coordi: {reconcile['items_to_exclude_from_coordi']:,}
|
||
|
|
|
||
|
|
Final Result: Both datasets will have {reconcile['reconciled_kst_count']:,} matching items after reconciliation.
|
||
|
|
"""
|
||
|
|
|
||
|
|
self.reconcile_text.insert(tk.END, reconcile_info)
|
||
|
|
|
||
|
|
def update_data_tables(self):
|
||
|
|
if not self.comparison_data:
|
||
|
|
return
|
||
|
|
|
||
|
|
mismatches = self.comparison_data['mismatch_details']
|
||
|
|
|
||
|
|
# Update matched items (create from intersection)
|
||
|
|
matched_count = self.comparison_data['matched_items_count']
|
||
|
|
self.matched_count_var.set(f"{matched_count:,}")
|
||
|
|
|
||
|
|
# Clear matched tree
|
||
|
|
for item in self.matched_tree.get_children():
|
||
|
|
self.matched_tree.delete(item)
|
||
|
|
|
||
|
|
# Add matched items (we'll show the first few as examples)
|
||
|
|
if self.comparator:
|
||
|
|
categorization = self.comparator.categorize_mismatches()
|
||
|
|
matched_items = categorization['matched_items']
|
||
|
|
for i, item in enumerate(list(matched_items)[:100]): # Show first 100
|
||
|
|
self.matched_tree.insert("", tk.END, values=(
|
||
|
|
item.title, item.episode, item.source_sheet, item.row_index + 1, "Perfect match"
|
||
|
|
))
|
||
|
|
|
||
|
|
# Update KST only
|
||
|
|
kst_only = mismatches['kst_only']
|
||
|
|
self.kst_only_count_var.set(f"{len(kst_only):,}")
|
||
|
|
|
||
|
|
for item in self.kst_only_tree.get_children():
|
||
|
|
self.kst_only_tree.delete(item)
|
||
|
|
|
||
|
|
for mismatch in kst_only:
|
||
|
|
self.kst_only_tree.insert("", tk.END, values=(
|
||
|
|
mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
||
|
|
mismatch['row_index'] + 1, mismatch['reason']
|
||
|
|
))
|
||
|
|
|
||
|
|
# Update Coordi only
|
||
|
|
coordi_only = mismatches['coordi_only']
|
||
|
|
self.coordi_only_count_var.set(f"{len(coordi_only):,}")
|
||
|
|
|
||
|
|
for item in self.coordi_only_tree.get_children():
|
||
|
|
self.coordi_only_tree.delete(item)
|
||
|
|
|
||
|
|
for mismatch in coordi_only:
|
||
|
|
self.coordi_only_tree.insert("", tk.END, values=(
|
||
|
|
mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
||
|
|
mismatch['row_index'] + 1, mismatch['reason']
|
||
|
|
))
|
||
|
|
|
||
|
|
def main():
|
||
|
|
root = tk.Tk()
|
||
|
|
app = DataComparisonGUI(root)
|
||
|
|
root.mainloop()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|