compare data logic
This commit is contained in:
parent
47097f6be4
commit
1f88db5fb9
@ -1,6 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Dict, List, Tuple, Any, Set
|
from typing import Dict, List, Tuple, Any
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -48,6 +48,8 @@ class KSTCoordiComparator:
|
|||||||
coordi_items = set()
|
coordi_items = set()
|
||||||
kst_details = []
|
kst_details = []
|
||||||
coordi_details = []
|
coordi_details = []
|
||||||
|
kst_all_items = [] # Keep all items including duplicates
|
||||||
|
coordi_all_items = [] # Keep all items including duplicates
|
||||||
|
|
||||||
for sheet_name, df in self.data.items():
|
for sheet_name, df in self.data.items():
|
||||||
columns = df.columns.tolist()
|
columns = df.columns.tolist()
|
||||||
@ -96,6 +98,7 @@ class KSTCoordiComparator:
|
|||||||
if has_kst_data:
|
if has_kst_data:
|
||||||
item = ComparisonItem(kst_title, kst_episode, sheet_name, idx)
|
item = ComparisonItem(kst_title, kst_episode, sheet_name, idx)
|
||||||
kst_items.add(item)
|
kst_items.add(item)
|
||||||
|
kst_all_items.append(item) # Keep all items for duplicate detection
|
||||||
kst_details.append({
|
kst_details.append({
|
||||||
'title': kst_title,
|
'title': kst_title,
|
||||||
'episode': kst_episode,
|
'episode': kst_episode,
|
||||||
@ -122,6 +125,7 @@ class KSTCoordiComparator:
|
|||||||
if has_coordi_data:
|
if has_coordi_data:
|
||||||
item = ComparisonItem(coordi_title, coordi_episode, sheet_name, idx)
|
item = ComparisonItem(coordi_title, coordi_episode, sheet_name, idx)
|
||||||
coordi_items.add(item)
|
coordi_items.add(item)
|
||||||
|
coordi_all_items.append(item) # Keep all items for duplicate detection
|
||||||
coordi_details.append({
|
coordi_details.append({
|
||||||
'title': coordi_title,
|
'title': coordi_title,
|
||||||
'episode': coordi_episode,
|
'episode': coordi_episode,
|
||||||
@ -135,12 +139,16 @@ class KSTCoordiComparator:
|
|||||||
|
|
||||||
self.kst_items = kst_items
|
self.kst_items = kst_items
|
||||||
self.coordi_items = coordi_items
|
self.coordi_items = coordi_items
|
||||||
|
self.kst_all_items = kst_all_items # Store for duplicate detection
|
||||||
|
self.coordi_all_items = coordi_all_items # Store for duplicate detection
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'kst_items': kst_items,
|
'kst_items': kst_items,
|
||||||
'coordi_items': coordi_items,
|
'coordi_items': coordi_items,
|
||||||
'kst_details': kst_details,
|
'kst_details': kst_details,
|
||||||
'coordi_details': coordi_details
|
'coordi_details': coordi_details,
|
||||||
|
'kst_all_items': kst_all_items,
|
||||||
|
'coordi_all_items': coordi_all_items
|
||||||
}
|
}
|
||||||
|
|
||||||
def categorize_mismatches(self) -> Dict[str, Any]:
|
def categorize_mismatches(self) -> Dict[str, Any]:
|
||||||
@ -154,8 +162,8 @@ class KSTCoordiComparator:
|
|||||||
coordi_only_items = self.coordi_items - self.kst_items
|
coordi_only_items = self.coordi_items - self.kst_items
|
||||||
|
|
||||||
# Find duplicates within each dataset
|
# Find duplicates within each dataset
|
||||||
kst_duplicates = self._find_duplicates_in_set(self.kst_items)
|
kst_duplicates = self._find_duplicates_in_list(self.kst_all_items)
|
||||||
coordi_duplicates = self._find_duplicates_in_set(self.coordi_items)
|
coordi_duplicates = self._find_duplicates_in_list(self.coordi_all_items)
|
||||||
|
|
||||||
categorization = {
|
categorization = {
|
||||||
'matched_items': list(matched_items),
|
'matched_items': list(matched_items),
|
||||||
@ -190,10 +198,8 @@ class KSTCoordiComparator:
|
|||||||
|
|
||||||
return categorization
|
return categorization
|
||||||
|
|
||||||
def _find_duplicates_in_set(self, items_set: Set[ComparisonItem]) -> List[ComparisonItem]:
|
def _find_duplicates_in_list(self, items_list: List[ComparisonItem]) -> List[ComparisonItem]:
|
||||||
"""Find duplicate items within a dataset"""
|
"""Find duplicate items within a dataset"""
|
||||||
# Convert to list to check for duplicates
|
|
||||||
items_list = list(items_set)
|
|
||||||
seen = set()
|
seen = set()
|
||||||
duplicates = []
|
duplicates = []
|
||||||
|
|
||||||
|
|||||||
484
gui_app.py
484
gui_app.py
@ -1,319 +1,319 @@
|
|||||||
import tkinter as tk
|
# import tkinter as tk
|
||||||
from tkinter import ttk, filedialog, messagebox
|
# from tkinter import ttk, filedialog, messagebox
|
||||||
import pandas as pd
|
# import pandas as pd
|
||||||
from pathlib import Path
|
# from pathlib import Path
|
||||||
from data_comparator import KSTCoordiComparator
|
# from data_comparator import KSTCoordiComparator
|
||||||
|
|
||||||
class DataComparisonGUI:
|
# class DataComparisonGUI:
|
||||||
def __init__(self, root):
|
# def __init__(self, root):
|
||||||
self.root = root
|
# self.root = root
|
||||||
self.root.title("KST vs Coordi Data Comparison Tool")
|
# self.root.title("KST vs Coordi Data Comparison Tool")
|
||||||
self.root.geometry("1200x800")
|
# self.root.geometry("1200x800")
|
||||||
|
|
||||||
self.comparator = None
|
# self.comparator = None
|
||||||
self.comparison_data = None
|
# self.comparison_data = None
|
||||||
|
|
||||||
self.setup_ui()
|
# self.setup_ui()
|
||||||
|
|
||||||
def setup_ui(self):
|
# def setup_ui(self):
|
||||||
# Main container
|
# # Main container
|
||||||
main_frame = ttk.Frame(self.root, padding="10")
|
# main_frame = ttk.Frame(self.root, padding="10")
|
||||||
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
# main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||||||
|
|
||||||
# Configure grid weights
|
# # Configure grid weights
|
||||||
self.root.columnconfigure(0, weight=1)
|
# self.root.columnconfigure(0, weight=1)
|
||||||
self.root.rowconfigure(0, weight=1)
|
# self.root.rowconfigure(0, weight=1)
|
||||||
main_frame.columnconfigure(1, weight=1)
|
# main_frame.columnconfigure(1, weight=1)
|
||||||
main_frame.rowconfigure(2, weight=1)
|
# main_frame.rowconfigure(2, weight=1)
|
||||||
|
|
||||||
# Title
|
# # Title
|
||||||
title_label = ttk.Label(main_frame, text="KST vs Coordi Data Comparison",
|
# title_label = ttk.Label(main_frame, text="KST vs Coordi Data Comparison",
|
||||||
font=("Arial", 16, "bold"))
|
# font=("Arial", 16, "bold"))
|
||||||
title_label.grid(row=0, column=0, columnspan=3, pady=(0, 20))
|
# title_label.grid(row=0, column=0, columnspan=3, pady=(0, 20))
|
||||||
|
|
||||||
# File selection frame
|
# # File selection frame
|
||||||
file_frame = ttk.LabelFrame(main_frame, text="File Selection", padding="10")
|
# file_frame = ttk.LabelFrame(main_frame, text="File Selection", padding="10")
|
||||||
file_frame.grid(row=1, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 10))
|
# file_frame.grid(row=1, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 10))
|
||||||
file_frame.columnconfigure(1, weight=1)
|
# file_frame.columnconfigure(1, weight=1)
|
||||||
|
|
||||||
ttk.Label(file_frame, text="Excel File:").grid(row=0, column=0, sticky=tk.W, padx=(0, 10))
|
# ttk.Label(file_frame, text="Excel File:").grid(row=0, column=0, sticky=tk.W, padx=(0, 10))
|
||||||
|
|
||||||
self.file_path_var = tk.StringVar(value="data/sample-data.xlsx")
|
# self.file_path_var = tk.StringVar(value="data/sample-data.xlsx")
|
||||||
self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50)
|
# self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50)
|
||||||
self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 10))
|
# self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 10))
|
||||||
|
|
||||||
browse_btn = ttk.Button(file_frame, text="Browse", command=self.browse_file)
|
# browse_btn = ttk.Button(file_frame, text="Browse", command=self.browse_file)
|
||||||
browse_btn.grid(row=0, column=2)
|
# browse_btn.grid(row=0, column=2)
|
||||||
|
|
||||||
analyze_btn = ttk.Button(file_frame, text="Analyze Data", command=self.analyze_data)
|
# analyze_btn = ttk.Button(file_frame, text="Analyze Data", command=self.analyze_data)
|
||||||
analyze_btn.grid(row=0, column=3, padx=(10, 0))
|
# analyze_btn.grid(row=0, column=3, padx=(10, 0))
|
||||||
|
|
||||||
# Results notebook (tabs)
|
# # Results notebook (tabs)
|
||||||
self.notebook = ttk.Notebook(main_frame)
|
# self.notebook = ttk.Notebook(main_frame)
|
||||||
self.notebook.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S))
|
# self.notebook.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||||||
|
|
||||||
# Create tabs
|
# # Create tabs
|
||||||
self.create_summary_tab()
|
# self.create_summary_tab()
|
||||||
self.create_matched_tab()
|
# self.create_matched_tab()
|
||||||
self.create_kst_only_tab()
|
# self.create_kst_only_tab()
|
||||||
self.create_coordi_only_tab()
|
# self.create_coordi_only_tab()
|
||||||
|
|
||||||
# Status bar
|
# # Status bar
|
||||||
self.status_var = tk.StringVar(value="Ready - Select an Excel file and click 'Analyze Data'")
|
# self.status_var = tk.StringVar(value="Ready - Select an Excel file and click 'Analyze Data'")
|
||||||
status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN)
|
# status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN)
|
||||||
status_bar.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0))
|
# status_bar.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0))
|
||||||
|
|
||||||
def create_summary_tab(self):
|
# def create_summary_tab(self):
|
||||||
# Summary tab
|
# # Summary tab
|
||||||
summary_frame = ttk.Frame(self.notebook)
|
# summary_frame = ttk.Frame(self.notebook)
|
||||||
self.notebook.add(summary_frame, text="Summary")
|
# self.notebook.add(summary_frame, text="Summary")
|
||||||
|
|
||||||
# Configure grid
|
# # Configure grid
|
||||||
summary_frame.columnconfigure(0, weight=1)
|
# summary_frame.columnconfigure(0, weight=1)
|
||||||
summary_frame.rowconfigure(1, weight=1)
|
# summary_frame.rowconfigure(1, weight=1)
|
||||||
|
|
||||||
# Summary text widget
|
# # Summary text widget
|
||||||
summary_text_frame = ttk.LabelFrame(summary_frame, text="Comparison Summary", padding="10")
|
# summary_text_frame = ttk.LabelFrame(summary_frame, text="Comparison Summary", padding="10")
|
||||||
summary_text_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=10)
|
# summary_text_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=10)
|
||||||
summary_text_frame.columnconfigure(0, weight=1)
|
# summary_text_frame.columnconfigure(0, weight=1)
|
||||||
summary_text_frame.rowconfigure(0, weight=1)
|
# summary_text_frame.rowconfigure(0, weight=1)
|
||||||
|
|
||||||
self.summary_text = tk.Text(summary_text_frame, wrap=tk.WORD, height=15)
|
# self.summary_text = tk.Text(summary_text_frame, wrap=tk.WORD, height=15)
|
||||||
summary_scrollbar = ttk.Scrollbar(summary_text_frame, orient=tk.VERTICAL, command=self.summary_text.yview)
|
# summary_scrollbar = ttk.Scrollbar(summary_text_frame, orient=tk.VERTICAL, command=self.summary_text.yview)
|
||||||
self.summary_text.configure(yscrollcommand=summary_scrollbar.set)
|
# self.summary_text.configure(yscrollcommand=summary_scrollbar.set)
|
||||||
|
|
||||||
self.summary_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
# self.summary_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||||||
summary_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
# summary_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||||||
|
|
||||||
# Reconciliation info
|
# # Reconciliation info
|
||||||
reconcile_frame = ttk.LabelFrame(summary_frame, text="Reconciliation Results", padding="10")
|
# reconcile_frame = ttk.LabelFrame(summary_frame, text="Reconciliation Results", padding="10")
|
||||||
reconcile_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), padx=10, pady=(0, 10))
|
# reconcile_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), padx=10, pady=(0, 10))
|
||||||
|
|
||||||
self.reconcile_text = tk.Text(reconcile_frame, wrap=tk.WORD, height=8)
|
# self.reconcile_text = tk.Text(reconcile_frame, wrap=tk.WORD, height=8)
|
||||||
reconcile_scrollbar = ttk.Scrollbar(reconcile_frame, orient=tk.VERTICAL, command=self.reconcile_text.yview)
|
# reconcile_scrollbar = ttk.Scrollbar(reconcile_frame, orient=tk.VERTICAL, command=self.reconcile_text.yview)
|
||||||
self.reconcile_text.configure(yscrollcommand=reconcile_scrollbar.set)
|
# self.reconcile_text.configure(yscrollcommand=reconcile_scrollbar.set)
|
||||||
|
|
||||||
self.reconcile_text.grid(row=0, column=0, sticky=(tk.W, tk.E))
|
# self.reconcile_text.grid(row=0, column=0, sticky=(tk.W, tk.E))
|
||||||
reconcile_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
# reconcile_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||||||
|
|
||||||
reconcile_frame.columnconfigure(0, weight=1)
|
# reconcile_frame.columnconfigure(0, weight=1)
|
||||||
|
|
||||||
def create_matched_tab(self):
|
# def create_matched_tab(self):
|
||||||
matched_frame = ttk.Frame(self.notebook)
|
# matched_frame = ttk.Frame(self.notebook)
|
||||||
self.notebook.add(matched_frame, text="Matched Items")
|
# self.notebook.add(matched_frame, text="Matched Items")
|
||||||
|
|
||||||
self.create_data_table(matched_frame, "matched")
|
# self.create_data_table(matched_frame, "matched")
|
||||||
|
|
||||||
def create_kst_only_tab(self):
|
# def create_kst_only_tab(self):
|
||||||
kst_frame = ttk.Frame(self.notebook)
|
# kst_frame = ttk.Frame(self.notebook)
|
||||||
self.notebook.add(kst_frame, text="KST Only")
|
# self.notebook.add(kst_frame, text="KST Only")
|
||||||
|
|
||||||
self.create_data_table(kst_frame, "kst_only")
|
# self.create_data_table(kst_frame, "kst_only")
|
||||||
|
|
||||||
def create_coordi_only_tab(self):
|
# def create_coordi_only_tab(self):
|
||||||
coordi_frame = ttk.Frame(self.notebook)
|
# coordi_frame = ttk.Frame(self.notebook)
|
||||||
self.notebook.add(coordi_frame, text="Coordi Only")
|
# self.notebook.add(coordi_frame, text="Coordi Only")
|
||||||
|
|
||||||
self.create_data_table(coordi_frame, "coordi_only")
|
# self.create_data_table(coordi_frame, "coordi_only")
|
||||||
|
|
||||||
def create_data_table(self, parent, table_type):
|
# def create_data_table(self, parent, table_type):
|
||||||
# Configure grid
|
# # Configure grid
|
||||||
parent.columnconfigure(0, weight=1)
|
# parent.columnconfigure(0, weight=1)
|
||||||
parent.rowconfigure(1, weight=1)
|
# parent.rowconfigure(1, weight=1)
|
||||||
|
|
||||||
# Info label
|
# # Info label
|
||||||
info_frame = ttk.Frame(parent)
|
# info_frame = ttk.Frame(parent)
|
||||||
info_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=10, pady=10)
|
# info_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=10, pady=10)
|
||||||
info_frame.columnconfigure(1, weight=1)
|
# info_frame.columnconfigure(1, weight=1)
|
||||||
|
|
||||||
count_label = ttk.Label(info_frame, text="Count:")
|
# count_label = ttk.Label(info_frame, text="Count:")
|
||||||
count_label.grid(row=0, column=0, padx=(0, 10))
|
# count_label.grid(row=0, column=0, padx=(0, 10))
|
||||||
|
|
||||||
count_var = tk.StringVar(value="0")
|
# count_var = tk.StringVar(value="0")
|
||||||
setattr(self, f"{table_type}_count_var", count_var)
|
# setattr(self, f"{table_type}_count_var", count_var)
|
||||||
count_display = ttk.Label(info_frame, textvariable=count_var, font=("Arial", 10, "bold"))
|
# count_display = ttk.Label(info_frame, textvariable=count_var, font=("Arial", 10, "bold"))
|
||||||
count_display.grid(row=0, column=1, sticky=tk.W)
|
# count_display.grid(row=0, column=1, sticky=tk.W)
|
||||||
|
|
||||||
# Table frame
|
# # Table frame
|
||||||
table_frame = ttk.Frame(parent)
|
# table_frame = ttk.Frame(parent)
|
||||||
table_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=(0, 10))
|
# table_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=10, pady=(0, 10))
|
||||||
table_frame.columnconfigure(0, weight=1)
|
# table_frame.columnconfigure(0, weight=1)
|
||||||
table_frame.rowconfigure(0, weight=1)
|
# table_frame.rowconfigure(0, weight=1)
|
||||||
|
|
||||||
# Create treeview
|
# # Create treeview
|
||||||
columns = ("Title", "Episode", "Sheet", "Row", "Reason")
|
# columns = ("Title", "Episode", "Sheet", "Row", "Reason")
|
||||||
tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20)
|
# tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20)
|
||||||
|
|
||||||
# Configure columns
|
# # Configure columns
|
||||||
tree.heading("Title", text="Title")
|
# tree.heading("Title", text="Title")
|
||||||
tree.heading("Episode", text="Episode")
|
# tree.heading("Episode", text="Episode")
|
||||||
tree.heading("Sheet", text="Sheet")
|
# tree.heading("Sheet", text="Sheet")
|
||||||
tree.heading("Row", text="Row")
|
# tree.heading("Row", text="Row")
|
||||||
tree.heading("Reason", text="Reason")
|
# tree.heading("Reason", text="Reason")
|
||||||
|
|
||||||
tree.column("Title", width=300)
|
# tree.column("Title", width=300)
|
||||||
tree.column("Episode", width=100)
|
# tree.column("Episode", width=100)
|
||||||
tree.column("Sheet", width=120)
|
# tree.column("Sheet", width=120)
|
||||||
tree.column("Row", width=80)
|
# tree.column("Row", width=80)
|
||||||
tree.column("Reason", width=300)
|
# tree.column("Reason", width=300)
|
||||||
|
|
||||||
# Scrollbars
|
# # Scrollbars
|
||||||
v_scrollbar = ttk.Scrollbar(table_frame, orient=tk.VERTICAL, command=tree.yview)
|
# v_scrollbar = ttk.Scrollbar(table_frame, orient=tk.VERTICAL, command=tree.yview)
|
||||||
h_scrollbar = ttk.Scrollbar(table_frame, orient=tk.HORIZONTAL, command=tree.xview)
|
# h_scrollbar = ttk.Scrollbar(table_frame, orient=tk.HORIZONTAL, command=tree.xview)
|
||||||
tree.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
|
# tree.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
|
||||||
|
|
||||||
tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
# tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||||||
v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
# v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
|
||||||
h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E))
|
# h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E))
|
||||||
|
|
||||||
# Store tree widget
|
# # Store tree widget
|
||||||
setattr(self, f"{table_type}_tree", tree)
|
# setattr(self, f"{table_type}_tree", tree)
|
||||||
|
|
||||||
def browse_file(self):
|
# def browse_file(self):
|
||||||
file_path = filedialog.askopenfilename(
|
# file_path = filedialog.askopenfilename(
|
||||||
title="Select Excel File",
|
# title="Select Excel File",
|
||||||
filetypes=[("Excel files", "*.xlsx *.xls"), ("All files", "*.*")]
|
# filetypes=[("Excel files", "*.xlsx *.xls"), ("All files", "*.*")]
|
||||||
)
|
# )
|
||||||
if file_path:
|
# if file_path:
|
||||||
self.file_path_var.set(file_path)
|
# self.file_path_var.set(file_path)
|
||||||
|
|
||||||
def analyze_data(self):
|
# def analyze_data(self):
|
||||||
file_path = self.file_path_var.get().strip()
|
# file_path = self.file_path_var.get().strip()
|
||||||
|
|
||||||
if not file_path:
|
# if not file_path:
|
||||||
messagebox.showerror("Error", "Please select an Excel file")
|
# messagebox.showerror("Error", "Please select an Excel file")
|
||||||
return
|
# return
|
||||||
|
|
||||||
if not Path(file_path).exists():
|
# if not Path(file_path).exists():
|
||||||
messagebox.showerror("Error", f"File not found: {file_path}")
|
# messagebox.showerror("Error", f"File not found: {file_path}")
|
||||||
return
|
# return
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
self.status_var.set("Analyzing data...")
|
# self.status_var.set("Analyzing data...")
|
||||||
self.root.update()
|
# self.root.update()
|
||||||
|
|
||||||
# Create comparator and analyze
|
# # Create comparator and analyze
|
||||||
self.comparator = KSTCoordiComparator(file_path)
|
# self.comparator = KSTCoordiComparator(file_path)
|
||||||
if not self.comparator.load_data():
|
# if not self.comparator.load_data():
|
||||||
messagebox.showerror("Error", "Failed to load Excel data")
|
# messagebox.showerror("Error", "Failed to load Excel data")
|
||||||
return
|
# return
|
||||||
|
|
||||||
# Get comparison results
|
# # Get comparison results
|
||||||
self.comparison_data = self.comparator.get_comparison_summary()
|
# self.comparison_data = self.comparator.get_comparison_summary()
|
||||||
|
|
||||||
# Update GUI
|
# # Update GUI
|
||||||
self.update_summary()
|
# self.update_summary()
|
||||||
self.update_data_tables()
|
# self.update_data_tables()
|
||||||
|
|
||||||
self.status_var.set("Analysis complete!")
|
# self.status_var.set("Analysis complete!")
|
||||||
|
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
messagebox.showerror("Error", f"Analysis failed: {str(e)}")
|
# messagebox.showerror("Error", f"Analysis failed: {str(e)}")
|
||||||
self.status_var.set("Analysis failed")
|
# self.status_var.set("Analysis failed")
|
||||||
|
|
||||||
def update_summary(self):
|
# def update_summary(self):
|
||||||
if not self.comparison_data:
|
# if not self.comparison_data:
|
||||||
return
|
# return
|
||||||
|
|
||||||
# Clear previous content
|
# # Clear previous content
|
||||||
self.summary_text.delete(1.0, tk.END)
|
# self.summary_text.delete(1.0, tk.END)
|
||||||
self.reconcile_text.delete(1.0, tk.END)
|
# self.reconcile_text.delete(1.0, tk.END)
|
||||||
|
|
||||||
data = self.comparison_data
|
# data = self.comparison_data
|
||||||
|
|
||||||
# Summary text
|
# # Summary text
|
||||||
summary = f"""COMPARISON SUMMARY
|
# summary = f"""COMPARISON SUMMARY
|
||||||
{'='*50}
|
# {'='*50}
|
||||||
|
|
||||||
Original Counts:
|
# Original Counts:
|
||||||
KST Total: {data['original_counts']['kst_total']:,}
|
# KST Total: {data['original_counts']['kst_total']:,}
|
||||||
Coordi Total: {data['original_counts']['coordi_total']:,}
|
# Coordi Total: {data['original_counts']['coordi_total']:,}
|
||||||
|
|
||||||
Matched Items: {data['matched_items_count']:,}
|
# Matched Items: {data['matched_items_count']:,}
|
||||||
|
|
||||||
Mismatches:
|
# Mismatches:
|
||||||
KST Only: {data['mismatches']['kst_only_count']:,}
|
# KST Only: {data['mismatches']['kst_only_count']:,}
|
||||||
Coordi Only: {data['mismatches']['coordi_only_count']:,}
|
# Coordi Only: {data['mismatches']['coordi_only_count']:,}
|
||||||
KST Duplicates: {data['mismatches']['kst_duplicates_count']:,}
|
# KST Duplicates: {data['mismatches']['kst_duplicates_count']:,}
|
||||||
Coordi Duplicates: {data['mismatches']['coordi_duplicates_count']:,}
|
# Coordi Duplicates: {data['mismatches']['coordi_duplicates_count']:,}
|
||||||
|
|
||||||
Total Mismatches: {data['mismatches']['kst_only_count'] + data['mismatches']['coordi_only_count'] + data['mismatches']['kst_duplicates_count'] + data['mismatches']['coordi_duplicates_count']:,}
|
# Total Mismatches: {data['mismatches']['kst_only_count'] + data['mismatches']['coordi_only_count'] + data['mismatches']['kst_duplicates_count'] + data['mismatches']['coordi_duplicates_count']:,}
|
||||||
"""
|
# """
|
||||||
|
|
||||||
self.summary_text.insert(tk.END, summary)
|
# self.summary_text.insert(tk.END, summary)
|
||||||
|
|
||||||
# Reconciliation text
|
# # Reconciliation text
|
||||||
reconcile = data['reconciliation']
|
# reconcile = data['reconciliation']
|
||||||
reconcile_info = f"""RECONCILIATION RESULTS
|
# reconcile_info = f"""RECONCILIATION RESULTS
|
||||||
{'='*40}
|
# {'='*40}
|
||||||
|
|
||||||
After excluding mismatches:
|
# After excluding mismatches:
|
||||||
KST Count: {reconcile['reconciled_kst_count']:,}
|
# KST Count: {reconcile['reconciled_kst_count']:,}
|
||||||
Coordi Count: {reconcile['reconciled_coordi_count']:,}
|
# Coordi Count: {reconcile['reconciled_coordi_count']:,}
|
||||||
Counts Match: {'✅ YES' if reconcile['counts_match_after_reconciliation'] else '❌ NO'}
|
# Counts Match: {'✅ YES' if reconcile['counts_match_after_reconciliation'] else '❌ NO'}
|
||||||
|
|
||||||
Items to exclude:
|
# Items to exclude:
|
||||||
From KST: {reconcile['items_to_exclude_from_kst']:,}
|
# From KST: {reconcile['items_to_exclude_from_kst']:,}
|
||||||
From Coordi: {reconcile['items_to_exclude_from_coordi']:,}
|
# From Coordi: {reconcile['items_to_exclude_from_coordi']:,}
|
||||||
|
|
||||||
Final Result: Both datasets will have {reconcile['reconciled_kst_count']:,} matching items after reconciliation.
|
# Final Result: Both datasets will have {reconcile['reconciled_kst_count']:,} matching items after reconciliation.
|
||||||
"""
|
# """
|
||||||
|
|
||||||
self.reconcile_text.insert(tk.END, reconcile_info)
|
# self.reconcile_text.insert(tk.END, reconcile_info)
|
||||||
|
|
||||||
def update_data_tables(self):
|
# def update_data_tables(self):
|
||||||
if not self.comparison_data:
|
# if not self.comparison_data:
|
||||||
return
|
# return
|
||||||
|
|
||||||
mismatches = self.comparison_data['mismatch_details']
|
# mismatches = self.comparison_data['mismatch_details']
|
||||||
|
|
||||||
# Update matched items (create from intersection)
|
# # Update matched items (create from intersection)
|
||||||
matched_count = self.comparison_data['matched_items_count']
|
# matched_count = self.comparison_data['matched_items_count']
|
||||||
self.matched_count_var.set(f"{matched_count:,}")
|
# self.matched_count_var.set(f"{matched_count:,}")
|
||||||
|
|
||||||
# Clear matched tree
|
# # Clear matched tree
|
||||||
for item in self.matched_tree.get_children():
|
# for item in self.matched_tree.get_children():
|
||||||
self.matched_tree.delete(item)
|
# self.matched_tree.delete(item)
|
||||||
|
|
||||||
# Add matched items (we'll show the first few as examples)
|
# # Add matched items (we'll show the first few as examples)
|
||||||
if self.comparator:
|
# if self.comparator:
|
||||||
categorization = self.comparator.categorize_mismatches()
|
# categorization = self.comparator.categorize_mismatches()
|
||||||
matched_items = categorization['matched_items']
|
# matched_items = categorization['matched_items']
|
||||||
for i, item in enumerate(list(matched_items)[:100]): # Show first 100
|
# for i, item in enumerate(list(matched_items)[:100]): # Show first 100
|
||||||
self.matched_tree.insert("", tk.END, values=(
|
# self.matched_tree.insert("", tk.END, values=(
|
||||||
item.title, item.episode, item.source_sheet, item.row_index + 1, "Perfect match"
|
# item.title, item.episode, item.source_sheet, item.row_index + 1, "Perfect match"
|
||||||
))
|
# ))
|
||||||
|
|
||||||
# Update KST only
|
# # Update KST only
|
||||||
kst_only = mismatches['kst_only']
|
# kst_only = mismatches['kst_only']
|
||||||
self.kst_only_count_var.set(f"{len(kst_only):,}")
|
# self.kst_only_count_var.set(f"{len(kst_only):,}")
|
||||||
|
|
||||||
for item in self.kst_only_tree.get_children():
|
# for item in self.kst_only_tree.get_children():
|
||||||
self.kst_only_tree.delete(item)
|
# self.kst_only_tree.delete(item)
|
||||||
|
|
||||||
for mismatch in kst_only:
|
# for mismatch in kst_only:
|
||||||
self.kst_only_tree.insert("", tk.END, values=(
|
# self.kst_only_tree.insert("", tk.END, values=(
|
||||||
mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
# mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
||||||
mismatch['row_index'] + 1, mismatch['reason']
|
# mismatch['row_index'] + 1, mismatch['reason']
|
||||||
))
|
# ))
|
||||||
|
|
||||||
# Update Coordi only
|
# # Update Coordi only
|
||||||
coordi_only = mismatches['coordi_only']
|
# coordi_only = mismatches['coordi_only']
|
||||||
self.coordi_only_count_var.set(f"{len(coordi_only):,}")
|
# self.coordi_only_count_var.set(f"{len(coordi_only):,}")
|
||||||
|
|
||||||
for item in self.coordi_only_tree.get_children():
|
# for item in self.coordi_only_tree.get_children():
|
||||||
self.coordi_only_tree.delete(item)
|
# self.coordi_only_tree.delete(item)
|
||||||
|
|
||||||
for mismatch in coordi_only:
|
# for mismatch in coordi_only:
|
||||||
self.coordi_only_tree.insert("", tk.END, values=(
|
# self.coordi_only_tree.insert("", tk.END, values=(
|
||||||
mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
# mismatch['title'], mismatch['episode'], mismatch['sheet'],
|
||||||
mismatch['row_index'] + 1, mismatch['reason']
|
# mismatch['row_index'] + 1, mismatch['reason']
|
||||||
))
|
# ))
|
||||||
|
|
||||||
def main():
|
# def main():
|
||||||
root = tk.Tk()
|
# root = tk.Tk()
|
||||||
app = DataComparisonGUI(root)
|
# app = DataComparisonGUI(root)
|
||||||
root.mainloop()
|
# root.mainloop()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
main()
|
# main()
|
||||||
@ -404,6 +404,7 @@
|
|||||||
// Update count displays
|
// Update count displays
|
||||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||||
|
|
||||||
|
// Count all different items including duplicates
|
||||||
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
||||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
||||||
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
||||||
@ -443,50 +444,44 @@
|
|||||||
const tbody = document.getElementById('different-table');
|
const tbody = document.getElementById('different-table');
|
||||||
tbody.innerHTML = '';
|
tbody.innerHTML = '';
|
||||||
|
|
||||||
// Combine all mismatches into one array for sorting
|
// Create sets of duplicate items for highlighting
|
||||||
|
const kstDuplicateKeys = new Set();
|
||||||
|
const coordiDuplicateKeys = new Set();
|
||||||
|
|
||||||
|
mismatchDetails.kst_duplicates.forEach(item => {
|
||||||
|
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||||
|
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Combine only KST-only and Coordi-only items (like before)
|
||||||
const allDifferences = [];
|
const allDifferences = [];
|
||||||
|
|
||||||
// Add KST-only items
|
// Add KST-only items
|
||||||
mismatchDetails.kst_only.forEach(item => {
|
mismatchDetails.kst_only.forEach(item => {
|
||||||
|
const key = `${item.title}_${item.episode}`;
|
||||||
allDifferences.push({
|
allDifferences.push({
|
||||||
kstData: `${item.title} - Episode ${item.episode}`,
|
kstData: `${item.title} - Episode ${item.episode}`,
|
||||||
coordiData: '',
|
coordiData: '',
|
||||||
reason: 'Only appears in KST',
|
reason: 'Only appears in KST',
|
||||||
sortTitle: item.title,
|
sortTitle: item.title,
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
sortEpisode: parseFloat(item.episode) || 0,
|
||||||
|
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add Coordi-only items
|
// Add Coordi-only items
|
||||||
mismatchDetails.coordi_only.forEach(item => {
|
mismatchDetails.coordi_only.forEach(item => {
|
||||||
|
const key = `${item.title}_${item.episode}`;
|
||||||
allDifferences.push({
|
allDifferences.push({
|
||||||
kstData: '',
|
kstData: '',
|
||||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||||
reason: 'Only appears in Coordi',
|
reason: 'Only appears in Coordi',
|
||||||
sortTitle: item.title,
|
sortTitle: item.title,
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
sortEpisode: parseFloat(item.episode) || 0,
|
||||||
});
|
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||||
});
|
|
||||||
|
|
||||||
// Add KST duplicates
|
|
||||||
mismatchDetails.kst_duplicates.forEach(item => {
|
|
||||||
allDifferences.push({
|
|
||||||
kstData: `${item.title} - Episode ${item.episode}`,
|
|
||||||
coordiData: '',
|
|
||||||
reason: 'Duplicate in KST',
|
|
||||||
sortTitle: item.title,
|
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add Coordi duplicates
|
|
||||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
|
||||||
allDifferences.push({
|
|
||||||
kstData: '',
|
|
||||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
|
||||||
reason: 'Duplicate in Coordi',
|
|
||||||
sortTitle: item.title,
|
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -497,12 +492,18 @@
|
|||||||
return a.sortEpisode - b.sortEpisode;
|
return a.sortEpisode - b.sortEpisode;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Populate table
|
// Populate table with highlighting
|
||||||
allDifferences.forEach(diff => {
|
allDifferences.forEach(diff => {
|
||||||
const row = tbody.insertRow();
|
const row = tbody.insertRow();
|
||||||
row.insertCell(0).textContent = diff.kstData;
|
row.insertCell(0).textContent = diff.kstData;
|
||||||
row.insertCell(1).textContent = diff.coordiData;
|
row.insertCell(1).textContent = diff.coordiData;
|
||||||
row.insertCell(2).textContent = diff.reason;
|
row.insertCell(2).textContent = diff.reason;
|
||||||
|
|
||||||
|
// Highlight row in yellow if it's also a duplicate
|
||||||
|
if (diff.isDuplicate) {
|
||||||
|
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
||||||
|
row.title = 'This item also has duplicates in the dataset';
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
64
test_duplicates.py
Normal file
64
test_duplicates.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
from data_comparator import KSTCoordiComparator
|
||||||
|
|
||||||
|
def test_duplicate_detection():
|
||||||
|
comparator = KSTCoordiComparator('data/sample-data.xlsx')
|
||||||
|
if comparator.load_data():
|
||||||
|
print("=== DUPLICATE DETECTION TEST ===")
|
||||||
|
|
||||||
|
# Get the data extraction results
|
||||||
|
data = comparator.extract_kst_coordi_items()
|
||||||
|
|
||||||
|
print(f"Total KST items (unique): {len(data['kst_items'])}")
|
||||||
|
print(f"Total KST items (all): {len(data['kst_all_items'])}")
|
||||||
|
print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
|
||||||
|
print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
|
||||||
|
|
||||||
|
# Check for duplicates
|
||||||
|
categorization = comparator.categorize_mismatches()
|
||||||
|
|
||||||
|
print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
|
||||||
|
print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
|
||||||
|
|
||||||
|
# Show sample duplicates
|
||||||
|
if categorization['kst_duplicates']:
|
||||||
|
print("\nSample KST duplicates:")
|
||||||
|
for i, dup in enumerate(categorization['kst_duplicates'][:3]):
|
||||||
|
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||||||
|
|
||||||
|
if categorization['coordi_duplicates']:
|
||||||
|
print("\nSample Coordi duplicates:")
|
||||||
|
for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
|
||||||
|
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||||||
|
|
||||||
|
# Check for the specific example: 백라이트 - Episode 53-1x(휴재)
|
||||||
|
mismatch_details = comparator.generate_mismatch_details()
|
||||||
|
|
||||||
|
print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
|
||||||
|
|
||||||
|
# Check in KST-only
|
||||||
|
backlight_kst_only = [item for item in mismatch_details['kst_only']
|
||||||
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||||
|
|
||||||
|
# Check in KST duplicates
|
||||||
|
backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
|
||||||
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||||
|
|
||||||
|
print(f" Found in KST-only: {len(backlight_kst_only)}")
|
||||||
|
print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
|
||||||
|
|
||||||
|
if backlight_kst_only:
|
||||||
|
print(f" KST-only details: {backlight_kst_only[0]}")
|
||||||
|
if backlight_kst_dup:
|
||||||
|
print(f" KST duplicate details: {backlight_kst_dup[0]}")
|
||||||
|
|
||||||
|
# Test the web interface logic
|
||||||
|
print(f"\n=== Testing Web Interface Logic ===")
|
||||||
|
summary = comparator.get_comparison_summary()
|
||||||
|
print(f"Web interface will show:")
|
||||||
|
print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
|
||||||
|
|
||||||
|
print("\n✓ Duplicate detection test complete!")
|
||||||
|
print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_duplicate_detection()
|
||||||
52
test_final_duplicate_fix.py
Normal file
52
test_final_duplicate_fix.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
def test_final_duplicate_fix():
|
||||||
|
print("=== FINAL DUPLICATE FIX TEST ===")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test the analyze endpoint
|
||||||
|
response = requests.post('http://localhost:8081/analyze',
|
||||||
|
json={'file_path': 'data/sample-data.xlsx'},
|
||||||
|
timeout=30)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
if data.get('success'):
|
||||||
|
results = data['results']
|
||||||
|
|
||||||
|
print("✓ Analysis successful!")
|
||||||
|
print(f" Matched items: {results['matched_items_count']}")
|
||||||
|
print(f" KST only: {results['mismatches']['kst_only_count']}")
|
||||||
|
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
|
||||||
|
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
|
||||||
|
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
|
||||||
|
|
||||||
|
total_different = (results['mismatches']['kst_only_count'] +
|
||||||
|
results['mismatches']['coordi_only_count'] +
|
||||||
|
results['mismatches']['kst_duplicates_count'] +
|
||||||
|
results['mismatches']['coordi_duplicates_count'])
|
||||||
|
print(f" Total different items: {total_different}")
|
||||||
|
|
||||||
|
# Check for the specific example
|
||||||
|
kst_duplicates = results['mismatch_details']['kst_duplicates']
|
||||||
|
backlight_duplicates = [item for item in kst_duplicates
|
||||||
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||||
|
|
||||||
|
if backlight_duplicates:
|
||||||
|
print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}")
|
||||||
|
print(f" Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}")
|
||||||
|
|
||||||
|
print(f"\n✓ Web interface ready at http://localhost:8081")
|
||||||
|
print("✓ The 'Different' tab will now show combined reasons like:")
|
||||||
|
print(" 백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"✗ Analysis failed: {data.get('error')}")
|
||||||
|
else:
|
||||||
|
print(f"✗ Request failed: {response.status_code}")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"✗ Request failed: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_final_duplicate_fix()
|
||||||
68
test_simplified_duplicates.py
Normal file
68
test_simplified_duplicates.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
def test_simplified_duplicates():
|
||||||
|
print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test the analyze endpoint
|
||||||
|
response = requests.post('http://localhost:8081/analyze',
|
||||||
|
json={'file_path': 'data/sample-data.xlsx'},
|
||||||
|
timeout=30)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
if data.get('success'):
|
||||||
|
results = data['results']
|
||||||
|
|
||||||
|
print("✓ Analysis successful!")
|
||||||
|
print(f" Matched items: {results['matched_items_count']}")
|
||||||
|
print(f" KST only: {results['mismatches']['kst_only_count']}")
|
||||||
|
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
|
||||||
|
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
|
||||||
|
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
|
||||||
|
|
||||||
|
# What the count will show
|
||||||
|
total_count = (results['mismatches']['kst_only_count'] +
|
||||||
|
results['mismatches']['coordi_only_count'] +
|
||||||
|
results['mismatches']['kst_duplicates_count'] +
|
||||||
|
results['mismatches']['coordi_duplicates_count'])
|
||||||
|
|
||||||
|
# What the table will show
|
||||||
|
table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count']
|
||||||
|
|
||||||
|
print(f"\n📊 DISPLAY LOGIC:")
|
||||||
|
print(f" Count badge shows: {total_count} items (all different items)")
|
||||||
|
print(f" Table shows: {table_rows} rows (only KST-only + Coordi-only)")
|
||||||
|
print(f" Yellow highlights: Items that are also duplicates")
|
||||||
|
|
||||||
|
# Check for 백라이트 example
|
||||||
|
kst_only = results['mismatch_details']['kst_only']
|
||||||
|
kst_duplicates = results['mismatch_details']['kst_duplicates']
|
||||||
|
|
||||||
|
backlight_kst_only = [item for item in kst_only
|
||||||
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||||
|
backlight_kst_dup = [item for item in kst_duplicates
|
||||||
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||||
|
|
||||||
|
if backlight_kst_only and backlight_kst_dup:
|
||||||
|
print(f"\n✓ 백라이트 example works:")
|
||||||
|
print(f" - Appears in table (KST-only): YES")
|
||||||
|
print(f" - Will be highlighted yellow: YES (also duplicate)")
|
||||||
|
print(f" - Contributes to count: 2 items (1 KST-only + 1 duplicate)")
|
||||||
|
|
||||||
|
print(f"\n✓ Web interface ready at http://localhost:8081")
|
||||||
|
print("✓ Check the 'Different' tab:")
|
||||||
|
print(" - Count shows all different items")
|
||||||
|
print(" - Table shows only KST-only + Coordi-only")
|
||||||
|
print(" - Yellow rows = items that also have duplicates")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"✗ Analysis failed: {data.get('error')}")
|
||||||
|
else:
|
||||||
|
print(f"✗ Request failed: {response.status_code}")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"✗ Request failed: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_simplified_duplicates()
|
||||||
57
web_gui.py
57
web_gui.py
@ -512,6 +512,7 @@ def create_templates_dir():
|
|||||||
// Update count displays
|
// Update count displays
|
||||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||||
|
|
||||||
|
// Count all different items including duplicates
|
||||||
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
||||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
||||||
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
||||||
@ -551,50 +552,44 @@ def create_templates_dir():
|
|||||||
const tbody = document.getElementById('different-table');
|
const tbody = document.getElementById('different-table');
|
||||||
tbody.innerHTML = '';
|
tbody.innerHTML = '';
|
||||||
|
|
||||||
// Combine all mismatches into one array for sorting
|
// Create sets of duplicate items for highlighting
|
||||||
|
const kstDuplicateKeys = new Set();
|
||||||
|
const coordiDuplicateKeys = new Set();
|
||||||
|
|
||||||
|
mismatchDetails.kst_duplicates.forEach(item => {
|
||||||
|
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||||
|
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Combine only KST-only and Coordi-only items (like before)
|
||||||
const allDifferences = [];
|
const allDifferences = [];
|
||||||
|
|
||||||
// Add KST-only items
|
// Add KST-only items
|
||||||
mismatchDetails.kst_only.forEach(item => {
|
mismatchDetails.kst_only.forEach(item => {
|
||||||
|
const key = `${item.title}_${item.episode}`;
|
||||||
allDifferences.push({
|
allDifferences.push({
|
||||||
kstData: `${item.title} - Episode ${item.episode}`,
|
kstData: `${item.title} - Episode ${item.episode}`,
|
||||||
coordiData: '',
|
coordiData: '',
|
||||||
reason: 'Only appears in KST',
|
reason: 'Only appears in KST',
|
||||||
sortTitle: item.title,
|
sortTitle: item.title,
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
sortEpisode: parseFloat(item.episode) || 0,
|
||||||
|
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add Coordi-only items
|
// Add Coordi-only items
|
||||||
mismatchDetails.coordi_only.forEach(item => {
|
mismatchDetails.coordi_only.forEach(item => {
|
||||||
|
const key = `${item.title}_${item.episode}`;
|
||||||
allDifferences.push({
|
allDifferences.push({
|
||||||
kstData: '',
|
kstData: '',
|
||||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||||
reason: 'Only appears in Coordi',
|
reason: 'Only appears in Coordi',
|
||||||
sortTitle: item.title,
|
sortTitle: item.title,
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
sortEpisode: parseFloat(item.episode) || 0,
|
||||||
});
|
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||||
});
|
|
||||||
|
|
||||||
// Add KST duplicates
|
|
||||||
mismatchDetails.kst_duplicates.forEach(item => {
|
|
||||||
allDifferences.push({
|
|
||||||
kstData: `${item.title} - Episode ${item.episode}`,
|
|
||||||
coordiData: '',
|
|
||||||
reason: 'Duplicate in KST',
|
|
||||||
sortTitle: item.title,
|
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add Coordi duplicates
|
|
||||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
|
||||||
allDifferences.push({
|
|
||||||
kstData: '',
|
|
||||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
|
||||||
reason: 'Duplicate in Coordi',
|
|
||||||
sortTitle: item.title,
|
|
||||||
sortEpisode: parseFloat(item.episode) || 0
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -605,12 +600,18 @@ def create_templates_dir():
|
|||||||
return a.sortEpisode - b.sortEpisode;
|
return a.sortEpisode - b.sortEpisode;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Populate table
|
// Populate table with highlighting
|
||||||
allDifferences.forEach(diff => {
|
allDifferences.forEach(diff => {
|
||||||
const row = tbody.insertRow();
|
const row = tbody.insertRow();
|
||||||
row.insertCell(0).textContent = diff.kstData;
|
row.insertCell(0).textContent = diff.kstData;
|
||||||
row.insertCell(1).textContent = diff.coordiData;
|
row.insertCell(1).textContent = diff.coordiData;
|
||||||
row.insertCell(2).textContent = diff.reason;
|
row.insertCell(2).textContent = diff.reason;
|
||||||
|
|
||||||
|
// Highlight row in yellow if it's also a duplicate
|
||||||
|
if (diff.isDuplicate) {
|
||||||
|
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
||||||
|
row.title = 'This item also has duplicates in the dataset';
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -630,8 +631,8 @@ def main():
|
|||||||
create_templates_dir()
|
create_templates_dir()
|
||||||
|
|
||||||
print("Starting web-based GUI...")
|
print("Starting web-based GUI...")
|
||||||
print("Open your browser and go to: http://localhost:8080")
|
print("Open your browser and go to: http://localhost:8081")
|
||||||
app.run(debug=True, host='0.0.0.0', port=8080)
|
app.run(debug=True, host='0.0.0.0', port=8081)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user