From 8d0351622bdc221b6c4bc0172de7a6eab1a4365a Mon Sep 17 00:00:00 2001 From: arthur Date: Thu, 21 Aug 2025 15:29:10 +0700 Subject: [PATCH] add visuallize tab --- data_comparator.py | 165 ++++++++++++++++++++++++++++++++++++++++++- templates/index.html | 83 ++++++++++++++++++++++ web_gui.py | 87 +++++++++++++++++++++++ 3 files changed, 332 insertions(+), 3 deletions(-) diff --git a/data_comparator.py b/data_comparator.py index 0c78606..b5005c2 100644 --- a/data_comparator.py +++ b/data_comparator.py @@ -3,7 +3,25 @@ import numpy as np from typing import Dict, List, Tuple, Any from dataclasses import dataclass -@dataclass +def normalize_episode(episode: str) -> str: + """Normalize episode numbers to handle cases like '54' vs '54.0'""" + if not episode or episode.strip() == '': + return episode + + try: + # Convert to float first to handle both int and float formats + episode_float = float(episode.strip()) + + # If it's a whole number (like 54.0), convert to int format + if episode_float.is_integer(): + return str(int(episode_float)) + else: + # Keep decimal format for non-whole numbers + return str(episode_float) + except (ValueError, TypeError): + # If conversion fails, return original episode string + return episode.strip() + class ComparisonItem: """Represents a single item for comparison""" title: str @@ -11,6 +29,12 @@ class ComparisonItem: source_sheet: str row_index: int + def __init__(self, title: str, episode: str, source_sheet: str, row_index: int): + self.title = title + self.episode = normalize_episode(episode) # Normalize episode on creation + self.source_sheet = source_sheet + self.row_index = row_index + def __hash__(self): return hash((self.title, self.episode)) @@ -340,7 +364,7 @@ class KSTCoordiComparator: return mismatch_details - def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]: + def get_comparison_summary(self, sheet_filter: str | None = None) -> Dict[str, Any]: """Get a comprehensive summary of the comparison for a specific sheet only""" # Get sheet names for filtering options sheet_names = list(self.data.keys()) if self.data else [] @@ -467,9 +491,144 @@ class KSTCoordiComparator: return grouped + def generate_visualize_data(self, sheet_filter: str | None = None) -> List[Dict[str, Any]]: + """Generate data structure for Excel-like visualization""" + # Get comparison data for the specified sheet + summary = self.get_comparison_summary(sheet_filter) + mismatch_details = summary['mismatch_details'] + + visualize_rows = [] + + # Helper function to create a row + def create_row(coordi_title="", coordi_chapter="", kst_title="", kst_chapter="", + row_type="matched", reason="", title_for_sort=""): + return { + 'coordi_title': coordi_title, + 'coordi_chapter': coordi_chapter, + 'kst_title': kst_title, + 'kst_chapter': kst_chapter, + 'row_type': row_type, + 'reason': reason, + 'title_for_sort': title_for_sort or coordi_title or kst_title, + 'priority': 1 if row_type != 'matched' else 2 # Mismatches first + } + + # 1. Handle Coordi-only items + for item in mismatch_details['coordi_only']: + visualize_rows.append(create_row( + coordi_title=item['title'], + coordi_chapter=item['episode'], + row_type='coordi_only', + reason='Only in Coordi' + )) + + # 2. Handle KST-only items + for item in mismatch_details['kst_only']: + visualize_rows.append(create_row( + kst_title=item['title'], + kst_chapter=item['episode'], + row_type='kst_only', + reason='Only in KST' + )) + + # 3. Handle Mixed duplicates (exists in both but duplicated on one side) + mixed_items = {} # Group by title+episode + for item in mismatch_details['mixed_duplicates']: + key = f"{item['title']}_{item['episode']}" + if key not in mixed_items: + mixed_items[key] = { + 'title': item['title'], + 'episode': item['episode'], + 'has_kst_duplicate': False, + 'has_coordi_duplicate': False + } + + if item['duplicate_side'] == 'KST': + mixed_items[key]['has_kst_duplicate'] = True + elif item['duplicate_side'] == 'COORDI': + mixed_items[key]['has_coordi_duplicate'] = True + + for key, item in mixed_items.items(): + # First row: show it exists in both + visualize_rows.append(create_row( + coordi_title=item['title'], + coordi_chapter=item['episode'], + kst_title=item['title'], + kst_chapter=item['episode'], + row_type='mixed_duplicate', + reason='Mixed duplicate' + )) + + # Additional rows for duplicates + if item['has_kst_duplicate']: + visualize_rows.append(create_row( + kst_title=item['title'], + kst_chapter=item['episode'], + row_type='mixed_duplicate', + reason='Duplicate in KST', + title_for_sort=item['title'] + )) + + if item['has_coordi_duplicate']: + visualize_rows.append(create_row( + coordi_title=item['title'], + coordi_chapter=item['episode'], + row_type='mixed_duplicate', + reason='Duplicate in Coordi', + title_for_sort=item['title'] + )) + + # 4. Handle Pure duplicates + for item in mismatch_details['kst_duplicates']: + visualize_rows.append(create_row( + kst_title=item['title'], + kst_chapter=item['episode'], + row_type='pure_duplicate', + reason='Duplicate in KST' + )) + + for item in mismatch_details['coordi_duplicates']: + visualize_rows.append(create_row( + coordi_title=item['title'], + coordi_chapter=item['episode'], + row_type='pure_duplicate', + reason='Duplicate in Coordi' + )) + + # 5. Handle Matched items (perfect matches) + matched_by_title = summary['grouped_by_title']['matched_by_title'] + for title, items in matched_by_title.items(): + for item in items: + visualize_rows.append(create_row( + coordi_title=item['title'], + coordi_chapter=item['episode'], + kst_title=item['title'], + kst_chapter=item['episode'], + row_type='matched', + reason='Perfect match' + )) + + # Sort: Mismatches first (priority 1), then matches (priority 2), then by Korean title + chapter + def sort_key(x): + # Extract episode number for proper numeric sorting + coordi_episode = x.get('coordi_chapter', '') or '' + kst_episode = x.get('kst_chapter', '') or '' + episode = coordi_episode or kst_episode + + # Try to convert episode to number for proper sorting, fallback to string + try: + episode_num = float(episode) if episode else 0 + except (ValueError, TypeError): + episode_num = 0 + + return (x['priority'], x['title_for_sort'], episode_num) + + visualize_rows.sort(key=sort_key) + + return visualize_rows - def print_comparison_summary(self, sheet_filter: str = None): + def print_comparison_summary(self, sheet_filter: str | None = None): """Print a formatted summary of the comparison for a specific sheet""" summary = self.get_comparison_summary(sheet_filter) diff --git a/templates/index.html b/templates/index.html index bb50abd..4c437d2 100644 --- a/templates/index.html +++ b/templates/index.html @@ -173,6 +173,32 @@ border: 1px solid #ddd; border-radius: 4px; } + + /* Vibrant color styles for Visualize tab */ + .coordi-only-row { + background-color: #ff4444 !important; /* Bright red */ + color: white; + } + + .kst-only-row { + background-color: #4488ff !important; /* Bright blue */ + color: white; + } + + .mixed-duplicate-row { + background-color: #ff8800 !important; /* Bright orange */ + color: white; + } + + .pure-duplicate-row { + background-color: #8844ff !important; /* Bright purple */ + color: white; + } + + .matched-row { + background-color: white !important; /* White background */ + color: black; + } @@ -203,6 +229,7 @@
Summary
Different
+
Visualize
@@ -255,6 +282,25 @@
+ +
+

Data

+
+ + + + + + + + + + + + +
Coordi TitleCoordi ChapterKST TitleKST ChapterStatus
+
+
@@ -468,6 +514,9 @@ // Update Different tab updateDifferentTable(results.mismatch_details); + + // Update Visualize tab + updateVisualizeTable(results.visualize_data); } function updateSummaryTable(matchedData) { @@ -587,6 +636,40 @@ }); } + function updateVisualizeTable(visualizeData) { + const tbody = document.getElementById('visualize-table-body'); + tbody.innerHTML = ''; + + // Data is already sorted by the backend (mismatches first, then matches, all by Korean title) + visualizeData.forEach(row => { + const tr = tbody.insertRow(); + tr.insertCell(0).textContent = row.coordi_title || ''; + tr.insertCell(1).textContent = row.coordi_chapter || ''; + tr.insertCell(2).textContent = row.kst_title || ''; + tr.insertCell(3).textContent = row.kst_chapter || ''; + tr.insertCell(4).textContent = row.reason || ''; + + // Apply vibrant color highlighting based on row type + switch (row.row_type) { + case 'coordi_only': + tr.className = 'coordi-only-row'; + break; + case 'kst_only': + tr.className = 'kst-only-row'; + break; + case 'mixed_duplicate': + tr.className = 'mixed-duplicate-row'; + break; + case 'pure_duplicate': + tr.className = 'pure-duplicate-row'; + break; + case 'matched': + tr.className = 'matched-row'; + break; + } + }); + } + // Auto-analyze on page load with default file window.onload = function() { // Initialize sheet filter with loading state diff --git a/web_gui.py b/web_gui.py index 0510d9f..0e5a547 100644 --- a/web_gui.py +++ b/web_gui.py @@ -61,6 +61,10 @@ def analyze_data(): # Add matched data to results comparison_results['matched_data'] = matched_items_data + # Generate visualize data + visualize_data = comparator_instance.generate_visualize_data(sheet_filter) + comparison_results['visualize_data'] = visualize_data + return jsonify({ 'success': True, 'results': comparison_results @@ -307,6 +311,32 @@ def create_templates_dir(): border: 1px solid #ddd; border-radius: 4px; } + + /* Vibrant color styles for Visualize tab */ + .coordi-only-row { + background-color: #ff4444 !important; /* Bright red */ + color: white; + } + + .kst-only-row { + background-color: #4488ff !important; /* Bright blue */ + color: white; + } + + .mixed-duplicate-row { + background-color: #ff8800 !important; /* Bright orange */ + color: white; + } + + .pure-duplicate-row { + background-color: #8844ff !important; /* Bright purple */ + color: white; + } + + .matched-row { + background-color: white !important; /* White background */ + color: black; + } @@ -337,6 +367,7 @@ def create_templates_dir():
Summary
Different
+
Visualize
@@ -389,6 +420,25 @@ def create_templates_dir():
+ +
+

Data

+
+ + + + + + + + + + + + +
Coordi TitleCoordi ChapterKST TitleKST ChapterStatus
+
+
@@ -602,6 +652,9 @@ def create_templates_dir(): // Update Different tab updateDifferentTable(results.mismatch_details); + + // Update Visualize tab + updateVisualizeTable(results.visualize_data); } function updateSummaryTable(matchedData) { @@ -721,6 +774,40 @@ def create_templates_dir(): }); } + function updateVisualizeTable(visualizeData) { + const tbody = document.getElementById('visualize-table-body'); + tbody.innerHTML = ''; + + // Data is already sorted by the backend (mismatches first, then matches, all by Korean title) + visualizeData.forEach(row => { + const tr = tbody.insertRow(); + tr.insertCell(0).textContent = row.coordi_title || ''; + tr.insertCell(1).textContent = row.coordi_chapter || ''; + tr.insertCell(2).textContent = row.kst_title || ''; + tr.insertCell(3).textContent = row.kst_chapter || ''; + tr.insertCell(4).textContent = row.reason || ''; + + // Apply vibrant color highlighting based on row type + switch (row.row_type) { + case 'coordi_only': + tr.className = 'coordi-only-row'; + break; + case 'kst_only': + tr.className = 'kst-only-row'; + break; + case 'mixed_duplicate': + tr.className = 'mixed-duplicate-row'; + break; + case 'pure_duplicate': + tr.className = 'pure-duplicate-row'; + break; + case 'matched': + tr.className = 'matched-row'; + break; + } + }); + } + // Auto-analyze on page load with default file window.onload = function() { // Initialize sheet filter with loading state