Merge pull request 'dev/viettran' (#2) from dev/viettran into main
Reviewed-on: #2
This commit is contained in:
commit
c946d3c871
BIN
data/Compare DE.xlsx
Normal file
BIN
data/Compare DE.xlsx
Normal file
Binary file not shown.
@ -3,7 +3,25 @@ import numpy as np
|
|||||||
from typing import Dict, List, Tuple, Any
|
from typing import Dict, List, Tuple, Any
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@dataclass
|
def normalize_episode(episode: str) -> str:
|
||||||
|
"""Normalize episode numbers to handle cases like '54' vs '54.0'"""
|
||||||
|
if not episode or episode.strip() == '':
|
||||||
|
return episode
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Convert to float first to handle both int and float formats
|
||||||
|
episode_float = float(episode.strip())
|
||||||
|
|
||||||
|
# If it's a whole number (like 54.0), convert to int format
|
||||||
|
if episode_float.is_integer():
|
||||||
|
return str(int(episode_float))
|
||||||
|
else:
|
||||||
|
# Keep decimal format for non-whole numbers
|
||||||
|
return str(episode_float)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
# If conversion fails, return original episode string
|
||||||
|
return episode.strip()
|
||||||
|
|
||||||
class ComparisonItem:
|
class ComparisonItem:
|
||||||
"""Represents a single item for comparison"""
|
"""Represents a single item for comparison"""
|
||||||
title: str
|
title: str
|
||||||
@ -11,6 +29,12 @@ class ComparisonItem:
|
|||||||
source_sheet: str
|
source_sheet: str
|
||||||
row_index: int
|
row_index: int
|
||||||
|
|
||||||
|
def __init__(self, title: str, episode: str, source_sheet: str, row_index: int):
|
||||||
|
self.title = title
|
||||||
|
self.episode = normalize_episode(episode) # Normalize episode on creation
|
||||||
|
self.source_sheet = source_sheet
|
||||||
|
self.row_index = row_index
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash((self.title, self.episode))
|
return hash((self.title, self.episode))
|
||||||
|
|
||||||
@ -167,15 +191,23 @@ class KSTCoordiComparator:
|
|||||||
kst_all_items = sheet_data['kst_all_items']
|
kst_all_items = sheet_data['kst_all_items']
|
||||||
coordi_all_items = sheet_data['coordi_all_items']
|
coordi_all_items = sheet_data['coordi_all_items']
|
||||||
|
|
||||||
# Find overlaps and differences
|
# Find duplicates within each dataset first
|
||||||
matched_items = kst_items.intersection(coordi_items)
|
|
||||||
kst_only_items = kst_items - coordi_items
|
|
||||||
coordi_only_items = coordi_items - kst_items
|
|
||||||
|
|
||||||
# Find duplicates within each dataset - FIXED LOGIC
|
|
||||||
kst_duplicates = self._find_duplicates_in_list(kst_all_items)
|
kst_duplicates = self._find_duplicates_in_list(kst_all_items)
|
||||||
coordi_duplicates = self._find_duplicates_in_list(coordi_all_items)
|
coordi_duplicates = self._find_duplicates_in_list(coordi_all_items)
|
||||||
|
|
||||||
|
# Create sets of items that have duplicates (to exclude from "only" lists)
|
||||||
|
kst_duplicate_keys = {(item.title, item.episode) for item in kst_duplicates}
|
||||||
|
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_duplicates}
|
||||||
|
|
||||||
|
# Find overlaps and differences - exclude items that have duplicates
|
||||||
|
matched_items = kst_items.intersection(coordi_items)
|
||||||
|
|
||||||
|
# For "only" items: exclude those that have duplicates within their own dataset
|
||||||
|
kst_only_items = {item for item in kst_items - coordi_items
|
||||||
|
if (item.title, item.episode) not in kst_duplicate_keys}
|
||||||
|
coordi_only_items = {item for item in coordi_items - kst_items
|
||||||
|
if (item.title, item.episode) not in coordi_duplicate_keys}
|
||||||
|
|
||||||
categorization = {
|
categorization = {
|
||||||
'matched_items': list(matched_items),
|
'matched_items': list(matched_items),
|
||||||
'kst_only_items': list(kst_only_items),
|
'kst_only_items': list(kst_only_items),
|
||||||
@ -245,10 +277,16 @@ class KSTCoordiComparator:
|
|||||||
kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
|
kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
|
||||||
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
|
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
|
||||||
|
|
||||||
|
# Count actual instances for each item
|
||||||
|
from collections import Counter
|
||||||
|
kst_counts = Counter((item.title, item.episode) for item in kst_sheet_items)
|
||||||
|
coordi_counts = Counter((item.title, item.episode) for item in coordi_sheet_items)
|
||||||
|
|
||||||
# Find matched items that also have duplicates within the same sheet
|
# Find matched items that also have duplicates within the same sheet
|
||||||
for title, episode in matched_in_sheet:
|
for title, episode in matched_in_sheet:
|
||||||
# Check if this matched item has duplicates in KST within this sheet
|
# Check if this matched item has duplicates in KST within this sheet
|
||||||
if (title, episode) in kst_duplicate_keys:
|
if (title, episode) in kst_duplicate_keys:
|
||||||
|
kst_count = kst_counts[(title, episode)]
|
||||||
mixed_duplicates.append({
|
mixed_duplicates.append({
|
||||||
'title': title,
|
'title': title,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
@ -256,11 +294,13 @@ class KSTCoordiComparator:
|
|||||||
'row_index': None, # Could get from items if needed
|
'row_index': None, # Could get from items if needed
|
||||||
'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
|
'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
|
||||||
'mismatch_type': 'MIXED_DUPLICATE_KST',
|
'mismatch_type': 'MIXED_DUPLICATE_KST',
|
||||||
'duplicate_side': 'KST'
|
'duplicate_side': 'KST',
|
||||||
|
'duplicate_count': kst_count
|
||||||
})
|
})
|
||||||
|
|
||||||
# Check if this matched item has duplicates in Coordi within this sheet
|
# Check if this matched item has duplicates in Coordi within this sheet
|
||||||
if (title, episode) in coordi_duplicate_keys:
|
if (title, episode) in coordi_duplicate_keys:
|
||||||
|
coordi_count = coordi_counts[(title, episode)]
|
||||||
mixed_duplicates.append({
|
mixed_duplicates.append({
|
||||||
'title': title,
|
'title': title,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
@ -268,7 +308,8 @@ class KSTCoordiComparator:
|
|||||||
'row_index': None, # Could get from items if needed
|
'row_index': None, # Could get from items if needed
|
||||||
'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
|
'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
|
||||||
'mismatch_type': 'MIXED_DUPLICATE_COORDI',
|
'mismatch_type': 'MIXED_DUPLICATE_COORDI',
|
||||||
'duplicate_side': 'COORDI'
|
'duplicate_side': 'COORDI',
|
||||||
|
'duplicate_count': coordi_count
|
||||||
})
|
})
|
||||||
|
|
||||||
return mixed_duplicates
|
return mixed_duplicates
|
||||||
@ -340,7 +381,7 @@ class KSTCoordiComparator:
|
|||||||
|
|
||||||
return mismatch_details
|
return mismatch_details
|
||||||
|
|
||||||
def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]:
|
def get_comparison_summary(self, sheet_filter: str | None = None) -> Dict[str, Any]:
|
||||||
"""Get a comprehensive summary of the comparison for a specific sheet only"""
|
"""Get a comprehensive summary of the comparison for a specific sheet only"""
|
||||||
# Get sheet names for filtering options
|
# Get sheet names for filtering options
|
||||||
sheet_names = list(self.data.keys()) if self.data else []
|
sheet_names = list(self.data.keys()) if self.data else []
|
||||||
@ -467,9 +508,146 @@ class KSTCoordiComparator:
|
|||||||
|
|
||||||
return grouped
|
return grouped
|
||||||
|
|
||||||
|
def generate_visualize_data(self, sheet_filter: str | None = None) -> List[Dict[str, Any]]:
|
||||||
|
"""Generate data structure for Excel-like visualization"""
|
||||||
|
# Get comparison data for the specified sheet
|
||||||
|
summary = self.get_comparison_summary(sheet_filter)
|
||||||
|
mismatch_details = summary['mismatch_details']
|
||||||
|
|
||||||
|
visualize_rows = []
|
||||||
|
|
||||||
|
# Helper function to create a row
|
||||||
|
def create_row(coordi_title="", coordi_chapter="", kst_title="", kst_chapter="",
|
||||||
|
row_type="matched", reason="", title_for_sort=""):
|
||||||
|
return {
|
||||||
|
'coordi_title': coordi_title,
|
||||||
|
'coordi_chapter': coordi_chapter,
|
||||||
|
'kst_title': kst_title,
|
||||||
|
'kst_chapter': kst_chapter,
|
||||||
|
'row_type': row_type,
|
||||||
|
'reason': reason,
|
||||||
|
'title_for_sort': title_for_sort or coordi_title or kst_title,
|
||||||
|
'priority': 1 if row_type != 'matched' else 2 # Mismatches first
|
||||||
|
}
|
||||||
|
|
||||||
|
# 1. Handle Coordi-only items
|
||||||
|
for item in mismatch_details['coordi_only']:
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
coordi_title=item['title'],
|
||||||
|
coordi_chapter=item['episode'],
|
||||||
|
row_type='coordi_only',
|
||||||
|
reason='Only in Coordi'
|
||||||
|
))
|
||||||
|
|
||||||
|
# 2. Handle KST-only items
|
||||||
|
for item in mismatch_details['kst_only']:
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
kst_title=item['title'],
|
||||||
|
kst_chapter=item['episode'],
|
||||||
|
row_type='kst_only',
|
||||||
|
reason='Only in KST'
|
||||||
|
))
|
||||||
|
|
||||||
|
# 3. Handle Mixed duplicates (exists in both but duplicated on one side)
|
||||||
|
mixed_items = {} # Group by title+episode
|
||||||
|
for item in mismatch_details['mixed_duplicates']:
|
||||||
|
key = f"{item['title']}_{item['episode']}"
|
||||||
|
if key not in mixed_items:
|
||||||
|
mixed_items[key] = {
|
||||||
|
'title': item['title'],
|
||||||
|
'episode': item['episode'],
|
||||||
|
'kst_duplicate_count': 0,
|
||||||
|
'coordi_duplicate_count': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Count the actual duplicates for each side
|
||||||
|
if item['duplicate_side'] == 'KST':
|
||||||
|
mixed_items[key]['kst_duplicate_count'] = item.get('duplicate_count', 1)
|
||||||
|
elif item['duplicate_side'] == 'COORDI':
|
||||||
|
mixed_items[key]['coordi_duplicate_count'] = item.get('duplicate_count', 1)
|
||||||
|
|
||||||
|
for key, item in mixed_items.items():
|
||||||
|
# First row: show it exists in both
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
coordi_title=item['title'],
|
||||||
|
coordi_chapter=item['episode'],
|
||||||
|
kst_title=item['title'],
|
||||||
|
kst_chapter=item['episode'],
|
||||||
|
row_type='mixed_duplicate',
|
||||||
|
reason='Mixed duplicate'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Additional rows for KST duplicates (count - 1 since first is already shown)
|
||||||
|
for i in range(max(0, item['kst_duplicate_count'] - 1)):
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
kst_title=item['title'],
|
||||||
|
kst_chapter=item['episode'],
|
||||||
|
row_type='mixed_duplicate',
|
||||||
|
reason='Duplicate in KST',
|
||||||
|
title_for_sort=item['title']
|
||||||
|
))
|
||||||
|
|
||||||
|
# Additional rows for Coordi duplicates (count - 1 since first is already shown)
|
||||||
|
for i in range(max(0, item['coordi_duplicate_count'] - 1)):
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
coordi_title=item['title'],
|
||||||
|
coordi_chapter=item['episode'],
|
||||||
|
row_type='mixed_duplicate',
|
||||||
|
reason='Duplicate in Coordi',
|
||||||
|
title_for_sort=item['title']
|
||||||
|
))
|
||||||
|
|
||||||
|
# 4. Handle Pure duplicates
|
||||||
|
for item in mismatch_details['kst_duplicates']:
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
kst_title=item['title'],
|
||||||
|
kst_chapter=item['episode'],
|
||||||
|
row_type='pure_duplicate',
|
||||||
|
reason='Duplicate in KST'
|
||||||
|
))
|
||||||
|
|
||||||
|
for item in mismatch_details['coordi_duplicates']:
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
coordi_title=item['title'],
|
||||||
|
coordi_chapter=item['episode'],
|
||||||
|
row_type='pure_duplicate',
|
||||||
|
reason='Duplicate in Coordi'
|
||||||
|
))
|
||||||
|
|
||||||
|
# 5. Handle Matched items (perfect matches)
|
||||||
|
matched_by_title = summary['grouped_by_title']['matched_by_title']
|
||||||
|
for title, items in matched_by_title.items():
|
||||||
|
for item in items:
|
||||||
|
visualize_rows.append(create_row(
|
||||||
|
coordi_title=item['title'],
|
||||||
|
coordi_chapter=item['episode'],
|
||||||
|
kst_title=item['title'],
|
||||||
|
kst_chapter=item['episode'],
|
||||||
|
row_type='matched',
|
||||||
|
reason='Perfect match'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Sort: Mismatches first (priority 1), then matches (priority 2), then by Korean title + chapter
|
||||||
|
def sort_key(x):
|
||||||
|
# Extract episode number for proper numeric sorting
|
||||||
|
coordi_episode = x.get('coordi_chapter', '') or ''
|
||||||
|
kst_episode = x.get('kst_chapter', '') or ''
|
||||||
|
episode = coordi_episode or kst_episode
|
||||||
|
|
||||||
|
# Try to convert episode to number for proper sorting, fallback to string
|
||||||
|
try:
|
||||||
|
episode_num = float(episode) if episode else 0
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
episode_num = 0
|
||||||
|
|
||||||
|
return (x['priority'], x['title_for_sort'], episode_num)
|
||||||
|
|
||||||
|
visualize_rows.sort(key=sort_key)
|
||||||
|
|
||||||
|
return visualize_rows
|
||||||
|
|
||||||
|
|
||||||
def print_comparison_summary(self, sheet_filter: str = None):
|
def print_comparison_summary(self, sheet_filter: str | None = None):
|
||||||
"""Print a formatted summary of the comparison for a specific sheet"""
|
"""Print a formatted summary of the comparison for a specific sheet"""
|
||||||
summary = self.get_comparison_summary(sheet_filter)
|
summary = self.get_comparison_summary(sheet_filter)
|
||||||
|
|
||||||
|
|||||||
@ -173,6 +173,32 @@
|
|||||||
border: 1px solid #ddd;
|
border: 1px solid #ddd;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Vibrant color styles for Visualize tab */
|
||||||
|
.coordi-only-row {
|
||||||
|
background-color: #ff4444 !important; /* Bright red */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.kst-only-row {
|
||||||
|
background-color: #4488ff !important; /* Bright blue */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mixed-duplicate-row {
|
||||||
|
background-color: #ff8800 !important; /* Bright orange */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pure-duplicate-row {
|
||||||
|
background-color: #8844ff !important; /* Bright purple */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.matched-row {
|
||||||
|
background-color: white !important; /* White background */
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
@ -203,6 +229,7 @@
|
|||||||
<div class="tabs">
|
<div class="tabs">
|
||||||
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
||||||
<div class="tab" onclick="showTab('different')">Different</div>
|
<div class="tab" onclick="showTab('different')">Different</div>
|
||||||
|
<div class="tab" onclick="showTab('visualize')">Visualize</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="summary" class="tab-content active">
|
<div id="summary" class="tab-content active">
|
||||||
@ -255,6 +282,25 @@
|
|||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div id="visualize" class="tab-content">
|
||||||
|
<h3>Data </h3>
|
||||||
|
<div class="table-container">
|
||||||
|
<table id="visualize-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Coordi Title</th>
|
||||||
|
<th>Coordi Chapter</th>
|
||||||
|
<th>KST Title</th>
|
||||||
|
<th>KST Chapter</th>
|
||||||
|
<th>Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="visualize-table-body">
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -468,6 +514,9 @@
|
|||||||
|
|
||||||
// Update Different tab
|
// Update Different tab
|
||||||
updateDifferentTable(results.mismatch_details);
|
updateDifferentTable(results.mismatch_details);
|
||||||
|
|
||||||
|
// Update Visualize tab
|
||||||
|
updateVisualizeTable(results.visualize_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
function updateSummaryTable(matchedData) {
|
function updateSummaryTable(matchedData) {
|
||||||
@ -587,6 +636,40 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function updateVisualizeTable(visualizeData) {
|
||||||
|
const tbody = document.getElementById('visualize-table-body');
|
||||||
|
tbody.innerHTML = '';
|
||||||
|
|
||||||
|
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
||||||
|
visualizeData.forEach(row => {
|
||||||
|
const tr = tbody.insertRow();
|
||||||
|
tr.insertCell(0).textContent = row.coordi_title || '';
|
||||||
|
tr.insertCell(1).textContent = row.coordi_chapter || '';
|
||||||
|
tr.insertCell(2).textContent = row.kst_title || '';
|
||||||
|
tr.insertCell(3).textContent = row.kst_chapter || '';
|
||||||
|
tr.insertCell(4).textContent = row.reason || '';
|
||||||
|
|
||||||
|
// Apply vibrant color highlighting based on row type
|
||||||
|
switch (row.row_type) {
|
||||||
|
case 'coordi_only':
|
||||||
|
tr.className = 'coordi-only-row';
|
||||||
|
break;
|
||||||
|
case 'kst_only':
|
||||||
|
tr.className = 'kst-only-row';
|
||||||
|
break;
|
||||||
|
case 'mixed_duplicate':
|
||||||
|
tr.className = 'mixed-duplicate-row';
|
||||||
|
break;
|
||||||
|
case 'pure_duplicate':
|
||||||
|
tr.className = 'pure-duplicate-row';
|
||||||
|
break;
|
||||||
|
case 'matched':
|
||||||
|
tr.className = 'matched-row';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Auto-analyze on page load with default file
|
// Auto-analyze on page load with default file
|
||||||
window.onload = function() {
|
window.onload = function() {
|
||||||
// Initialize sheet filter with loading state
|
// Initialize sheet filter with loading state
|
||||||
|
|||||||
@ -1,101 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from data_comparator import KSTCoordiComparator
|
|
||||||
|
|
||||||
def test_ba_confirmed_cases():
|
|
||||||
"""Test that the comparison logic matches BA confirmed expectations"""
|
|
||||||
print("Testing BA confirmed duplicate cases...")
|
|
||||||
|
|
||||||
# Create comparator and load data
|
|
||||||
comparator = KSTCoordiComparator("data/sample-data.xlsx")
|
|
||||||
if not comparator.load_data():
|
|
||||||
print("Failed to load data!")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("\n=== US URGENT Sheet - BA Confirmed Cases ===")
|
|
||||||
us_summary = comparator.get_comparison_summary('US URGENT')
|
|
||||||
|
|
||||||
# Check for expected duplicates in US URGENT
|
|
||||||
coordi_duplicates = us_summary['mismatch_details']['coordi_duplicates']
|
|
||||||
mixed_duplicates = us_summary['mismatch_details']['mixed_duplicates']
|
|
||||||
|
|
||||||
expected_coordi_duplicates = [
|
|
||||||
('금수의 영역', '17'),
|
|
||||||
('신결', '23')
|
|
||||||
]
|
|
||||||
|
|
||||||
expected_mixed_duplicates = [
|
|
||||||
('트윈 가이드', '31')
|
|
||||||
]
|
|
||||||
|
|
||||||
print("Coordi duplicates found:")
|
|
||||||
found_coordi = []
|
|
||||||
for item in coordi_duplicates:
|
|
||||||
key = (item['title'], item['episode'])
|
|
||||||
found_coordi.append(key)
|
|
||||||
print(f" - {item['title']} - Episode {item['episode']}")
|
|
||||||
|
|
||||||
print("\nMixed duplicates found:")
|
|
||||||
found_mixed = []
|
|
||||||
for item in mixed_duplicates:
|
|
||||||
key = (item['title'], item['episode'])
|
|
||||||
found_mixed.append(key)
|
|
||||||
print(f" - {item['title']} - Episode {item['episode']} ({item['reason']})")
|
|
||||||
|
|
||||||
# Verify expected cases
|
|
||||||
print("\n✓ Verification:")
|
|
||||||
for expected in expected_coordi_duplicates:
|
|
||||||
if expected in found_coordi:
|
|
||||||
print(f" ✓ Found expected Coordi duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
else:
|
|
||||||
print(f" ✗ Missing expected Coordi duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
|
|
||||||
for expected in expected_mixed_duplicates:
|
|
||||||
if expected in found_mixed:
|
|
||||||
print(f" ✓ Found expected mixed duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
else:
|
|
||||||
print(f" ✗ Missing expected mixed duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
|
|
||||||
print("\n=== TH URGENT Sheet - BA Confirmed Cases ===")
|
|
||||||
th_summary = comparator.get_comparison_summary('TH URGENT')
|
|
||||||
|
|
||||||
# Check for expected duplicates in TH URGENT
|
|
||||||
kst_duplicates = th_summary['mismatch_details']['kst_duplicates']
|
|
||||||
coordi_only = th_summary['mismatch_details']['coordi_only']
|
|
||||||
|
|
||||||
expected_kst_duplicates = [
|
|
||||||
('백라이트', '53-1x(휴재)')
|
|
||||||
]
|
|
||||||
|
|
||||||
print("KST duplicates found:")
|
|
||||||
found_kst = []
|
|
||||||
for item in kst_duplicates:
|
|
||||||
key = (item['title'], item['episode'])
|
|
||||||
found_kst.append(key)
|
|
||||||
print(f" - {item['title']} - Episode {item['episode']}")
|
|
||||||
|
|
||||||
# Check that 백라이트 - Episode 53-1x(휴재) doesn't appear in Coordi
|
|
||||||
print("\nChecking that 백라이트 - Episode 53-1x(휴재) doesn't appear in Coordi:")
|
|
||||||
found_in_coordi = False
|
|
||||||
for item in coordi_only:
|
|
||||||
if item['title'] == '백라이트' and item['episode'] == '53-1x(휴재)':
|
|
||||||
found_in_coordi = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not found_in_coordi:
|
|
||||||
print(" ✓ 백라이트 - Episode 53-1x(휴재) correctly does NOT appear in Coordi data")
|
|
||||||
else:
|
|
||||||
print(" ✗ 백라이트 - Episode 53-1x(휴재) incorrectly appears in Coordi data")
|
|
||||||
|
|
||||||
# Verify expected cases
|
|
||||||
print("\n✓ Verification:")
|
|
||||||
for expected in expected_kst_duplicates:
|
|
||||||
if expected in found_kst:
|
|
||||||
print(f" ✓ Found expected KST duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
else:
|
|
||||||
print(f" ✗ Missing expected KST duplicate: {expected[0]} - Episode {expected[1]}")
|
|
||||||
|
|
||||||
print("\n✓ All BA confirmed cases tested!")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_ba_confirmed_cases()
|
|
||||||
@ -1,57 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from data_comparator import KSTCoordiComparator
|
|
||||||
|
|
||||||
def test_sheet_filtering():
|
|
||||||
"""Test that sheet filtering works correctly and defaults to first sheet"""
|
|
||||||
print("Testing sheet filtering functionality...")
|
|
||||||
|
|
||||||
# Create comparator and load data
|
|
||||||
comparator = KSTCoordiComparator("data/sample-data.xlsx")
|
|
||||||
if not comparator.load_data():
|
|
||||||
print("Failed to load data!")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"Available sheets: {list(comparator.data.keys())}")
|
|
||||||
|
|
||||||
# Test 1: No sheet filter provided (should default to first sheet)
|
|
||||||
print("\n=== TEST 1: No sheet filter (should default to first sheet) ===")
|
|
||||||
try:
|
|
||||||
summary1 = comparator.get_comparison_summary()
|
|
||||||
print(f"Default sheet selected: {summary1['current_sheet_filter']}")
|
|
||||||
print(f"KST total: {summary1['original_counts']['kst_total']}")
|
|
||||||
print(f"Coordi total: {summary1['original_counts']['coordi_total']}")
|
|
||||||
print(f"Matched: {summary1['matched_items_count']}")
|
|
||||||
print("✓ Test 1 passed")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ Test 1 failed: {e}")
|
|
||||||
|
|
||||||
# Test 2: Specific sheet filter
|
|
||||||
sheet_names = list(comparator.data.keys())
|
|
||||||
if len(sheet_names) > 1:
|
|
||||||
second_sheet = sheet_names[1]
|
|
||||||
print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===")
|
|
||||||
try:
|
|
||||||
summary2 = comparator.get_comparison_summary(second_sheet)
|
|
||||||
print(f"Selected sheet: {summary2['current_sheet_filter']}")
|
|
||||||
print(f"KST total: {summary2['original_counts']['kst_total']}")
|
|
||||||
print(f"Coordi total: {summary2['original_counts']['coordi_total']}")
|
|
||||||
print(f"Matched: {summary2['matched_items_count']}")
|
|
||||||
print("✓ Test 2 passed")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ Test 2 failed: {e}")
|
|
||||||
else:
|
|
||||||
print("\n=== TEST 2: Skipped (only one sheet available) ===")
|
|
||||||
|
|
||||||
# Test 3: Verify no duplicates across sheets (this was the original problem)
|
|
||||||
print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===")
|
|
||||||
for sheet_name in sheet_names:
|
|
||||||
summary = comparator.get_comparison_summary(sheet_name)
|
|
||||||
print(f"Sheet '{sheet_name}':")
|
|
||||||
print(f" KST duplicates: {summary['mismatches']['kst_duplicates_count']}")
|
|
||||||
print(f" Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}")
|
|
||||||
|
|
||||||
print("\n✓ All tests completed!")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_sheet_filtering()
|
|
||||||
87
web_gui.py
87
web_gui.py
@ -61,6 +61,10 @@ def analyze_data():
|
|||||||
# Add matched data to results
|
# Add matched data to results
|
||||||
comparison_results['matched_data'] = matched_items_data
|
comparison_results['matched_data'] = matched_items_data
|
||||||
|
|
||||||
|
# Generate visualize data
|
||||||
|
visualize_data = comparator_instance.generate_visualize_data(sheet_filter)
|
||||||
|
comparison_results['visualize_data'] = visualize_data
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': True,
|
'success': True,
|
||||||
'results': comparison_results
|
'results': comparison_results
|
||||||
@ -307,6 +311,32 @@ def create_templates_dir():
|
|||||||
border: 1px solid #ddd;
|
border: 1px solid #ddd;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Vibrant color styles for Visualize tab */
|
||||||
|
.coordi-only-row {
|
||||||
|
background-color: #ff4444 !important; /* Bright red */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.kst-only-row {
|
||||||
|
background-color: #4488ff !important; /* Bright blue */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mixed-duplicate-row {
|
||||||
|
background-color: #ff8800 !important; /* Bright orange */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pure-duplicate-row {
|
||||||
|
background-color: #8844ff !important; /* Bright purple */
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.matched-row {
|
||||||
|
background-color: white !important; /* White background */
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
@ -337,6 +367,7 @@ def create_templates_dir():
|
|||||||
<div class="tabs">
|
<div class="tabs">
|
||||||
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
||||||
<div class="tab" onclick="showTab('different')">Different</div>
|
<div class="tab" onclick="showTab('different')">Different</div>
|
||||||
|
<div class="tab" onclick="showTab('visualize')">Visualize</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="summary" class="tab-content active">
|
<div id="summary" class="tab-content active">
|
||||||
@ -389,6 +420,25 @@ def create_templates_dir():
|
|||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div id="visualize" class="tab-content">
|
||||||
|
<h3>Data </h3>
|
||||||
|
<div class="table-container">
|
||||||
|
<table id="visualize-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Coordi Title</th>
|
||||||
|
<th>Coordi Chapter</th>
|
||||||
|
<th>KST Title</th>
|
||||||
|
<th>KST Chapter</th>
|
||||||
|
<th>Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="visualize-table-body">
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -602,6 +652,9 @@ def create_templates_dir():
|
|||||||
|
|
||||||
// Update Different tab
|
// Update Different tab
|
||||||
updateDifferentTable(results.mismatch_details);
|
updateDifferentTable(results.mismatch_details);
|
||||||
|
|
||||||
|
// Update Visualize tab
|
||||||
|
updateVisualizeTable(results.visualize_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
function updateSummaryTable(matchedData) {
|
function updateSummaryTable(matchedData) {
|
||||||
@ -721,6 +774,40 @@ def create_templates_dir():
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function updateVisualizeTable(visualizeData) {
|
||||||
|
const tbody = document.getElementById('visualize-table-body');
|
||||||
|
tbody.innerHTML = '';
|
||||||
|
|
||||||
|
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
||||||
|
visualizeData.forEach(row => {
|
||||||
|
const tr = tbody.insertRow();
|
||||||
|
tr.insertCell(0).textContent = row.coordi_title || '';
|
||||||
|
tr.insertCell(1).textContent = row.coordi_chapter || '';
|
||||||
|
tr.insertCell(2).textContent = row.kst_title || '';
|
||||||
|
tr.insertCell(3).textContent = row.kst_chapter || '';
|
||||||
|
tr.insertCell(4).textContent = row.reason || '';
|
||||||
|
|
||||||
|
// Apply vibrant color highlighting based on row type
|
||||||
|
switch (row.row_type) {
|
||||||
|
case 'coordi_only':
|
||||||
|
tr.className = 'coordi-only-row';
|
||||||
|
break;
|
||||||
|
case 'kst_only':
|
||||||
|
tr.className = 'kst-only-row';
|
||||||
|
break;
|
||||||
|
case 'mixed_duplicate':
|
||||||
|
tr.className = 'mixed-duplicate-row';
|
||||||
|
break;
|
||||||
|
case 'pure_duplicate':
|
||||||
|
tr.className = 'pure-duplicate-row';
|
||||||
|
break;
|
||||||
|
case 'matched':
|
||||||
|
tr.className = 'matched-row';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Auto-analyze on page load with default file
|
// Auto-analyze on page load with default file
|
||||||
window.onload = function() {
|
window.onload = function() {
|
||||||
// Initialize sheet filter with loading state
|
// Initialize sheet filter with loading state
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user