diff --git a/data_comparator.py b/data_comparator.py index 5bc2440..e5fb884 100644 --- a/data_comparator.py +++ b/data_comparator.py @@ -212,6 +212,59 @@ class KSTCoordiComparator: return duplicates + def _find_sheet_specific_mixed_duplicates(self, sheet_filter: str) -> List[Dict]: + """Find mixed duplicates within a specific sheet only""" + if not sheet_filter: + return [] + + mixed_duplicates = [] + + # Extract items specific to this sheet + extract_results = self.extract_kst_coordi_items() + kst_sheet_items = [item for item in extract_results['kst_all_items'] if item.source_sheet == sheet_filter] + coordi_sheet_items = [item for item in extract_results['coordi_all_items'] if item.source_sheet == sheet_filter] + + # Find duplicates within this sheet + kst_sheet_duplicates = self._find_duplicates_in_list(kst_sheet_items) + coordi_sheet_duplicates = self._find_duplicates_in_list(coordi_sheet_items) + + # Create sets for items that exist in both KST and Coordi within this sheet + kst_sheet_set = {(item.title, item.episode) for item in kst_sheet_items} + coordi_sheet_set = {(item.title, item.episode) for item in coordi_sheet_items} + matched_in_sheet = kst_sheet_set.intersection(coordi_sheet_set) + + # Create sets of duplicate keys within this sheet + kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates} + coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates} + + # Find matched items that also have duplicates within the same sheet + for title, episode in matched_in_sheet: + # Check if this matched item has duplicates in KST within this sheet + if (title, episode) in kst_duplicate_keys: + mixed_duplicates.append({ + 'title': title, + 'episode': episode, + 'sheet': sheet_filter, + 'row_index': None, # Could get from items if needed + 'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}', + 'mismatch_type': 'MIXED_DUPLICATE_KST', + 'duplicate_side': 'KST' + }) + + # Check if this matched item has duplicates in Coordi within this sheet + if (title, episode) in coordi_duplicate_keys: + mixed_duplicates.append({ + 'title': title, + 'episode': episode, + 'sheet': sheet_filter, + 'row_index': None, # Could get from items if needed + 'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}', + 'mismatch_type': 'MIXED_DUPLICATE_COORDI', + 'duplicate_side': 'COORDI' + }) + + return mixed_duplicates + def generate_mismatch_details(self) -> Dict[str, List[Dict]]: """Generate detailed information about each type of mismatch with reasons""" categorization = self.categorize_mismatches() @@ -220,7 +273,8 @@ class KSTCoordiComparator: 'kst_only': [], 'coordi_only': [], 'kst_duplicates': [], - 'coordi_duplicates': [] + 'coordi_duplicates': [], + 'mixed_duplicates': [] } # KST-only items @@ -267,38 +321,40 @@ class KSTCoordiComparator: 'mismatch_type': 'COORDI_DUPLICATE' }) + # Mixed duplicates will be calculated per sheet in get_comparison_summary + mismatch_details['mixed_duplicates'] = [] + return mismatch_details def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]: - """Get a comprehensive summary of the comparison, optionally filtered by sheet""" + """Get a comprehensive summary of the comparison, filtered by a specific sheet""" + # Get sheet names for filtering options + sheet_names = list(self.data.keys()) if self.data else [] + + # If no sheet filter provided, default to first sheet + if not sheet_filter: + sheet_filter = sheet_names[0] if sheet_names else None + + if not sheet_filter: + raise ValueError("No sheets available or sheet filter not specified") + categorization = self.categorize_mismatches() mismatch_details = self.generate_mismatch_details() grouped_data = self.group_by_title() - # Get sheet names for filtering options - sheet_names = list(self.data.keys()) if self.data else [] + # Always apply sheet filtering (no more "All Sheets" option) + mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter) + grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter) - # Apply sheet filtering if specified - if sheet_filter and sheet_filter != 'All Sheets': - mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter) - grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter) - - # Recalculate counts for filtered data - filtered_counts = self.calculate_filtered_counts(mismatch_details) - else: - filtered_counts = { - 'kst_total': categorization['counts']['total_kst'], - 'coordi_total': categorization['counts']['total_coordi'], - 'matched': categorization['counts']['matched'], - 'kst_only_count': categorization['counts']['kst_only'], - 'coordi_only_count': categorization['counts']['coordi_only'], - 'kst_duplicates_count': categorization['counts']['kst_duplicates_count'], - 'coordi_duplicates_count': categorization['counts']['coordi_duplicates_count'] - } + # Calculate mixed duplicates specific to this sheet + mismatch_details['mixed_duplicates'] = self._find_sheet_specific_mixed_duplicates(sheet_filter) + + # Recalculate counts for filtered data + filtered_counts = self.calculate_filtered_counts(mismatch_details) summary = { 'sheet_names': sheet_names, - 'current_sheet_filter': sheet_filter or 'All Sheets', + 'current_sheet_filter': sheet_filter, 'original_counts': { 'kst_total': filtered_counts['kst_total'], 'coordi_total': filtered_counts['coordi_total'] @@ -372,7 +428,8 @@ class KSTCoordiComparator: 'kst_only_count': len(filtered_mismatch_details['kst_only']), 'coordi_only_count': len(filtered_mismatch_details['coordi_only']), 'kst_duplicates_count': len(filtered_mismatch_details['kst_duplicates']), - 'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']) + 'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']), + 'mixed_duplicates_count': len(filtered_mismatch_details.get('mixed_duplicates', [])) } def group_by_title(self) -> Dict[str, Any]: diff --git a/templates/index.html b/templates/index.html index 4b99942..c7404e6 100644 --- a/templates/index.html +++ b/templates/index.html @@ -183,7 +183,7 @@
@@ -275,7 +275,7 @@ }, body: JSON.stringify({ file_path: filePath, - sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter + sheet_filter: sheetFilter }) }) .then(response => response.json()) @@ -300,13 +300,14 @@ function updateSheetFilter(sheetNames, currentFilter) { const select = document.getElementById('sheetFilter'); - select.innerHTML = ''; + select.innerHTML = ''; - sheetNames.forEach(sheetName => { + sheetNames.forEach((sheetName, index) => { const option = document.createElement('option'); option.value = sheetName; option.textContent = sheetName; - if (sheetName === currentFilter) { + // Select the first sheet by default, or the current filter if specified + if (sheetName === currentFilter || (!currentFilter && index === 0)) { option.selected = true; } select.appendChild(option); @@ -372,7 +373,7 @@ }, body: JSON.stringify({ file_path: data.file_path, - sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter + sheet_filter: sheetFilter }) }); } else { @@ -404,9 +405,10 @@ // Update count displays document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString(); - // Count all different items including duplicates + // Count all different items including duplicates and mixed duplicates const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count + - results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count; + results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count + + (results.mismatches.mixed_duplicates_count || 0); document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString(); // Update Summary tab (matched items) @@ -444,47 +446,70 @@ const tbody = document.getElementById('different-table'); tbody.innerHTML = ''; - // Create sets of duplicate items for highlighting - const kstDuplicateKeys = new Set(); - const coordiDuplicateKeys = new Set(); - - mismatchDetails.kst_duplicates.forEach(item => { - kstDuplicateKeys.add(`${item.title}_${item.episode}`); - }); - - mismatchDetails.coordi_duplicates.forEach(item => { - coordiDuplicateKeys.add(`${item.title}_${item.episode}`); - }); - - // Combine only KST-only and Coordi-only items (like before) const allDifferences = []; - // Add KST-only items + // Add KST-only items (no special highlighting) mismatchDetails.kst_only.forEach(item => { - const key = `${item.title}_${item.episode}`; allDifferences.push({ kstData: `${item.title} - Episode ${item.episode}`, coordiData: '', reason: 'Only appears in KST', sortTitle: item.title, sortEpisode: parseFloat(item.episode) || 0, - isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate + highlightType: 'none' }); }); - // Add Coordi-only items + // Add Coordi-only items (no special highlighting) mismatchDetails.coordi_only.forEach(item => { - const key = `${item.title}_${item.episode}`; allDifferences.push({ kstData: '', coordiData: `${item.title} - Episode ${item.episode}`, reason: 'Only appears in Coordi', sortTitle: item.title, sortEpisode: parseFloat(item.episode) || 0, - isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate + highlightType: 'none' }); }); + // Add KST duplicates (red highlighting) + mismatchDetails.kst_duplicates.forEach(item => { + allDifferences.push({ + kstData: `${item.title} - Episode ${item.episode}`, + coordiData: '', + reason: 'Duplicate entry in KST data', + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'red' + }); + }); + + // Add Coordi duplicates (red highlighting) + mismatchDetails.coordi_duplicates.forEach(item => { + allDifferences.push({ + kstData: '', + coordiData: `${item.title} - Episode ${item.episode}`, + reason: 'Duplicate entry in Coordi data', + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'red' + }); + }); + + // Add mixed duplicates (yellow highlighting) + if (mismatchDetails.mixed_duplicates) { + mismatchDetails.mixed_duplicates.forEach(item => { + allDifferences.push({ + kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`, + coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`, + reason: item.reason, + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'yellow' + }); + }); + } + // Sort by Korean title + episode allDifferences.sort((a, b) => { const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko'); @@ -499,10 +524,13 @@ row.insertCell(1).textContent = diff.coordiData; row.insertCell(2).textContent = diff.reason; - // Highlight row in yellow if it's also a duplicate - if (diff.isDuplicate) { + // Apply highlighting based on type + if (diff.highlightType === 'red') { + row.style.backgroundColor = '#f8d7da'; // Light red + row.title = 'Pure duplicate entry'; + } else if (diff.highlightType === 'yellow') { row.style.backgroundColor = '#fff3cd'; // Light yellow - row.title = 'This item also has duplicates in the dataset'; + row.title = 'Item exists in both datasets but has duplicates on one side'; } }); } diff --git a/test_duplicates.py b/test_duplicates.py deleted file mode 100644 index d239999..0000000 --- a/test_duplicates.py +++ /dev/null @@ -1,64 +0,0 @@ -from data_comparator import KSTCoordiComparator - -def test_duplicate_detection(): - comparator = KSTCoordiComparator('data/sample-data.xlsx') - if comparator.load_data(): - print("=== DUPLICATE DETECTION TEST ===") - - # Get the data extraction results - data = comparator.extract_kst_coordi_items() - - print(f"Total KST items (unique): {len(data['kst_items'])}") - print(f"Total KST items (all): {len(data['kst_all_items'])}") - print(f"Total Coordi items (unique): {len(data['coordi_items'])}") - print(f"Total Coordi items (all): {len(data['coordi_all_items'])}") - - # Check for duplicates - categorization = comparator.categorize_mismatches() - - print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}") - print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}") - - # Show sample duplicates - if categorization['kst_duplicates']: - print("\nSample KST duplicates:") - for i, dup in enumerate(categorization['kst_duplicates'][:3]): - print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})") - - if categorization['coordi_duplicates']: - print("\nSample Coordi duplicates:") - for i, dup in enumerate(categorization['coordi_duplicates'][:3]): - print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})") - - # Check for the specific example: 백라이트 - Episode 53-1x(휴재) - mismatch_details = comparator.generate_mismatch_details() - - print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':") - - # Check in KST-only - backlight_kst_only = [item for item in mismatch_details['kst_only'] - if '백라이트' in item['title'] and '53-1x' in item['episode']] - - # Check in KST duplicates - backlight_kst_dup = [item for item in mismatch_details['kst_duplicates'] - if '백라이트' in item['title'] and '53-1x' in item['episode']] - - print(f" Found in KST-only: {len(backlight_kst_only)}") - print(f" Found in KST duplicates: {len(backlight_kst_dup)}") - - if backlight_kst_only: - print(f" KST-only details: {backlight_kst_only[0]}") - if backlight_kst_dup: - print(f" KST duplicate details: {backlight_kst_dup[0]}") - - # Test the web interface logic - print(f"\n=== Testing Web Interface Logic ===") - summary = comparator.get_comparison_summary() - print(f"Web interface will show:") - print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}") - - print("\n✓ Duplicate detection test complete!") - print("✓ Check the web interface at http://localhost:8080 to see combined reasons") - -if __name__ == "__main__": - test_duplicate_detection() \ No newline at end of file diff --git a/test_final_duplicate_fix.py b/test_final_duplicate_fix.py deleted file mode 100644 index 52d02f9..0000000 --- a/test_final_duplicate_fix.py +++ /dev/null @@ -1,52 +0,0 @@ -import requests - -def test_final_duplicate_fix(): - print("=== FINAL DUPLICATE FIX TEST ===") - - try: - # Test the analyze endpoint - response = requests.post('http://localhost:8081/analyze', - json={'file_path': 'data/sample-data.xlsx'}, - timeout=30) - - if response.status_code == 200: - data = response.json() - if data.get('success'): - results = data['results'] - - print("✓ Analysis successful!") - print(f" Matched items: {results['matched_items_count']}") - print(f" KST only: {results['mismatches']['kst_only_count']}") - print(f" Coordi only: {results['mismatches']['coordi_only_count']}") - print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}") - print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}") - - total_different = (results['mismatches']['kst_only_count'] + - results['mismatches']['coordi_only_count'] + - results['mismatches']['kst_duplicates_count'] + - results['mismatches']['coordi_duplicates_count']) - print(f" Total different items: {total_different}") - - # Check for the specific example - kst_duplicates = results['mismatch_details']['kst_duplicates'] - backlight_duplicates = [item for item in kst_duplicates - if '백라이트' in item['title'] and '53-1x' in item['episode']] - - if backlight_duplicates: - print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}") - print(f" Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}") - - print(f"\n✓ Web interface ready at http://localhost:8081") - print("✓ The 'Different' tab will now show combined reasons like:") - print(" 백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST") - - else: - print(f"✗ Analysis failed: {data.get('error')}") - else: - print(f"✗ Request failed: {response.status_code}") - - except requests.exceptions.RequestException as e: - print(f"✗ Request failed: {e}") - -if __name__ == "__main__": - test_final_duplicate_fix() \ No newline at end of file diff --git a/test_sheet_filtering.py b/test_sheet_filtering.py new file mode 100644 index 0000000..0d1ef12 --- /dev/null +++ b/test_sheet_filtering.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +from data_comparator import KSTCoordiComparator + +def test_sheet_filtering(): + """Test that sheet filtering works correctly and defaults to first sheet""" + print("Testing sheet filtering functionality...") + + # Create comparator and load data + comparator = KSTCoordiComparator("data/sample-data.xlsx") + if not comparator.load_data(): + print("Failed to load data!") + return + + print(f"Available sheets: {list(comparator.data.keys())}") + + # Test 1: No sheet filter provided (should default to first sheet) + print("\n=== TEST 1: No sheet filter (should default to first sheet) ===") + try: + summary1 = comparator.get_comparison_summary() + print(f"Default sheet selected: {summary1['current_sheet_filter']}") + print(f"KST total: {summary1['original_counts']['kst_total']}") + print(f"Coordi total: {summary1['original_counts']['coordi_total']}") + print(f"Matched: {summary1['matched_items_count']}") + print("✓ Test 1 passed") + except Exception as e: + print(f"✗ Test 1 failed: {e}") + + # Test 2: Specific sheet filter + sheet_names = list(comparator.data.keys()) + if len(sheet_names) > 1: + second_sheet = sheet_names[1] + print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===") + try: + summary2 = comparator.get_comparison_summary(second_sheet) + print(f"Selected sheet: {summary2['current_sheet_filter']}") + print(f"KST total: {summary2['original_counts']['kst_total']}") + print(f"Coordi total: {summary2['original_counts']['coordi_total']}") + print(f"Matched: {summary2['matched_items_count']}") + print("✓ Test 2 passed") + except Exception as e: + print(f"✗ Test 2 failed: {e}") + else: + print("\n=== TEST 2: Skipped (only one sheet available) ===") + + # Test 3: Verify no duplicates across sheets (this was the original problem) + print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===") + for sheet_name in sheet_names: + summary = comparator.get_comparison_summary(sheet_name) + print(f"Sheet '{sheet_name}':") + print(f" KST duplicates: {summary['mismatches']['kst_duplicates_count']}") + print(f" Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}") + + print("\n✓ All tests completed!") + +if __name__ == "__main__": + test_sheet_filtering() \ No newline at end of file diff --git a/test_simplified_duplicates.py b/test_simplified_duplicates.py deleted file mode 100644 index 08aa534..0000000 --- a/test_simplified_duplicates.py +++ /dev/null @@ -1,68 +0,0 @@ -import requests - -def test_simplified_duplicates(): - print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===") - - try: - # Test the analyze endpoint - response = requests.post('http://localhost:8081/analyze', - json={'file_path': 'data/sample-data.xlsx'}, - timeout=30) - - if response.status_code == 200: - data = response.json() - if data.get('success'): - results = data['results'] - - print("✓ Analysis successful!") - print(f" Matched items: {results['matched_items_count']}") - print(f" KST only: {results['mismatches']['kst_only_count']}") - print(f" Coordi only: {results['mismatches']['coordi_only_count']}") - print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}") - print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}") - - # What the count will show - total_count = (results['mismatches']['kst_only_count'] + - results['mismatches']['coordi_only_count'] + - results['mismatches']['kst_duplicates_count'] + - results['mismatches']['coordi_duplicates_count']) - - # What the table will show - table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count'] - - print(f"\n📊 DISPLAY LOGIC:") - print(f" Count badge shows: {total_count} items (all different items)") - print(f" Table shows: {table_rows} rows (only KST-only + Coordi-only)") - print(f" Yellow highlights: Items that are also duplicates") - - # Check for 백라이트 example - kst_only = results['mismatch_details']['kst_only'] - kst_duplicates = results['mismatch_details']['kst_duplicates'] - - backlight_kst_only = [item for item in kst_only - if '백라이트' in item['title'] and '53-1x' in item['episode']] - backlight_kst_dup = [item for item in kst_duplicates - if '백라이트' in item['title'] and '53-1x' in item['episode']] - - if backlight_kst_only and backlight_kst_dup: - print(f"\n✓ 백라이트 example works:") - print(f" - Appears in table (KST-only): YES") - print(f" - Will be highlighted yellow: YES (also duplicate)") - print(f" - Contributes to count: 2 items (1 KST-only + 1 duplicate)") - - print(f"\n✓ Web interface ready at http://localhost:8081") - print("✓ Check the 'Different' tab:") - print(" - Count shows all different items") - print(" - Table shows only KST-only + Coordi-only") - print(" - Yellow rows = items that also have duplicates") - - else: - print(f"✗ Analysis failed: {data.get('error')}") - else: - print(f"✗ Request failed: {response.status_code}") - - except requests.exceptions.RequestException as e: - print(f"✗ Request failed: {e}") - -if __name__ == "__main__": - test_simplified_duplicates() \ No newline at end of file diff --git a/web_gui.py b/web_gui.py index 9783fa4..6bf5fcd 100644 --- a/web_gui.py +++ b/web_gui.py @@ -42,7 +42,7 @@ def analyze_data(): matched_items = list(categorization['matched_items']) # Filter matched items by sheet if specified - if sheet_filter and sheet_filter != 'All Sheets': + if sheet_filter: matched_items = [item for item in matched_items if item.source_sheet == sheet_filter] # Format matched items for JSON (limit to first 500 for performance) @@ -291,7 +291,7 @@ def create_templates_dir():
@@ -383,7 +383,7 @@ def create_templates_dir(): }, body: JSON.stringify({ file_path: filePath, - sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter + sheet_filter: sheetFilter }) }) .then(response => response.json()) @@ -408,13 +408,14 @@ def create_templates_dir(): function updateSheetFilter(sheetNames, currentFilter) { const select = document.getElementById('sheetFilter'); - select.innerHTML = ''; + select.innerHTML = ''; - sheetNames.forEach(sheetName => { + sheetNames.forEach((sheetName, index) => { const option = document.createElement('option'); option.value = sheetName; option.textContent = sheetName; - if (sheetName === currentFilter) { + // Select the first sheet by default, or the current filter if specified + if (sheetName === currentFilter || (!currentFilter && index === 0)) { option.selected = true; } select.appendChild(option); @@ -480,7 +481,7 @@ def create_templates_dir(): }, body: JSON.stringify({ file_path: data.file_path, - sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter + sheet_filter: sheetFilter }) }); } else { @@ -512,9 +513,10 @@ def create_templates_dir(): // Update count displays document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString(); - // Count all different items including duplicates + // Count all different items including duplicates and mixed duplicates const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count + - results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count; + results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count + + (results.mismatches.mixed_duplicates_count || 0); document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString(); // Update Summary tab (matched items) @@ -552,47 +554,70 @@ def create_templates_dir(): const tbody = document.getElementById('different-table'); tbody.innerHTML = ''; - // Create sets of duplicate items for highlighting - const kstDuplicateKeys = new Set(); - const coordiDuplicateKeys = new Set(); - - mismatchDetails.kst_duplicates.forEach(item => { - kstDuplicateKeys.add(`${item.title}_${item.episode}`); - }); - - mismatchDetails.coordi_duplicates.forEach(item => { - coordiDuplicateKeys.add(`${item.title}_${item.episode}`); - }); - - // Combine only KST-only and Coordi-only items (like before) const allDifferences = []; - // Add KST-only items + // Add KST-only items (no special highlighting) mismatchDetails.kst_only.forEach(item => { - const key = `${item.title}_${item.episode}`; allDifferences.push({ kstData: `${item.title} - Episode ${item.episode}`, coordiData: '', reason: 'Only appears in KST', sortTitle: item.title, sortEpisode: parseFloat(item.episode) || 0, - isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate + highlightType: 'none' }); }); - // Add Coordi-only items + // Add Coordi-only items (no special highlighting) mismatchDetails.coordi_only.forEach(item => { - const key = `${item.title}_${item.episode}`; allDifferences.push({ kstData: '', coordiData: `${item.title} - Episode ${item.episode}`, reason: 'Only appears in Coordi', sortTitle: item.title, sortEpisode: parseFloat(item.episode) || 0, - isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate + highlightType: 'none' }); }); + // Add KST duplicates (red highlighting) + mismatchDetails.kst_duplicates.forEach(item => { + allDifferences.push({ + kstData: `${item.title} - Episode ${item.episode}`, + coordiData: '', + reason: 'Duplicate entry in KST data', + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'red' + }); + }); + + // Add Coordi duplicates (red highlighting) + mismatchDetails.coordi_duplicates.forEach(item => { + allDifferences.push({ + kstData: '', + coordiData: `${item.title} - Episode ${item.episode}`, + reason: 'Duplicate entry in Coordi data', + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'red' + }); + }); + + // Add mixed duplicates (yellow highlighting) + if (mismatchDetails.mixed_duplicates) { + mismatchDetails.mixed_duplicates.forEach(item => { + allDifferences.push({ + kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`, + coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`, + reason: item.reason, + sortTitle: item.title, + sortEpisode: parseFloat(item.episode) || 0, + highlightType: 'yellow' + }); + }); + } + // Sort by Korean title + episode allDifferences.sort((a, b) => { const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko'); @@ -607,10 +632,13 @@ def create_templates_dir(): row.insertCell(1).textContent = diff.coordiData; row.insertCell(2).textContent = diff.reason; - // Highlight row in yellow if it's also a duplicate - if (diff.isDuplicate) { + // Apply highlighting based on type + if (diff.highlightType === 'red') { + row.style.backgroundColor = '#f8d7da'; // Light red + row.title = 'Pure duplicate entry'; + } else if (diff.highlightType === 'yellow') { row.style.backgroundColor = '#fff3cd'; // Light yellow - row.title = 'This item also has duplicates in the dataset'; + row.title = 'Item exists in both datasets but has duplicates on one side'; } }); }