fix logic

2025-08-20 15:38:04 +07:00 · 2025-08-20 15:38:04 +07:00 · ed3655d1c9
commit ed3655d1c9
parent 1f88db5fb9
7 changed files with 254 additions and 268 deletions
--- a/data_comparator.py
+++ b/data_comparator.py
@ -212,6 +212,59 @@ class KSTCoordiComparator:
        
        return duplicates
    
+    def _find_sheet_specific_mixed_duplicates(self, sheet_filter: str) -> List[Dict]:
+        """Find mixed duplicates within a specific sheet only"""
+        if not sheet_filter:
+            return []
+        
+        mixed_duplicates = []
+        
+        # Extract items specific to this sheet
+        extract_results = self.extract_kst_coordi_items()
+        kst_sheet_items = [item for item in extract_results['kst_all_items'] if item.source_sheet == sheet_filter]
+        coordi_sheet_items = [item for item in extract_results['coordi_all_items'] if item.source_sheet == sheet_filter]
+        
+        # Find duplicates within this sheet
+        kst_sheet_duplicates = self._find_duplicates_in_list(kst_sheet_items)
+        coordi_sheet_duplicates = self._find_duplicates_in_list(coordi_sheet_items)
+        
+        # Create sets for items that exist in both KST and Coordi within this sheet
+        kst_sheet_set = {(item.title, item.episode) for item in kst_sheet_items}
+        coordi_sheet_set = {(item.title, item.episode) for item in coordi_sheet_items}
+        matched_in_sheet = kst_sheet_set.intersection(coordi_sheet_set)
+        
+        # Create sets of duplicate keys within this sheet
+        kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
+        coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
+        
+        # Find matched items that also have duplicates within the same sheet
+        for title, episode in matched_in_sheet:
+            # Check if this matched item has duplicates in KST within this sheet
+            if (title, episode) in kst_duplicate_keys:
+                mixed_duplicates.append({
+                    'title': title,
+                    'episode': episode,
+                    'sheet': sheet_filter,
+                    'row_index': None,  # Could get from items if needed
+                    'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
+                    'mismatch_type': 'MIXED_DUPLICATE_KST',
+                    'duplicate_side': 'KST'
+                })
+            
+            # Check if this matched item has duplicates in Coordi within this sheet
+            if (title, episode) in coordi_duplicate_keys:
+                mixed_duplicates.append({
+                    'title': title,
+                    'episode': episode,
+                    'sheet': sheet_filter,
+                    'row_index': None,  # Could get from items if needed
+                    'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
+                    'mismatch_type': 'MIXED_DUPLICATE_COORDI',
+                    'duplicate_side': 'COORDI'
+                })
+        
+        return mixed_duplicates
+    
    def generate_mismatch_details(self) -> Dict[str, List[Dict]]:
        """Generate detailed information about each type of mismatch with reasons"""
        categorization = self.categorize_mismatches()
@ -220,7 +273,8 @@ class KSTCoordiComparator:
            'kst_only': [],
            'coordi_only': [],
            'kst_duplicates': [],
-            'coordi_duplicates': []
+            'coordi_duplicates': [],
+            'mixed_duplicates': []
        }
        
        # KST-only items
@ -267,38 +321,40 @@ class KSTCoordiComparator:
                'mismatch_type': 'COORDI_DUPLICATE'
            })
        
+        # Mixed duplicates will be calculated per sheet in get_comparison_summary
+        mismatch_details['mixed_duplicates'] = []
+        
        return mismatch_details
    
    def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]:
-        """Get a comprehensive summary of the comparison, optionally filtered by sheet"""
+        """Get a comprehensive summary of the comparison, filtered by a specific sheet"""
+        # Get sheet names for filtering options
+        sheet_names = list(self.data.keys()) if self.data else []
+        
+        # If no sheet filter provided, default to first sheet
+        if not sheet_filter:
+            sheet_filter = sheet_names[0] if sheet_names else None
+        
+        if not sheet_filter:
+            raise ValueError("No sheets available or sheet filter not specified")
+        
        categorization = self.categorize_mismatches()
        mismatch_details = self.generate_mismatch_details()
        grouped_data = self.group_by_title()
        
-        # Get sheet names for filtering options
-        sheet_names = list(self.data.keys()) if self.data else []
+        # Always apply sheet filtering (no more "All Sheets" option)
+        mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
+        grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
        
-        # Apply sheet filtering if specified
-        if sheet_filter and sheet_filter != 'All Sheets':
-            mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
-            grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
-            
-            # Recalculate counts for filtered data
-            filtered_counts = self.calculate_filtered_counts(mismatch_details)
-        else:
-            filtered_counts = {
-                'kst_total': categorization['counts']['total_kst'],
-                'coordi_total': categorization['counts']['total_coordi'],
-                'matched': categorization['counts']['matched'],
-                'kst_only_count': categorization['counts']['kst_only'],
-                'coordi_only_count': categorization['counts']['coordi_only'],
-                'kst_duplicates_count': categorization['counts']['kst_duplicates_count'],
-                'coordi_duplicates_count': categorization['counts']['coordi_duplicates_count']
-            }
+        # Calculate mixed duplicates specific to this sheet
+        mismatch_details['mixed_duplicates'] = self._find_sheet_specific_mixed_duplicates(sheet_filter)
+        
+        # Recalculate counts for filtered data
+        filtered_counts = self.calculate_filtered_counts(mismatch_details)
        
        summary = {
            'sheet_names': sheet_names,
-            'current_sheet_filter': sheet_filter or 'All Sheets',
+            'current_sheet_filter': sheet_filter,
            'original_counts': {
                'kst_total': filtered_counts['kst_total'],
                'coordi_total': filtered_counts['coordi_total']
@ -372,7 +428,8 @@ class KSTCoordiComparator:
            'kst_only_count': len(filtered_mismatch_details['kst_only']),
            'coordi_only_count': len(filtered_mismatch_details['coordi_only']),
            'kst_duplicates_count': len(filtered_mismatch_details['kst_duplicates']),
-            'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates'])
+            'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']),
+            'mixed_duplicates_count': len(filtered_mismatch_details.get('mixed_duplicates', []))
        }
    
    def group_by_title(self) -> Dict[str, Any]:
--- a/templates/index.html
+++ b/templates/index.html
@ -183,7 +183,7 @@
            <div class="file-input" style="margin-top: 10px;">
                <label for="sheetFilter">Sheet Filter:</label>
                <select id="sheetFilter" onchange="filterBySheet()" disabled>
-                    <option value="All Sheets">All Sheets</option>
+                    <!-- Options will be populated dynamically -->
                </select>
            </div>
            <div id="status"></div>
@ -275,7 +275,7 @@
                },
                body: JSON.stringify({ 
                    file_path: filePath,
-                    sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+                    sheet_filter: sheetFilter
                })
            })
            .then(response => response.json())
@ -300,13 +300,14 @@
        
        function updateSheetFilter(sheetNames, currentFilter) {
            const select = document.getElementById('sheetFilter');
-            select.innerHTML = '<option value="All Sheets">All Sheets</option>';
+            select.innerHTML = '';
            
-            sheetNames.forEach(sheetName => {
+            sheetNames.forEach((sheetName, index) => {
                const option = document.createElement('option');
                option.value = sheetName;
                option.textContent = sheetName;
-                if (sheetName === currentFilter) {
+                // Select the first sheet by default, or the current filter if specified
+                if (sheetName === currentFilter || (!currentFilter && index === 0)) {
                    option.selected = true;
                }
                select.appendChild(option);
@ -372,7 +373,7 @@
                        },
                        body: JSON.stringify({ 
                            file_path: data.file_path,
-                            sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+                            sheet_filter: sheetFilter
                        })
                    });
                } else {
@ -404,9 +405,10 @@
            // Update count displays
            document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
            
-            // Count all different items including duplicates
+            // Count all different items including duplicates and mixed duplicates
            const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count + 
-                                  results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
+                                  results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
+                                  (results.mismatches.mixed_duplicates_count || 0);
            document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
            
            // Update Summary tab (matched items)
@ -444,47 +446,70 @@
            const tbody = document.getElementById('different-table');
            tbody.innerHTML = '';
            
-            // Create sets of duplicate items for highlighting
-            const kstDuplicateKeys = new Set();
-            const coordiDuplicateKeys = new Set();
-            
-            mismatchDetails.kst_duplicates.forEach(item => {
-                kstDuplicateKeys.add(`${item.title}_${item.episode}`);
-            });
-            
-            mismatchDetails.coordi_duplicates.forEach(item => {
-                coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
-            });
-            
-            // Combine only KST-only and Coordi-only items (like before)
            const allDifferences = [];
            
-            // Add KST-only items
+            // Add KST-only items (no special highlighting)
            mismatchDetails.kst_only.forEach(item => {
-                const key = `${item.title}_${item.episode}`;
                allDifferences.push({
                    kstData: `${item.title} - Episode ${item.episode}`,
                    coordiData: '',
                    reason: 'Only appears in KST',
                    sortTitle: item.title,
                    sortEpisode: parseFloat(item.episode) || 0,
-                    isDuplicate: kstDuplicateKeys.has(key)  // Check if this item is also a duplicate
+                    highlightType: 'none'
                });
            });
            
-            // Add Coordi-only items
+            // Add Coordi-only items (no special highlighting)
            mismatchDetails.coordi_only.forEach(item => {
-                const key = `${item.title}_${item.episode}`;
                allDifferences.push({
                    kstData: '',
                    coordiData: `${item.title} - Episode ${item.episode}`,
                    reason: 'Only appears in Coordi',
                    sortTitle: item.title,
                    sortEpisode: parseFloat(item.episode) || 0,
-                    isDuplicate: coordiDuplicateKeys.has(key)  // Check if this item is also a duplicate
+                    highlightType: 'none'
                });
            });
            
+            // Add KST duplicates (red highlighting)
+            mismatchDetails.kst_duplicates.forEach(item => {
+                allDifferences.push({
+                    kstData: `${item.title} - Episode ${item.episode}`,
+                    coordiData: '',
+                    reason: 'Duplicate entry in KST data',
+                    sortTitle: item.title,
+                    sortEpisode: parseFloat(item.episode) || 0,
+                    highlightType: 'red'
+                });
+            });
+            
+            // Add Coordi duplicates (red highlighting)
+            mismatchDetails.coordi_duplicates.forEach(item => {
+                allDifferences.push({
+                    kstData: '',
+                    coordiData: `${item.title} - Episode ${item.episode}`,
+                    reason: 'Duplicate entry in Coordi data',
+                    sortTitle: item.title,
+                    sortEpisode: parseFloat(item.episode) || 0,
+                    highlightType: 'red'
+                });
+            });
+            
+            // Add mixed duplicates (yellow highlighting)
+            if (mismatchDetails.mixed_duplicates) {
+                mismatchDetails.mixed_duplicates.forEach(item => {
+                    allDifferences.push({
+                        kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+                        coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+                        reason: item.reason,
+                        sortTitle: item.title,
+                        sortEpisode: parseFloat(item.episode) || 0,
+                        highlightType: 'yellow'
+                    });
+                });
+            }
+            
            // Sort by Korean title + episode
            allDifferences.sort((a, b) => {
                const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@ -499,10 +524,13 @@
                row.insertCell(1).textContent = diff.coordiData;
                row.insertCell(2).textContent = diff.reason;
                
-                // Highlight row in yellow if it's also a duplicate
-                if (diff.isDuplicate) {
+                // Apply highlighting based on type
+                if (diff.highlightType === 'red') {
+                    row.style.backgroundColor = '#f8d7da';  // Light red
+                    row.title = 'Pure duplicate entry';
+                } else if (diff.highlightType === 'yellow') {
                    row.style.backgroundColor = '#fff3cd';  // Light yellow
-                    row.title = 'This item also has duplicates in the dataset';
+                    row.title = 'Item exists in both datasets but has duplicates on one side';
                }
            });
        }
--- a/test_duplicates.py
+++ b/test_duplicates.py
@ -1,64 +0,0 @@
-from data_comparator import KSTCoordiComparator
-
-def test_duplicate_detection():
-    comparator = KSTCoordiComparator('data/sample-data.xlsx')
-    if comparator.load_data():
-        print("=== DUPLICATE DETECTION TEST ===")
-        
-        # Get the data extraction results
-        data = comparator.extract_kst_coordi_items()
-        
-        print(f"Total KST items (unique): {len(data['kst_items'])}")
-        print(f"Total KST items (all): {len(data['kst_all_items'])}")
-        print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
-        print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
-        
-        # Check for duplicates
-        categorization = comparator.categorize_mismatches()
-        
-        print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
-        print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
-        
-        # Show sample duplicates
-        if categorization['kst_duplicates']:
-            print("\nSample KST duplicates:")
-            for i, dup in enumerate(categorization['kst_duplicates'][:3]):
-                print(f"  {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
-        
-        if categorization['coordi_duplicates']:
-            print("\nSample Coordi duplicates:")
-            for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
-                print(f"  {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
-        
-        # Check for the specific example: 백라이트 - Episode 53-1x(휴재)
-        mismatch_details = comparator.generate_mismatch_details()
-        
-        print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
-        
-        # Check in KST-only
-        backlight_kst_only = [item for item in mismatch_details['kst_only'] 
-                             if '백라이트' in item['title'] and '53-1x' in item['episode']]
-        
-        # Check in KST duplicates  
-        backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
-                            if '백라이트' in item['title'] and '53-1x' in item['episode']]
-        
-        print(f"  Found in KST-only: {len(backlight_kst_only)}")
-        print(f"  Found in KST duplicates: {len(backlight_kst_dup)}")
-        
-        if backlight_kst_only:
-            print(f"  KST-only details: {backlight_kst_only[0]}")
-        if backlight_kst_dup:
-            print(f"  KST duplicate details: {backlight_kst_dup[0]}")
-            
-        # Test the web interface logic
-        print(f"\n=== Testing Web Interface Logic ===")
-        summary = comparator.get_comparison_summary()
-        print(f"Web interface will show:")
-        print(f"  Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
-        
-        print("\n✓ Duplicate detection test complete!")
-        print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
-
-if __name__ == "__main__":
-    test_duplicate_detection()
--- a/test_final_duplicate_fix.py
+++ b/test_final_duplicate_fix.py
@ -1,52 +0,0 @@
-import requests
-
-def test_final_duplicate_fix():
-    print("=== FINAL DUPLICATE FIX TEST ===")
-    
-    try:
-        # Test the analyze endpoint
-        response = requests.post('http://localhost:8081/analyze', 
-                               json={'file_path': 'data/sample-data.xlsx'}, 
-                               timeout=30)
-        
-        if response.status_code == 200:
-            data = response.json()
-            if data.get('success'):
-                results = data['results']
-                
-                print("✓ Analysis successful!")
-                print(f"  Matched items: {results['matched_items_count']}")
-                print(f"  KST only: {results['mismatches']['kst_only_count']}")
-                print(f"  Coordi only: {results['mismatches']['coordi_only_count']}")
-                print(f"  KST duplicates: {results['mismatches']['kst_duplicates_count']}")
-                print(f"  Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
-                
-                total_different = (results['mismatches']['kst_only_count'] + 
-                                 results['mismatches']['coordi_only_count'] + 
-                                 results['mismatches']['kst_duplicates_count'] + 
-                                 results['mismatches']['coordi_duplicates_count'])
-                print(f"  Total different items: {total_different}")
-                
-                # Check for the specific example
-                kst_duplicates = results['mismatch_details']['kst_duplicates']
-                backlight_duplicates = [item for item in kst_duplicates 
-                                      if '백라이트' in item['title'] and '53-1x' in item['episode']]
-                
-                if backlight_duplicates:
-                    print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}")
-                    print(f"  Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}")
-                
-                print(f"\n✓ Web interface ready at http://localhost:8081")
-                print("✓ The 'Different' tab will now show combined reasons like:")
-                print("  백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST")
-                
-            else:
-                print(f"✗ Analysis failed: {data.get('error')}")
-        else:
-            print(f"✗ Request failed: {response.status_code}")
-            
-    except requests.exceptions.RequestException as e:
-        print(f"✗ Request failed: {e}")
-
-if __name__ == "__main__":
-    test_final_duplicate_fix()
--- a/test_sheet_filtering.py
+++ b/test_sheet_filtering.py
@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+from data_comparator import KSTCoordiComparator
+
+def test_sheet_filtering():
+    """Test that sheet filtering works correctly and defaults to first sheet"""
+    print("Testing sheet filtering functionality...")
+    
+    # Create comparator and load data
+    comparator = KSTCoordiComparator("data/sample-data.xlsx")
+    if not comparator.load_data():
+        print("Failed to load data!")
+        return
+    
+    print(f"Available sheets: {list(comparator.data.keys())}")
+    
+    # Test 1: No sheet filter provided (should default to first sheet)
+    print("\n=== TEST 1: No sheet filter (should default to first sheet) ===")
+    try:
+        summary1 = comparator.get_comparison_summary()
+        print(f"Default sheet selected: {summary1['current_sheet_filter']}")
+        print(f"KST total: {summary1['original_counts']['kst_total']}")
+        print(f"Coordi total: {summary1['original_counts']['coordi_total']}")
+        print(f"Matched: {summary1['matched_items_count']}")
+        print("✓ Test 1 passed")
+    except Exception as e:
+        print(f"✗ Test 1 failed: {e}")
+    
+    # Test 2: Specific sheet filter
+    sheet_names = list(comparator.data.keys())
+    if len(sheet_names) > 1:
+        second_sheet = sheet_names[1]
+        print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===")
+        try:
+            summary2 = comparator.get_comparison_summary(second_sheet)
+            print(f"Selected sheet: {summary2['current_sheet_filter']}")
+            print(f"KST total: {summary2['original_counts']['kst_total']}")
+            print(f"Coordi total: {summary2['original_counts']['coordi_total']}")
+            print(f"Matched: {summary2['matched_items_count']}")
+            print("✓ Test 2 passed")
+        except Exception as e:
+            print(f"✗ Test 2 failed: {e}")
+    else:
+        print("\n=== TEST 2: Skipped (only one sheet available) ===")
+    
+    # Test 3: Verify no duplicates across sheets (this was the original problem)
+    print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===")
+    for sheet_name in sheet_names:
+        summary = comparator.get_comparison_summary(sheet_name)
+        print(f"Sheet '{sheet_name}':")
+        print(f"  KST duplicates: {summary['mismatches']['kst_duplicates_count']}")
+        print(f"  Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}")
+    
+    print("\n✓ All tests completed!")
+
+if __name__ == "__main__":
+    test_sheet_filtering()
--- a/test_simplified_duplicates.py
+++ b/test_simplified_duplicates.py
@ -1,68 +0,0 @@
-import requests
-
-def test_simplified_duplicates():
-    print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===")
-    
-    try:
-        # Test the analyze endpoint
-        response = requests.post('http://localhost:8081/analyze', 
-                               json={'file_path': 'data/sample-data.xlsx'}, 
-                               timeout=30)
-        
-        if response.status_code == 200:
-            data = response.json()
-            if data.get('success'):
-                results = data['results']
-                
-                print("✓ Analysis successful!")
-                print(f"  Matched items: {results['matched_items_count']}")
-                print(f"  KST only: {results['mismatches']['kst_only_count']}")
-                print(f"  Coordi only: {results['mismatches']['coordi_only_count']}")
-                print(f"  KST duplicates: {results['mismatches']['kst_duplicates_count']}")
-                print(f"  Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
-                
-                # What the count will show
-                total_count = (results['mismatches']['kst_only_count'] + 
-                              results['mismatches']['coordi_only_count'] + 
-                              results['mismatches']['kst_duplicates_count'] + 
-                              results['mismatches']['coordi_duplicates_count'])
-                
-                # What the table will show
-                table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count']
-                
-                print(f"\n📊 DISPLAY LOGIC:")
-                print(f"  Count badge shows: {total_count} items (all different items)")
-                print(f"  Table shows: {table_rows} rows (only KST-only + Coordi-only)")
-                print(f"  Yellow highlights: Items that are also duplicates")
-                
-                # Check for 백라이트 example
-                kst_only = results['mismatch_details']['kst_only']
-                kst_duplicates = results['mismatch_details']['kst_duplicates']
-                
-                backlight_kst_only = [item for item in kst_only 
-                                     if '백라이트' in item['title'] and '53-1x' in item['episode']]
-                backlight_kst_dup = [item for item in kst_duplicates
-                                    if '백라이트' in item['title'] and '53-1x' in item['episode']]
-                
-                if backlight_kst_only and backlight_kst_dup:
-                    print(f"\n✓ 백라이트 example works:")
-                    print(f"  - Appears in table (KST-only): YES")
-                    print(f"  - Will be highlighted yellow: YES (also duplicate)")
-                    print(f"  - Contributes to count: 2 items (1 KST-only + 1 duplicate)")
-                
-                print(f"\n✓ Web interface ready at http://localhost:8081")
-                print("✓ Check the 'Different' tab:")
-                print("  - Count shows all different items")
-                print("  - Table shows only KST-only + Coordi-only")
-                print("  - Yellow rows = items that also have duplicates")
-                
-            else:
-                print(f"✗ Analysis failed: {data.get('error')}")
-        else:
-            print(f"✗ Request failed: {response.status_code}")
-            
-    except requests.exceptions.RequestException as e:
-        print(f"✗ Request failed: {e}")
-
-if __name__ == "__main__":
-    test_simplified_duplicates()
--- a/web_gui.py
+++ b/web_gui.py
@ -42,7 +42,7 @@ def analyze_data():
        matched_items = list(categorization['matched_items'])
        
        # Filter matched items by sheet if specified
-        if sheet_filter and sheet_filter != 'All Sheets':
+        if sheet_filter:
            matched_items = [item for item in matched_items if item.source_sheet == sheet_filter]
        
        # Format matched items for JSON (limit to first 500 for performance)
@ -291,7 +291,7 @@ def create_templates_dir():
            <div class="file-input" style="margin-top: 10px;">
                <label for="sheetFilter">Sheet Filter:</label>
                <select id="sheetFilter" onchange="filterBySheet()" disabled>
-                    <option value="All Sheets">All Sheets</option>
+                    <!-- Options will be populated dynamically -->
                </select>
            </div>
            <div id="status"></div>
@ -383,7 +383,7 @@ def create_templates_dir():
                },
                body: JSON.stringify({ 
                    file_path: filePath,
-                    sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+                    sheet_filter: sheetFilter
                })
            })
            .then(response => response.json())
@ -408,13 +408,14 @@ def create_templates_dir():
        
        function updateSheetFilter(sheetNames, currentFilter) {
            const select = document.getElementById('sheetFilter');
-            select.innerHTML = '<option value="All Sheets">All Sheets</option>';
+            select.innerHTML = '';
            
-            sheetNames.forEach(sheetName => {
+            sheetNames.forEach((sheetName, index) => {
                const option = document.createElement('option');
                option.value = sheetName;
                option.textContent = sheetName;
-                if (sheetName === currentFilter) {
+                // Select the first sheet by default, or the current filter if specified
+                if (sheetName === currentFilter || (!currentFilter && index === 0)) {
                    option.selected = true;
                }
                select.appendChild(option);
@ -480,7 +481,7 @@ def create_templates_dir():
                        },
                        body: JSON.stringify({ 
                            file_path: data.file_path,
-                            sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+                            sheet_filter: sheetFilter
                        })
                    });
                } else {
@ -512,9 +513,10 @@ def create_templates_dir():
            // Update count displays
            document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
            
-            // Count all different items including duplicates
+            // Count all different items including duplicates and mixed duplicates
            const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count + 
-                                  results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
+                                  results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
+                                  (results.mismatches.mixed_duplicates_count || 0);
            document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
            
            // Update Summary tab (matched items)
@ -552,47 +554,70 @@ def create_templates_dir():
            const tbody = document.getElementById('different-table');
            tbody.innerHTML = '';
            
-            // Create sets of duplicate items for highlighting
-            const kstDuplicateKeys = new Set();
-            const coordiDuplicateKeys = new Set();
-            
-            mismatchDetails.kst_duplicates.forEach(item => {
-                kstDuplicateKeys.add(`${item.title}_${item.episode}`);
-            });
-            
-            mismatchDetails.coordi_duplicates.forEach(item => {
-                coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
-            });
-            
-            // Combine only KST-only and Coordi-only items (like before)
            const allDifferences = [];
            
-            // Add KST-only items
+            // Add KST-only items (no special highlighting)
            mismatchDetails.kst_only.forEach(item => {
-                const key = `${item.title}_${item.episode}`;
                allDifferences.push({
                    kstData: `${item.title} - Episode ${item.episode}`,
                    coordiData: '',
                    reason: 'Only appears in KST',
                    sortTitle: item.title,
                    sortEpisode: parseFloat(item.episode) || 0,
-                    isDuplicate: kstDuplicateKeys.has(key)  // Check if this item is also a duplicate
+                    highlightType: 'none'
                });
            });
            
-            // Add Coordi-only items
+            // Add Coordi-only items (no special highlighting)
            mismatchDetails.coordi_only.forEach(item => {
-                const key = `${item.title}_${item.episode}`;
                allDifferences.push({
                    kstData: '',
                    coordiData: `${item.title} - Episode ${item.episode}`,
                    reason: 'Only appears in Coordi',
                    sortTitle: item.title,
                    sortEpisode: parseFloat(item.episode) || 0,
-                    isDuplicate: coordiDuplicateKeys.has(key)  // Check if this item is also a duplicate
+                    highlightType: 'none'
                });
            });
            
+            // Add KST duplicates (red highlighting)
+            mismatchDetails.kst_duplicates.forEach(item => {
+                allDifferences.push({
+                    kstData: `${item.title} - Episode ${item.episode}`,
+                    coordiData: '',
+                    reason: 'Duplicate entry in KST data',
+                    sortTitle: item.title,
+                    sortEpisode: parseFloat(item.episode) || 0,
+                    highlightType: 'red'
+                });
+            });
+            
+            // Add Coordi duplicates (red highlighting)
+            mismatchDetails.coordi_duplicates.forEach(item => {
+                allDifferences.push({
+                    kstData: '',
+                    coordiData: `${item.title} - Episode ${item.episode}`,
+                    reason: 'Duplicate entry in Coordi data',
+                    sortTitle: item.title,
+                    sortEpisode: parseFloat(item.episode) || 0,
+                    highlightType: 'red'
+                });
+            });
+            
+            // Add mixed duplicates (yellow highlighting)
+            if (mismatchDetails.mixed_duplicates) {
+                mismatchDetails.mixed_duplicates.forEach(item => {
+                    allDifferences.push({
+                        kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+                        coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+                        reason: item.reason,
+                        sortTitle: item.title,
+                        sortEpisode: parseFloat(item.episode) || 0,
+                        highlightType: 'yellow'
+                    });
+                });
+            }
+            
            // Sort by Korean title + episode
            allDifferences.sort((a, b) => {
                const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@ -607,10 +632,13 @@ def create_templates_dir():
                row.insertCell(1).textContent = diff.coordiData;
                row.insertCell(2).textContent = diff.reason;
                
-                // Highlight row in yellow if it's also a duplicate
-                if (diff.isDuplicate) {
+                // Apply highlighting based on type
+                if (diff.highlightType === 'red') {
+                    row.style.backgroundColor = '#f8d7da';  // Light red
+                    row.title = 'Pure duplicate entry';
+                } else if (diff.highlightType === 'yellow') {
                    row.style.backgroundColor = '#fff3cd';  // Light yellow
-                    row.title = 'This item also has duplicates in the dataset';
+                    row.title = 'Item exists in both datasets but has duplicates on one side';
                }
            });
        }