diff --git a/data_comparator.py b/data_comparator.py
index 5bc2440..e5fb884 100644
--- a/data_comparator.py
+++ b/data_comparator.py
@@ -212,6 +212,59 @@ class KSTCoordiComparator:
return duplicates
+ def _find_sheet_specific_mixed_duplicates(self, sheet_filter: str) -> List[Dict]:
+ """Find mixed duplicates within a specific sheet only"""
+ if not sheet_filter:
+ return []
+
+ mixed_duplicates = []
+
+ # Extract items specific to this sheet
+ extract_results = self.extract_kst_coordi_items()
+ kst_sheet_items = [item for item in extract_results['kst_all_items'] if item.source_sheet == sheet_filter]
+ coordi_sheet_items = [item for item in extract_results['coordi_all_items'] if item.source_sheet == sheet_filter]
+
+ # Find duplicates within this sheet
+ kst_sheet_duplicates = self._find_duplicates_in_list(kst_sheet_items)
+ coordi_sheet_duplicates = self._find_duplicates_in_list(coordi_sheet_items)
+
+ # Create sets for items that exist in both KST and Coordi within this sheet
+ kst_sheet_set = {(item.title, item.episode) for item in kst_sheet_items}
+ coordi_sheet_set = {(item.title, item.episode) for item in coordi_sheet_items}
+ matched_in_sheet = kst_sheet_set.intersection(coordi_sheet_set)
+
+ # Create sets of duplicate keys within this sheet
+ kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
+ coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
+
+ # Find matched items that also have duplicates within the same sheet
+ for title, episode in matched_in_sheet:
+ # Check if this matched item has duplicates in KST within this sheet
+ if (title, episode) in kst_duplicate_keys:
+ mixed_duplicates.append({
+ 'title': title,
+ 'episode': episode,
+ 'sheet': sheet_filter,
+ 'row_index': None, # Could get from items if needed
+ 'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
+ 'mismatch_type': 'MIXED_DUPLICATE_KST',
+ 'duplicate_side': 'KST'
+ })
+
+ # Check if this matched item has duplicates in Coordi within this sheet
+ if (title, episode) in coordi_duplicate_keys:
+ mixed_duplicates.append({
+ 'title': title,
+ 'episode': episode,
+ 'sheet': sheet_filter,
+ 'row_index': None, # Could get from items if needed
+ 'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
+ 'mismatch_type': 'MIXED_DUPLICATE_COORDI',
+ 'duplicate_side': 'COORDI'
+ })
+
+ return mixed_duplicates
+
def generate_mismatch_details(self) -> Dict[str, List[Dict]]:
"""Generate detailed information about each type of mismatch with reasons"""
categorization = self.categorize_mismatches()
@@ -220,7 +273,8 @@ class KSTCoordiComparator:
'kst_only': [],
'coordi_only': [],
'kst_duplicates': [],
- 'coordi_duplicates': []
+ 'coordi_duplicates': [],
+ 'mixed_duplicates': []
}
# KST-only items
@@ -267,38 +321,40 @@ class KSTCoordiComparator:
'mismatch_type': 'COORDI_DUPLICATE'
})
+ # Mixed duplicates will be calculated per sheet in get_comparison_summary
+ mismatch_details['mixed_duplicates'] = []
+
return mismatch_details
def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]:
- """Get a comprehensive summary of the comparison, optionally filtered by sheet"""
+ """Get a comprehensive summary of the comparison, filtered by a specific sheet"""
+ # Get sheet names for filtering options
+ sheet_names = list(self.data.keys()) if self.data else []
+
+ # If no sheet filter provided, default to first sheet
+ if not sheet_filter:
+ sheet_filter = sheet_names[0] if sheet_names else None
+
+ if not sheet_filter:
+ raise ValueError("No sheets available or sheet filter not specified")
+
categorization = self.categorize_mismatches()
mismatch_details = self.generate_mismatch_details()
grouped_data = self.group_by_title()
- # Get sheet names for filtering options
- sheet_names = list(self.data.keys()) if self.data else []
+ # Always apply sheet filtering (no more "All Sheets" option)
+ mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
+ grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
- # Apply sheet filtering if specified
- if sheet_filter and sheet_filter != 'All Sheets':
- mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
- grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
-
- # Recalculate counts for filtered data
- filtered_counts = self.calculate_filtered_counts(mismatch_details)
- else:
- filtered_counts = {
- 'kst_total': categorization['counts']['total_kst'],
- 'coordi_total': categorization['counts']['total_coordi'],
- 'matched': categorization['counts']['matched'],
- 'kst_only_count': categorization['counts']['kst_only'],
- 'coordi_only_count': categorization['counts']['coordi_only'],
- 'kst_duplicates_count': categorization['counts']['kst_duplicates_count'],
- 'coordi_duplicates_count': categorization['counts']['coordi_duplicates_count']
- }
+ # Calculate mixed duplicates specific to this sheet
+ mismatch_details['mixed_duplicates'] = self._find_sheet_specific_mixed_duplicates(sheet_filter)
+
+ # Recalculate counts for filtered data
+ filtered_counts = self.calculate_filtered_counts(mismatch_details)
summary = {
'sheet_names': sheet_names,
- 'current_sheet_filter': sheet_filter or 'All Sheets',
+ 'current_sheet_filter': sheet_filter,
'original_counts': {
'kst_total': filtered_counts['kst_total'],
'coordi_total': filtered_counts['coordi_total']
@@ -372,7 +428,8 @@ class KSTCoordiComparator:
'kst_only_count': len(filtered_mismatch_details['kst_only']),
'coordi_only_count': len(filtered_mismatch_details['coordi_only']),
'kst_duplicates_count': len(filtered_mismatch_details['kst_duplicates']),
- 'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates'])
+ 'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']),
+ 'mixed_duplicates_count': len(filtered_mismatch_details.get('mixed_duplicates', []))
}
def group_by_title(self) -> Dict[str, Any]:
diff --git a/templates/index.html b/templates/index.html
index 4b99942..c7404e6 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -183,7 +183,7 @@
@@ -275,7 +275,7 @@
},
body: JSON.stringify({
file_path: filePath,
- sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+ sheet_filter: sheetFilter
})
})
.then(response => response.json())
@@ -300,13 +300,14 @@
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
- select.innerHTML = '';
+ select.innerHTML = '';
- sheetNames.forEach(sheetName => {
+ sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
- if (sheetName === currentFilter) {
+ // Select the first sheet by default, or the current filter if specified
+ if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
@@ -372,7 +373,7 @@
},
body: JSON.stringify({
file_path: data.file_path,
- sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+ sheet_filter: sheetFilter
})
});
} else {
@@ -404,9 +405,10 @@
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
- // Count all different items including duplicates
+ // Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
- results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
+ results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
+ (results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update Summary tab (matched items)
@@ -444,47 +446,70 @@
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
- // Create sets of duplicate items for highlighting
- const kstDuplicateKeys = new Set();
- const coordiDuplicateKeys = new Set();
-
- mismatchDetails.kst_duplicates.forEach(item => {
- kstDuplicateKeys.add(`${item.title}_${item.episode}`);
- });
-
- mismatchDetails.coordi_duplicates.forEach(item => {
- coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
- });
-
- // Combine only KST-only and Coordi-only items (like before)
const allDifferences = [];
- // Add KST-only items
+ // Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
- const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
- isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
+ highlightType: 'none'
});
});
- // Add Coordi-only items
+ // Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
- const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
- isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
+ highlightType: 'none'
});
});
+ // Add KST duplicates (red highlighting)
+ mismatchDetails.kst_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: `${item.title} - Episode ${item.episode}`,
+ coordiData: '',
+ reason: 'Duplicate entry in KST data',
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'red'
+ });
+ });
+
+ // Add Coordi duplicates (red highlighting)
+ mismatchDetails.coordi_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: '',
+ coordiData: `${item.title} - Episode ${item.episode}`,
+ reason: 'Duplicate entry in Coordi data',
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'red'
+ });
+ });
+
+ // Add mixed duplicates (yellow highlighting)
+ if (mismatchDetails.mixed_duplicates) {
+ mismatchDetails.mixed_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+ coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+ reason: item.reason,
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'yellow'
+ });
+ });
+ }
+
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@@ -499,10 +524,13 @@
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
- // Highlight row in yellow if it's also a duplicate
- if (diff.isDuplicate) {
+ // Apply highlighting based on type
+ if (diff.highlightType === 'red') {
+ row.style.backgroundColor = '#f8d7da'; // Light red
+ row.title = 'Pure duplicate entry';
+ } else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
- row.title = 'This item also has duplicates in the dataset';
+ row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}
diff --git a/test_duplicates.py b/test_duplicates.py
deleted file mode 100644
index d239999..0000000
--- a/test_duplicates.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from data_comparator import KSTCoordiComparator
-
-def test_duplicate_detection():
- comparator = KSTCoordiComparator('data/sample-data.xlsx')
- if comparator.load_data():
- print("=== DUPLICATE DETECTION TEST ===")
-
- # Get the data extraction results
- data = comparator.extract_kst_coordi_items()
-
- print(f"Total KST items (unique): {len(data['kst_items'])}")
- print(f"Total KST items (all): {len(data['kst_all_items'])}")
- print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
- print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
-
- # Check for duplicates
- categorization = comparator.categorize_mismatches()
-
- print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
- print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
-
- # Show sample duplicates
- if categorization['kst_duplicates']:
- print("\nSample KST duplicates:")
- for i, dup in enumerate(categorization['kst_duplicates'][:3]):
- print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
-
- if categorization['coordi_duplicates']:
- print("\nSample Coordi duplicates:")
- for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
- print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
-
- # Check for the specific example: 백라이트 - Episode 53-1x(휴재)
- mismatch_details = comparator.generate_mismatch_details()
-
- print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
-
- # Check in KST-only
- backlight_kst_only = [item for item in mismatch_details['kst_only']
- if '백라이트' in item['title'] and '53-1x' in item['episode']]
-
- # Check in KST duplicates
- backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
- if '백라이트' in item['title'] and '53-1x' in item['episode']]
-
- print(f" Found in KST-only: {len(backlight_kst_only)}")
- print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
-
- if backlight_kst_only:
- print(f" KST-only details: {backlight_kst_only[0]}")
- if backlight_kst_dup:
- print(f" KST duplicate details: {backlight_kst_dup[0]}")
-
- # Test the web interface logic
- print(f"\n=== Testing Web Interface Logic ===")
- summary = comparator.get_comparison_summary()
- print(f"Web interface will show:")
- print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
-
- print("\n✓ Duplicate detection test complete!")
- print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
-
-if __name__ == "__main__":
- test_duplicate_detection()
\ No newline at end of file
diff --git a/test_final_duplicate_fix.py b/test_final_duplicate_fix.py
deleted file mode 100644
index 52d02f9..0000000
--- a/test_final_duplicate_fix.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import requests
-
-def test_final_duplicate_fix():
- print("=== FINAL DUPLICATE FIX TEST ===")
-
- try:
- # Test the analyze endpoint
- response = requests.post('http://localhost:8081/analyze',
- json={'file_path': 'data/sample-data.xlsx'},
- timeout=30)
-
- if response.status_code == 200:
- data = response.json()
- if data.get('success'):
- results = data['results']
-
- print("✓ Analysis successful!")
- print(f" Matched items: {results['matched_items_count']}")
- print(f" KST only: {results['mismatches']['kst_only_count']}")
- print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
- print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
- print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
-
- total_different = (results['mismatches']['kst_only_count'] +
- results['mismatches']['coordi_only_count'] +
- results['mismatches']['kst_duplicates_count'] +
- results['mismatches']['coordi_duplicates_count'])
- print(f" Total different items: {total_different}")
-
- # Check for the specific example
- kst_duplicates = results['mismatch_details']['kst_duplicates']
- backlight_duplicates = [item for item in kst_duplicates
- if '백라이트' in item['title'] and '53-1x' in item['episode']]
-
- if backlight_duplicates:
- print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}")
- print(f" Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}")
-
- print(f"\n✓ Web interface ready at http://localhost:8081")
- print("✓ The 'Different' tab will now show combined reasons like:")
- print(" 백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST")
-
- else:
- print(f"✗ Analysis failed: {data.get('error')}")
- else:
- print(f"✗ Request failed: {response.status_code}")
-
- except requests.exceptions.RequestException as e:
- print(f"✗ Request failed: {e}")
-
-if __name__ == "__main__":
- test_final_duplicate_fix()
\ No newline at end of file
diff --git a/test_sheet_filtering.py b/test_sheet_filtering.py
new file mode 100644
index 0000000..0d1ef12
--- /dev/null
+++ b/test_sheet_filtering.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+from data_comparator import KSTCoordiComparator
+
+def test_sheet_filtering():
+ """Test that sheet filtering works correctly and defaults to first sheet"""
+ print("Testing sheet filtering functionality...")
+
+ # Create comparator and load data
+ comparator = KSTCoordiComparator("data/sample-data.xlsx")
+ if not comparator.load_data():
+ print("Failed to load data!")
+ return
+
+ print(f"Available sheets: {list(comparator.data.keys())}")
+
+ # Test 1: No sheet filter provided (should default to first sheet)
+ print("\n=== TEST 1: No sheet filter (should default to first sheet) ===")
+ try:
+ summary1 = comparator.get_comparison_summary()
+ print(f"Default sheet selected: {summary1['current_sheet_filter']}")
+ print(f"KST total: {summary1['original_counts']['kst_total']}")
+ print(f"Coordi total: {summary1['original_counts']['coordi_total']}")
+ print(f"Matched: {summary1['matched_items_count']}")
+ print("✓ Test 1 passed")
+ except Exception as e:
+ print(f"✗ Test 1 failed: {e}")
+
+ # Test 2: Specific sheet filter
+ sheet_names = list(comparator.data.keys())
+ if len(sheet_names) > 1:
+ second_sheet = sheet_names[1]
+ print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===")
+ try:
+ summary2 = comparator.get_comparison_summary(second_sheet)
+ print(f"Selected sheet: {summary2['current_sheet_filter']}")
+ print(f"KST total: {summary2['original_counts']['kst_total']}")
+ print(f"Coordi total: {summary2['original_counts']['coordi_total']}")
+ print(f"Matched: {summary2['matched_items_count']}")
+ print("✓ Test 2 passed")
+ except Exception as e:
+ print(f"✗ Test 2 failed: {e}")
+ else:
+ print("\n=== TEST 2: Skipped (only one sheet available) ===")
+
+ # Test 3: Verify no duplicates across sheets (this was the original problem)
+ print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===")
+ for sheet_name in sheet_names:
+ summary = comparator.get_comparison_summary(sheet_name)
+ print(f"Sheet '{sheet_name}':")
+ print(f" KST duplicates: {summary['mismatches']['kst_duplicates_count']}")
+ print(f" Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}")
+
+ print("\n✓ All tests completed!")
+
+if __name__ == "__main__":
+ test_sheet_filtering()
\ No newline at end of file
diff --git a/test_simplified_duplicates.py b/test_simplified_duplicates.py
deleted file mode 100644
index 08aa534..0000000
--- a/test_simplified_duplicates.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import requests
-
-def test_simplified_duplicates():
- print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===")
-
- try:
- # Test the analyze endpoint
- response = requests.post('http://localhost:8081/analyze',
- json={'file_path': 'data/sample-data.xlsx'},
- timeout=30)
-
- if response.status_code == 200:
- data = response.json()
- if data.get('success'):
- results = data['results']
-
- print("✓ Analysis successful!")
- print(f" Matched items: {results['matched_items_count']}")
- print(f" KST only: {results['mismatches']['kst_only_count']}")
- print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
- print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
- print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
-
- # What the count will show
- total_count = (results['mismatches']['kst_only_count'] +
- results['mismatches']['coordi_only_count'] +
- results['mismatches']['kst_duplicates_count'] +
- results['mismatches']['coordi_duplicates_count'])
-
- # What the table will show
- table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count']
-
- print(f"\n📊 DISPLAY LOGIC:")
- print(f" Count badge shows: {total_count} items (all different items)")
- print(f" Table shows: {table_rows} rows (only KST-only + Coordi-only)")
- print(f" Yellow highlights: Items that are also duplicates")
-
- # Check for 백라이트 example
- kst_only = results['mismatch_details']['kst_only']
- kst_duplicates = results['mismatch_details']['kst_duplicates']
-
- backlight_kst_only = [item for item in kst_only
- if '백라이트' in item['title'] and '53-1x' in item['episode']]
- backlight_kst_dup = [item for item in kst_duplicates
- if '백라이트' in item['title'] and '53-1x' in item['episode']]
-
- if backlight_kst_only and backlight_kst_dup:
- print(f"\n✓ 백라이트 example works:")
- print(f" - Appears in table (KST-only): YES")
- print(f" - Will be highlighted yellow: YES (also duplicate)")
- print(f" - Contributes to count: 2 items (1 KST-only + 1 duplicate)")
-
- print(f"\n✓ Web interface ready at http://localhost:8081")
- print("✓ Check the 'Different' tab:")
- print(" - Count shows all different items")
- print(" - Table shows only KST-only + Coordi-only")
- print(" - Yellow rows = items that also have duplicates")
-
- else:
- print(f"✗ Analysis failed: {data.get('error')}")
- else:
- print(f"✗ Request failed: {response.status_code}")
-
- except requests.exceptions.RequestException as e:
- print(f"✗ Request failed: {e}")
-
-if __name__ == "__main__":
- test_simplified_duplicates()
\ No newline at end of file
diff --git a/web_gui.py b/web_gui.py
index 9783fa4..6bf5fcd 100644
--- a/web_gui.py
+++ b/web_gui.py
@@ -42,7 +42,7 @@ def analyze_data():
matched_items = list(categorization['matched_items'])
# Filter matched items by sheet if specified
- if sheet_filter and sheet_filter != 'All Sheets':
+ if sheet_filter:
matched_items = [item for item in matched_items if item.source_sheet == sheet_filter]
# Format matched items for JSON (limit to first 500 for performance)
@@ -291,7 +291,7 @@ def create_templates_dir():
@@ -383,7 +383,7 @@ def create_templates_dir():
},
body: JSON.stringify({
file_path: filePath,
- sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+ sheet_filter: sheetFilter
})
})
.then(response => response.json())
@@ -408,13 +408,14 @@ def create_templates_dir():
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
- select.innerHTML = '';
+ select.innerHTML = '';
- sheetNames.forEach(sheetName => {
+ sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
- if (sheetName === currentFilter) {
+ // Select the first sheet by default, or the current filter if specified
+ if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
@@ -480,7 +481,7 @@ def create_templates_dir():
},
body: JSON.stringify({
file_path: data.file_path,
- sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
+ sheet_filter: sheetFilter
})
});
} else {
@@ -512,9 +513,10 @@ def create_templates_dir():
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
- // Count all different items including duplicates
+ // Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
- results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
+ results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
+ (results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update Summary tab (matched items)
@@ -552,47 +554,70 @@ def create_templates_dir():
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
- // Create sets of duplicate items for highlighting
- const kstDuplicateKeys = new Set();
- const coordiDuplicateKeys = new Set();
-
- mismatchDetails.kst_duplicates.forEach(item => {
- kstDuplicateKeys.add(`${item.title}_${item.episode}`);
- });
-
- mismatchDetails.coordi_duplicates.forEach(item => {
- coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
- });
-
- // Combine only KST-only and Coordi-only items (like before)
const allDifferences = [];
- // Add KST-only items
+ // Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
- const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
- isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
+ highlightType: 'none'
});
});
- // Add Coordi-only items
+ // Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
- const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
- isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
+ highlightType: 'none'
});
});
+ // Add KST duplicates (red highlighting)
+ mismatchDetails.kst_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: `${item.title} - Episode ${item.episode}`,
+ coordiData: '',
+ reason: 'Duplicate entry in KST data',
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'red'
+ });
+ });
+
+ // Add Coordi duplicates (red highlighting)
+ mismatchDetails.coordi_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: '',
+ coordiData: `${item.title} - Episode ${item.episode}`,
+ reason: 'Duplicate entry in Coordi data',
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'red'
+ });
+ });
+
+ // Add mixed duplicates (yellow highlighting)
+ if (mismatchDetails.mixed_duplicates) {
+ mismatchDetails.mixed_duplicates.forEach(item => {
+ allDifferences.push({
+ kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+ coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
+ reason: item.reason,
+ sortTitle: item.title,
+ sortEpisode: parseFloat(item.episode) || 0,
+ highlightType: 'yellow'
+ });
+ });
+ }
+
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@@ -607,10 +632,13 @@ def create_templates_dir():
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
- // Highlight row in yellow if it's also a duplicate
- if (diff.isDuplicate) {
+ // Apply highlighting based on type
+ if (diff.highlightType === 'red') {
+ row.style.backgroundColor = '#f8d7da'; // Light red
+ row.title = 'Pure duplicate entry';
+ } else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
- row.title = 'This item also has duplicates in the dataset';
+ row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}