fix logic

This commit is contained in:
arthur 2025-08-20 15:38:04 +07:00
parent 1f88db5fb9
commit ed3655d1c9
7 changed files with 254 additions and 268 deletions

View File

@ -212,6 +212,59 @@ class KSTCoordiComparator:
return duplicates
def _find_sheet_specific_mixed_duplicates(self, sheet_filter: str) -> List[Dict]:
"""Find mixed duplicates within a specific sheet only"""
if not sheet_filter:
return []
mixed_duplicates = []
# Extract items specific to this sheet
extract_results = self.extract_kst_coordi_items()
kst_sheet_items = [item for item in extract_results['kst_all_items'] if item.source_sheet == sheet_filter]
coordi_sheet_items = [item for item in extract_results['coordi_all_items'] if item.source_sheet == sheet_filter]
# Find duplicates within this sheet
kst_sheet_duplicates = self._find_duplicates_in_list(kst_sheet_items)
coordi_sheet_duplicates = self._find_duplicates_in_list(coordi_sheet_items)
# Create sets for items that exist in both KST and Coordi within this sheet
kst_sheet_set = {(item.title, item.episode) for item in kst_sheet_items}
coordi_sheet_set = {(item.title, item.episode) for item in coordi_sheet_items}
matched_in_sheet = kst_sheet_set.intersection(coordi_sheet_set)
# Create sets of duplicate keys within this sheet
kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
# Find matched items that also have duplicates within the same sheet
for title, episode in matched_in_sheet:
# Check if this matched item has duplicates in KST within this sheet
if (title, episode) in kst_duplicate_keys:
mixed_duplicates.append({
'title': title,
'episode': episode,
'sheet': sheet_filter,
'row_index': None, # Could get from items if needed
'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
'mismatch_type': 'MIXED_DUPLICATE_KST',
'duplicate_side': 'KST'
})
# Check if this matched item has duplicates in Coordi within this sheet
if (title, episode) in coordi_duplicate_keys:
mixed_duplicates.append({
'title': title,
'episode': episode,
'sheet': sheet_filter,
'row_index': None, # Could get from items if needed
'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
'mismatch_type': 'MIXED_DUPLICATE_COORDI',
'duplicate_side': 'COORDI'
})
return mixed_duplicates
def generate_mismatch_details(self) -> Dict[str, List[Dict]]:
"""Generate detailed information about each type of mismatch with reasons"""
categorization = self.categorize_mismatches()
@ -220,7 +273,8 @@ class KSTCoordiComparator:
'kst_only': [],
'coordi_only': [],
'kst_duplicates': [],
'coordi_duplicates': []
'coordi_duplicates': [],
'mixed_duplicates': []
}
# KST-only items
@ -267,38 +321,40 @@ class KSTCoordiComparator:
'mismatch_type': 'COORDI_DUPLICATE'
})
# Mixed duplicates will be calculated per sheet in get_comparison_summary
mismatch_details['mixed_duplicates'] = []
return mismatch_details
def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]:
"""Get a comprehensive summary of the comparison, optionally filtered by sheet"""
"""Get a comprehensive summary of the comparison, filtered by a specific sheet"""
# Get sheet names for filtering options
sheet_names = list(self.data.keys()) if self.data else []
# If no sheet filter provided, default to first sheet
if not sheet_filter:
sheet_filter = sheet_names[0] if sheet_names else None
if not sheet_filter:
raise ValueError("No sheets available or sheet filter not specified")
categorization = self.categorize_mismatches()
mismatch_details = self.generate_mismatch_details()
grouped_data = self.group_by_title()
# Get sheet names for filtering options
sheet_names = list(self.data.keys()) if self.data else []
# Always apply sheet filtering (no more "All Sheets" option)
mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
# Apply sheet filtering if specified
if sheet_filter and sheet_filter != 'All Sheets':
mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
# Recalculate counts for filtered data
filtered_counts = self.calculate_filtered_counts(mismatch_details)
else:
filtered_counts = {
'kst_total': categorization['counts']['total_kst'],
'coordi_total': categorization['counts']['total_coordi'],
'matched': categorization['counts']['matched'],
'kst_only_count': categorization['counts']['kst_only'],
'coordi_only_count': categorization['counts']['coordi_only'],
'kst_duplicates_count': categorization['counts']['kst_duplicates_count'],
'coordi_duplicates_count': categorization['counts']['coordi_duplicates_count']
}
# Calculate mixed duplicates specific to this sheet
mismatch_details['mixed_duplicates'] = self._find_sheet_specific_mixed_duplicates(sheet_filter)
# Recalculate counts for filtered data
filtered_counts = self.calculate_filtered_counts(mismatch_details)
summary = {
'sheet_names': sheet_names,
'current_sheet_filter': sheet_filter or 'All Sheets',
'current_sheet_filter': sheet_filter,
'original_counts': {
'kst_total': filtered_counts['kst_total'],
'coordi_total': filtered_counts['coordi_total']
@ -372,7 +428,8 @@ class KSTCoordiComparator:
'kst_only_count': len(filtered_mismatch_details['kst_only']),
'coordi_only_count': len(filtered_mismatch_details['coordi_only']),
'kst_duplicates_count': len(filtered_mismatch_details['kst_duplicates']),
'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates'])
'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']),
'mixed_duplicates_count': len(filtered_mismatch_details.get('mixed_duplicates', []))
}
def group_by_title(self) -> Dict[str, Any]:

View File

@ -183,7 +183,7 @@
<div class="file-input" style="margin-top: 10px;">
<label for="sheetFilter">Sheet Filter:</label>
<select id="sheetFilter" onchange="filterBySheet()" disabled>
<option value="All Sheets">All Sheets</option>
<!-- Options will be populated dynamically -->
</select>
</div>
<div id="status"></div>
@ -275,7 +275,7 @@
},
body: JSON.stringify({
file_path: filePath,
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
sheet_filter: sheetFilter
})
})
.then(response => response.json())
@ -300,13 +300,14 @@
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
select.innerHTML = '<option value="All Sheets">All Sheets</option>';
select.innerHTML = '';
sheetNames.forEach(sheetName => {
sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
if (sheetName === currentFilter) {
// Select the first sheet by default, or the current filter if specified
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
@ -372,7 +373,7 @@
},
body: JSON.stringify({
file_path: data.file_path,
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
sheet_filter: sheetFilter
})
});
} else {
@ -404,9 +405,10 @@
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
// Count all different items including duplicates
// Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update Summary tab (matched items)
@ -444,47 +446,70 @@
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
// Create sets of duplicate items for highlighting
const kstDuplicateKeys = new Set();
const coordiDuplicateKeys = new Set();
mismatchDetails.kst_duplicates.forEach(item => {
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
});
mismatchDetails.coordi_duplicates.forEach(item => {
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
});
// Combine only KST-only and Coordi-only items (like before)
const allDifferences = [];
// Add KST-only items
// Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
highlightType: 'none'
});
});
// Add Coordi-only items
// Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
highlightType: 'none'
});
});
// Add KST duplicates (red highlighting)
mismatchDetails.kst_duplicates.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Duplicate entry in KST data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add Coordi duplicates (red highlighting)
mismatchDetails.coordi_duplicates.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Duplicate entry in Coordi data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add mixed duplicates (yellow highlighting)
if (mismatchDetails.mixed_duplicates) {
mismatchDetails.mixed_duplicates.forEach(item => {
allDifferences.push({
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
reason: item.reason,
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'yellow'
});
});
}
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@ -499,10 +524,13 @@
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
// Highlight row in yellow if it's also a duplicate
if (diff.isDuplicate) {
// Apply highlighting based on type
if (diff.highlightType === 'red') {
row.style.backgroundColor = '#f8d7da'; // Light red
row.title = 'Pure duplicate entry';
} else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
row.title = 'This item also has duplicates in the dataset';
row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}

View File

@ -1,64 +0,0 @@
from data_comparator import KSTCoordiComparator
def test_duplicate_detection():
comparator = KSTCoordiComparator('data/sample-data.xlsx')
if comparator.load_data():
print("=== DUPLICATE DETECTION TEST ===")
# Get the data extraction results
data = comparator.extract_kst_coordi_items()
print(f"Total KST items (unique): {len(data['kst_items'])}")
print(f"Total KST items (all): {len(data['kst_all_items'])}")
print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
# Check for duplicates
categorization = comparator.categorize_mismatches()
print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
# Show sample duplicates
if categorization['kst_duplicates']:
print("\nSample KST duplicates:")
for i, dup in enumerate(categorization['kst_duplicates'][:3]):
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
if categorization['coordi_duplicates']:
print("\nSample Coordi duplicates:")
for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
# Check for the specific example: 백라이트 - Episode 53-1x(휴재)
mismatch_details = comparator.generate_mismatch_details()
print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
# Check in KST-only
backlight_kst_only = [item for item in mismatch_details['kst_only']
if '백라이트' in item['title'] and '53-1x' in item['episode']]
# Check in KST duplicates
backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
if '백라이트' in item['title'] and '53-1x' in item['episode']]
print(f" Found in KST-only: {len(backlight_kst_only)}")
print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
if backlight_kst_only:
print(f" KST-only details: {backlight_kst_only[0]}")
if backlight_kst_dup:
print(f" KST duplicate details: {backlight_kst_dup[0]}")
# Test the web interface logic
print(f"\n=== Testing Web Interface Logic ===")
summary = comparator.get_comparison_summary()
print(f"Web interface will show:")
print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
print("\n✓ Duplicate detection test complete!")
print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
if __name__ == "__main__":
test_duplicate_detection()

View File

@ -1,52 +0,0 @@
import requests
def test_final_duplicate_fix():
print("=== FINAL DUPLICATE FIX TEST ===")
try:
# Test the analyze endpoint
response = requests.post('http://localhost:8081/analyze',
json={'file_path': 'data/sample-data.xlsx'},
timeout=30)
if response.status_code == 200:
data = response.json()
if data.get('success'):
results = data['results']
print("✓ Analysis successful!")
print(f" Matched items: {results['matched_items_count']}")
print(f" KST only: {results['mismatches']['kst_only_count']}")
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
total_different = (results['mismatches']['kst_only_count'] +
results['mismatches']['coordi_only_count'] +
results['mismatches']['kst_duplicates_count'] +
results['mismatches']['coordi_duplicates_count'])
print(f" Total different items: {total_different}")
# Check for the specific example
kst_duplicates = results['mismatch_details']['kst_duplicates']
backlight_duplicates = [item for item in kst_duplicates
if '백라이트' in item['title'] and '53-1x' in item['episode']]
if backlight_duplicates:
print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}")
print(f" Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}")
print(f"\n✓ Web interface ready at http://localhost:8081")
print("✓ The 'Different' tab will now show combined reasons like:")
print(" 백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST")
else:
print(f"✗ Analysis failed: {data.get('error')}")
else:
print(f"✗ Request failed: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"✗ Request failed: {e}")
if __name__ == "__main__":
test_final_duplicate_fix()

57
test_sheet_filtering.py Normal file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python3
from data_comparator import KSTCoordiComparator
def test_sheet_filtering():
"""Test that sheet filtering works correctly and defaults to first sheet"""
print("Testing sheet filtering functionality...")
# Create comparator and load data
comparator = KSTCoordiComparator("data/sample-data.xlsx")
if not comparator.load_data():
print("Failed to load data!")
return
print(f"Available sheets: {list(comparator.data.keys())}")
# Test 1: No sheet filter provided (should default to first sheet)
print("\n=== TEST 1: No sheet filter (should default to first sheet) ===")
try:
summary1 = comparator.get_comparison_summary()
print(f"Default sheet selected: {summary1['current_sheet_filter']}")
print(f"KST total: {summary1['original_counts']['kst_total']}")
print(f"Coordi total: {summary1['original_counts']['coordi_total']}")
print(f"Matched: {summary1['matched_items_count']}")
print("✓ Test 1 passed")
except Exception as e:
print(f"✗ Test 1 failed: {e}")
# Test 2: Specific sheet filter
sheet_names = list(comparator.data.keys())
if len(sheet_names) > 1:
second_sheet = sheet_names[1]
print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===")
try:
summary2 = comparator.get_comparison_summary(second_sheet)
print(f"Selected sheet: {summary2['current_sheet_filter']}")
print(f"KST total: {summary2['original_counts']['kst_total']}")
print(f"Coordi total: {summary2['original_counts']['coordi_total']}")
print(f"Matched: {summary2['matched_items_count']}")
print("✓ Test 2 passed")
except Exception as e:
print(f"✗ Test 2 failed: {e}")
else:
print("\n=== TEST 2: Skipped (only one sheet available) ===")
# Test 3: Verify no duplicates across sheets (this was the original problem)
print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===")
for sheet_name in sheet_names:
summary = comparator.get_comparison_summary(sheet_name)
print(f"Sheet '{sheet_name}':")
print(f" KST duplicates: {summary['mismatches']['kst_duplicates_count']}")
print(f" Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}")
print("\n✓ All tests completed!")
if __name__ == "__main__":
test_sheet_filtering()

View File

@ -1,68 +0,0 @@
import requests
def test_simplified_duplicates():
print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===")
try:
# Test the analyze endpoint
response = requests.post('http://localhost:8081/analyze',
json={'file_path': 'data/sample-data.xlsx'},
timeout=30)
if response.status_code == 200:
data = response.json()
if data.get('success'):
results = data['results']
print("✓ Analysis successful!")
print(f" Matched items: {results['matched_items_count']}")
print(f" KST only: {results['mismatches']['kst_only_count']}")
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
# What the count will show
total_count = (results['mismatches']['kst_only_count'] +
results['mismatches']['coordi_only_count'] +
results['mismatches']['kst_duplicates_count'] +
results['mismatches']['coordi_duplicates_count'])
# What the table will show
table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count']
print(f"\n📊 DISPLAY LOGIC:")
print(f" Count badge shows: {total_count} items (all different items)")
print(f" Table shows: {table_rows} rows (only KST-only + Coordi-only)")
print(f" Yellow highlights: Items that are also duplicates")
# Check for 백라이트 example
kst_only = results['mismatch_details']['kst_only']
kst_duplicates = results['mismatch_details']['kst_duplicates']
backlight_kst_only = [item for item in kst_only
if '백라이트' in item['title'] and '53-1x' in item['episode']]
backlight_kst_dup = [item for item in kst_duplicates
if '백라이트' in item['title'] and '53-1x' in item['episode']]
if backlight_kst_only and backlight_kst_dup:
print(f"\n✓ 백라이트 example works:")
print(f" - Appears in table (KST-only): YES")
print(f" - Will be highlighted yellow: YES (also duplicate)")
print(f" - Contributes to count: 2 items (1 KST-only + 1 duplicate)")
print(f"\n✓ Web interface ready at http://localhost:8081")
print("✓ Check the 'Different' tab:")
print(" - Count shows all different items")
print(" - Table shows only KST-only + Coordi-only")
print(" - Yellow rows = items that also have duplicates")
else:
print(f"✗ Analysis failed: {data.get('error')}")
else:
print(f"✗ Request failed: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"✗ Request failed: {e}")
if __name__ == "__main__":
test_simplified_duplicates()

View File

@ -42,7 +42,7 @@ def analyze_data():
matched_items = list(categorization['matched_items'])
# Filter matched items by sheet if specified
if sheet_filter and sheet_filter != 'All Sheets':
if sheet_filter:
matched_items = [item for item in matched_items if item.source_sheet == sheet_filter]
# Format matched items for JSON (limit to first 500 for performance)
@ -291,7 +291,7 @@ def create_templates_dir():
<div class="file-input" style="margin-top: 10px;">
<label for="sheetFilter">Sheet Filter:</label>
<select id="sheetFilter" onchange="filterBySheet()" disabled>
<option value="All Sheets">All Sheets</option>
<!-- Options will be populated dynamically -->
</select>
</div>
<div id="status"></div>
@ -383,7 +383,7 @@ def create_templates_dir():
},
body: JSON.stringify({
file_path: filePath,
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
sheet_filter: sheetFilter
})
})
.then(response => response.json())
@ -408,13 +408,14 @@ def create_templates_dir():
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
select.innerHTML = '<option value="All Sheets">All Sheets</option>';
select.innerHTML = '';
sheetNames.forEach(sheetName => {
sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
if (sheetName === currentFilter) {
// Select the first sheet by default, or the current filter if specified
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
@ -480,7 +481,7 @@ def create_templates_dir():
},
body: JSON.stringify({
file_path: data.file_path,
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
sheet_filter: sheetFilter
})
});
} else {
@ -512,9 +513,10 @@ def create_templates_dir():
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
// Count all different items including duplicates
// Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update Summary tab (matched items)
@ -552,47 +554,70 @@ def create_templates_dir():
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
// Create sets of duplicate items for highlighting
const kstDuplicateKeys = new Set();
const coordiDuplicateKeys = new Set();
mismatchDetails.kst_duplicates.forEach(item => {
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
});
mismatchDetails.coordi_duplicates.forEach(item => {
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
});
// Combine only KST-only and Coordi-only items (like before)
const allDifferences = [];
// Add KST-only items
// Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
highlightType: 'none'
});
});
// Add Coordi-only items
// Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
const key = `${item.title}_${item.episode}`;
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
highlightType: 'none'
});
});
// Add KST duplicates (red highlighting)
mismatchDetails.kst_duplicates.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Duplicate entry in KST data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add Coordi duplicates (red highlighting)
mismatchDetails.coordi_duplicates.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Duplicate entry in Coordi data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add mixed duplicates (yellow highlighting)
if (mismatchDetails.mixed_duplicates) {
mismatchDetails.mixed_duplicates.forEach(item => {
allDifferences.push({
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
reason: item.reason,
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'yellow'
});
});
}
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
@ -607,10 +632,13 @@ def create_templates_dir():
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
// Highlight row in yellow if it's also a duplicate
if (diff.isDuplicate) {
// Apply highlighting based on type
if (diff.highlightType === 'red') {
row.style.backgroundColor = '#f8d7da'; // Light red
row.title = 'Pure duplicate entry';
} else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
row.title = 'This item also has duplicates in the dataset';
row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}