fix logic
This commit is contained in:
parent
1f88db5fb9
commit
ed3655d1c9
@ -212,6 +212,59 @@ class KSTCoordiComparator:
|
||||
|
||||
return duplicates
|
||||
|
||||
def _find_sheet_specific_mixed_duplicates(self, sheet_filter: str) -> List[Dict]:
|
||||
"""Find mixed duplicates within a specific sheet only"""
|
||||
if not sheet_filter:
|
||||
return []
|
||||
|
||||
mixed_duplicates = []
|
||||
|
||||
# Extract items specific to this sheet
|
||||
extract_results = self.extract_kst_coordi_items()
|
||||
kst_sheet_items = [item for item in extract_results['kst_all_items'] if item.source_sheet == sheet_filter]
|
||||
coordi_sheet_items = [item for item in extract_results['coordi_all_items'] if item.source_sheet == sheet_filter]
|
||||
|
||||
# Find duplicates within this sheet
|
||||
kst_sheet_duplicates = self._find_duplicates_in_list(kst_sheet_items)
|
||||
coordi_sheet_duplicates = self._find_duplicates_in_list(coordi_sheet_items)
|
||||
|
||||
# Create sets for items that exist in both KST and Coordi within this sheet
|
||||
kst_sheet_set = {(item.title, item.episode) for item in kst_sheet_items}
|
||||
coordi_sheet_set = {(item.title, item.episode) for item in coordi_sheet_items}
|
||||
matched_in_sheet = kst_sheet_set.intersection(coordi_sheet_set)
|
||||
|
||||
# Create sets of duplicate keys within this sheet
|
||||
kst_duplicate_keys = {(item.title, item.episode) for item in kst_sheet_duplicates}
|
||||
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_sheet_duplicates}
|
||||
|
||||
# Find matched items that also have duplicates within the same sheet
|
||||
for title, episode in matched_in_sheet:
|
||||
# Check if this matched item has duplicates in KST within this sheet
|
||||
if (title, episode) in kst_duplicate_keys:
|
||||
mixed_duplicates.append({
|
||||
'title': title,
|
||||
'episode': episode,
|
||||
'sheet': sheet_filter,
|
||||
'row_index': None, # Could get from items if needed
|
||||
'reason': f'Item exists in both datasets but has duplicates in KST within {sheet_filter}',
|
||||
'mismatch_type': 'MIXED_DUPLICATE_KST',
|
||||
'duplicate_side': 'KST'
|
||||
})
|
||||
|
||||
# Check if this matched item has duplicates in Coordi within this sheet
|
||||
if (title, episode) in coordi_duplicate_keys:
|
||||
mixed_duplicates.append({
|
||||
'title': title,
|
||||
'episode': episode,
|
||||
'sheet': sheet_filter,
|
||||
'row_index': None, # Could get from items if needed
|
||||
'reason': f'Item exists in both datasets but has duplicates in Coordi within {sheet_filter}',
|
||||
'mismatch_type': 'MIXED_DUPLICATE_COORDI',
|
||||
'duplicate_side': 'COORDI'
|
||||
})
|
||||
|
||||
return mixed_duplicates
|
||||
|
||||
def generate_mismatch_details(self) -> Dict[str, List[Dict]]:
|
||||
"""Generate detailed information about each type of mismatch with reasons"""
|
||||
categorization = self.categorize_mismatches()
|
||||
@ -220,7 +273,8 @@ class KSTCoordiComparator:
|
||||
'kst_only': [],
|
||||
'coordi_only': [],
|
||||
'kst_duplicates': [],
|
||||
'coordi_duplicates': []
|
||||
'coordi_duplicates': [],
|
||||
'mixed_duplicates': []
|
||||
}
|
||||
|
||||
# KST-only items
|
||||
@ -267,38 +321,40 @@ class KSTCoordiComparator:
|
||||
'mismatch_type': 'COORDI_DUPLICATE'
|
||||
})
|
||||
|
||||
# Mixed duplicates will be calculated per sheet in get_comparison_summary
|
||||
mismatch_details['mixed_duplicates'] = []
|
||||
|
||||
return mismatch_details
|
||||
|
||||
def get_comparison_summary(self, sheet_filter: str = None) -> Dict[str, Any]:
|
||||
"""Get a comprehensive summary of the comparison, optionally filtered by sheet"""
|
||||
"""Get a comprehensive summary of the comparison, filtered by a specific sheet"""
|
||||
# Get sheet names for filtering options
|
||||
sheet_names = list(self.data.keys()) if self.data else []
|
||||
|
||||
# If no sheet filter provided, default to first sheet
|
||||
if not sheet_filter:
|
||||
sheet_filter = sheet_names[0] if sheet_names else None
|
||||
|
||||
if not sheet_filter:
|
||||
raise ValueError("No sheets available or sheet filter not specified")
|
||||
|
||||
categorization = self.categorize_mismatches()
|
||||
mismatch_details = self.generate_mismatch_details()
|
||||
grouped_data = self.group_by_title()
|
||||
|
||||
# Get sheet names for filtering options
|
||||
sheet_names = list(self.data.keys()) if self.data else []
|
||||
|
||||
# Apply sheet filtering if specified
|
||||
if sheet_filter and sheet_filter != 'All Sheets':
|
||||
# Always apply sheet filtering (no more "All Sheets" option)
|
||||
mismatch_details = self.filter_by_sheet(mismatch_details, sheet_filter)
|
||||
grouped_data = self.filter_grouped_data_by_sheet(grouped_data, sheet_filter)
|
||||
|
||||
# Calculate mixed duplicates specific to this sheet
|
||||
mismatch_details['mixed_duplicates'] = self._find_sheet_specific_mixed_duplicates(sheet_filter)
|
||||
|
||||
# Recalculate counts for filtered data
|
||||
filtered_counts = self.calculate_filtered_counts(mismatch_details)
|
||||
else:
|
||||
filtered_counts = {
|
||||
'kst_total': categorization['counts']['total_kst'],
|
||||
'coordi_total': categorization['counts']['total_coordi'],
|
||||
'matched': categorization['counts']['matched'],
|
||||
'kst_only_count': categorization['counts']['kst_only'],
|
||||
'coordi_only_count': categorization['counts']['coordi_only'],
|
||||
'kst_duplicates_count': categorization['counts']['kst_duplicates_count'],
|
||||
'coordi_duplicates_count': categorization['counts']['coordi_duplicates_count']
|
||||
}
|
||||
|
||||
summary = {
|
||||
'sheet_names': sheet_names,
|
||||
'current_sheet_filter': sheet_filter or 'All Sheets',
|
||||
'current_sheet_filter': sheet_filter,
|
||||
'original_counts': {
|
||||
'kst_total': filtered_counts['kst_total'],
|
||||
'coordi_total': filtered_counts['coordi_total']
|
||||
@ -372,7 +428,8 @@ class KSTCoordiComparator:
|
||||
'kst_only_count': len(filtered_mismatch_details['kst_only']),
|
||||
'coordi_only_count': len(filtered_mismatch_details['coordi_only']),
|
||||
'kst_duplicates_count': len(filtered_mismatch_details['kst_duplicates']),
|
||||
'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates'])
|
||||
'coordi_duplicates_count': len(filtered_mismatch_details['coordi_duplicates']),
|
||||
'mixed_duplicates_count': len(filtered_mismatch_details.get('mixed_duplicates', []))
|
||||
}
|
||||
|
||||
def group_by_title(self) -> Dict[str, Any]:
|
||||
|
||||
@ -183,7 +183,7 @@
|
||||
<div class="file-input" style="margin-top: 10px;">
|
||||
<label for="sheetFilter">Sheet Filter:</label>
|
||||
<select id="sheetFilter" onchange="filterBySheet()" disabled>
|
||||
<option value="All Sheets">All Sheets</option>
|
||||
<!-- Options will be populated dynamically -->
|
||||
</select>
|
||||
</div>
|
||||
<div id="status"></div>
|
||||
@ -275,7 +275,7 @@
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: filePath,
|
||||
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
@ -300,13 +300,14 @@
|
||||
|
||||
function updateSheetFilter(sheetNames, currentFilter) {
|
||||
const select = document.getElementById('sheetFilter');
|
||||
select.innerHTML = '<option value="All Sheets">All Sheets</option>';
|
||||
select.innerHTML = '';
|
||||
|
||||
sheetNames.forEach(sheetName => {
|
||||
sheetNames.forEach((sheetName, index) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = sheetName;
|
||||
option.textContent = sheetName;
|
||||
if (sheetName === currentFilter) {
|
||||
// Select the first sheet by default, or the current filter if specified
|
||||
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
|
||||
option.selected = true;
|
||||
}
|
||||
select.appendChild(option);
|
||||
@ -372,7 +373,7 @@
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: data.file_path,
|
||||
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
});
|
||||
} else {
|
||||
@ -404,9 +405,10 @@
|
||||
// Update count displays
|
||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||
|
||||
// Count all different items including duplicates
|
||||
// Count all different items including duplicates and mixed duplicates
|
||||
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
||||
(results.mismatches.mixed_duplicates_count || 0);
|
||||
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
||||
|
||||
// Update Summary tab (matched items)
|
||||
@ -444,47 +446,70 @@
|
||||
const tbody = document.getElementById('different-table');
|
||||
tbody.innerHTML = '';
|
||||
|
||||
// Create sets of duplicate items for highlighting
|
||||
const kstDuplicateKeys = new Set();
|
||||
const coordiDuplicateKeys = new Set();
|
||||
|
||||
mismatchDetails.kst_duplicates.forEach(item => {
|
||||
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||
});
|
||||
|
||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||
});
|
||||
|
||||
// Combine only KST-only and Coordi-only items (like before)
|
||||
const allDifferences = [];
|
||||
|
||||
// Add KST-only items
|
||||
// Add KST-only items (no special highlighting)
|
||||
mismatchDetails.kst_only.forEach(item => {
|
||||
const key = `${item.title}_${item.episode}`;
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Only appears in KST',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi-only items
|
||||
// Add Coordi-only items (no special highlighting)
|
||||
mismatchDetails.coordi_only.forEach(item => {
|
||||
const key = `${item.title}_${item.episode}`;
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Only appears in Coordi',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add KST duplicates (red highlighting)
|
||||
mismatchDetails.kst_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Duplicate entry in KST data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi duplicates (red highlighting)
|
||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Duplicate entry in Coordi data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add mixed duplicates (yellow highlighting)
|
||||
if (mismatchDetails.mixed_duplicates) {
|
||||
mismatchDetails.mixed_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
reason: item.reason,
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'yellow'
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by Korean title + episode
|
||||
allDifferences.sort((a, b) => {
|
||||
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
|
||||
@ -499,10 +524,13 @@
|
||||
row.insertCell(1).textContent = diff.coordiData;
|
||||
row.insertCell(2).textContent = diff.reason;
|
||||
|
||||
// Highlight row in yellow if it's also a duplicate
|
||||
if (diff.isDuplicate) {
|
||||
// Apply highlighting based on type
|
||||
if (diff.highlightType === 'red') {
|
||||
row.style.backgroundColor = '#f8d7da'; // Light red
|
||||
row.title = 'Pure duplicate entry';
|
||||
} else if (diff.highlightType === 'yellow') {
|
||||
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
||||
row.title = 'This item also has duplicates in the dataset';
|
||||
row.title = 'Item exists in both datasets but has duplicates on one side';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -1,64 +0,0 @@
|
||||
from data_comparator import KSTCoordiComparator
|
||||
|
||||
def test_duplicate_detection():
|
||||
comparator = KSTCoordiComparator('data/sample-data.xlsx')
|
||||
if comparator.load_data():
|
||||
print("=== DUPLICATE DETECTION TEST ===")
|
||||
|
||||
# Get the data extraction results
|
||||
data = comparator.extract_kst_coordi_items()
|
||||
|
||||
print(f"Total KST items (unique): {len(data['kst_items'])}")
|
||||
print(f"Total KST items (all): {len(data['kst_all_items'])}")
|
||||
print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
|
||||
print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
|
||||
|
||||
# Check for duplicates
|
||||
categorization = comparator.categorize_mismatches()
|
||||
|
||||
print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
|
||||
print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
|
||||
|
||||
# Show sample duplicates
|
||||
if categorization['kst_duplicates']:
|
||||
print("\nSample KST duplicates:")
|
||||
for i, dup in enumerate(categorization['kst_duplicates'][:3]):
|
||||
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||||
|
||||
if categorization['coordi_duplicates']:
|
||||
print("\nSample Coordi duplicates:")
|
||||
for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
|
||||
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||||
|
||||
# Check for the specific example: 백라이트 - Episode 53-1x(휴재)
|
||||
mismatch_details = comparator.generate_mismatch_details()
|
||||
|
||||
print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
|
||||
|
||||
# Check in KST-only
|
||||
backlight_kst_only = [item for item in mismatch_details['kst_only']
|
||||
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||
|
||||
# Check in KST duplicates
|
||||
backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
|
||||
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||
|
||||
print(f" Found in KST-only: {len(backlight_kst_only)}")
|
||||
print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
|
||||
|
||||
if backlight_kst_only:
|
||||
print(f" KST-only details: {backlight_kst_only[0]}")
|
||||
if backlight_kst_dup:
|
||||
print(f" KST duplicate details: {backlight_kst_dup[0]}")
|
||||
|
||||
# Test the web interface logic
|
||||
print(f"\n=== Testing Web Interface Logic ===")
|
||||
summary = comparator.get_comparison_summary()
|
||||
print(f"Web interface will show:")
|
||||
print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
|
||||
|
||||
print("\n✓ Duplicate detection test complete!")
|
||||
print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_duplicate_detection()
|
||||
@ -1,52 +0,0 @@
|
||||
import requests
|
||||
|
||||
def test_final_duplicate_fix():
|
||||
print("=== FINAL DUPLICATE FIX TEST ===")
|
||||
|
||||
try:
|
||||
# Test the analyze endpoint
|
||||
response = requests.post('http://localhost:8081/analyze',
|
||||
json={'file_path': 'data/sample-data.xlsx'},
|
||||
timeout=30)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('success'):
|
||||
results = data['results']
|
||||
|
||||
print("✓ Analysis successful!")
|
||||
print(f" Matched items: {results['matched_items_count']}")
|
||||
print(f" KST only: {results['mismatches']['kst_only_count']}")
|
||||
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
|
||||
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
|
||||
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
|
||||
|
||||
total_different = (results['mismatches']['kst_only_count'] +
|
||||
results['mismatches']['coordi_only_count'] +
|
||||
results['mismatches']['kst_duplicates_count'] +
|
||||
results['mismatches']['coordi_duplicates_count'])
|
||||
print(f" Total different items: {total_different}")
|
||||
|
||||
# Check for the specific example
|
||||
kst_duplicates = results['mismatch_details']['kst_duplicates']
|
||||
backlight_duplicates = [item for item in kst_duplicates
|
||||
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||
|
||||
if backlight_duplicates:
|
||||
print(f"\n✓ Found 백라이트 duplicates: {len(backlight_duplicates)}")
|
||||
print(f" Example: {backlight_duplicates[0]['title']} - Episode {backlight_duplicates[0]['episode']}")
|
||||
|
||||
print(f"\n✓ Web interface ready at http://localhost:8081")
|
||||
print("✓ The 'Different' tab will now show combined reasons like:")
|
||||
print(" 백라이트 - Episode 53-1x(휴재) | (empty) | Only appears in KST + Duplicate in KST")
|
||||
|
||||
else:
|
||||
print(f"✗ Analysis failed: {data.get('error')}")
|
||||
else:
|
||||
print(f"✗ Request failed: {response.status_code}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"✗ Request failed: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_final_duplicate_fix()
|
||||
57
test_sheet_filtering.py
Normal file
57
test_sheet_filtering.py
Normal file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from data_comparator import KSTCoordiComparator
|
||||
|
||||
def test_sheet_filtering():
|
||||
"""Test that sheet filtering works correctly and defaults to first sheet"""
|
||||
print("Testing sheet filtering functionality...")
|
||||
|
||||
# Create comparator and load data
|
||||
comparator = KSTCoordiComparator("data/sample-data.xlsx")
|
||||
if not comparator.load_data():
|
||||
print("Failed to load data!")
|
||||
return
|
||||
|
||||
print(f"Available sheets: {list(comparator.data.keys())}")
|
||||
|
||||
# Test 1: No sheet filter provided (should default to first sheet)
|
||||
print("\n=== TEST 1: No sheet filter (should default to first sheet) ===")
|
||||
try:
|
||||
summary1 = comparator.get_comparison_summary()
|
||||
print(f"Default sheet selected: {summary1['current_sheet_filter']}")
|
||||
print(f"KST total: {summary1['original_counts']['kst_total']}")
|
||||
print(f"Coordi total: {summary1['original_counts']['coordi_total']}")
|
||||
print(f"Matched: {summary1['matched_items_count']}")
|
||||
print("✓ Test 1 passed")
|
||||
except Exception as e:
|
||||
print(f"✗ Test 1 failed: {e}")
|
||||
|
||||
# Test 2: Specific sheet filter
|
||||
sheet_names = list(comparator.data.keys())
|
||||
if len(sheet_names) > 1:
|
||||
second_sheet = sheet_names[1]
|
||||
print(f"\n=== TEST 2: Specific sheet filter ({second_sheet}) ===")
|
||||
try:
|
||||
summary2 = comparator.get_comparison_summary(second_sheet)
|
||||
print(f"Selected sheet: {summary2['current_sheet_filter']}")
|
||||
print(f"KST total: {summary2['original_counts']['kst_total']}")
|
||||
print(f"Coordi total: {summary2['original_counts']['coordi_total']}")
|
||||
print(f"Matched: {summary2['matched_items_count']}")
|
||||
print("✓ Test 2 passed")
|
||||
except Exception as e:
|
||||
print(f"✗ Test 2 failed: {e}")
|
||||
else:
|
||||
print("\n=== TEST 2: Skipped (only one sheet available) ===")
|
||||
|
||||
# Test 3: Verify no duplicates across sheets (this was the original problem)
|
||||
print(f"\n=== TEST 3: Verify duplicate detection within single sheets only ===")
|
||||
for sheet_name in sheet_names:
|
||||
summary = comparator.get_comparison_summary(sheet_name)
|
||||
print(f"Sheet '{sheet_name}':")
|
||||
print(f" KST duplicates: {summary['mismatches']['kst_duplicates_count']}")
|
||||
print(f" Coordi duplicates: {summary['mismatches']['coordi_duplicates_count']}")
|
||||
|
||||
print("\n✓ All tests completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sheet_filtering()
|
||||
@ -1,68 +0,0 @@
|
||||
import requests
|
||||
|
||||
def test_simplified_duplicates():
|
||||
print("=== SIMPLIFIED DUPLICATE DISPLAY TEST ===")
|
||||
|
||||
try:
|
||||
# Test the analyze endpoint
|
||||
response = requests.post('http://localhost:8081/analyze',
|
||||
json={'file_path': 'data/sample-data.xlsx'},
|
||||
timeout=30)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('success'):
|
||||
results = data['results']
|
||||
|
||||
print("✓ Analysis successful!")
|
||||
print(f" Matched items: {results['matched_items_count']}")
|
||||
print(f" KST only: {results['mismatches']['kst_only_count']}")
|
||||
print(f" Coordi only: {results['mismatches']['coordi_only_count']}")
|
||||
print(f" KST duplicates: {results['mismatches']['kst_duplicates_count']}")
|
||||
print(f" Coordi duplicates: {results['mismatches']['coordi_duplicates_count']}")
|
||||
|
||||
# What the count will show
|
||||
total_count = (results['mismatches']['kst_only_count'] +
|
||||
results['mismatches']['coordi_only_count'] +
|
||||
results['mismatches']['kst_duplicates_count'] +
|
||||
results['mismatches']['coordi_duplicates_count'])
|
||||
|
||||
# What the table will show
|
||||
table_rows = results['mismatches']['kst_only_count'] + results['mismatches']['coordi_only_count']
|
||||
|
||||
print(f"\n📊 DISPLAY LOGIC:")
|
||||
print(f" Count badge shows: {total_count} items (all different items)")
|
||||
print(f" Table shows: {table_rows} rows (only KST-only + Coordi-only)")
|
||||
print(f" Yellow highlights: Items that are also duplicates")
|
||||
|
||||
# Check for 백라이트 example
|
||||
kst_only = results['mismatch_details']['kst_only']
|
||||
kst_duplicates = results['mismatch_details']['kst_duplicates']
|
||||
|
||||
backlight_kst_only = [item for item in kst_only
|
||||
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||
backlight_kst_dup = [item for item in kst_duplicates
|
||||
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||||
|
||||
if backlight_kst_only and backlight_kst_dup:
|
||||
print(f"\n✓ 백라이트 example works:")
|
||||
print(f" - Appears in table (KST-only): YES")
|
||||
print(f" - Will be highlighted yellow: YES (also duplicate)")
|
||||
print(f" - Contributes to count: 2 items (1 KST-only + 1 duplicate)")
|
||||
|
||||
print(f"\n✓ Web interface ready at http://localhost:8081")
|
||||
print("✓ Check the 'Different' tab:")
|
||||
print(" - Count shows all different items")
|
||||
print(" - Table shows only KST-only + Coordi-only")
|
||||
print(" - Yellow rows = items that also have duplicates")
|
||||
|
||||
else:
|
||||
print(f"✗ Analysis failed: {data.get('error')}")
|
||||
else:
|
||||
print(f"✗ Request failed: {response.status_code}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"✗ Request failed: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_simplified_duplicates()
|
||||
90
web_gui.py
90
web_gui.py
@ -42,7 +42,7 @@ def analyze_data():
|
||||
matched_items = list(categorization['matched_items'])
|
||||
|
||||
# Filter matched items by sheet if specified
|
||||
if sheet_filter and sheet_filter != 'All Sheets':
|
||||
if sheet_filter:
|
||||
matched_items = [item for item in matched_items if item.source_sheet == sheet_filter]
|
||||
|
||||
# Format matched items for JSON (limit to first 500 for performance)
|
||||
@ -291,7 +291,7 @@ def create_templates_dir():
|
||||
<div class="file-input" style="margin-top: 10px;">
|
||||
<label for="sheetFilter">Sheet Filter:</label>
|
||||
<select id="sheetFilter" onchange="filterBySheet()" disabled>
|
||||
<option value="All Sheets">All Sheets</option>
|
||||
<!-- Options will be populated dynamically -->
|
||||
</select>
|
||||
</div>
|
||||
<div id="status"></div>
|
||||
@ -383,7 +383,7 @@ def create_templates_dir():
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: filePath,
|
||||
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
@ -408,13 +408,14 @@ def create_templates_dir():
|
||||
|
||||
function updateSheetFilter(sheetNames, currentFilter) {
|
||||
const select = document.getElementById('sheetFilter');
|
||||
select.innerHTML = '<option value="All Sheets">All Sheets</option>';
|
||||
select.innerHTML = '';
|
||||
|
||||
sheetNames.forEach(sheetName => {
|
||||
sheetNames.forEach((sheetName, index) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = sheetName;
|
||||
option.textContent = sheetName;
|
||||
if (sheetName === currentFilter) {
|
||||
// Select the first sheet by default, or the current filter if specified
|
||||
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
|
||||
option.selected = true;
|
||||
}
|
||||
select.appendChild(option);
|
||||
@ -480,7 +481,7 @@ def create_templates_dir():
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: data.file_path,
|
||||
sheet_filter: sheetFilter === 'All Sheets' ? null : sheetFilter
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
});
|
||||
} else {
|
||||
@ -512,9 +513,10 @@ def create_templates_dir():
|
||||
// Update count displays
|
||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||
|
||||
// Count all different items including duplicates
|
||||
// Count all different items including duplicates and mixed duplicates
|
||||
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count;
|
||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
||||
(results.mismatches.mixed_duplicates_count || 0);
|
||||
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
||||
|
||||
// Update Summary tab (matched items)
|
||||
@ -552,47 +554,70 @@ def create_templates_dir():
|
||||
const tbody = document.getElementById('different-table');
|
||||
tbody.innerHTML = '';
|
||||
|
||||
// Create sets of duplicate items for highlighting
|
||||
const kstDuplicateKeys = new Set();
|
||||
const coordiDuplicateKeys = new Set();
|
||||
|
||||
mismatchDetails.kst_duplicates.forEach(item => {
|
||||
kstDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||
});
|
||||
|
||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||
coordiDuplicateKeys.add(`${item.title}_${item.episode}`);
|
||||
});
|
||||
|
||||
// Combine only KST-only and Coordi-only items (like before)
|
||||
const allDifferences = [];
|
||||
|
||||
// Add KST-only items
|
||||
// Add KST-only items (no special highlighting)
|
||||
mismatchDetails.kst_only.forEach(item => {
|
||||
const key = `${item.title}_${item.episode}`;
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Only appears in KST',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
isDuplicate: kstDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi-only items
|
||||
// Add Coordi-only items (no special highlighting)
|
||||
mismatchDetails.coordi_only.forEach(item => {
|
||||
const key = `${item.title}_${item.episode}`;
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Only appears in Coordi',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
isDuplicate: coordiDuplicateKeys.has(key) // Check if this item is also a duplicate
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add KST duplicates (red highlighting)
|
||||
mismatchDetails.kst_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Duplicate entry in KST data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi duplicates (red highlighting)
|
||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Duplicate entry in Coordi data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add mixed duplicates (yellow highlighting)
|
||||
if (mismatchDetails.mixed_duplicates) {
|
||||
mismatchDetails.mixed_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
reason: item.reason,
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'yellow'
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by Korean title + episode
|
||||
allDifferences.sort((a, b) => {
|
||||
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
|
||||
@ -607,10 +632,13 @@ def create_templates_dir():
|
||||
row.insertCell(1).textContent = diff.coordiData;
|
||||
row.insertCell(2).textContent = diff.reason;
|
||||
|
||||
// Highlight row in yellow if it's also a duplicate
|
||||
if (diff.isDuplicate) {
|
||||
// Apply highlighting based on type
|
||||
if (diff.highlightType === 'red') {
|
||||
row.style.backgroundColor = '#f8d7da'; // Light red
|
||||
row.title = 'Pure duplicate entry';
|
||||
} else if (diff.highlightType === 'yellow') {
|
||||
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
||||
row.title = 'This item also has duplicates in the dataset';
|
||||
row.title = 'Item exists in both datasets but has duplicates on one side';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user