data-comparison/test_duplicates.py

64 lines
3.1 KiB
Python
Raw Normal View History

2025-08-20 07:58:30 +00:00
from data_comparator import KSTCoordiComparator
def test_duplicate_detection():
comparator = KSTCoordiComparator('data/sample-data.xlsx')
if comparator.load_data():
print("=== DUPLICATE DETECTION TEST ===")
# Get the data extraction results
data = comparator.extract_kst_coordi_items()
print(f"Total KST items (unique): {len(data['kst_items'])}")
print(f"Total KST items (all): {len(data['kst_all_items'])}")
print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
# Check for duplicates
categorization = comparator.categorize_mismatches()
print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
# Show sample duplicates
if categorization['kst_duplicates']:
print("\nSample KST duplicates:")
for i, dup in enumerate(categorization['kst_duplicates'][:3]):
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
if categorization['coordi_duplicates']:
print("\nSample Coordi duplicates:")
for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
# Check for the specific example: 백라이트 - Episode 53-1x(휴재)
mismatch_details = comparator.generate_mismatch_details()
print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
# Check in KST-only
backlight_kst_only = [item for item in mismatch_details['kst_only']
if '백라이트' in item['title'] and '53-1x' in item['episode']]
# Check in KST duplicates
backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
if '백라이트' in item['title'] and '53-1x' in item['episode']]
print(f" Found in KST-only: {len(backlight_kst_only)}")
print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
if backlight_kst_only:
print(f" KST-only details: {backlight_kst_only[0]}")
if backlight_kst_dup:
print(f" KST duplicate details: {backlight_kst_dup[0]}")
# Test the web interface logic
print(f"\n=== Testing Web Interface Logic ===")
summary = comparator.get_comparison_summary()
print(f"Web interface will show:")
print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
print("\n✓ Duplicate detection test complete!")
print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
if __name__ == "__main__":
test_duplicate_detection()