from data_comparator import KSTCoordiComparator def test_duplicate_detection(): comparator = KSTCoordiComparator('data/sample-data.xlsx') if comparator.load_data(): print("=== DUPLICATE DETECTION TEST ===") # Get the data extraction results data = comparator.extract_kst_coordi_items() print(f"Total KST items (unique): {len(data['kst_items'])}") print(f"Total KST items (all): {len(data['kst_all_items'])}") print(f"Total Coordi items (unique): {len(data['coordi_items'])}") print(f"Total Coordi items (all): {len(data['coordi_all_items'])}") # Check for duplicates categorization = comparator.categorize_mismatches() print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}") print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}") # Show sample duplicates if categorization['kst_duplicates']: print("\nSample KST duplicates:") for i, dup in enumerate(categorization['kst_duplicates'][:3]): print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})") if categorization['coordi_duplicates']: print("\nSample Coordi duplicates:") for i, dup in enumerate(categorization['coordi_duplicates'][:3]): print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})") # Check for the specific example: 백라이트 - Episode 53-1x(휴재) mismatch_details = comparator.generate_mismatch_details() print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':") # Check in KST-only backlight_kst_only = [item for item in mismatch_details['kst_only'] if '백라이트' in item['title'] and '53-1x' in item['episode']] # Check in KST duplicates backlight_kst_dup = [item for item in mismatch_details['kst_duplicates'] if '백라이트' in item['title'] and '53-1x' in item['episode']] print(f" Found in KST-only: {len(backlight_kst_only)}") print(f" Found in KST duplicates: {len(backlight_kst_dup)}") if backlight_kst_only: print(f" KST-only details: {backlight_kst_only[0]}") if backlight_kst_dup: print(f" KST duplicate details: {backlight_kst_dup[0]}") # Test the web interface logic print(f"\n=== Testing Web Interface Logic ===") summary = comparator.get_comparison_summary() print(f"Web interface will show:") print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}") print("\n✓ Duplicate detection test complete!") print("✓ Check the web interface at http://localhost:8080 to see combined reasons") if __name__ == "__main__": test_duplicate_detection()