64 lines
3.1 KiB
Python
64 lines
3.1 KiB
Python
|
|
from data_comparator import KSTCoordiComparator
|
||
|
|
|
||
|
|
def test_duplicate_detection():
|
||
|
|
comparator = KSTCoordiComparator('data/sample-data.xlsx')
|
||
|
|
if comparator.load_data():
|
||
|
|
print("=== DUPLICATE DETECTION TEST ===")
|
||
|
|
|
||
|
|
# Get the data extraction results
|
||
|
|
data = comparator.extract_kst_coordi_items()
|
||
|
|
|
||
|
|
print(f"Total KST items (unique): {len(data['kst_items'])}")
|
||
|
|
print(f"Total KST items (all): {len(data['kst_all_items'])}")
|
||
|
|
print(f"Total Coordi items (unique): {len(data['coordi_items'])}")
|
||
|
|
print(f"Total Coordi items (all): {len(data['coordi_all_items'])}")
|
||
|
|
|
||
|
|
# Check for duplicates
|
||
|
|
categorization = comparator.categorize_mismatches()
|
||
|
|
|
||
|
|
print(f"\nKST duplicates found: {len(categorization['kst_duplicates'])}")
|
||
|
|
print(f"Coordi duplicates found: {len(categorization['coordi_duplicates'])}")
|
||
|
|
|
||
|
|
# Show sample duplicates
|
||
|
|
if categorization['kst_duplicates']:
|
||
|
|
print("\nSample KST duplicates:")
|
||
|
|
for i, dup in enumerate(categorization['kst_duplicates'][:3]):
|
||
|
|
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||
|
|
|
||
|
|
if categorization['coordi_duplicates']:
|
||
|
|
print("\nSample Coordi duplicates:")
|
||
|
|
for i, dup in enumerate(categorization['coordi_duplicates'][:3]):
|
||
|
|
print(f" {i+1}. {dup.title} - Episode {dup.episode} (Sheet: {dup.source_sheet}, Row: {dup.row_index + 1})")
|
||
|
|
|
||
|
|
# Check for the specific example: 백라이트 - Episode 53-1x(휴재)
|
||
|
|
mismatch_details = comparator.generate_mismatch_details()
|
||
|
|
|
||
|
|
print(f"\nLooking for '백라이트 - Episode 53-1x(휴재)':")
|
||
|
|
|
||
|
|
# Check in KST-only
|
||
|
|
backlight_kst_only = [item for item in mismatch_details['kst_only']
|
||
|
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||
|
|
|
||
|
|
# Check in KST duplicates
|
||
|
|
backlight_kst_dup = [item for item in mismatch_details['kst_duplicates']
|
||
|
|
if '백라이트' in item['title'] and '53-1x' in item['episode']]
|
||
|
|
|
||
|
|
print(f" Found in KST-only: {len(backlight_kst_only)}")
|
||
|
|
print(f" Found in KST duplicates: {len(backlight_kst_dup)}")
|
||
|
|
|
||
|
|
if backlight_kst_only:
|
||
|
|
print(f" KST-only details: {backlight_kst_only[0]}")
|
||
|
|
if backlight_kst_dup:
|
||
|
|
print(f" KST duplicate details: {backlight_kst_dup[0]}")
|
||
|
|
|
||
|
|
# Test the web interface logic
|
||
|
|
print(f"\n=== Testing Web Interface Logic ===")
|
||
|
|
summary = comparator.get_comparison_summary()
|
||
|
|
print(f"Web interface will show:")
|
||
|
|
print(f" Total different items: {summary['mismatches']['kst_only_count'] + summary['mismatches']['coordi_only_count'] + summary['mismatches']['kst_duplicates_count'] + summary['mismatches']['coordi_duplicates_count']}")
|
||
|
|
|
||
|
|
print("\n✓ Duplicate detection test complete!")
|
||
|
|
print("✓ Check the web interface at http://localhost:8080 to see combined reasons")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
test_duplicate_detection()
|