from data_comparator import KSTCoordiComparator def test_grouping(): comparator = KSTCoordiComparator('data/sample-data.xlsx') if comparator.load_data(): summary = comparator.get_comparison_summary() print("=== GROUPED BY TITLE TEST ===") print(f"Total unique titles: {len(summary['grouped_by_title']['title_summaries'])}") print() # Show top 5 titles with worst match percentages sorted_titles = sorted( summary['grouped_by_title']['title_summaries'].items(), key=lambda x: x[1]['match_percentage'] ) print("Top 5 titles needing attention (worst match %):") for i, (title, data) in enumerate(sorted_titles[:5]): print(f"{i+1}. {title}") print(f" Total Episodes: {data['total_episodes']}") print(f" Matched: {data['matched_count']} ({data['match_percentage']}%)") print(f" KST Only: {data['kst_only_count']}") print(f" Coordi Only: {data['coordi_only_count']}") print() if __name__ == "__main__": test_grouping()