Compare commits

..

No commits in common. "5ab713f336440e197db3bba42b9c1087702f1d6b" and "5545c5343ab2177f92dc47053451646b01bc79cd" have entirely different histories.

10 changed files with 150 additions and 3622 deletions

BIN
.DS_Store vendored

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@ -24,26 +24,24 @@ def normalize_episode(episode: str) -> str:
class ComparisonItem: class ComparisonItem:
"""Represents a single item for comparison""" """Represents a single item for comparison"""
language: str
title: str title: str
episode: str episode: str
source_sheet: str source_sheet: str
row_index: int row_index: int
def __init__(self, language: str, title: str, episode: str, source_sheet: str, row_index: int): def __init__(self, title: str, episode: str, source_sheet: str, row_index: int):
self.language = language
self.title = title self.title = title
self.episode = normalize_episode(episode) # Normalize episode on creation self.episode = normalize_episode(episode) # Normalize episode on creation
self.source_sheet = source_sheet self.source_sheet = source_sheet
self.row_index = row_index self.row_index = row_index
def __hash__(self): def __hash__(self):
return hash((self.language, self.title, self.episode)) return hash((self.title, self.episode))
def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, ComparisonItem): if not isinstance(other, ComparisonItem):
return False return False
return self.language == other.language and self.title == other.title and self.episode == other.episode return self.title == other.title and self.episode == other.episode
class KSTCoordiComparator: class KSTCoordiComparator:
""" """
@ -83,129 +81,99 @@ class KSTCoordiComparator:
kst_all_items = [] # Keep all items including duplicates kst_all_items = [] # Keep all items including duplicates
coordi_all_items = [] # Keep all items including duplicates coordi_all_items = [] # Keep all items including duplicates
# Fixed column positions - NO header name search # Try fixed column positions first, then fall back to header names
# Coordi: B(1), C(2), D(3) = Language, Title, Chapter # KST columns: I (index 8) for title, J (index 9) for chapter
# KST: H(7), I(8), J(9) = Language, Title, Chapter # Coordi columns: C (index 2) for title, D (index 3) for chapter
coordi_language_col_idx = 1 # Column B kst_title_col_idx = 8 # Column I
coordi_title_col_idx = 2 # Column C kst_episode_col_idx = 9 # Column J
coordi_episode_col_idx = 3 # Column D coordi_title_col_idx = 2 # Column C
kst_language_col_idx = 7 # Column H coordi_episode_col_idx = 3 # Column D
kst_title_col_idx = 8 # Column I
kst_episode_col_idx = 9 # Column J
# Validate that all required columns exist # Get column names by index (if they exist)
required_columns = [ kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None
(coordi_language_col_idx, 'Coordi Language (Column B)'), kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None
(coordi_title_col_idx, 'Coordi Title (Column C)'), coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None
(coordi_episode_col_idx, 'Coordi Episode (Column D)'), coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None
(kst_language_col_idx, 'KST Language (Column H)'),
(kst_title_col_idx, 'KST Title (Column I)'),
(kst_episode_col_idx, 'KST Episode (Column J)')
]
missing_columns = [] # Fallback: search by header names if fixed positions don't work
for col_idx, col_name in required_columns: if not kst_title_col or not kst_episode_col:
if len(columns) <= col_idx: for i, col in enumerate(columns):
missing_columns.append(col_name) if col == 'Title KR':
kst_title_col = col
kst_title_col_idx = i
elif col == 'Epi.':
kst_episode_col = col
kst_episode_col_idx = i
if missing_columns: if not coordi_title_col or not coordi_episode_col:
error_msg = f"Missing required columns in sheet '{sheet_name}':\n" + "\n".join(f" - {col}" for col in missing_columns) for i, col in enumerate(columns):
raise ValueError(error_msg) if col == 'KR title':
coordi_title_col = col
# Get column names by fixed positions coordi_title_col_idx = i
coordi_language_col = columns[coordi_language_col_idx] elif col == 'Chap':
coordi_title_col = columns[coordi_title_col_idx] coordi_episode_col = col
coordi_episode_col = columns[coordi_episode_col_idx] coordi_episode_col_idx = i
kst_language_col = columns[kst_language_col_idx]
kst_title_col = columns[kst_title_col_idx]
kst_episode_col = columns[kst_episode_col_idx]
# Extract dynamic label from column A (index 0) for flexible naming
coordi_label = "Default" # Default fallback
if len(columns) > 0 and columns[0] and str(columns[0]).strip():
coordi_label = str(columns[0]).strip()
elif len(df) > 0:
# Try to get from first data row if header is empty
first_row_col_a = str(df.iloc[0, 0]).strip() if not pd.isna(df.iloc[0, 0]) else ""
if first_row_col_a and first_row_col_a.lower() not in ['nan', 'none', '']:
coordi_label = first_row_col_a
print(f"Sheet: {sheet_name}") print(f"Sheet: {sheet_name}")
print(f" KST columns - Language: Column {chr(65 + kst_language_col_idx)} ({kst_language_col}), Title: Column {chr(65 + kst_title_col_idx)} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx)} ({kst_episode_col})") print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})")
print(f" Coordi columns - Language: Column {chr(65 + coordi_language_col_idx)} ({coordi_language_col}), Title: Column {chr(65 + coordi_title_col_idx)} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx)} ({coordi_episode_col})") print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})")
# Extract items from each row # Extract items from each row
for idx, row in df.iterrows(): for idx, row in df.iterrows():
# Extract KST data from fixed positions H, I, J # Extract KST data
kst_language = str(row.get(kst_language_col, '')).strip() if kst_title_col and kst_episode_col:
kst_title = str(row.get(kst_title_col, '')).strip() kst_title = str(row.get(kst_title_col, '')).strip()
kst_episode = str(row.get(kst_episode_col, '')).strip() kst_episode = str(row.get(kst_episode_col, '')).strip()
# Check if this row has valid KST data
has_kst_data = (
kst_title and kst_title != 'nan' and
kst_episode and kst_episode != 'nan' and
pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col])
)
if has_kst_data:
item = ComparisonItem(kst_title, kst_episode, sheet_name, idx)
kst_items.add(item)
kst_all_items.append(item) # Keep all items for duplicate detection
kst_details.append({
'title': kst_title,
'episode': kst_episode,
'sheet': sheet_name,
'row_index': idx,
'kst_data': {
kst_title_col: row[kst_title_col],
kst_episode_col: row[kst_episode_col]
}
})
# Check if this row has valid KST data # Extract Coordi data
has_kst_data = ( if coordi_title_col and coordi_episode_col:
kst_language and kst_language != 'nan' and coordi_title = str(row.get(coordi_title_col, '')).strip()
kst_title and kst_title != 'nan' and coordi_episode = str(row.get(coordi_episode_col, '')).strip()
kst_episode and kst_episode != 'nan' and
pd.notna(row[kst_language_col]) and pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col]) # Check if this row has valid Coordi data
) has_coordi_data = (
coordi_title and coordi_title != 'nan' and
# Validate language is not empty - raise error if found coordi_episode and coordi_episode != 'nan' and
if pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col]): # Only check if this is a real data row pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col])
if not kst_language or kst_language == 'nan' or pd.isna(row[kst_language_col]): )
raise ValueError(f"Empty language value found in KST data at row {idx + 1} ('{kst_title}' - Episode {kst_episode}): All language fields must be populated")
if has_coordi_data:
if has_kst_data: item = ComparisonItem(coordi_title, coordi_episode, sheet_name, idx)
item = ComparisonItem(kst_language, kst_title, kst_episode, sheet_name, idx) coordi_items.add(item)
kst_items.add(item) coordi_all_items.append(item) # Keep all items for duplicate detection
kst_all_items.append(item) # Keep all items for duplicate detection coordi_details.append({
kst_details.append({ 'title': coordi_title,
'language': kst_language, 'episode': coordi_episode,
'title': kst_title, 'sheet': sheet_name,
'episode': kst_episode, 'row_index': idx,
'sheet': sheet_name, 'coordi_data': {
'row_index': idx, coordi_title_col: row[coordi_title_col],
'kst_data': { coordi_episode_col: row[coordi_episode_col]
kst_language_col: row[kst_language_col], }
kst_title_col: row[kst_title_col], })
kst_episode_col: row[kst_episode_col]
}
})
# Extract Coordi data from fixed positions B, C, D
coordi_language = str(row.get(coordi_language_col, '')).strip()
coordi_title = str(row.get(coordi_title_col, '')).strip()
coordi_episode = str(row.get(coordi_episode_col, '')).strip()
# Check if this row has valid Coordi data
has_coordi_data = (
coordi_language and coordi_language != 'nan' and
coordi_title and coordi_title != 'nan' and
coordi_episode and coordi_episode != 'nan' and
pd.notna(row[coordi_language_col]) and pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col])
)
# Validate language is not empty - raise error if found
if pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col]): # Only check if this is a real data row
if not coordi_language or coordi_language == 'nan' or pd.isna(row[coordi_language_col]):
raise ValueError(f"Empty language value found in Coordi data at row {idx + 1} ('{coordi_title}' - Episode {coordi_episode}): All language fields must be populated")
if has_coordi_data:
item = ComparisonItem(coordi_language, coordi_title, coordi_episode, sheet_name, idx)
coordi_items.add(item)
coordi_all_items.append(item) # Keep all items for duplicate detection
coordi_details.append({
'language': coordi_language,
'title': coordi_title,
'episode': coordi_episode,
'sheet': sheet_name,
'row_index': idx,
'coordi_data': {
coordi_language_col: row[coordi_language_col],
coordi_title_col: row[coordi_title_col],
coordi_episode_col: row[coordi_episode_col]
}
})
return { return {
'kst_items': kst_items, 'kst_items': kst_items,
@ -213,8 +181,7 @@ class KSTCoordiComparator:
'kst_details': kst_details, 'kst_details': kst_details,
'coordi_details': coordi_details, 'coordi_details': coordi_details,
'kst_all_items': kst_all_items, 'kst_all_items': kst_all_items,
'coordi_all_items': coordi_all_items, 'coordi_all_items': coordi_all_items
'coordi_label': coordi_label # Dynamic label from column A
} }
def categorize_mismatches_for_sheet(self, sheet_data: Dict[str, Any]) -> Dict[str, Any]: def categorize_mismatches_for_sheet(self, sheet_data: Dict[str, Any]) -> Dict[str, Any]:
@ -229,17 +196,17 @@ class KSTCoordiComparator:
coordi_duplicates = self._find_duplicates_in_list(coordi_all_items) coordi_duplicates = self._find_duplicates_in_list(coordi_all_items)
# Create sets of items that have duplicates (to exclude from "only" lists) # Create sets of items that have duplicates (to exclude from "only" lists)
kst_duplicate_keys = {(item.language, item.title, item.episode) for item in kst_duplicates} kst_duplicate_keys = {(item.title, item.episode) for item in kst_duplicates}
coordi_duplicate_keys = {(item.language, item.title, item.episode) for item in coordi_duplicates} coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_duplicates}
# Find overlaps and differences - exclude items that have duplicates # Find overlaps and differences - exclude items that have duplicates
matched_items = kst_items.intersection(coordi_items) matched_items = kst_items.intersection(coordi_items)
# For "only" items: exclude those that have duplicates within their own dataset # For "only" items: exclude those that have duplicates within their own dataset
kst_only_items = {item for item in kst_items - coordi_items kst_only_items = {item for item in kst_items - coordi_items
if (item.language, item.title, item.episode) not in kst_duplicate_keys} if (item.title, item.episode) not in kst_duplicate_keys}
coordi_only_items = {item for item in coordi_items - kst_items coordi_only_items = {item for item in coordi_items - kst_items
if (item.language, item.title, item.episode) not in coordi_duplicate_keys} if (item.title, item.episode) not in coordi_duplicate_keys}
categorization = { categorization = {
'matched_items': list(matched_items), 'matched_items': list(matched_items),
@ -278,13 +245,13 @@ class KSTCoordiComparator:
"""Find duplicate items within a dataset - FIXED to only return actual duplicates""" """Find duplicate items within a dataset - FIXED to only return actual duplicates"""
from collections import Counter from collections import Counter
# Count occurrences of each (language, title, episode) tuple # Count occurrences of each (title, episode) pair
key_counts = Counter((item.language, item.title, item.episode) for item in items_list) key_counts = Counter((item.title, item.episode) for item in items_list)
# Only return items that appear more than once # Only return items that appear more than once
duplicates = [] duplicates = []
for item in items_list: for item in items_list:
key = (item.language, item.title, item.episode) key = (item.title, item.episode)
if key_counts[key] > 1: if key_counts[key] > 1:
duplicates.append(item) duplicates.append(item)
@ -360,7 +327,6 @@ class KSTCoordiComparator:
# KST-only items # KST-only items
for item in categorization['kst_only_items']: for item in categorization['kst_only_items']:
mismatch_details['kst_only'].append({ mismatch_details['kst_only'].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -372,7 +338,6 @@ class KSTCoordiComparator:
# Coordi-only items # Coordi-only items
for item in categorization['coordi_only_items']: for item in categorization['coordi_only_items']:
mismatch_details['coordi_only'].append({ mismatch_details['coordi_only'].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -393,7 +358,6 @@ class KSTCoordiComparator:
key = (item.title, item.episode) key = (item.title, item.episode)
if key not in mixed_duplicate_keys: if key not in mixed_duplicate_keys:
mismatch_details['kst_duplicates'].append({ mismatch_details['kst_duplicates'].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -407,7 +371,6 @@ class KSTCoordiComparator:
key = (item.title, item.episode) key = (item.title, item.episode)
if key not in mixed_duplicate_keys: if key not in mixed_duplicate_keys:
mismatch_details['coordi_duplicates'].append({ mismatch_details['coordi_duplicates'].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -488,7 +451,6 @@ class KSTCoordiComparator:
for item in categorization['kst_only_items']: for item in categorization['kst_only_items']:
title = item.title title = item.title
grouped['kst_only_by_title'][title].append({ grouped['kst_only_by_title'][title].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -500,7 +462,6 @@ class KSTCoordiComparator:
for item in categorization['coordi_only_items']: for item in categorization['coordi_only_items']:
title = item.title title = item.title
grouped['coordi_only_by_title'][title].append({ grouped['coordi_only_by_title'][title].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -512,7 +473,6 @@ class KSTCoordiComparator:
for item in categorization['matched_items']: for item in categorization['matched_items']:
title = item.title title = item.title
grouped['matched_by_title'][title].append({ grouped['matched_by_title'][title].append({
'language': item.language,
'title': item.title, 'title': item.title,
'episode': item.episode, 'episode': item.episode,
'sheet': item.source_sheet, 'sheet': item.source_sheet,
@ -557,13 +517,11 @@ class KSTCoordiComparator:
visualize_rows = [] visualize_rows = []
# Helper function to create a row # Helper function to create a row
def create_row(coordi_language="", coordi_title="", coordi_chapter="", kst_language="", kst_title="", kst_chapter="", def create_row(coordi_title="", coordi_chapter="", kst_title="", kst_chapter="",
row_type="matched", reason="", title_for_sort=""): row_type="matched", reason="", title_for_sort=""):
return { return {
'coordi_language': coordi_language,
'coordi_title': coordi_title, 'coordi_title': coordi_title,
'coordi_chapter': coordi_chapter, 'coordi_chapter': coordi_chapter,
'kst_language': kst_language,
'kst_title': kst_title, 'kst_title': kst_title,
'kst_chapter': kst_chapter, 'kst_chapter': kst_chapter,
'row_type': row_type, 'row_type': row_type,
@ -575,7 +533,6 @@ class KSTCoordiComparator:
# 1. Handle Coordi-only items # 1. Handle Coordi-only items
for item in mismatch_details['coordi_only']: for item in mismatch_details['coordi_only']:
visualize_rows.append(create_row( visualize_rows.append(create_row(
coordi_language=item.get('language', ''),
coordi_title=item['title'], coordi_title=item['title'],
coordi_chapter=item['episode'], coordi_chapter=item['episode'],
row_type='coordi_only', row_type='coordi_only',
@ -585,7 +542,6 @@ class KSTCoordiComparator:
# 2. Handle KST-only items # 2. Handle KST-only items
for item in mismatch_details['kst_only']: for item in mismatch_details['kst_only']:
visualize_rows.append(create_row( visualize_rows.append(create_row(
kst_language=item.get('language', ''),
kst_title=item['title'], kst_title=item['title'],
kst_chapter=item['episode'], kst_chapter=item['episode'],
row_type='kst_only', row_type='kst_only',
@ -593,12 +549,11 @@ class KSTCoordiComparator:
)) ))
# 3. Handle Mixed duplicates (exists in both but duplicated on one side) # 3. Handle Mixed duplicates (exists in both but duplicated on one side)
mixed_items = {} # Group by language+title+episode mixed_items = {} # Group by title+episode
for item in mismatch_details['mixed_duplicates']: for item in mismatch_details['mixed_duplicates']:
key = f"{item.get('language', '')}_{item['title']}_{item['episode']}" key = f"{item['title']}_{item['episode']}"
if key not in mixed_items: if key not in mixed_items:
mixed_items[key] = { mixed_items[key] = {
'language': item.get('language', ''),
'title': item['title'], 'title': item['title'],
'episode': item['episode'], 'episode': item['episode'],
'kst_duplicate_count': 0, 'kst_duplicate_count': 0,
@ -614,10 +569,8 @@ class KSTCoordiComparator:
for key, item in mixed_items.items(): for key, item in mixed_items.items():
# First row: show it exists in both # First row: show it exists in both
visualize_rows.append(create_row( visualize_rows.append(create_row(
coordi_language=item['language'],
coordi_title=item['title'], coordi_title=item['title'],
coordi_chapter=item['episode'], coordi_chapter=item['episode'],
kst_language=item['language'],
kst_title=item['title'], kst_title=item['title'],
kst_chapter=item['episode'], kst_chapter=item['episode'],
row_type='mixed_duplicate', row_type='mixed_duplicate',
@ -625,9 +578,8 @@ class KSTCoordiComparator:
)) ))
# Additional rows for KST duplicates (count - 1 since first is already shown) # Additional rows for KST duplicates (count - 1 since first is already shown)
for _ in range(max(0, item['kst_duplicate_count'] - 1)): for i in range(max(0, item['kst_duplicate_count'] - 1)):
visualize_rows.append(create_row( visualize_rows.append(create_row(
kst_language=item['language'],
kst_title=item['title'], kst_title=item['title'],
kst_chapter=item['episode'], kst_chapter=item['episode'],
row_type='mixed_duplicate', row_type='mixed_duplicate',
@ -636,9 +588,8 @@ class KSTCoordiComparator:
)) ))
# Additional rows for Coordi duplicates (count - 1 since first is already shown) # Additional rows for Coordi duplicates (count - 1 since first is already shown)
for _ in range(max(0, item['coordi_duplicate_count'] - 1)): for i in range(max(0, item['coordi_duplicate_count'] - 1)):
visualize_rows.append(create_row( visualize_rows.append(create_row(
coordi_language=item['language'],
coordi_title=item['title'], coordi_title=item['title'],
coordi_chapter=item['episode'], coordi_chapter=item['episode'],
row_type='mixed_duplicate', row_type='mixed_duplicate',
@ -649,7 +600,6 @@ class KSTCoordiComparator:
# 4. Handle Pure duplicates # 4. Handle Pure duplicates
for item in mismatch_details['kst_duplicates']: for item in mismatch_details['kst_duplicates']:
visualize_rows.append(create_row( visualize_rows.append(create_row(
kst_language=item.get('language', ''),
kst_title=item['title'], kst_title=item['title'],
kst_chapter=item['episode'], kst_chapter=item['episode'],
row_type='pure_duplicate', row_type='pure_duplicate',
@ -658,7 +608,6 @@ class KSTCoordiComparator:
for item in mismatch_details['coordi_duplicates']: for item in mismatch_details['coordi_duplicates']:
visualize_rows.append(create_row( visualize_rows.append(create_row(
coordi_language=item.get('language', ''),
coordi_title=item['title'], coordi_title=item['title'],
coordi_chapter=item['episode'], coordi_chapter=item['episode'],
row_type='pure_duplicate', row_type='pure_duplicate',
@ -667,13 +616,11 @@ class KSTCoordiComparator:
# 5. Handle Matched items (perfect matches) # 5. Handle Matched items (perfect matches)
matched_by_title = summary['grouped_by_title']['matched_by_title'] matched_by_title = summary['grouped_by_title']['matched_by_title']
for _, items in matched_by_title.items(): for title, items in matched_by_title.items():
for item in items: for item in items:
visualize_rows.append(create_row( visualize_rows.append(create_row(
coordi_language=item.get('language', ''),
coordi_title=item['title'], coordi_title=item['title'],
coordi_chapter=item['episode'], coordi_chapter=item['episode'],
kst_language=item.get('language', ''),
kst_title=item['title'], kst_title=item['title'],
kst_chapter=item['episode'], kst_chapter=item['episode'],
row_type='matched', row_type='matched',
@ -698,21 +645,6 @@ class KSTCoordiComparator:
visualize_rows.sort(key=sort_key) visualize_rows.sort(key=sort_key)
return visualize_rows return visualize_rows
def get_coordi_label_for_sheet(self, sheet_filter: str | None = None) -> str:
"""Get the dynamic coordi label from column A for a specific sheet"""
if not self.data:
return "Default"
# Use first sheet if no filter specified
sheet_name = sheet_filter if sheet_filter else list(self.data.keys())[0]
if sheet_name not in self.data:
return "Default"
# Extract the sheet data to get the dynamic label
sheet_data = self.extract_kst_coordi_items_for_sheet(sheet_name)
return sheet_data.get('coordi_label', 'Default')
def generate_excel_export_data(self) -> Dict[str, List[Dict[str, Any]]]: def generate_excel_export_data(self) -> Dict[str, List[Dict[str, Any]]]:
"""Generate data for Excel export with all sheets in visualize format""" """Generate data for Excel export with all sheets in visualize format"""
@ -729,10 +661,8 @@ class KSTCoordiComparator:
excel_rows = [] excel_rows = []
for row in sheet_visualize_data: for row in sheet_visualize_data:
excel_rows.append({ excel_rows.append({
'Coordi Language': row.get('coordi_language', ''),
'Coordi Title': row.get('coordi_title', ''), 'Coordi Title': row.get('coordi_title', ''),
'Coordi Chapter': row.get('coordi_chapter', ''), 'Coordi Chapter': row.get('coordi_chapter', ''),
'KST Language': row.get('kst_language', ''),
'KST Title': row.get('kst_title', ''), 'KST Title': row.get('kst_title', ''),
'KST Chapter': row.get('kst_chapter', ''), 'KST Chapter': row.get('kst_chapter', ''),
'Status': row.get('reason', ''), 'Status': row.get('reason', ''),
@ -781,8 +711,7 @@ class KSTCoordiComparator:
if details: if details:
print(f"{mismatch_type.upper()} (showing first 3):") print(f"{mismatch_type.upper()} (showing first 3):")
for i, item in enumerate(details[:3]): for i, item in enumerate(details[:3]):
language = item.get('language', 'N/A') print(f" {i+1}. {item['title']} - Episode {item['episode']} ({item['reason']})")
print(f" {i+1}. [{language}] {item['title']} - Episode {item['episode']} ({item['reason']})")
if len(details) > 3: if len(details) > 3:
print(f" ... and {len(details) - 3} more") print(f" ... and {len(details) - 3} more")
print() print()

View File

@ -168,46 +168,36 @@
margin: 10px 0; margin: 10px 0;
} }
.table-container { .table-container {
max-height: 900px; max-height: 500px;
overflow-y: auto; overflow-y: auto;
border: 1px solid #ddd; border: 1px solid #ddd;
border-radius: 4px; border-radius: 4px;
} }
/* Professional UI Design System - Semantic Color Palette */ /* Vibrant color styles for Visualize tab */
.coordi-only-row { .coordi-only-row {
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */ background-color: #ff4444 !important; /* Bright red */
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */ color: white;
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
font-weight: 500;
} }
.kst-only-row { .kst-only-row {
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */ background-color: #4488ff !important; /* Bright blue */
color: #365314 !important; /* Lime 900 - High contrast text */ color: white;
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
font-weight: 500;
} }
.mixed-duplicate-row { .mixed-duplicate-row {
background-color: #fefbeb !important; /* Amber 50 - Warning state */ background-color: #ff8800 !important; /* Bright orange */
color: #92400e !important; /* Amber 800 - High contrast text */ color: white;
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
font-weight: 500;
} }
.pure-duplicate-row { .pure-duplicate-row {
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */ background-color: #8844ff !important; /* Bright purple */
color: #991b1b !important; /* Red 800 - High contrast text */ color: white;
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
font-weight: 500;
} }
.matched-row { .matched-row {
background-color: #ffffff !important; /* Pure white - Neutral/Default state */ background-color: white !important; /* White background */
color: #374151 !important; /* Gray 700 - Standard text */ color: black;
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
font-weight: 400;
} }
</style> </style>
</head> </head>
@ -302,10 +292,8 @@
<table id="visualize-table"> <table id="visualize-table">
<thead> <thead>
<tr> <tr>
<th id="coordi-lang-header">Coordi Lang</th> <th>Coordi Title</th>
<th id="coordi-title-header">Coordi Title</th> <th>Coordi Chapter</th>
<th id="coordi-chapter-header">Coordi Chapter</th>
<th>KST Lang</th>
<th>KST Title</th> <th>KST Title</th>
<th>KST Chapter</th> <th>KST Chapter</th>
<th>Status</th> <th>Status</th>
@ -503,12 +491,6 @@
} }
function updateResults(results) { function updateResults(results) {
// Update dynamic headers with coordi label
const coordiLabel = results.coordi_label || 'Coordi';
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
// Update count displays // Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString(); document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
@ -664,13 +646,11 @@
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title) // Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
visualizeData.forEach(row => { visualizeData.forEach(row => {
const tr = tbody.insertRow(); const tr = tbody.insertRow();
tr.insertCell(0).textContent = row.coordi_language || ''; tr.insertCell(0).textContent = row.coordi_title || '';
tr.insertCell(1).textContent = row.coordi_title || ''; tr.insertCell(1).textContent = row.coordi_chapter || '';
tr.insertCell(2).textContent = row.coordi_chapter || ''; tr.insertCell(2).textContent = row.kst_title || '';
tr.insertCell(3).textContent = row.kst_language || ''; tr.insertCell(3).textContent = row.kst_chapter || '';
tr.insertCell(4).textContent = row.kst_title || ''; tr.insertCell(4).textContent = row.reason || '';
tr.insertCell(5).textContent = row.kst_chapter || '';
tr.insertCell(6).textContent = row.reason || '';
// Apply vibrant color highlighting based on row type // Apply vibrant color highlighting based on row type
switch (row.row_type) { switch (row.row_type) {

View File

@ -1,740 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>KST vs Coordi Data Comparison</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}
.container {
max-width: 1400px;
margin: 0 auto;
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
text-align: center;
color: #333;
margin-bottom: 30px;
}
.file-section {
background: #f8f9fa;
padding: 20px;
border-radius: 6px;
margin-bottom: 20px;
}
.file-input {
display: flex;
gap: 10px;
align-items: center;
margin-bottom: 10px;
}
input[type="text"], input[type="file"] {
flex: 1;
padding: 8px 12px;
border: 1px solid #ddd;
border-radius: 4px;
}
input[type="file"] {
padding: 6px 8px;
}
button {
padding: 8px 16px;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
button:hover {
background: #0056b3;
}
button:disabled {
background: #6c757d;
cursor: not-allowed;
}
.loading {
text-align: center;
color: #666;
font-style: italic;
}
.tabs {
border-bottom: 2px solid #ddd;
margin-bottom: 20px;
}
.tab {
display: inline-block;
padding: 10px 20px;
background: #f8f9fa;
border: 1px solid #ddd;
border-bottom: none;
cursor: pointer;
margin-right: 5px;
border-radius: 4px 4px 0 0;
}
.tab.active {
background: white;
border-bottom: 2px solid white;
margin-bottom: -2px;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.summary-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-bottom: 20px;
}
.summary-card {
background: #f8f9fa;
padding: 15px;
border-radius: 6px;
border-left: 4px solid #007bff;
}
.summary-card h3 {
margin-top: 0;
margin-bottom: 15px;
color: #333;
font-size: 1.1em;
}
.summary-card p {
margin: 8px 0;
color: #555;
}
.summary-card span {
font-weight: bold;
color: #007bff;
}
.count-badge {
display: inline-block;
background: #007bff;
color: white;
padding: 4px 8px;
border-radius: 12px;
font-size: 0.9em;
margin-left: 10px;
}
.reconciliation {
background: #d4edda;
border: 1px solid #c3e6cb;
padding: 15px;
border-radius: 6px;
margin-top: 15px;
}
.reconciliation.mismatch {
background: #f8d7da;
border-color: #f5c6cb;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 10px;
}
th, td {
padding: 10px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #f8f9fa;
font-weight: bold;
}
tr:hover {
background-color: #f5f5f5;
}
.error {
background: #f8d7da;
color: #721c24;
padding: 15px;
border-radius: 6px;
margin: 10px 0;
}
.success {
background: #d4edda;
color: #155724;
padding: 15px;
border-radius: 6px;
margin: 10px 0;
}
.table-container {
max-height: 900px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
/* Vibrant color styles for Visualize tab */
.coordi-only-row {
background-color: #ff4444 !important; /* Bright red */
color: white;
}
.kst-only-row {
background-color: #4488ff !important; /* Bright blue */
color: white;
}
.mixed-duplicate-row {
background-color: #ff8800 !important; /* Bright orange */
color: white;
}
.pure-duplicate-row {
background-color: #8844ff !important; /* Bright purple */
color: white;
}
.matched-row {
background-color: white !important; /* White background */
color: black;
}
</style>
</head>
<body>
<div class="container">
<h1>KST vs Coordi Data Comparison Tool</h1>
<div class="file-section">
<div class="file-input">
<label for="filePath">Excel File Path:</label>
<input type="text" id="filePath" value="data/sample-data.xlsx" placeholder="Enter file path">
<button onclick="analyzeData()" id="analyzeBtn">Analyze Data</button>
</div>
<div class="file-input" style="margin-top: 10px;">
<label>Or Upload File:</label>
<input type="file" id="fileUpload" accept=".xlsx,.xls" onchange="handleFileUpload()">
<button onclick="uploadAndAnalyze()" id="uploadBtn" disabled>Upload & Analyze</button>
</div>
<div class="file-input" style="margin-top: 10px;">
<label for="sheetFilter">Sheet Filter:</label>
<select id="sheetFilter" onchange="filterBySheet()" disabled>
<!-- Options will be populated dynamically -->
</select>
</div>
<div id="status"></div>
</div>
<div id="results" style="display: none;">
<div class="tabs">
<div class="tab active" onclick="showTab('summary')">Summary</div>
<div class="tab" onclick="showTab('different')">Different</div>
<div class="tab" onclick="showTab('visualize')">Visualize</div>
</div>
<div id="summary" class="tab-content active">
<!-- Summary Cards Section -->
<div class="summary-grid">
<div class="summary-card">
<h3>📊 Sheet Summary</h3>
<p><strong>Current Sheet:</strong> <span id="current-sheet-name">-</span></p>
<p><strong>Matched Items:</strong> <span id="summary-matched-count">0</span> (Same in both KST and Coordi)</p>
<p><strong>Different Items:</strong> <span id="summary-different-count">0</span> (Total tasks excluding matched items)</p>
</div>
<div class="summary-card">
<h3>🔍 Breakdown</h3>
<p><strong>KST Only:</strong> <span id="summary-kst-only">0</span></p>
<p><strong>Coordi Only:</strong> <span id="summary-coordi-only">0</span></p>
<p><strong>Duplicates:</strong> <span id="summary-duplicates">0</span></p>
</div>
</div>
<h3>Matched Items (Same in both KST and Coordi) <span id="matched-count-display" class="count-badge">0</span></h3>
<div class="table-container">
<table>
<thead>
<tr>
<th>Korean Title</th>
<th>Episode</th>
<th>Sheet</th>
<th>Row</th>
</tr>
</thead>
<tbody id="summary-table">
</tbody>
</table>
</div>
</div>
<div id="different" class="tab-content">
<h3>Different Items <span id="different-count-display" class="count-badge">0</span></h3>
<div class="table-container">
<table>
<thead>
<tr>
<th>KST Data</th>
<th>Coordi Data</th>
<th>Reason</th>
</tr>
</thead>
<tbody id="different-table">
</tbody>
</table>
</div>
</div>
<div id="visualize" class="tab-content">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
<h3>Excel-like Visualization</h3>
<button onclick="downloadExcel()" id="downloadBtn" style="background: #28a745; padding: 8px 16px;">📥 Download All Sheets</button>
</div>
<div class="table-container">
<table id="visualize-table">
<thead>
<tr>
<th>Coordi Title</th>
<th>Coordi Chapter</th>
<th>KST Title</th>
<th>KST Chapter</th>
<th>Status</th>
</tr>
</thead>
<tbody id="visualize-table-body">
</tbody>
</table>
</div>
</div>
</div>
</div>
<script>
function showTab(tabName) {
// Hide all tab contents
document.querySelectorAll('.tab-content').forEach(content => {
content.classList.remove('active');
});
// Remove active class from all tabs
document.querySelectorAll('.tab').forEach(tab => {
tab.classList.remove('active');
});
// Show selected tab content
document.getElementById(tabName).classList.add('active');
// Add active class to clicked tab
event.target.classList.add('active');
}
function analyzeData() {
const filePath = document.getElementById('filePath').value;
const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
const statusDiv = document.getElementById('status');
const analyzeBtn = document.getElementById('analyzeBtn');
if (!filePath.trim()) {
statusDiv.innerHTML = '<div class="error">Please enter a file path</div>';
return;
}
// Show loading state
statusDiv.innerHTML = '<div class="loading">Analyzing data...</div>';
analyzeBtn.disabled = true;
analyzeBtn.textContent = 'Analyzing...';
// Make API call
fetch('/analyze', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
file_path: filePath,
sheet_filter: sheetFilter
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusDiv.innerHTML = '<div class="success">Analysis complete!</div>';
updateResults(data.results);
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
document.getElementById('results').style.display = 'block';
} else {
statusDiv.innerHTML = `<div class="error">Error: ${data.error}</div>`;
}
})
.catch(error => {
statusDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
})
.finally(() => {
analyzeBtn.disabled = false;
analyzeBtn.textContent = 'Analyze Data';
});
}
function updateSheetFilter(sheetNames, currentFilter) {
const select = document.getElementById('sheetFilter');
select.innerHTML = '';
// Add a default option if no sheets are available yet
if (!sheetNames || sheetNames.length === 0) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'Loading sheets...';
option.disabled = true;
option.selected = true;
select.appendChild(option);
select.disabled = true;
return;
}
sheetNames.forEach((sheetName, index) => {
const option = document.createElement('option');
option.value = sheetName;
option.textContent = sheetName;
// Select the first sheet by default, or the current filter if specified
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
option.selected = true;
}
select.appendChild(option);
});
select.disabled = false;
}
function filterBySheet() {
// Re-analyze with the selected sheet filter
analyzeData();
}
function handleFileUpload() {
const fileInput = document.getElementById('fileUpload');
const uploadBtn = document.getElementById('uploadBtn');
if (fileInput.files.length > 0) {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Upload & Analyze';
} else {
uploadBtn.disabled = true;
}
}
function uploadAndAnalyze() {
const fileInput = document.getElementById('fileUpload');
const statusDiv = document.getElementById('status');
const uploadBtn = document.getElementById('uploadBtn');
if (fileInput.files.length === 0) {
statusDiv.innerHTML = '<div class="error">Please select a file to upload</div>';
return;
}
const file = fileInput.files[0];
const formData = new FormData();
formData.append('file', file);
// Show uploading state
statusDiv.innerHTML = '<div class="loading">Uploading and analyzing file...</div>';
uploadBtn.disabled = true;
uploadBtn.textContent = 'Uploading...';
// Upload file
fetch('/upload', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
if (data.success) {
// File uploaded successfully, now analyze it
document.getElementById('filePath').value = data.file_path;
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
// Clear sheet filter for new file (let it default to first sheet)
const sheetFilterElement = document.getElementById('sheetFilter');
const sheetFilter = null; // Always use default (first sheet) for new uploads
return fetch('/analyze', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
file_path: data.file_path,
sheet_filter: sheetFilter
})
});
} else {
throw new Error(data.error);
}
})
.then(response => response.json())
.then(data => {
if (data.success) {
statusDiv.innerHTML = '<div class="success">File uploaded and analyzed successfully!</div>';
updateResults(data.results);
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
document.getElementById('results').style.display = 'block';
} else {
statusDiv.innerHTML = `<div class="error">Analysis error: ${data.error}</div>`;
}
})
.catch(error => {
statusDiv.innerHTML = `<div class="error">Upload error: ${error.message}</div>`;
})
.finally(() => {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Upload & Analyze';
handleFileUpload(); // Reset button state based on file selection
});
}
function updateResults(results) {
// Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
// Count all different items including duplicates and mixed duplicates
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
// Update summary section
document.getElementById('current-sheet-name').textContent = results.current_sheet_filter;
document.getElementById('summary-matched-count').textContent = results.matched_items_count.toLocaleString();
document.getElementById('summary-different-count').textContent = totalDifferent.toLocaleString();
document.getElementById('summary-kst-only').textContent = results.mismatches.kst_only_count.toLocaleString();
document.getElementById('summary-coordi-only').textContent = results.mismatches.coordi_only_count.toLocaleString();
// Calculate total duplicates (KST + Coordi + Mixed)
const totalDuplicates = results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
(results.mismatches.mixed_duplicates_count || 0);
document.getElementById('summary-duplicates').textContent = totalDuplicates.toLocaleString();
// Update Summary tab (matched items)
updateSummaryTable(results.matched_data);
// Update Different tab
updateDifferentTable(results.mismatch_details);
// Update Visualize tab
updateVisualizeTable(results.visualize_data);
}
function updateSummaryTable(matchedData) {
const tbody = document.getElementById('summary-table');
tbody.innerHTML = '';
// Sort by Korean title + episode
const sortedData = [...matchedData].sort((a, b) => {
const titleCompare = a.title.localeCompare(b.title, 'ko');
if (titleCompare !== 0) return titleCompare;
// Try to sort episodes numerically
const aEp = parseFloat(a.episode) || 0;
const bEp = parseFloat(b.episode) || 0;
return aEp - bEp;
});
sortedData.forEach(item => {
const row = tbody.insertRow();
row.insertCell(0).textContent = item.title;
row.insertCell(1).textContent = item.episode;
row.insertCell(2).textContent = item.sheet;
row.insertCell(3).textContent = item.row_index ? item.row_index + 1 : item.row;
});
}
function updateDifferentTable(mismatchDetails) {
const tbody = document.getElementById('different-table');
tbody.innerHTML = '';
const allDifferences = [];
// Add KST-only items (no special highlighting)
mismatchDetails.kst_only.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Only appears in KST',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'none'
});
});
// Add Coordi-only items (no special highlighting)
mismatchDetails.coordi_only.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Only appears in Coordi',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'none'
});
});
// Add KST duplicates (red highlighting)
mismatchDetails.kst_duplicates.forEach(item => {
allDifferences.push({
kstData: `${item.title} - Episode ${item.episode}`,
coordiData: '',
reason: 'Duplicate entry in KST data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add Coordi duplicates (red highlighting)
mismatchDetails.coordi_duplicates.forEach(item => {
allDifferences.push({
kstData: '',
coordiData: `${item.title} - Episode ${item.episode}`,
reason: 'Duplicate entry in Coordi data',
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'red'
});
});
// Add mixed duplicates (yellow highlighting)
if (mismatchDetails.mixed_duplicates) {
mismatchDetails.mixed_duplicates.forEach(item => {
allDifferences.push({
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
reason: item.reason,
sortTitle: item.title,
sortEpisode: parseFloat(item.episode) || 0,
highlightType: 'yellow'
});
});
}
// Sort by Korean title + episode
allDifferences.sort((a, b) => {
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
if (titleCompare !== 0) return titleCompare;
return a.sortEpisode - b.sortEpisode;
});
// Populate table with highlighting
allDifferences.forEach(diff => {
const row = tbody.insertRow();
row.insertCell(0).textContent = diff.kstData;
row.insertCell(1).textContent = diff.coordiData;
row.insertCell(2).textContent = diff.reason;
// Apply highlighting based on type
if (diff.highlightType === 'red') {
row.style.backgroundColor = '#f8d7da'; // Light red
row.title = 'Pure duplicate entry';
} else if (diff.highlightType === 'yellow') {
row.style.backgroundColor = '#fff3cd'; // Light yellow
row.title = 'Item exists in both datasets but has duplicates on one side';
}
});
}
function updateVisualizeTable(visualizeData) {
const tbody = document.getElementById('visualize-table-body');
tbody.innerHTML = '';
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
visualizeData.forEach(row => {
const tr = tbody.insertRow();
tr.insertCell(0).textContent = row.coordi_language || '';
tr.insertCell(1).textContent = row.coordi_title || '';
tr.insertCell(2).textContent = row.coordi_chapter || '';
tr.insertCell(3).textContent = row.kst_language || '';
tr.insertCell(4).textContent = row.kst_title || '';
tr.insertCell(5).textContent = row.kst_chapter || '';
tr.insertCell(6).textContent = row.reason || '';
// Apply vibrant color highlighting based on row type
switch (row.row_type) {
case 'coordi_only':
tr.className = 'coordi-only-row';
break;
case 'kst_only':
tr.className = 'kst-only-row';
break;
case 'mixed_duplicate':
tr.className = 'mixed-duplicate-row';
break;
case 'pure_duplicate':
tr.className = 'pure-duplicate-row';
break;
case 'matched':
tr.className = 'matched-row';
break;
}
});
}
function downloadExcel() {
const downloadBtn = document.getElementById('downloadBtn');
const originalText = downloadBtn.textContent;
// Show loading state
downloadBtn.disabled = true;
downloadBtn.textContent = '⏳ Generating...';
downloadBtn.style.background = '#6c757d';
// Create a temporary link and trigger download
fetch('/download_excel', {
method: 'GET'
})
.then(response => {
if (!response.ok) {
throw new Error('Download failed');
}
return response.blob();
})
.then(blob => {
// Create download link
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.style.display = 'none';
a.href = url;
a.download = 'data_comparison_export.xlsx';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
// Show success message
const statusDiv = document.getElementById('status');
statusDiv.innerHTML = '<div class="success">Excel file downloaded successfully!</div>';
setTimeout(() => {
statusDiv.innerHTML = '';
}, 3000);
})
.catch(error => {
console.error('Download error:', error);
const statusDiv = document.getElementById('status');
statusDiv.innerHTML = '<div class="error">Download failed. Please try again.</div>';
setTimeout(() => {
statusDiv.innerHTML = '';
}, 5000);
})
.finally(() => {
// Reset button state
downloadBtn.disabled = false;
downloadBtn.textContent = originalText;
downloadBtn.style.background = '#28a745';
});
}
// Auto-analyze on page load with default file
window.onload = function() {
// Initialize sheet filter with loading state
updateSheetFilter([], null);
analyzeData();
};
</script>
</body>
</html>

View File

@ -66,10 +66,6 @@ def analyze_data():
visualize_data = comparator_instance.generate_visualize_data(sheet_filter) visualize_data = comparator_instance.generate_visualize_data(sheet_filter)
comparison_results['visualize_data'] = visualize_data comparison_results['visualize_data'] = visualize_data
# Get dynamic coordi label for display
coordi_label = comparator_instance.get_coordi_label_for_sheet(sheet_filter)
comparison_results['coordi_label'] = coordi_label
return jsonify({ return jsonify({
'success': True, 'success': True,
'results': comparison_results 'results': comparison_results
@ -172,31 +168,16 @@ def download_excel():
workbook = writer.book workbook = writer.book
worksheet = writer.sheets[clean_sheet_name] worksheet = writer.sheets[clean_sheet_name]
# Apply professional color formatting based on Type column # Apply color formatting based on Type column
from openpyxl.styles import PatternFill, Font from openpyxl.styles import PatternFill
# Define professional colors matching the new web interface design system # Define colors matching the web interface
colors = { colors = {
'Coordi Only': { 'Coordi Only': PatternFill(start_color='FF4444', end_color='FF4444', fill_type='solid'),
'fill': PatternFill(start_color='F0F9FF', end_color='F0F9FF', fill_type='solid'), 'Kst Only': PatternFill(start_color='4488FF', end_color='4488FF', fill_type='solid'),
'font': Font(color='0C4A6E', bold=True) 'Mixed Duplicate': PatternFill(start_color='FF8800', end_color='FF8800', fill_type='solid'),
}, 'Pure Duplicate': PatternFill(start_color='8844FF', end_color='8844FF', fill_type='solid'),
'Kst Only': { 'Matched': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid')
'fill': PatternFill(start_color='F7FEE7', end_color='F7FEE7', fill_type='solid'),
'font': Font(color='365314', bold=True)
},
'Mixed Duplicate': {
'fill': PatternFill(start_color='FEFBEB', end_color='FEFBEB', fill_type='solid'),
'font': Font(color='92400E', bold=True)
},
'Pure Duplicate': {
'fill': PatternFill(start_color='FEF2F2', end_color='FEF2F2', fill_type='solid'),
'font': Font(color='991B1B', bold=True)
},
'Matched': {
'fill': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid'),
'font': Font(color='374151', bold=False)
}
} }
# Find the Type column (should be column F, index 5) # Find the Type column (should be column F, index 5)
@ -206,16 +187,15 @@ def download_excel():
type_col_idx = idx + 1 # Excel is 1-indexed type_col_idx = idx + 1 # Excel is 1-indexed
break break
# Apply professional formatting to data rows (skip header) # Apply formatting to data rows (skip header)
if type_col_idx: if type_col_idx:
for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header) for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header)
row_type = row_data.get('Type', '') row_type = row_data.get('Type', '')
style_config = colors.get(row_type) fill = colors.get(row_type)
if style_config: if fill:
for col_idx in range(1, len(df.columns) + 1): for col_idx in range(1, len(df.columns) + 1):
cell = worksheet.cell(row=row_idx, column=col_idx) cell = worksheet.cell(row=row_idx, column=col_idx)
cell.fill = style_config['fill'] cell.fill = fill
cell.font = style_config['font']
# Auto-adjust column widths # Auto-adjust column widths
for column in worksheet.columns: for column in worksheet.columns:
@ -250,12 +230,6 @@ def create_templates_dir():
templates_dir = Path('templates') templates_dir = Path('templates')
templates_dir.mkdir(exist_ok=True) templates_dir.mkdir(exist_ok=True)
# Only create the HTML file if it doesn't exist (don't overwrite existing customizations)
html_file = templates_dir / 'index.html'
if html_file.exists():
print("Template file already exists, skipping auto-generation to preserve customizations.")
return
html_content = '''<!DOCTYPE html> html_content = '''<!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
@ -432,40 +406,30 @@ def create_templates_dir():
border-radius: 4px; border-radius: 4px;
} }
/* Professional UI Design System - Semantic Color Palette */ /* Vibrant color styles for Visualize tab */
.coordi-only-row { .coordi-only-row {
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */ background-color: #ff4444 !important; /* Bright red */
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */ color: white;
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
font-weight: 500;
} }
.kst-only-row { .kst-only-row {
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */ background-color: #4488ff !important; /* Bright blue */
color: #365314 !important; /* Lime 900 - High contrast text */ color: white;
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
font-weight: 500;
} }
.mixed-duplicate-row { .mixed-duplicate-row {
background-color: #fefbeb !important; /* Amber 50 - Warning state */ background-color: #ff8800 !important; /* Bright orange */
color: #92400e !important; /* Amber 800 - High contrast text */ color: white;
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
font-weight: 500;
} }
.pure-duplicate-row { .pure-duplicate-row {
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */ background-color: #8844ff !important; /* Bright purple */
color: #991b1b !important; /* Red 800 - High contrast text */ color: white;
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
font-weight: 500;
} }
.matched-row { .matched-row {
background-color: #ffffff !important; /* Pure white - Neutral/Default state */ background-color: white !important; /* White background */
color: #374151 !important; /* Gray 700 - Standard text */ color: black;
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
font-weight: 400;
} }
</style> </style>
</head> </head>
@ -560,10 +524,8 @@ def create_templates_dir():
<table id="visualize-table"> <table id="visualize-table">
<thead> <thead>
<tr> <tr>
<th id="coordi-lang-header">Coordi Lang</th> <th>Coordi Title</th>
<th id="coordi-title-header">Coordi Title</th> <th>Coordi Chapter</th>
<th id="coordi-chapter-header">Coordi Chapter</th>
<th>KST Lang</th>
<th>KST Title</th> <th>KST Title</th>
<th>KST Chapter</th> <th>KST Chapter</th>
<th>Status</th> <th>Status</th>
@ -761,12 +723,6 @@ def create_templates_dir():
} }
function updateResults(results) { function updateResults(results) {
// Update dynamic headers with coordi label
const coordiLabel = results.coordi_label || 'Coordi';
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
// Update count displays // Update count displays
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString(); document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
@ -922,13 +878,11 @@ def create_templates_dir():
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title) // Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
visualizeData.forEach(row => { visualizeData.forEach(row => {
const tr = tbody.insertRow(); const tr = tbody.insertRow();
tr.insertCell(0).textContent = row.coordi_language || ''; tr.insertCell(0).textContent = row.coordi_title || '';
tr.insertCell(1).textContent = row.coordi_title || ''; tr.insertCell(1).textContent = row.coordi_chapter || '';
tr.insertCell(2).textContent = row.coordi_chapter || ''; tr.insertCell(2).textContent = row.kst_title || '';
tr.insertCell(3).textContent = row.kst_language || ''; tr.insertCell(3).textContent = row.kst_chapter || '';
tr.insertCell(4).textContent = row.kst_title || ''; tr.insertCell(4).textContent = row.reason || '';
tr.insertCell(5).textContent = row.kst_chapter || '';
tr.insertCell(6).textContent = row.reason || '';
// Apply vibrant color highlighting based on row type // Apply vibrant color highlighting based on row type
switch (row.row_type) { switch (row.row_type) {