Merge pull request 'add 'lang' criteria for comparison' (#4) from dev/viettran into main
Reviewed-on: #4
This commit is contained in:
commit
5ab713f336
BIN
data/Kst_Task.xlsx
Normal file
BIN
data/Kst_Task.xlsx
Normal file
Binary file not shown.
BIN
data/data_comparison_export (2).xlsx
Normal file
BIN
data/data_comparison_export (2).xlsx
Normal file
Binary file not shown.
2595
data/raw.csv
Normal file
2595
data/raw.csv
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
data/test_language_fix.xlsx
Normal file
BIN
data/test_language_fix.xlsx
Normal file
Binary file not shown.
@ -24,24 +24,26 @@ def normalize_episode(episode: str) -> str:
|
||||
|
||||
class ComparisonItem:
|
||||
"""Represents a single item for comparison"""
|
||||
language: str
|
||||
title: str
|
||||
episode: str
|
||||
source_sheet: str
|
||||
row_index: int
|
||||
|
||||
def __init__(self, title: str, episode: str, source_sheet: str, row_index: int):
|
||||
def __init__(self, language: str, title: str, episode: str, source_sheet: str, row_index: int):
|
||||
self.language = language
|
||||
self.title = title
|
||||
self.episode = normalize_episode(episode) # Normalize episode on creation
|
||||
self.source_sheet = source_sheet
|
||||
self.row_index = row_index
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.title, self.episode))
|
||||
return hash((self.language, self.title, self.episode))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ComparisonItem):
|
||||
return False
|
||||
return self.title == other.title and self.episode == other.episode
|
||||
return self.language == other.language and self.title == other.title and self.episode == other.episode
|
||||
|
||||
class KSTCoordiComparator:
|
||||
"""
|
||||
@ -81,99 +83,129 @@ class KSTCoordiComparator:
|
||||
kst_all_items = [] # Keep all items including duplicates
|
||||
coordi_all_items = [] # Keep all items including duplicates
|
||||
|
||||
# Try fixed column positions first, then fall back to header names
|
||||
# KST columns: I (index 8) for title, J (index 9) for chapter
|
||||
# Coordi columns: C (index 2) for title, D (index 3) for chapter
|
||||
# Fixed column positions - NO header name search
|
||||
# Coordi: B(1), C(2), D(3) = Language, Title, Chapter
|
||||
# KST: H(7), I(8), J(9) = Language, Title, Chapter
|
||||
|
||||
kst_title_col_idx = 8 # Column I
|
||||
kst_episode_col_idx = 9 # Column J
|
||||
coordi_title_col_idx = 2 # Column C
|
||||
coordi_episode_col_idx = 3 # Column D
|
||||
coordi_language_col_idx = 1 # Column B
|
||||
coordi_title_col_idx = 2 # Column C
|
||||
coordi_episode_col_idx = 3 # Column D
|
||||
kst_language_col_idx = 7 # Column H
|
||||
kst_title_col_idx = 8 # Column I
|
||||
kst_episode_col_idx = 9 # Column J
|
||||
|
||||
# Get column names by index (if they exist)
|
||||
kst_title_col = columns[kst_title_col_idx] if len(columns) > kst_title_col_idx else None
|
||||
kst_episode_col = columns[kst_episode_col_idx] if len(columns) > kst_episode_col_idx else None
|
||||
coordi_title_col = columns[coordi_title_col_idx] if len(columns) > coordi_title_col_idx else None
|
||||
coordi_episode_col = columns[coordi_episode_col_idx] if len(columns) > coordi_episode_col_idx else None
|
||||
# Validate that all required columns exist
|
||||
required_columns = [
|
||||
(coordi_language_col_idx, 'Coordi Language (Column B)'),
|
||||
(coordi_title_col_idx, 'Coordi Title (Column C)'),
|
||||
(coordi_episode_col_idx, 'Coordi Episode (Column D)'),
|
||||
(kst_language_col_idx, 'KST Language (Column H)'),
|
||||
(kst_title_col_idx, 'KST Title (Column I)'),
|
||||
(kst_episode_col_idx, 'KST Episode (Column J)')
|
||||
]
|
||||
|
||||
# Fallback: search by header names if fixed positions don't work
|
||||
if not kst_title_col or not kst_episode_col:
|
||||
for i, col in enumerate(columns):
|
||||
if col == 'Title KR':
|
||||
kst_title_col = col
|
||||
kst_title_col_idx = i
|
||||
elif col == 'Epi.':
|
||||
kst_episode_col = col
|
||||
kst_episode_col_idx = i
|
||||
missing_columns = []
|
||||
for col_idx, col_name in required_columns:
|
||||
if len(columns) <= col_idx:
|
||||
missing_columns.append(col_name)
|
||||
|
||||
if not coordi_title_col or not coordi_episode_col:
|
||||
for i, col in enumerate(columns):
|
||||
if col == 'KR title':
|
||||
coordi_title_col = col
|
||||
coordi_title_col_idx = i
|
||||
elif col == 'Chap':
|
||||
coordi_episode_col = col
|
||||
coordi_episode_col_idx = i
|
||||
if missing_columns:
|
||||
error_msg = f"Missing required columns in sheet '{sheet_name}':\n" + "\n".join(f" - {col}" for col in missing_columns)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Get column names by fixed positions
|
||||
coordi_language_col = columns[coordi_language_col_idx]
|
||||
coordi_title_col = columns[coordi_title_col_idx]
|
||||
coordi_episode_col = columns[coordi_episode_col_idx]
|
||||
kst_language_col = columns[kst_language_col_idx]
|
||||
kst_title_col = columns[kst_title_col_idx]
|
||||
kst_episode_col = columns[kst_episode_col_idx]
|
||||
|
||||
# Extract dynamic label from column A (index 0) for flexible naming
|
||||
coordi_label = "Default" # Default fallback
|
||||
if len(columns) > 0 and columns[0] and str(columns[0]).strip():
|
||||
coordi_label = str(columns[0]).strip()
|
||||
elif len(df) > 0:
|
||||
# Try to get from first data row if header is empty
|
||||
first_row_col_a = str(df.iloc[0, 0]).strip() if not pd.isna(df.iloc[0, 0]) else ""
|
||||
if first_row_col_a and first_row_col_a.lower() not in ['nan', 'none', '']:
|
||||
coordi_label = first_row_col_a
|
||||
|
||||
print(f"Sheet: {sheet_name}")
|
||||
print(f" KST columns - Title: Column {chr(65 + kst_title_col_idx) if kst_title_col else 'None'} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx) if kst_episode_col else 'None'} ({kst_episode_col})")
|
||||
print(f" Coordi columns - Title: Column {chr(65 + coordi_title_col_idx) if coordi_title_col else 'None'} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx) if coordi_episode_col else 'None'} ({coordi_episode_col})")
|
||||
print(f" KST columns - Language: Column {chr(65 + kst_language_col_idx)} ({kst_language_col}), Title: Column {chr(65 + kst_title_col_idx)} ({kst_title_col}), Episode: Column {chr(65 + kst_episode_col_idx)} ({kst_episode_col})")
|
||||
print(f" Coordi columns - Language: Column {chr(65 + coordi_language_col_idx)} ({coordi_language_col}), Title: Column {chr(65 + coordi_title_col_idx)} ({coordi_title_col}), Episode: Column {chr(65 + coordi_episode_col_idx)} ({coordi_episode_col})")
|
||||
|
||||
# Extract items from each row
|
||||
for idx, row in df.iterrows():
|
||||
# Extract KST data
|
||||
if kst_title_col and kst_episode_col:
|
||||
kst_title = str(row.get(kst_title_col, '')).strip()
|
||||
kst_episode = str(row.get(kst_episode_col, '')).strip()
|
||||
|
||||
# Check if this row has valid KST data
|
||||
has_kst_data = (
|
||||
kst_title and kst_title != 'nan' and
|
||||
kst_episode and kst_episode != 'nan' and
|
||||
pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col])
|
||||
)
|
||||
|
||||
if has_kst_data:
|
||||
item = ComparisonItem(kst_title, kst_episode, sheet_name, idx)
|
||||
kst_items.add(item)
|
||||
kst_all_items.append(item) # Keep all items for duplicate detection
|
||||
kst_details.append({
|
||||
'title': kst_title,
|
||||
'episode': kst_episode,
|
||||
'sheet': sheet_name,
|
||||
'row_index': idx,
|
||||
'kst_data': {
|
||||
kst_title_col: row[kst_title_col],
|
||||
kst_episode_col: row[kst_episode_col]
|
||||
}
|
||||
})
|
||||
# Extract KST data from fixed positions H, I, J
|
||||
kst_language = str(row.get(kst_language_col, '')).strip()
|
||||
kst_title = str(row.get(kst_title_col, '')).strip()
|
||||
kst_episode = str(row.get(kst_episode_col, '')).strip()
|
||||
|
||||
# Extract Coordi data
|
||||
if coordi_title_col and coordi_episode_col:
|
||||
coordi_title = str(row.get(coordi_title_col, '')).strip()
|
||||
coordi_episode = str(row.get(coordi_episode_col, '')).strip()
|
||||
|
||||
# Check if this row has valid Coordi data
|
||||
has_coordi_data = (
|
||||
coordi_title and coordi_title != 'nan' and
|
||||
coordi_episode and coordi_episode != 'nan' and
|
||||
pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col])
|
||||
)
|
||||
|
||||
if has_coordi_data:
|
||||
item = ComparisonItem(coordi_title, coordi_episode, sheet_name, idx)
|
||||
coordi_items.add(item)
|
||||
coordi_all_items.append(item) # Keep all items for duplicate detection
|
||||
coordi_details.append({
|
||||
'title': coordi_title,
|
||||
'episode': coordi_episode,
|
||||
'sheet': sheet_name,
|
||||
'row_index': idx,
|
||||
'coordi_data': {
|
||||
coordi_title_col: row[coordi_title_col],
|
||||
coordi_episode_col: row[coordi_episode_col]
|
||||
}
|
||||
})
|
||||
# Check if this row has valid KST data
|
||||
has_kst_data = (
|
||||
kst_language and kst_language != 'nan' and
|
||||
kst_title and kst_title != 'nan' and
|
||||
kst_episode and kst_episode != 'nan' and
|
||||
pd.notna(row[kst_language_col]) and pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col])
|
||||
)
|
||||
|
||||
# Validate language is not empty - raise error if found
|
||||
if pd.notna(row[kst_title_col]) and pd.notna(row[kst_episode_col]): # Only check if this is a real data row
|
||||
if not kst_language or kst_language == 'nan' or pd.isna(row[kst_language_col]):
|
||||
raise ValueError(f"Empty language value found in KST data at row {idx + 1} ('{kst_title}' - Episode {kst_episode}): All language fields must be populated")
|
||||
|
||||
if has_kst_data:
|
||||
item = ComparisonItem(kst_language, kst_title, kst_episode, sheet_name, idx)
|
||||
kst_items.add(item)
|
||||
kst_all_items.append(item) # Keep all items for duplicate detection
|
||||
kst_details.append({
|
||||
'language': kst_language,
|
||||
'title': kst_title,
|
||||
'episode': kst_episode,
|
||||
'sheet': sheet_name,
|
||||
'row_index': idx,
|
||||
'kst_data': {
|
||||
kst_language_col: row[kst_language_col],
|
||||
kst_title_col: row[kst_title_col],
|
||||
kst_episode_col: row[kst_episode_col]
|
||||
}
|
||||
})
|
||||
|
||||
# Extract Coordi data from fixed positions B, C, D
|
||||
coordi_language = str(row.get(coordi_language_col, '')).strip()
|
||||
coordi_title = str(row.get(coordi_title_col, '')).strip()
|
||||
coordi_episode = str(row.get(coordi_episode_col, '')).strip()
|
||||
|
||||
# Check if this row has valid Coordi data
|
||||
has_coordi_data = (
|
||||
coordi_language and coordi_language != 'nan' and
|
||||
coordi_title and coordi_title != 'nan' and
|
||||
coordi_episode and coordi_episode != 'nan' and
|
||||
pd.notna(row[coordi_language_col]) and pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col])
|
||||
)
|
||||
|
||||
# Validate language is not empty - raise error if found
|
||||
if pd.notna(row[coordi_title_col]) and pd.notna(row[coordi_episode_col]): # Only check if this is a real data row
|
||||
if not coordi_language or coordi_language == 'nan' or pd.isna(row[coordi_language_col]):
|
||||
raise ValueError(f"Empty language value found in Coordi data at row {idx + 1} ('{coordi_title}' - Episode {coordi_episode}): All language fields must be populated")
|
||||
|
||||
if has_coordi_data:
|
||||
item = ComparisonItem(coordi_language, coordi_title, coordi_episode, sheet_name, idx)
|
||||
coordi_items.add(item)
|
||||
coordi_all_items.append(item) # Keep all items for duplicate detection
|
||||
coordi_details.append({
|
||||
'language': coordi_language,
|
||||
'title': coordi_title,
|
||||
'episode': coordi_episode,
|
||||
'sheet': sheet_name,
|
||||
'row_index': idx,
|
||||
'coordi_data': {
|
||||
coordi_language_col: row[coordi_language_col],
|
||||
coordi_title_col: row[coordi_title_col],
|
||||
coordi_episode_col: row[coordi_episode_col]
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
'kst_items': kst_items,
|
||||
@ -181,7 +213,8 @@ class KSTCoordiComparator:
|
||||
'kst_details': kst_details,
|
||||
'coordi_details': coordi_details,
|
||||
'kst_all_items': kst_all_items,
|
||||
'coordi_all_items': coordi_all_items
|
||||
'coordi_all_items': coordi_all_items,
|
||||
'coordi_label': coordi_label # Dynamic label from column A
|
||||
}
|
||||
|
||||
def categorize_mismatches_for_sheet(self, sheet_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@ -196,17 +229,17 @@ class KSTCoordiComparator:
|
||||
coordi_duplicates = self._find_duplicates_in_list(coordi_all_items)
|
||||
|
||||
# Create sets of items that have duplicates (to exclude from "only" lists)
|
||||
kst_duplicate_keys = {(item.title, item.episode) for item in kst_duplicates}
|
||||
coordi_duplicate_keys = {(item.title, item.episode) for item in coordi_duplicates}
|
||||
kst_duplicate_keys = {(item.language, item.title, item.episode) for item in kst_duplicates}
|
||||
coordi_duplicate_keys = {(item.language, item.title, item.episode) for item in coordi_duplicates}
|
||||
|
||||
# Find overlaps and differences - exclude items that have duplicates
|
||||
matched_items = kst_items.intersection(coordi_items)
|
||||
|
||||
# For "only" items: exclude those that have duplicates within their own dataset
|
||||
kst_only_items = {item for item in kst_items - coordi_items
|
||||
if (item.title, item.episode) not in kst_duplicate_keys}
|
||||
if (item.language, item.title, item.episode) not in kst_duplicate_keys}
|
||||
coordi_only_items = {item for item in coordi_items - kst_items
|
||||
if (item.title, item.episode) not in coordi_duplicate_keys}
|
||||
if (item.language, item.title, item.episode) not in coordi_duplicate_keys}
|
||||
|
||||
categorization = {
|
||||
'matched_items': list(matched_items),
|
||||
@ -245,13 +278,13 @@ class KSTCoordiComparator:
|
||||
"""Find duplicate items within a dataset - FIXED to only return actual duplicates"""
|
||||
from collections import Counter
|
||||
|
||||
# Count occurrences of each (title, episode) pair
|
||||
key_counts = Counter((item.title, item.episode) for item in items_list)
|
||||
# Count occurrences of each (language, title, episode) tuple
|
||||
key_counts = Counter((item.language, item.title, item.episode) for item in items_list)
|
||||
|
||||
# Only return items that appear more than once
|
||||
duplicates = []
|
||||
for item in items_list:
|
||||
key = (item.title, item.episode)
|
||||
key = (item.language, item.title, item.episode)
|
||||
if key_counts[key] > 1:
|
||||
duplicates.append(item)
|
||||
|
||||
@ -327,6 +360,7 @@ class KSTCoordiComparator:
|
||||
# KST-only items
|
||||
for item in categorization['kst_only_items']:
|
||||
mismatch_details['kst_only'].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -338,6 +372,7 @@ class KSTCoordiComparator:
|
||||
# Coordi-only items
|
||||
for item in categorization['coordi_only_items']:
|
||||
mismatch_details['coordi_only'].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -358,6 +393,7 @@ class KSTCoordiComparator:
|
||||
key = (item.title, item.episode)
|
||||
if key not in mixed_duplicate_keys:
|
||||
mismatch_details['kst_duplicates'].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -371,6 +407,7 @@ class KSTCoordiComparator:
|
||||
key = (item.title, item.episode)
|
||||
if key not in mixed_duplicate_keys:
|
||||
mismatch_details['coordi_duplicates'].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -451,6 +488,7 @@ class KSTCoordiComparator:
|
||||
for item in categorization['kst_only_items']:
|
||||
title = item.title
|
||||
grouped['kst_only_by_title'][title].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -462,6 +500,7 @@ class KSTCoordiComparator:
|
||||
for item in categorization['coordi_only_items']:
|
||||
title = item.title
|
||||
grouped['coordi_only_by_title'][title].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -473,6 +512,7 @@ class KSTCoordiComparator:
|
||||
for item in categorization['matched_items']:
|
||||
title = item.title
|
||||
grouped['matched_by_title'][title].append({
|
||||
'language': item.language,
|
||||
'title': item.title,
|
||||
'episode': item.episode,
|
||||
'sheet': item.source_sheet,
|
||||
@ -517,11 +557,13 @@ class KSTCoordiComparator:
|
||||
visualize_rows = []
|
||||
|
||||
# Helper function to create a row
|
||||
def create_row(coordi_title="", coordi_chapter="", kst_title="", kst_chapter="",
|
||||
def create_row(coordi_language="", coordi_title="", coordi_chapter="", kst_language="", kst_title="", kst_chapter="",
|
||||
row_type="matched", reason="", title_for_sort=""):
|
||||
return {
|
||||
'coordi_language': coordi_language,
|
||||
'coordi_title': coordi_title,
|
||||
'coordi_chapter': coordi_chapter,
|
||||
'kst_language': kst_language,
|
||||
'kst_title': kst_title,
|
||||
'kst_chapter': kst_chapter,
|
||||
'row_type': row_type,
|
||||
@ -533,6 +575,7 @@ class KSTCoordiComparator:
|
||||
# 1. Handle Coordi-only items
|
||||
for item in mismatch_details['coordi_only']:
|
||||
visualize_rows.append(create_row(
|
||||
coordi_language=item.get('language', ''),
|
||||
coordi_title=item['title'],
|
||||
coordi_chapter=item['episode'],
|
||||
row_type='coordi_only',
|
||||
@ -542,6 +585,7 @@ class KSTCoordiComparator:
|
||||
# 2. Handle KST-only items
|
||||
for item in mismatch_details['kst_only']:
|
||||
visualize_rows.append(create_row(
|
||||
kst_language=item.get('language', ''),
|
||||
kst_title=item['title'],
|
||||
kst_chapter=item['episode'],
|
||||
row_type='kst_only',
|
||||
@ -549,11 +593,12 @@ class KSTCoordiComparator:
|
||||
))
|
||||
|
||||
# 3. Handle Mixed duplicates (exists in both but duplicated on one side)
|
||||
mixed_items = {} # Group by title+episode
|
||||
mixed_items = {} # Group by language+title+episode
|
||||
for item in mismatch_details['mixed_duplicates']:
|
||||
key = f"{item['title']}_{item['episode']}"
|
||||
key = f"{item.get('language', '')}_{item['title']}_{item['episode']}"
|
||||
if key not in mixed_items:
|
||||
mixed_items[key] = {
|
||||
'language': item.get('language', ''),
|
||||
'title': item['title'],
|
||||
'episode': item['episode'],
|
||||
'kst_duplicate_count': 0,
|
||||
@ -569,8 +614,10 @@ class KSTCoordiComparator:
|
||||
for key, item in mixed_items.items():
|
||||
# First row: show it exists in both
|
||||
visualize_rows.append(create_row(
|
||||
coordi_language=item['language'],
|
||||
coordi_title=item['title'],
|
||||
coordi_chapter=item['episode'],
|
||||
kst_language=item['language'],
|
||||
kst_title=item['title'],
|
||||
kst_chapter=item['episode'],
|
||||
row_type='mixed_duplicate',
|
||||
@ -578,8 +625,9 @@ class KSTCoordiComparator:
|
||||
))
|
||||
|
||||
# Additional rows for KST duplicates (count - 1 since first is already shown)
|
||||
for i in range(max(0, item['kst_duplicate_count'] - 1)):
|
||||
for _ in range(max(0, item['kst_duplicate_count'] - 1)):
|
||||
visualize_rows.append(create_row(
|
||||
kst_language=item['language'],
|
||||
kst_title=item['title'],
|
||||
kst_chapter=item['episode'],
|
||||
row_type='mixed_duplicate',
|
||||
@ -588,8 +636,9 @@ class KSTCoordiComparator:
|
||||
))
|
||||
|
||||
# Additional rows for Coordi duplicates (count - 1 since first is already shown)
|
||||
for i in range(max(0, item['coordi_duplicate_count'] - 1)):
|
||||
for _ in range(max(0, item['coordi_duplicate_count'] - 1)):
|
||||
visualize_rows.append(create_row(
|
||||
coordi_language=item['language'],
|
||||
coordi_title=item['title'],
|
||||
coordi_chapter=item['episode'],
|
||||
row_type='mixed_duplicate',
|
||||
@ -600,6 +649,7 @@ class KSTCoordiComparator:
|
||||
# 4. Handle Pure duplicates
|
||||
for item in mismatch_details['kst_duplicates']:
|
||||
visualize_rows.append(create_row(
|
||||
kst_language=item.get('language', ''),
|
||||
kst_title=item['title'],
|
||||
kst_chapter=item['episode'],
|
||||
row_type='pure_duplicate',
|
||||
@ -608,6 +658,7 @@ class KSTCoordiComparator:
|
||||
|
||||
for item in mismatch_details['coordi_duplicates']:
|
||||
visualize_rows.append(create_row(
|
||||
coordi_language=item.get('language', ''),
|
||||
coordi_title=item['title'],
|
||||
coordi_chapter=item['episode'],
|
||||
row_type='pure_duplicate',
|
||||
@ -616,11 +667,13 @@ class KSTCoordiComparator:
|
||||
|
||||
# 5. Handle Matched items (perfect matches)
|
||||
matched_by_title = summary['grouped_by_title']['matched_by_title']
|
||||
for title, items in matched_by_title.items():
|
||||
for _, items in matched_by_title.items():
|
||||
for item in items:
|
||||
visualize_rows.append(create_row(
|
||||
coordi_language=item.get('language', ''),
|
||||
coordi_title=item['title'],
|
||||
coordi_chapter=item['episode'],
|
||||
kst_language=item.get('language', ''),
|
||||
kst_title=item['title'],
|
||||
kst_chapter=item['episode'],
|
||||
row_type='matched',
|
||||
@ -645,6 +698,21 @@ class KSTCoordiComparator:
|
||||
visualize_rows.sort(key=sort_key)
|
||||
|
||||
return visualize_rows
|
||||
|
||||
def get_coordi_label_for_sheet(self, sheet_filter: str | None = None) -> str:
|
||||
"""Get the dynamic coordi label from column A for a specific sheet"""
|
||||
if not self.data:
|
||||
return "Default"
|
||||
|
||||
# Use first sheet if no filter specified
|
||||
sheet_name = sheet_filter if sheet_filter else list(self.data.keys())[0]
|
||||
|
||||
if sheet_name not in self.data:
|
||||
return "Default"
|
||||
|
||||
# Extract the sheet data to get the dynamic label
|
||||
sheet_data = self.extract_kst_coordi_items_for_sheet(sheet_name)
|
||||
return sheet_data.get('coordi_label', 'Default')
|
||||
|
||||
def generate_excel_export_data(self) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""Generate data for Excel export with all sheets in visualize format"""
|
||||
@ -661,8 +729,10 @@ class KSTCoordiComparator:
|
||||
excel_rows = []
|
||||
for row in sheet_visualize_data:
|
||||
excel_rows.append({
|
||||
'Coordi Language': row.get('coordi_language', ''),
|
||||
'Coordi Title': row.get('coordi_title', ''),
|
||||
'Coordi Chapter': row.get('coordi_chapter', ''),
|
||||
'KST Language': row.get('kst_language', ''),
|
||||
'KST Title': row.get('kst_title', ''),
|
||||
'KST Chapter': row.get('kst_chapter', ''),
|
||||
'Status': row.get('reason', ''),
|
||||
@ -711,7 +781,8 @@ class KSTCoordiComparator:
|
||||
if details:
|
||||
print(f"{mismatch_type.upper()} (showing first 3):")
|
||||
for i, item in enumerate(details[:3]):
|
||||
print(f" {i+1}. {item['title']} - Episode {item['episode']} ({item['reason']})")
|
||||
language = item.get('language', 'N/A')
|
||||
print(f" {i+1}. [{language}] {item['title']} - Episode {item['episode']} ({item['reason']})")
|
||||
if len(details) > 3:
|
||||
print(f" ... and {len(details) - 3} more")
|
||||
print()
|
||||
|
||||
@ -168,36 +168,46 @@
|
||||
margin: 10px 0;
|
||||
}
|
||||
.table-container {
|
||||
max-height: 500px;
|
||||
max-height: 900px;
|
||||
overflow-y: auto;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
/* Vibrant color styles for Visualize tab */
|
||||
/* Professional UI Design System - Semantic Color Palette */
|
||||
.coordi-only-row {
|
||||
background-color: #ff4444 !important; /* Bright red */
|
||||
color: white;
|
||||
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */
|
||||
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */
|
||||
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.kst-only-row {
|
||||
background-color: #4488ff !important; /* Bright blue */
|
||||
color: white;
|
||||
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */
|
||||
color: #365314 !important; /* Lime 900 - High contrast text */
|
||||
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.mixed-duplicate-row {
|
||||
background-color: #ff8800 !important; /* Bright orange */
|
||||
color: white;
|
||||
background-color: #fefbeb !important; /* Amber 50 - Warning state */
|
||||
color: #92400e !important; /* Amber 800 - High contrast text */
|
||||
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.pure-duplicate-row {
|
||||
background-color: #8844ff !important; /* Bright purple */
|
||||
color: white;
|
||||
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */
|
||||
color: #991b1b !important; /* Red 800 - High contrast text */
|
||||
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.matched-row {
|
||||
background-color: white !important; /* White background */
|
||||
color: black;
|
||||
background-color: #ffffff !important; /* Pure white - Neutral/Default state */
|
||||
color: #374151 !important; /* Gray 700 - Standard text */
|
||||
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
|
||||
font-weight: 400;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
@ -292,8 +302,10 @@
|
||||
<table id="visualize-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Coordi Title</th>
|
||||
<th>Coordi Chapter</th>
|
||||
<th id="coordi-lang-header">Coordi Lang</th>
|
||||
<th id="coordi-title-header">Coordi Title</th>
|
||||
<th id="coordi-chapter-header">Coordi Chapter</th>
|
||||
<th>KST Lang</th>
|
||||
<th>KST Title</th>
|
||||
<th>KST Chapter</th>
|
||||
<th>Status</th>
|
||||
@ -491,6 +503,12 @@
|
||||
}
|
||||
|
||||
function updateResults(results) {
|
||||
// Update dynamic headers with coordi label
|
||||
const coordiLabel = results.coordi_label || 'Coordi';
|
||||
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
|
||||
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
|
||||
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
|
||||
|
||||
// Update count displays
|
||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||
|
||||
@ -646,11 +664,13 @@
|
||||
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
||||
visualizeData.forEach(row => {
|
||||
const tr = tbody.insertRow();
|
||||
tr.insertCell(0).textContent = row.coordi_title || '';
|
||||
tr.insertCell(1).textContent = row.coordi_chapter || '';
|
||||
tr.insertCell(2).textContent = row.kst_title || '';
|
||||
tr.insertCell(3).textContent = row.kst_chapter || '';
|
||||
tr.insertCell(4).textContent = row.reason || '';
|
||||
tr.insertCell(0).textContent = row.coordi_language || '';
|
||||
tr.insertCell(1).textContent = row.coordi_title || '';
|
||||
tr.insertCell(2).textContent = row.coordi_chapter || '';
|
||||
tr.insertCell(3).textContent = row.kst_language || '';
|
||||
tr.insertCell(4).textContent = row.kst_title || '';
|
||||
tr.insertCell(5).textContent = row.kst_chapter || '';
|
||||
tr.insertCell(6).textContent = row.reason || '';
|
||||
|
||||
// Apply vibrant color highlighting based on row type
|
||||
switch (row.row_type) {
|
||||
|
||||
740
templates/index.html.bak
Normal file
740
templates/index.html.bak
Normal file
@ -0,0 +1,740 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>KST vs Coordi Data Comparison</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.container {
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.file-section {
|
||||
background: #f8f9fa;
|
||||
padding: 20px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.file-input {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
align-items: center;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
input[type="text"], input[type="file"] {
|
||||
flex: 1;
|
||||
padding: 8px 12px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
input[type="file"] {
|
||||
padding: 6px 8px;
|
||||
}
|
||||
button {
|
||||
padding: 8px 16px;
|
||||
background: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
button:disabled {
|
||||
background: #6c757d;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.loading {
|
||||
text-align: center;
|
||||
color: #666;
|
||||
font-style: italic;
|
||||
}
|
||||
.tabs {
|
||||
border-bottom: 2px solid #ddd;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.tab {
|
||||
display: inline-block;
|
||||
padding: 10px 20px;
|
||||
background: #f8f9fa;
|
||||
border: 1px solid #ddd;
|
||||
border-bottom: none;
|
||||
cursor: pointer;
|
||||
margin-right: 5px;
|
||||
border-radius: 4px 4px 0 0;
|
||||
}
|
||||
.tab.active {
|
||||
background: white;
|
||||
border-bottom: 2px solid white;
|
||||
margin-bottom: -2px;
|
||||
}
|
||||
.tab-content {
|
||||
display: none;
|
||||
}
|
||||
.tab-content.active {
|
||||
display: block;
|
||||
}
|
||||
.summary-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.summary-card {
|
||||
background: #f8f9fa;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
border-left: 4px solid #007bff;
|
||||
}
|
||||
.summary-card h3 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 15px;
|
||||
color: #333;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
.summary-card p {
|
||||
margin: 8px 0;
|
||||
color: #555;
|
||||
}
|
||||
.summary-card span {
|
||||
font-weight: bold;
|
||||
color: #007bff;
|
||||
}
|
||||
.count-badge {
|
||||
display: inline-block;
|
||||
background: #007bff;
|
||||
color: white;
|
||||
padding: 4px 8px;
|
||||
border-radius: 12px;
|
||||
font-size: 0.9em;
|
||||
margin-left: 10px;
|
||||
}
|
||||
.reconciliation {
|
||||
background: #d4edda;
|
||||
border: 1px solid #c3e6cb;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin-top: 15px;
|
||||
}
|
||||
.reconciliation.mismatch {
|
||||
background: #f8d7da;
|
||||
border-color: #f5c6cb;
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 10px;
|
||||
}
|
||||
th, td {
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
th {
|
||||
background-color: #f8f9fa;
|
||||
font-weight: bold;
|
||||
}
|
||||
tr:hover {
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.error {
|
||||
background: #f8d7da;
|
||||
color: #721c24;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
.success {
|
||||
background: #d4edda;
|
||||
color: #155724;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
.table-container {
|
||||
max-height: 900px;
|
||||
overflow-y: auto;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
/* Vibrant color styles for Visualize tab */
|
||||
.coordi-only-row {
|
||||
background-color: #ff4444 !important; /* Bright red */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.kst-only-row {
|
||||
background-color: #4488ff !important; /* Bright blue */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.mixed-duplicate-row {
|
||||
background-color: #ff8800 !important; /* Bright orange */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.pure-duplicate-row {
|
||||
background-color: #8844ff !important; /* Bright purple */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.matched-row {
|
||||
background-color: white !important; /* White background */
|
||||
color: black;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>KST vs Coordi Data Comparison Tool</h1>
|
||||
|
||||
<div class="file-section">
|
||||
<div class="file-input">
|
||||
<label for="filePath">Excel File Path:</label>
|
||||
<input type="text" id="filePath" value="data/sample-data.xlsx" placeholder="Enter file path">
|
||||
<button onclick="analyzeData()" id="analyzeBtn">Analyze Data</button>
|
||||
</div>
|
||||
<div class="file-input" style="margin-top: 10px;">
|
||||
<label>Or Upload File:</label>
|
||||
<input type="file" id="fileUpload" accept=".xlsx,.xls" onchange="handleFileUpload()">
|
||||
<button onclick="uploadAndAnalyze()" id="uploadBtn" disabled>Upload & Analyze</button>
|
||||
</div>
|
||||
<div class="file-input" style="margin-top: 10px;">
|
||||
<label for="sheetFilter">Sheet Filter:</label>
|
||||
<select id="sheetFilter" onchange="filterBySheet()" disabled>
|
||||
<!-- Options will be populated dynamically -->
|
||||
</select>
|
||||
</div>
|
||||
<div id="status"></div>
|
||||
</div>
|
||||
|
||||
<div id="results" style="display: none;">
|
||||
<div class="tabs">
|
||||
<div class="tab active" onclick="showTab('summary')">Summary</div>
|
||||
<div class="tab" onclick="showTab('different')">Different</div>
|
||||
<div class="tab" onclick="showTab('visualize')">Visualize</div>
|
||||
</div>
|
||||
|
||||
<div id="summary" class="tab-content active">
|
||||
<!-- Summary Cards Section -->
|
||||
<div class="summary-grid">
|
||||
<div class="summary-card">
|
||||
<h3>📊 Sheet Summary</h3>
|
||||
<p><strong>Current Sheet:</strong> <span id="current-sheet-name">-</span></p>
|
||||
<p><strong>Matched Items:</strong> <span id="summary-matched-count">0</span> (Same in both KST and Coordi)</p>
|
||||
<p><strong>Different Items:</strong> <span id="summary-different-count">0</span> (Total tasks excluding matched items)</p>
|
||||
</div>
|
||||
<div class="summary-card">
|
||||
<h3>🔍 Breakdown</h3>
|
||||
<p><strong>KST Only:</strong> <span id="summary-kst-only">0</span></p>
|
||||
<p><strong>Coordi Only:</strong> <span id="summary-coordi-only">0</span></p>
|
||||
<p><strong>Duplicates:</strong> <span id="summary-duplicates">0</span></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>Matched Items (Same in both KST and Coordi) <span id="matched-count-display" class="count-badge">0</span></h3>
|
||||
<div class="table-container">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Korean Title</th>
|
||||
<th>Episode</th>
|
||||
<th>Sheet</th>
|
||||
<th>Row</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="summary-table">
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="different" class="tab-content">
|
||||
<h3>Different Items <span id="different-count-display" class="count-badge">0</span></h3>
|
||||
<div class="table-container">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>KST Data</th>
|
||||
<th>Coordi Data</th>
|
||||
<th>Reason</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="different-table">
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="visualize" class="tab-content">
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
|
||||
<h3>Excel-like Visualization</h3>
|
||||
<button onclick="downloadExcel()" id="downloadBtn" style="background: #28a745; padding: 8px 16px;">📥 Download All Sheets</button>
|
||||
</div>
|
||||
<div class="table-container">
|
||||
<table id="visualize-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Coordi Title</th>
|
||||
<th>Coordi Chapter</th>
|
||||
<th>KST Title</th>
|
||||
<th>KST Chapter</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="visualize-table-body">
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function showTab(tabName) {
|
||||
// Hide all tab contents
|
||||
document.querySelectorAll('.tab-content').forEach(content => {
|
||||
content.classList.remove('active');
|
||||
});
|
||||
|
||||
// Remove active class from all tabs
|
||||
document.querySelectorAll('.tab').forEach(tab => {
|
||||
tab.classList.remove('active');
|
||||
});
|
||||
|
||||
// Show selected tab content
|
||||
document.getElementById(tabName).classList.add('active');
|
||||
|
||||
// Add active class to clicked tab
|
||||
event.target.classList.add('active');
|
||||
}
|
||||
|
||||
function analyzeData() {
|
||||
const filePath = document.getElementById('filePath').value;
|
||||
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||
const sheetFilter = sheetFilterElement.value || null; // Use null if empty
|
||||
const statusDiv = document.getElementById('status');
|
||||
const analyzeBtn = document.getElementById('analyzeBtn');
|
||||
|
||||
if (!filePath.trim()) {
|
||||
statusDiv.innerHTML = '<div class="error">Please enter a file path</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
// Show loading state
|
||||
statusDiv.innerHTML = '<div class="loading">Analyzing data...</div>';
|
||||
analyzeBtn.disabled = true;
|
||||
analyzeBtn.textContent = 'Analyzing...';
|
||||
|
||||
// Make API call
|
||||
fetch('/analyze', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: filePath,
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
statusDiv.innerHTML = '<div class="success">Analysis complete!</div>';
|
||||
updateResults(data.results);
|
||||
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
|
||||
document.getElementById('results').style.display = 'block';
|
||||
} else {
|
||||
statusDiv.innerHTML = `<div class="error">Error: ${data.error}</div>`;
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
statusDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
|
||||
})
|
||||
.finally(() => {
|
||||
analyzeBtn.disabled = false;
|
||||
analyzeBtn.textContent = 'Analyze Data';
|
||||
});
|
||||
}
|
||||
|
||||
function updateSheetFilter(sheetNames, currentFilter) {
|
||||
const select = document.getElementById('sheetFilter');
|
||||
select.innerHTML = '';
|
||||
|
||||
// Add a default option if no sheets are available yet
|
||||
if (!sheetNames || sheetNames.length === 0) {
|
||||
const option = document.createElement('option');
|
||||
option.value = '';
|
||||
option.textContent = 'Loading sheets...';
|
||||
option.disabled = true;
|
||||
option.selected = true;
|
||||
select.appendChild(option);
|
||||
select.disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
sheetNames.forEach((sheetName, index) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = sheetName;
|
||||
option.textContent = sheetName;
|
||||
// Select the first sheet by default, or the current filter if specified
|
||||
if (sheetName === currentFilter || (!currentFilter && index === 0)) {
|
||||
option.selected = true;
|
||||
}
|
||||
select.appendChild(option);
|
||||
});
|
||||
|
||||
select.disabled = false;
|
||||
}
|
||||
|
||||
function filterBySheet() {
|
||||
// Re-analyze with the selected sheet filter
|
||||
analyzeData();
|
||||
}
|
||||
|
||||
function handleFileUpload() {
|
||||
const fileInput = document.getElementById('fileUpload');
|
||||
const uploadBtn = document.getElementById('uploadBtn');
|
||||
|
||||
if (fileInput.files.length > 0) {
|
||||
uploadBtn.disabled = false;
|
||||
uploadBtn.textContent = 'Upload & Analyze';
|
||||
} else {
|
||||
uploadBtn.disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
function uploadAndAnalyze() {
|
||||
const fileInput = document.getElementById('fileUpload');
|
||||
const statusDiv = document.getElementById('status');
|
||||
const uploadBtn = document.getElementById('uploadBtn');
|
||||
|
||||
if (fileInput.files.length === 0) {
|
||||
statusDiv.innerHTML = '<div class="error">Please select a file to upload</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
const file = fileInput.files[0];
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
// Show uploading state
|
||||
statusDiv.innerHTML = '<div class="loading">Uploading and analyzing file...</div>';
|
||||
uploadBtn.disabled = true;
|
||||
uploadBtn.textContent = 'Uploading...';
|
||||
|
||||
// Upload file
|
||||
fetch('/upload', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
// File uploaded successfully, now analyze it
|
||||
document.getElementById('filePath').value = data.file_path;
|
||||
statusDiv.innerHTML = '<div class="loading">File uploaded! Analyzing data...</div>';
|
||||
|
||||
// Clear sheet filter for new file (let it default to first sheet)
|
||||
const sheetFilterElement = document.getElementById('sheetFilter');
|
||||
const sheetFilter = null; // Always use default (first sheet) for new uploads
|
||||
return fetch('/analyze', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
file_path: data.file_path,
|
||||
sheet_filter: sheetFilter
|
||||
})
|
||||
});
|
||||
} else {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
statusDiv.innerHTML = '<div class="success">File uploaded and analyzed successfully!</div>';
|
||||
updateResults(data.results);
|
||||
updateSheetFilter(data.results.sheet_names, data.results.current_sheet_filter);
|
||||
document.getElementById('results').style.display = 'block';
|
||||
} else {
|
||||
statusDiv.innerHTML = `<div class="error">Analysis error: ${data.error}</div>`;
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
statusDiv.innerHTML = `<div class="error">Upload error: ${error.message}</div>`;
|
||||
})
|
||||
.finally(() => {
|
||||
uploadBtn.disabled = false;
|
||||
uploadBtn.textContent = 'Upload & Analyze';
|
||||
handleFileUpload(); // Reset button state based on file selection
|
||||
});
|
||||
}
|
||||
|
||||
function updateResults(results) {
|
||||
// Update count displays
|
||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||
|
||||
// Count all different items including duplicates and mixed duplicates
|
||||
const totalDifferent = results.mismatches.kst_only_count + results.mismatches.coordi_only_count +
|
||||
results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
||||
(results.mismatches.mixed_duplicates_count || 0);
|
||||
document.getElementById('different-count-display').textContent = totalDifferent.toLocaleString();
|
||||
|
||||
// Update summary section
|
||||
document.getElementById('current-sheet-name').textContent = results.current_sheet_filter;
|
||||
document.getElementById('summary-matched-count').textContent = results.matched_items_count.toLocaleString();
|
||||
document.getElementById('summary-different-count').textContent = totalDifferent.toLocaleString();
|
||||
document.getElementById('summary-kst-only').textContent = results.mismatches.kst_only_count.toLocaleString();
|
||||
document.getElementById('summary-coordi-only').textContent = results.mismatches.coordi_only_count.toLocaleString();
|
||||
|
||||
// Calculate total duplicates (KST + Coordi + Mixed)
|
||||
const totalDuplicates = results.mismatches.kst_duplicates_count + results.mismatches.coordi_duplicates_count +
|
||||
(results.mismatches.mixed_duplicates_count || 0);
|
||||
document.getElementById('summary-duplicates').textContent = totalDuplicates.toLocaleString();
|
||||
|
||||
// Update Summary tab (matched items)
|
||||
updateSummaryTable(results.matched_data);
|
||||
|
||||
// Update Different tab
|
||||
updateDifferentTable(results.mismatch_details);
|
||||
|
||||
// Update Visualize tab
|
||||
updateVisualizeTable(results.visualize_data);
|
||||
}
|
||||
|
||||
function updateSummaryTable(matchedData) {
|
||||
const tbody = document.getElementById('summary-table');
|
||||
tbody.innerHTML = '';
|
||||
|
||||
// Sort by Korean title + episode
|
||||
const sortedData = [...matchedData].sort((a, b) => {
|
||||
const titleCompare = a.title.localeCompare(b.title, 'ko');
|
||||
if (titleCompare !== 0) return titleCompare;
|
||||
|
||||
// Try to sort episodes numerically
|
||||
const aEp = parseFloat(a.episode) || 0;
|
||||
const bEp = parseFloat(b.episode) || 0;
|
||||
return aEp - bEp;
|
||||
});
|
||||
|
||||
sortedData.forEach(item => {
|
||||
const row = tbody.insertRow();
|
||||
row.insertCell(0).textContent = item.title;
|
||||
row.insertCell(1).textContent = item.episode;
|
||||
row.insertCell(2).textContent = item.sheet;
|
||||
row.insertCell(3).textContent = item.row_index ? item.row_index + 1 : item.row;
|
||||
});
|
||||
}
|
||||
|
||||
function updateDifferentTable(mismatchDetails) {
|
||||
const tbody = document.getElementById('different-table');
|
||||
tbody.innerHTML = '';
|
||||
|
||||
const allDifferences = [];
|
||||
|
||||
// Add KST-only items (no special highlighting)
|
||||
mismatchDetails.kst_only.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Only appears in KST',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi-only items (no special highlighting)
|
||||
mismatchDetails.coordi_only.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Only appears in Coordi',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'none'
|
||||
});
|
||||
});
|
||||
|
||||
// Add KST duplicates (red highlighting)
|
||||
mismatchDetails.kst_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: '',
|
||||
reason: 'Duplicate entry in KST data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add Coordi duplicates (red highlighting)
|
||||
mismatchDetails.coordi_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: '',
|
||||
coordiData: `${item.title} - Episode ${item.episode}`,
|
||||
reason: 'Duplicate entry in Coordi data',
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'red'
|
||||
});
|
||||
});
|
||||
|
||||
// Add mixed duplicates (yellow highlighting)
|
||||
if (mismatchDetails.mixed_duplicates) {
|
||||
mismatchDetails.mixed_duplicates.forEach(item => {
|
||||
allDifferences.push({
|
||||
kstData: item.duplicate_side === 'KST' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
coordiData: item.duplicate_side === 'COORDI' ? `${item.title} - Episode ${item.episode}` : `${item.title} - Episode ${item.episode}`,
|
||||
reason: item.reason,
|
||||
sortTitle: item.title,
|
||||
sortEpisode: parseFloat(item.episode) || 0,
|
||||
highlightType: 'yellow'
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by Korean title + episode
|
||||
allDifferences.sort((a, b) => {
|
||||
const titleCompare = a.sortTitle.localeCompare(b.sortTitle, 'ko');
|
||||
if (titleCompare !== 0) return titleCompare;
|
||||
return a.sortEpisode - b.sortEpisode;
|
||||
});
|
||||
|
||||
// Populate table with highlighting
|
||||
allDifferences.forEach(diff => {
|
||||
const row = tbody.insertRow();
|
||||
row.insertCell(0).textContent = diff.kstData;
|
||||
row.insertCell(1).textContent = diff.coordiData;
|
||||
row.insertCell(2).textContent = diff.reason;
|
||||
|
||||
// Apply highlighting based on type
|
||||
if (diff.highlightType === 'red') {
|
||||
row.style.backgroundColor = '#f8d7da'; // Light red
|
||||
row.title = 'Pure duplicate entry';
|
||||
} else if (diff.highlightType === 'yellow') {
|
||||
row.style.backgroundColor = '#fff3cd'; // Light yellow
|
||||
row.title = 'Item exists in both datasets but has duplicates on one side';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function updateVisualizeTable(visualizeData) {
|
||||
const tbody = document.getElementById('visualize-table-body');
|
||||
tbody.innerHTML = '';
|
||||
|
||||
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
||||
visualizeData.forEach(row => {
|
||||
const tr = tbody.insertRow();
|
||||
tr.insertCell(0).textContent = row.coordi_language || '';
|
||||
tr.insertCell(1).textContent = row.coordi_title || '';
|
||||
tr.insertCell(2).textContent = row.coordi_chapter || '';
|
||||
tr.insertCell(3).textContent = row.kst_language || '';
|
||||
tr.insertCell(4).textContent = row.kst_title || '';
|
||||
tr.insertCell(5).textContent = row.kst_chapter || '';
|
||||
tr.insertCell(6).textContent = row.reason || '';
|
||||
|
||||
// Apply vibrant color highlighting based on row type
|
||||
switch (row.row_type) {
|
||||
case 'coordi_only':
|
||||
tr.className = 'coordi-only-row';
|
||||
break;
|
||||
case 'kst_only':
|
||||
tr.className = 'kst-only-row';
|
||||
break;
|
||||
case 'mixed_duplicate':
|
||||
tr.className = 'mixed-duplicate-row';
|
||||
break;
|
||||
case 'pure_duplicate':
|
||||
tr.className = 'pure-duplicate-row';
|
||||
break;
|
||||
case 'matched':
|
||||
tr.className = 'matched-row';
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function downloadExcel() {
|
||||
const downloadBtn = document.getElementById('downloadBtn');
|
||||
const originalText = downloadBtn.textContent;
|
||||
|
||||
// Show loading state
|
||||
downloadBtn.disabled = true;
|
||||
downloadBtn.textContent = '⏳ Generating...';
|
||||
downloadBtn.style.background = '#6c757d';
|
||||
|
||||
// Create a temporary link and trigger download
|
||||
fetch('/download_excel', {
|
||||
method: 'GET'
|
||||
})
|
||||
.then(response => {
|
||||
if (!response.ok) {
|
||||
throw new Error('Download failed');
|
||||
}
|
||||
return response.blob();
|
||||
})
|
||||
.then(blob => {
|
||||
// Create download link
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.style.display = 'none';
|
||||
a.href = url;
|
||||
a.download = 'data_comparison_export.xlsx';
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
|
||||
// Show success message
|
||||
const statusDiv = document.getElementById('status');
|
||||
statusDiv.innerHTML = '<div class="success">Excel file downloaded successfully!</div>';
|
||||
setTimeout(() => {
|
||||
statusDiv.innerHTML = '';
|
||||
}, 3000);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Download error:', error);
|
||||
const statusDiv = document.getElementById('status');
|
||||
statusDiv.innerHTML = '<div class="error">Download failed. Please try again.</div>';
|
||||
setTimeout(() => {
|
||||
statusDiv.innerHTML = '';
|
||||
}, 5000);
|
||||
})
|
||||
.finally(() => {
|
||||
// Reset button state
|
||||
downloadBtn.disabled = false;
|
||||
downloadBtn.textContent = originalText;
|
||||
downloadBtn.style.background = '#28a745';
|
||||
});
|
||||
}
|
||||
|
||||
// Auto-analyze on page load with default file
|
||||
window.onload = function() {
|
||||
// Initialize sheet filter with loading state
|
||||
updateSheetFilter([], null);
|
||||
analyzeData();
|
||||
};
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
106
web_gui.py
106
web_gui.py
@ -66,6 +66,10 @@ def analyze_data():
|
||||
visualize_data = comparator_instance.generate_visualize_data(sheet_filter)
|
||||
comparison_results['visualize_data'] = visualize_data
|
||||
|
||||
# Get dynamic coordi label for display
|
||||
coordi_label = comparator_instance.get_coordi_label_for_sheet(sheet_filter)
|
||||
comparison_results['coordi_label'] = coordi_label
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'results': comparison_results
|
||||
@ -168,16 +172,31 @@ def download_excel():
|
||||
workbook = writer.book
|
||||
worksheet = writer.sheets[clean_sheet_name]
|
||||
|
||||
# Apply color formatting based on Type column
|
||||
from openpyxl.styles import PatternFill
|
||||
# Apply professional color formatting based on Type column
|
||||
from openpyxl.styles import PatternFill, Font
|
||||
|
||||
# Define colors matching the web interface
|
||||
# Define professional colors matching the new web interface design system
|
||||
colors = {
|
||||
'Coordi Only': PatternFill(start_color='FF4444', end_color='FF4444', fill_type='solid'),
|
||||
'Kst Only': PatternFill(start_color='4488FF', end_color='4488FF', fill_type='solid'),
|
||||
'Mixed Duplicate': PatternFill(start_color='FF8800', end_color='FF8800', fill_type='solid'),
|
||||
'Pure Duplicate': PatternFill(start_color='8844FF', end_color='8844FF', fill_type='solid'),
|
||||
'Matched': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid')
|
||||
'Coordi Only': {
|
||||
'fill': PatternFill(start_color='F0F9FF', end_color='F0F9FF', fill_type='solid'),
|
||||
'font': Font(color='0C4A6E', bold=True)
|
||||
},
|
||||
'Kst Only': {
|
||||
'fill': PatternFill(start_color='F7FEE7', end_color='F7FEE7', fill_type='solid'),
|
||||
'font': Font(color='365314', bold=True)
|
||||
},
|
||||
'Mixed Duplicate': {
|
||||
'fill': PatternFill(start_color='FEFBEB', end_color='FEFBEB', fill_type='solid'),
|
||||
'font': Font(color='92400E', bold=True)
|
||||
},
|
||||
'Pure Duplicate': {
|
||||
'fill': PatternFill(start_color='FEF2F2', end_color='FEF2F2', fill_type='solid'),
|
||||
'font': Font(color='991B1B', bold=True)
|
||||
},
|
||||
'Matched': {
|
||||
'fill': PatternFill(start_color='FFFFFF', end_color='FFFFFF', fill_type='solid'),
|
||||
'font': Font(color='374151', bold=False)
|
||||
}
|
||||
}
|
||||
|
||||
# Find the Type column (should be column F, index 5)
|
||||
@ -187,15 +206,16 @@ def download_excel():
|
||||
type_col_idx = idx + 1 # Excel is 1-indexed
|
||||
break
|
||||
|
||||
# Apply formatting to data rows (skip header)
|
||||
# Apply professional formatting to data rows (skip header)
|
||||
if type_col_idx:
|
||||
for row_idx, row_data in enumerate(sheet_data, start=2): # Start from row 2 (after header)
|
||||
row_type = row_data.get('Type', '')
|
||||
fill = colors.get(row_type)
|
||||
if fill:
|
||||
style_config = colors.get(row_type)
|
||||
if style_config:
|
||||
for col_idx in range(1, len(df.columns) + 1):
|
||||
cell = worksheet.cell(row=row_idx, column=col_idx)
|
||||
cell.fill = fill
|
||||
cell.fill = style_config['fill']
|
||||
cell.font = style_config['font']
|
||||
|
||||
# Auto-adjust column widths
|
||||
for column in worksheet.columns:
|
||||
@ -230,6 +250,12 @@ def create_templates_dir():
|
||||
templates_dir = Path('templates')
|
||||
templates_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Only create the HTML file if it doesn't exist (don't overwrite existing customizations)
|
||||
html_file = templates_dir / 'index.html'
|
||||
if html_file.exists():
|
||||
print("Template file already exists, skipping auto-generation to preserve customizations.")
|
||||
return
|
||||
|
||||
html_content = '''<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
@ -406,30 +432,40 @@ def create_templates_dir():
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
/* Vibrant color styles for Visualize tab */
|
||||
/* Professional UI Design System - Semantic Color Palette */
|
||||
.coordi-only-row {
|
||||
background-color: #ff4444 !important; /* Bright red */
|
||||
color: white;
|
||||
background-color: #f0f9ff !important; /* Sky blue 50 - Information state */
|
||||
color: #0c4a6e !important; /* Sky blue 900 - High contrast text */
|
||||
border-left: 4px solid #0ea5e9 !important; /* Sky blue 500 - Primary accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.kst-only-row {
|
||||
background-color: #4488ff !important; /* Bright blue */
|
||||
color: white;
|
||||
background-color: #f7fee7 !important; /* Lime 50 - Success/Available state */
|
||||
color: #365314 !important; /* Lime 900 - High contrast text */
|
||||
border-left: 4px solid #65a30d !important; /* Lime 600 - Success accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.mixed-duplicate-row {
|
||||
background-color: #ff8800 !important; /* Bright orange */
|
||||
color: white;
|
||||
background-color: #fefbeb !important; /* Amber 50 - Warning state */
|
||||
color: #92400e !important; /* Amber 800 - High contrast text */
|
||||
border-left: 4px solid #f59e0b !important; /* Amber 500 - Warning accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.pure-duplicate-row {
|
||||
background-color: #8844ff !important; /* Bright purple */
|
||||
color: white;
|
||||
background-color: #fef2f2 !important; /* Red 50 - Error/Critical state */
|
||||
color: #991b1b !important; /* Red 800 - High contrast text */
|
||||
border-left: 4px solid #ef4444 !important; /* Red 500 - Error accent */
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.matched-row {
|
||||
background-color: white !important; /* White background */
|
||||
color: black;
|
||||
background-color: #ffffff !important; /* Pure white - Neutral/Default state */
|
||||
color: #374151 !important; /* Gray 700 - Standard text */
|
||||
border-left: 4px solid #10b981 !important; /* Emerald 500 - Success indicator */
|
||||
font-weight: 400;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
@ -524,8 +560,10 @@ def create_templates_dir():
|
||||
<table id="visualize-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Coordi Title</th>
|
||||
<th>Coordi Chapter</th>
|
||||
<th id="coordi-lang-header">Coordi Lang</th>
|
||||
<th id="coordi-title-header">Coordi Title</th>
|
||||
<th id="coordi-chapter-header">Coordi Chapter</th>
|
||||
<th>KST Lang</th>
|
||||
<th>KST Title</th>
|
||||
<th>KST Chapter</th>
|
||||
<th>Status</th>
|
||||
@ -723,6 +761,12 @@ def create_templates_dir():
|
||||
}
|
||||
|
||||
function updateResults(results) {
|
||||
// Update dynamic headers with coordi label
|
||||
const coordiLabel = results.coordi_label || 'Coordi';
|
||||
document.getElementById('coordi-lang-header').textContent = `${coordiLabel} Lang`;
|
||||
document.getElementById('coordi-title-header').textContent = `${coordiLabel} Title`;
|
||||
document.getElementById('coordi-chapter-header').textContent = `${coordiLabel} Chapter`;
|
||||
|
||||
// Update count displays
|
||||
document.getElementById('matched-count-display').textContent = results.matched_items_count.toLocaleString();
|
||||
|
||||
@ -878,11 +922,13 @@ def create_templates_dir():
|
||||
// Data is already sorted by the backend (mismatches first, then matches, all by Korean title)
|
||||
visualizeData.forEach(row => {
|
||||
const tr = tbody.insertRow();
|
||||
tr.insertCell(0).textContent = row.coordi_title || '';
|
||||
tr.insertCell(1).textContent = row.coordi_chapter || '';
|
||||
tr.insertCell(2).textContent = row.kst_title || '';
|
||||
tr.insertCell(3).textContent = row.kst_chapter || '';
|
||||
tr.insertCell(4).textContent = row.reason || '';
|
||||
tr.insertCell(0).textContent = row.coordi_language || '';
|
||||
tr.insertCell(1).textContent = row.coordi_title || '';
|
||||
tr.insertCell(2).textContent = row.coordi_chapter || '';
|
||||
tr.insertCell(3).textContent = row.kst_language || '';
|
||||
tr.insertCell(4).textContent = row.kst_title || '';
|
||||
tr.insertCell(5).textContent = row.kst_chapter || '';
|
||||
tr.insertCell(6).textContent = row.reason || '';
|
||||
|
||||
// Apply vibrant color highlighting based on row type
|
||||
switch (row.row_type) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user