|
@@ -94,6 +94,40 @@ def extract_markdown_from_conversation(conversation: BeautifulSoup) -> str:
|
|
|
code_text = get_text_with_formatting(element)
|
|
code_text = get_text_with_formatting(element)
|
|
|
md_lines.append(f"`{code_text}`")
|
|
md_lines.append(f"`{code_text}`")
|
|
|
|
|
|
|
|
|
|
+ # Tables
|
|
|
|
|
+ elif tag == "table":
|
|
|
|
|
+ def extract_rows(section):
|
|
|
|
|
+ return section.find_all("tr") if section else []
|
|
|
|
|
+
|
|
|
|
|
+ thead = element.find("thead")
|
|
|
|
|
+ tbody = element.find("tbody")
|
|
|
|
|
+ tfoot = element.find("tfoot")
|
|
|
|
|
+
|
|
|
|
|
+ rows = extract_rows(thead) + extract_rows(tbody) + extract_rows(element) + extract_rows(tfoot)
|
|
|
|
|
+ seen = set()
|
|
|
|
|
+ filtered_rows = []
|
|
|
|
|
+ for tr in rows:
|
|
|
|
|
+ if tr not in seen:
|
|
|
|
|
+ seen.add(tr)
|
|
|
|
|
+ filtered_rows.append(tr)
|
|
|
|
|
+
|
|
|
|
|
+ if not filtered_rows:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ table_lines = []
|
|
|
|
|
+ for row_idx, tr in enumerate(filtered_rows):
|
|
|
|
|
+ cells = tr.find_all(["th", "td"])
|
|
|
|
|
+ row = [get_text_with_formatting(cell).strip() for cell in cells]
|
|
|
|
|
+ line = "| " + " | ".join(row) + " |"
|
|
|
|
|
+ table_lines.append(line)
|
|
|
|
|
+
|
|
|
|
|
+ # After the first row, add separator (assuming it's the header)
|
|
|
|
|
+ if row_idx == 0:
|
|
|
|
|
+ separator = "| " + " | ".join(["---"] * len(row)) + " |"
|
|
|
|
|
+ table_lines.insert(1, separator)
|
|
|
|
|
+
|
|
|
|
|
+ md_lines.append("\n".join(table_lines))
|
|
|
|
|
+
|
|
|
# Fallback
|
|
# Fallback
|
|
|
else:
|
|
else:
|
|
|
text = get_text_with_formatting(element)
|
|
text = get_text_with_formatting(element)
|