Sfoglia il codice sorgente

Add table support, fixes #3

Josh Rosario 6 mesi fa
parent
commit
45da9a41ff
1 ha cambiato i file con 34 aggiunte e 0 eliminazioni
  1. 34 0
      main.py

+ 34 - 0
main.py

@@ -94,6 +94,40 @@ def extract_markdown_from_conversation(conversation: BeautifulSoup) -> str:
             code_text = get_text_with_formatting(element)
             md_lines.append(f"`{code_text}`")
 
+        # Tables
+        elif tag == "table":
+            def extract_rows(section):
+                return section.find_all("tr") if section else []
+
+            thead = element.find("thead")
+            tbody = element.find("tbody")
+            tfoot = element.find("tfoot")
+
+            rows = extract_rows(thead) + extract_rows(tbody) + extract_rows(element) + extract_rows(tfoot)
+            seen = set()
+            filtered_rows = []
+            for tr in rows:
+                if tr not in seen:
+                    seen.add(tr)
+                    filtered_rows.append(tr)
+
+            if not filtered_rows:
+                continue
+
+            table_lines = []
+            for row_idx, tr in enumerate(filtered_rows):
+                cells = tr.find_all(["th", "td"])
+                row = [get_text_with_formatting(cell).strip() for cell in cells]
+                line = "| " + " | ".join(row) + " |"
+                table_lines.append(line)
+
+                # After the first row, add separator (assuming it's the header)
+                if row_idx == 0:
+                    separator = "| " + " | ".join(["---"] * len(row)) + " |"
+                    table_lines.insert(1, separator)
+
+            md_lines.append("\n".join(table_lines))
+
         # Fallback
         else:
             text = get_text_with_formatting(element)