10 月之前 · 41b2b962dc
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 
				+venv
			
 
				+input
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,53 @@
 
				+import sys
			
 
				+import os
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+def sanitize_filename(title: str) -> str:
			
 
				+    return "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in title).strip()[:100]
			
 
				+
			
 
				+def convert_chat_html_to_markdown(html_path: str) -> str:
			
 
				+    with open(html_path, "r", encoding="utf-8") as f:
			
 
				+        soup = BeautifulSoup(f, "html.parser")
			
 
				+
			
 
				+    title = soup.title.string.strip() if soup.title else "chatgpt_conversation"
			
 
				+    filename = sanitize_filename(title) + ".md"
			
 
				+
			
 
				+    # Find the main chat container
			
 
				+    main_content = soup.find("main")
			
 
				+    if not main_content:
			
 
				+        raise ValueError("Could not find <main> in HTML. Is this a valid saved ChatGPT conversation?")
			
 
				+
			
 
				+    # Each message: role in h3, content in a sibling div
			
 
				+    h3s = main_content.find_all("h3")
			
 
				+    prose_divs = main_content.find_all("div", class_="prose")
			
 
				+
			
 
				+    if len(h3s) != len(prose_divs):
			
 
				+        print("Warning: Number of roles and messages doesn't match. Continuing anyway...")
			
 
				+
			
 
				+    messages = []
			
 
				+    for role_elem, content_elem in zip(h3s, prose_divs):
			
 
				+        role = role_elem.get_text(strip=True)
			
 
				+        content = content_elem.get_text(separator="\n", strip=True)
			
 
				+        role_prefix = "**ChatGPT:**" if "chatgpt" in role.lower() else "**You:**"
			
 
				+        messages.append(f"{role_prefix}\n\n{content}")
			
 
				+
			
 
				+    markdown = f"# {title}\n\n" + "\n\n---\n\n".join(messages)
			
 
				+    return filename, markdown
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    if len(sys.argv) != 2:
			
 
				+        print("Usage: python html_to_markdown.py <path_to_saved_html>")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    input_html = sys.argv[1]
			
 
				+    if not os.path.isfile(input_html):
			
 
				+        print(f"File not found: {input_html}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    output_name, markdown_text = convert_chat_html_to_markdown(input_html)
			
 
				+    output_path = os.path.join(os.path.dirname(input_html), output_name)
			
 
				+
			
 
				+    with open(output_path, "w", encoding="utf-8") as f:
			
 
				+        f.write(markdown_text)
			
 
				+
			
 
				+    print(f"Markdown saved to: {output_path}")
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
 
				+beautifulsoup4==4.13.4
			
 
				+soupsieve==2.7
			
 
				+typing_extensions==4.13.2