import os import re import shutil from openai import OpenAI from pathlib import Path # --- CONFIG --- SOURCE_DIR = Path(".") TARGET_DIR = SOURCE_DIR / "es" if not os.path.exists(TARGET_DIR): os.mkdir(TARGET_DIR) OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # or hardcode if you want client = OpenAI(api_key=OPENAI_API_KEY) # or whichever LLM API you want def find_markdown_files(base_dir): """Recursively find all markdown files excluding /es/.""" for root, dirs, files in os.walk(base_dir): # Skip the /es/ folder if "es" in dirs: dirs.remove("es") for f in files: if f.endswith(".md"): yield Path(root) / f def adjust_urls(content): """Append /es/ to urls except those starting with /assets.""" def replacer(match): url = match.group(1) if url.startswith("/assets"): return f"]({url})" # Avoid double-appending if already translated if url.startswith("/es/"): return f"]({url})" return f"](/es{url})" return re.sub(r"\]\((/[^)]+)\)", replacer, content) def translate_text(text, target_lang="es"): """Translate using an LLM (placeholder).""" # Example: OpenAI GPT-4 translation (replace with your call) response = client.chat.completions.create(model="gpt-4o-mini", messages=[ {"role": "system", "content": f"You are a translator. Translate the following text into {target_lang}. The input is in markdown and the output should also be in markdown."}, {"role": "user", "content": text}, ]) return response.choices[0].message.content def process_file(src_path, base_dir=SOURCE_DIR, target_dir=TARGET_DIR): """Translate and copy file to /es/ subfolder with updated urls.""" rel_path = src_path.relative_to(base_dir) dest_path = target_dir / rel_path dest_path.parent.mkdir(parents=True, exist_ok=True) with open(src_path, "r", encoding="utf-8") as f: content = f.read() # First adjust urls adjusted = adjust_urls(content) # Translate content translated = translate_text(adjusted) with open(dest_path, "w", encoding="utf-8") as f: f.write(translated) print(f"Translated: {src_path} -> {dest_path}") def main(): for md_file in find_markdown_files(SOURCE_DIR): process_file(md_file) if __name__ == "__main__": main()