import os import re FILES_LIST = "list.txt" BSC_ROOT = "." DOCS_OUTPUT = "docs" RAW_BASE_URL = "https://raw.usercontent.amit.is-a.dev" # Protected paths that should never be overwritten PROTECTED_INDEX_FILES = { os.path.normpath(os.path.join(DOCS_OUTPUT, f"semester_{i}", "index.md")) for i in range(1, 9) } PROTECTED_INDEX_FILES.add(os.path.normpath(os.path.join(DOCS_OUTPUT, "index.md"))) ALGO_FOLDER_NAME = "algorithms" # folder name that contains algorithm .md files # Directories excluded from index generation IGNORED_FOLDERS = {"stylesheets", "overrides", "assets", ".vitepress", "node_modules", "public"} # Language config mapped by file extension SUPPORTED_LANGS = { '.c': {'label': 'C', 'fence': 'c', 'style': 'c'}, '.cpp': {'label': 'C++', 'fence': 'cpp', 'style': 'c'}, '.h': {'label': 'C Header', 'fence': 'c', 'style': 'c'}, '.hpp': {'label': 'C++ Header', 'fence': 'cpp', 'style': 'c'}, '.r': {'label': 'R', 'fence': 'r', 'style': 'hash'}, '.py': {'label': 'Python', 'fence': 'python', 'style': 'hash'}, '.java': {'label': 'Java', 'fence': 'java', 'style': 'c'}, '.js': {'label': 'JavaScript', 'fence': 'javascript', 'style': 'c'}, '.ts': {'label': 'TypeScript', 'fence': 'typescript', 'style': 'c'}, '.sh': {'label': 'Bash', 'fence': 'bash', 'style': 'hash'}, } def read_block_comment(lines, start): result = [] i = start n = len(lines) first = lines[i].strip() if first.startswith('/*') and '*/' in first: inner = first[2: first.index('*/')].strip().strip('*').strip() if inner: result.append(inner) return result, i + 1 inner = first[2:].strip().strip('*').strip() if inner: result.append(inner) i += 1 while i < n: line = lines[i].strip() if '*/' in line: text = line[: line.index('*/')].strip().strip('*').strip() if text: result.append(text) return result, i + 1 text = line.strip('*').strip() if text: result.append(text) i += 1 return result, i def esc_yaml(s: str) -> str: return s.replace("'", "''") def esc_html(s: str) -> str: """Escape characters that break Vue template compilation in VitePress.""" return s.replace('&', '&').replace('<', '<').replace('>', '>') def format_author(author: str) -> str: """Return a mailto markdown link if the author string contains an email.""" m = re.search(r'(.*?)\s*<(.*?)>', author) if m: name = esc_html(m.group(1).strip()) email = esc_html(m.group(2).strip()) return f"[{name}](mailto:{email})" return esc_html(author) def parse_c_style(content): lines = content.splitlines() n = len(lines) i = 0 author = date = repo = license_str = problem_statement = "" while i < n and not lines[i].strip(): i += 1 # Extract metadata block if i < n and lines[i].strip().startswith('/*'): block, i = read_block_comment(lines, i) for line in block: # Handles both newline-per-field and pipe-separated formats parts = [p.strip() for p in line.split('|')] for part in parts: if ':' in part: key, _, val = part.partition(':') key = key.strip().lower() val = val.strip() if 'author' in key: author = val elif 'date' in key: date = val elif 'repo' in key: repo = val elif 'license' in key: license_str = val while i < n and not lines[i].strip(): i += 1 # Extract problem statement block (ensures it's not the actual code starting) if i < n and lines[i].strip().startswith('/*'): peek_block, peek_i = read_block_comment(lines, i) block_text = ' '.join(peek_block) if '#include' not in block_text and 'import ' not in block_text: problem_statement = ' '.join(p for p in peek_block if p).strip() i = peek_i # Locate the beginning of actual source code code_start = None for j in list(range(i, n)) + list(range(0, i)): line_strip = lines[j].strip() if line_strip.startswith('#include') or line_strip.startswith('import '): code_start = j break code = '\n'.join(lines[code_start:]).strip() if code_start is not None else content.strip() return author, date, repo, license_str, problem_statement, code def parse_hash_style(content): lines = content.splitlines() n = len(lines) i = 0 author = date = repo = license_str = problem_statement = "" while i < n and not lines[i].strip(): i += 1 # Extract metadata block meta_lines = [] while i < n and lines[i].strip().startswith('#'): meta_lines.append(lines[i].strip()[1:].strip()) i += 1 for raw_line in meta_lines: for part in raw_line.split('|'): if ':' in part: key, _, val = part.partition(':') key = key.strip().lower() val = val.strip() if 'author' in key: author = val elif 'date' in key: date = val elif 'repo' in key: repo = val elif 'license' in key: license_str = val while i < n and not lines[i].strip(): i += 1 # Extract problem statement block ps_lines = [] while i < n and lines[i].strip().startswith('#'): text = lines[i].strip()[1:].strip() if text: ps_lines.append(text) i += 1 if ps_lines: problem_statement = ' '.join(ps_lines).strip() code = content.strip() return author, date, repo, license_str, problem_statement, code ALGO_ICON_SVG = '' def parse_algo_md(content): """Parse a GitHub-style algorithm .md file and convert to VitePress format.""" lines = content.splitlines() title = "" problem_statement = "" body_lines = [] i = 0 n = len(lines) # Extract title from first # heading while i < n: line = lines[i] if line.startswith("# "): title = line[2:].strip() i += 1 break i += 1 # Parse rest: find problem statement in > blockquote under ### Problem Statement in_problem_section = False ps_lines = [] while i < n: line = lines[i] stripped = line.strip() if stripped.lower().startswith("### problem statement"): in_problem_section = True i += 1 continue if in_problem_section: # Allow blank lines between heading and blockquote if stripped == "": i += 1 continue if stripped.startswith("> "): ps_lines.append(stripped[2:].strip()) i += 1 continue elif stripped == ">": i += 1 continue else: # Non-blockquote, non-blank line ends problem section in_problem_section = False problem_statement = " ".join(ps_lines).strip() body_lines.append(line) else: body_lines.append(line) i += 1 if in_problem_section and ps_lines: problem_statement = " ".join(ps_lines).strip() return title, problem_statement, body_lines def build_algo_md(filename_base, title, problem_statement, body_lines, source_path=""): """Build VitePress .md from parsed algo content.""" desc = problem_statement if problem_statement else f"Algorithm — {title}" fm = [ "---", f"title: '{ALGO_ICON_SVG} {esc_yaml(title)}'", f"description: '{esc_yaml(desc)}'", f"source: '{source_path}'", "---", "", ] body = [f"# {title}", ""] if problem_statement: body += [ "### Problem Statement", "", "::: tip Problem Statement", esc_html(problem_statement), ":::", "", ] # Replace > blockquote problem section in body with nothing (already handled above) # Just append the remaining body lines (Algorithm, Pseudocode, Complexity etc.) skip_next_blockquote = False cleaned = [] blines = body_lines[:] j = 0 while j < len(blines): l = blines[j] s = l.strip() if s.lower().startswith("### problem statement"): # skip until blockquote ends j += 1 while j < len(blines) and (blines[j].strip().startswith(">") or blines[j].strip() == ""): j += 1 continue cleaned.append(l) j += 1 # Remove leading blank lines from cleaned while cleaned and not cleaned[0].strip(): cleaned.pop(0) body += cleaned return "\n".join(fm + body) def build_md(filename, lang_label, fence_lang, author, date, repo, license_str, problem_statement, code, raw_url, github_url, rel_url): desc = problem_statement if problem_statement else f"{lang_label} program — {filename}" icon_svg = '' fm_lines = [ "---", f"title: '{icon_svg} {esc_yaml(filename)}'", f"description: '{esc_yaml(desc)}'", f"source: '{rel_url}'", "---", ] body = [ "", f"# {filename}", "" ] has_meta = any([author, date, license_str]) if has_meta: body.extend([ "### Metadata", "" ]) if author: body.append(f"- **Author** — {format_author(author)}") if date: body.append(f"- **Last updated** — {esc_html(date)}") if license_str: body.append( f"- **License** — [{esc_html(license_str)}]" f"(https://github.com/notamitgamer/bsc/blob/main/LICENSE)" ) body.append("") if problem_statement: body += [ "", "### Problem Statement", "", f"::: tip {filename}", esc_html(problem_statement), ":::", "", ] body += [ "## Source Code", "", f"```{fence_lang} [{filename}]", code, "```", "---", # Action links — minimal, right-aligned feel via HTML '