Init

im going to bed -=-
2026-02-12 02:28:23 +02:00
parent 0b92f6f239
commit a5d75e6bac
1972 changed files with 308880 additions and 0 deletions
--- a/lib/markdown_it/rules_core/init.py
+++ b/lib/markdown_it/rules_core/init.py
@@ -0,0 +1,19 @@
+__all__ = (
+    "StateCore",
+    "block",
+    "inline",
+    "linkify",
+    "normalize",
+    "replace",
+    "smartquotes",
+    "text_join",
+)
+
+from .block import block
+from .inline import inline
+from .linkify import linkify
+from .normalize import normalize
+from .replacements import replace
+from .smartquotes import smartquotes
+from .state_core import StateCore
+from .text_join import text_join
--- a/lib/markdown_it/rules_core/pycache/init.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/init.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/block.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/block.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/inline.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/inline.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/linkify.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/linkify.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/normalize.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/normalize.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/replacements.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/replacements.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/smartquotes.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/smartquotes.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/state_core.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/state_core.cpython-314.pyc
--- a/lib/markdown_it/rules_core/pycache/text_join.cpython-314.pyc
+++ b/lib/markdown_it/rules_core/pycache/text_join.cpython-314.pyc
--- a/lib/markdown_it/rules_core/block.py
+++ b/lib/markdown_it/rules_core/block.py
@@ -0,0 +1,13 @@
+from ..token import Token
+from .state_core import StateCore
+
+
+def block(state: StateCore) -> None:
+    if state.inlineMode:
+        token = Token("inline", "", 0)
+        token.content = state.src
+        token.map = [0, 1]
+        token.children = []
+        state.tokens.append(token)
+    else:
+        state.md.block.parse(state.src, state.md, state.env, state.tokens)
--- a/lib/markdown_it/rules_core/inline.py
+++ b/lib/markdown_it/rules_core/inline.py
@@ -0,0 +1,10 @@
+from .state_core import StateCore
+
+
+def inline(state: StateCore) -> None:
+    """Parse inlines"""
+    for token in state.tokens:
+        if token.type == "inline":
+            if token.children is None:
+                token.children = []
+            state.md.inline.parse(token.content, state.md, state.env, token.children)
--- a/lib/markdown_it/rules_core/linkify.py
+++ b/lib/markdown_it/rules_core/linkify.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import re
+from typing import Protocol
+
+from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
+from ..token import Token
+from .state_core import StateCore
+
+HTTP_RE = re.compile(r"^http://")
+MAILTO_RE = re.compile(r"^mailto:")
+TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
+
+
+def linkify(state: StateCore) -> None:
+    """Rule for identifying plain-text links."""
+    if not state.md.options.linkify:
+        return
+
+    if not state.md.linkify:
+        raise ModuleNotFoundError("Linkify enabled but not installed.")
+
+    for inline_token in state.tokens:
+        if inline_token.type != "inline" or not state.md.linkify.pretest(
+            inline_token.content
+        ):
+            continue
+
+        tokens = inline_token.children
+
+        htmlLinkLevel = 0
+
+        # We scan from the end, to keep position when new tags added.
+        # Use reversed logic in links start/end match
+        assert tokens is not None
+        i = len(tokens)
+        while i >= 1:
+            i -= 1
+            assert isinstance(tokens, list)
+            currentToken = tokens[i]
+
+            # Skip content of markdown links
+            if currentToken.type == "link_close":
+                i -= 1
+                while (
+                    tokens[i].level != currentToken.level
+                    and tokens[i].type != "link_open"
+                ):
+                    i -= 1
+                continue
+
+            # Skip content of html tag links
+            if currentToken.type == "html_inline":
+                if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
+                    htmlLinkLevel -= 1
+                if isLinkClose(currentToken.content):
+                    htmlLinkLevel += 1
+            if htmlLinkLevel > 0:
+                continue
+
+            if currentToken.type == "text" and state.md.linkify.test(
+                currentToken.content
+            ):
+                text = currentToken.content
+                links: list[_LinkType] = state.md.linkify.match(text) or []
+
+                # Now split string to nodes
+                nodes = []
+                level = currentToken.level
+                lastPos = 0
+
+                # forbid escape sequence at the start of the string,
+                # this avoids http\://example.com/ from being linkified as
+                # http:<a href="//example.com/">//example.com/</a>
+                if (
+                    links
+                    and links[0].index == 0
+                    and i > 0
+                    and tokens[i - 1].type == "text_special"
+                ):
+                    links = links[1:]
+
+                for link in links:
+                    url = link.url
+                    fullUrl = state.md.normalizeLink(url)
+                    if not state.md.validateLink(fullUrl):
+                        continue
+
+                    urlText = link.text
+
+                    # Linkifier might send raw hostnames like "example.com", where url
+                    # starts with domain name. So we prepend http:// in those cases,
+                    # and remove it afterwards.
+                    if not link.schema:
+                        urlText = HTTP_RE.sub(
+                            "", state.md.normalizeLinkText("http://" + urlText)
+                        )
+                    elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
+                        urlText = MAILTO_RE.sub(
+                            "", state.md.normalizeLinkText("mailto:" + urlText)
+                        )
+                    else:
+                        urlText = state.md.normalizeLinkText(urlText)
+
+                    pos = link.index
+
+                    if pos > lastPos:
+                        token = Token("text", "", 0)
+                        token.content = text[lastPos:pos]
+                        token.level = level
+                        nodes.append(token)
+
+                    token = Token("link_open", "a", 1)
+                    token.attrs = {"href": fullUrl}
+                    token.level = level
+                    level += 1
+                    token.markup = "linkify"
+                    token.info = "auto"
+                    nodes.append(token)
+
+                    token = Token("text", "", 0)
+                    token.content = urlText
+                    token.level = level
+                    nodes.append(token)
+
+                    token = Token("link_close", "a", -1)
+                    level -= 1
+                    token.level = level
+                    token.markup = "linkify"
+                    token.info = "auto"
+                    nodes.append(token)
+
+                    lastPos = link.last_index
+
+                if lastPos < len(text):
+                    token = Token("text", "", 0)
+                    token.content = text[lastPos:]
+                    token.level = level
+                    nodes.append(token)
+
+                inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)
+
+
+class _LinkType(Protocol):
+    url: str
+    text: str
+    index: int
+    last_index: int
+    schema: str | None
--- a/lib/markdown_it/rules_core/normalize.py
+++ b/lib/markdown_it/rules_core/normalize.py
@@ -0,0 +1,19 @@
+"""Normalize input string."""
+
+import re
+
+from .state_core import StateCore
+
+# https://spec.commonmark.org/0.29/#line-ending
+NEWLINES_RE = re.compile(r"\r\n?|\n")
+NULL_RE = re.compile(r"\0")
+
+
+def normalize(state: StateCore) -> None:
+    # Normalize newlines
+    string = NEWLINES_RE.sub("\n", state.src)
+
+    # Replace NULL characters
+    string = NULL_RE.sub("\ufffd", string)
+
+    state.src = string
--- a/lib/markdown_it/rules_core/replacements.py
+++ b/lib/markdown_it/rules_core/replacements.py
@@ -0,0 +1,127 @@
+"""Simple typographic replacements
+
+* ``(c)``, ``(C)`` → ©
+* ``(tm)``, ``(TM)`` → ™
+* ``(r)``, ``(R)`` → ®
+* ``+-`` → ±
+* ``...`` → …
+* ``?....`` → ?..
+* ``!....`` → !..
+* ``????????`` → ???
+* ``!!!!!`` → !!!
+* ``,,,`` → ,
+* ``--`` → &ndash
+* ``---`` → &mdash
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+
+from ..token import Token
+from .state_core import StateCore
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - multiplication 2 x 4 -> 2 × 4
+
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)"
+
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE)
+
+PLUS_MINUS_RE = re.compile(r"\+-")
+
+ELLIPSIS_RE = re.compile(r"\.{2,}")
+
+ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
+
+QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
+
+COMMA_RE = re.compile(r",{2,}")
+
+EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
+
+EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
+
+EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
+
+
+SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"}
+
+
+def replaceFn(match: re.Match[str]) -> str:
+    return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: list[Token]) -> None:
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace_rare(inlineTokens: list[Token]) -> None:
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if (
+            token.type == "text"
+            and (not inside_autolink)
+            and RARE_RE.search(token.content)
+        ):
+            # +- -> ±
+            token.content = PLUS_MINUS_RE.sub("±", token.content)
+
+            # .., ..., ....... -> …
+            token.content = ELLIPSIS_RE.sub("…", token.content)
+
+            # but ?..... & !..... -> ?.. & !..
+            token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content)
+            token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
+
+            # ,,  ,,,  ,,,, -> ,
+            token.content = COMMA_RE.sub(",", token.content)
+
+            # em-dash
+            token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
+
+            # en-dash
+            token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
+            token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace(state: StateCore) -> None:
+    if not state.md.options.typographer:
+        return
+
+    for token in state.tokens:
+        if token.type != "inline":
+            continue
+        if token.children is None:
+            continue
+
+        if SCOPED_ABBR_RE.search(token.content):
+            replace_scoped(token.children)
+
+        if RARE_RE.search(token.content):
+            replace_rare(token.children)
--- a/lib/markdown_it/rules_core/smartquotes.py
+++ b/lib/markdown_it/rules_core/smartquotes.py
@@ -0,0 +1,202 @@
+"""Convert straight quotation marks to typographic ones"""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..token import Token
+from .state_core import StateCore
+
+QUOTE_TEST_RE = re.compile(r"['\"]")
+QUOTE_RE = re.compile(r"['\"]")
+APOSTROPHE = "\u2019"  # ’
+
+
+def replaceAt(string: str, index: int, ch: str) -> str:
+    # When the index is negative, the behavior is different from the js version.
+    # But basically, the index will not be negative.
+    assert index >= 0
+    return string[:index] + ch + string[index + 1 :]
+
+
+def process_inlines(tokens: list[Token], state: StateCore) -> None:
+    stack: list[dict[str, Any]] = []
+
+    for i, token in enumerate(tokens):
+        thisLevel = token.level
+
+        j = 0
+        for j in range(len(stack))[::-1]:
+            if stack[j]["level"] <= thisLevel:
+                break
+        else:
+            # When the loop is terminated without a "break".
+            # Subtract 1 to get the same index as the js version.
+            j -= 1
+
+        stack = stack[: j + 1]
+
+        if token.type != "text":
+            continue
+
+        text = token.content
+        pos = 0
+        maximum = len(text)
+
+        while pos < maximum:
+            goto_outer = False
+            lastIndex = pos
+            t = QUOTE_RE.search(text[lastIndex:])
+            if not t:
+                break
+
+            canOpen = canClose = True
+            pos = t.start(0) + lastIndex + 1
+            isSingle = t.group(0) == "'"
+
+            # Find previous character,
+            # default to space if it's the beginning of the line
+            lastChar: None | int = 0x20
+
+            if t.start(0) + lastIndex - 1 >= 0:
+                lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
+            else:
+                for j in range(i)[::-1]:
+                    if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+                        break
+                    # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+                    if not tokens[j].content:
+                        continue
+
+                    lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
+                    break
+
+            # Find next character,
+            # default to space if it's the end of the line
+            nextChar: None | int = 0x20
+
+            if pos < maximum:
+                nextChar = charCodeAt(text, pos)
+            else:
+                for j in range(i + 1, len(tokens)):
+                    # nextChar defaults to 0x20
+                    if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+                        break
+                    # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+                    if not tokens[j].content:
+                        continue
+
+                    nextChar = charCodeAt(tokens[j].content, 0)
+                    break
+
+            isLastPunctChar = lastChar is not None and (
+                isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+            )
+            isNextPunctChar = nextChar is not None and (
+                isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+            )
+
+            isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar)
+            isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar)
+
+            if isNextWhiteSpace:  # noqa: SIM114
+                canOpen = False
+            elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar):
+                canOpen = False
+
+            if isLastWhiteSpace:  # noqa: SIM114
+                canClose = False
+            elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar):
+                canClose = False
+
+            if nextChar == 0x22 and t.group(0) == '"':  # 0x22: "  # noqa: SIM102
+                if (
+                    lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39
+                ):  # 0x30: 0, 0x39: 9
+                    # special case: 1"" - count first quote as an inch
+                    canClose = canOpen = False
+
+            if canOpen and canClose:
+                # Replace quotes in the middle of punctuation sequence, but not
+                # in the middle of the words, i.e.:
+                #
+                # 1. foo " bar " baz - not replaced
+                # 2. foo-"-bar-"-baz - replaced
+                # 3. foo"bar"baz     - not replaced
+                canOpen = isLastPunctChar
+                canClose = isNextPunctChar
+
+            if not canOpen and not canClose:
+                # middle of word
+                if isSingle:
+                    token.content = replaceAt(
+                        token.content, t.start(0) + lastIndex, APOSTROPHE
+                    )
+                continue
+
+            if canClose:
+                # this could be a closing quote, rewind the stack to get a match
+                for j in range(len(stack))[::-1]:
+                    item = stack[j]
+                    if stack[j]["level"] < thisLevel:
+                        break
+                    if item["single"] == isSingle and stack[j]["level"] == thisLevel:
+                        item = stack[j]
+
+                        if isSingle:
+                            openQuote = state.md.options.quotes[2]
+                            closeQuote = state.md.options.quotes[3]
+                        else:
+                            openQuote = state.md.options.quotes[0]
+                            closeQuote = state.md.options.quotes[1]
+
+                        # replace token.content *before* tokens[item.token].content,
+                        # because, if they are pointing at the same token, replaceAt
+                        # could mess up indices when quote length != 1
+                        token.content = replaceAt(
+                            token.content, t.start(0) + lastIndex, closeQuote
+                        )
+                        tokens[item["token"]].content = replaceAt(
+                            tokens[item["token"]].content, item["pos"], openQuote
+                        )
+
+                        pos += len(closeQuote) - 1
+                        if item["token"] == i:
+                            pos += len(openQuote) - 1
+
+                        text = token.content
+                        maximum = len(text)
+
+                        stack = stack[:j]
+                        goto_outer = True
+                        break
+                if goto_outer:
+                    goto_outer = False
+                    continue
+
+            if canOpen:
+                stack.append(
+                    {
+                        "token": i,
+                        "pos": t.start(0) + lastIndex,
+                        "single": isSingle,
+                        "level": thisLevel,
+                    }
+                )
+            elif canClose and isSingle:
+                token.content = replaceAt(
+                    token.content, t.start(0) + lastIndex, APOSTROPHE
+                )
+
+
+def smartquotes(state: StateCore) -> None:
+    if not state.md.options.typographer:
+        return
+
+    for token in state.tokens:
+        if token.type != "inline" or not QUOTE_RE.search(token.content):
+            continue
+        if token.children is not None:
+            process_inlines(token.children, state)
--- a/lib/markdown_it/rules_core/state_core.py
+++ b/lib/markdown_it/rules_core/state_core.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..ruler import StateBase
+from ..token import Token
+from ..utils import EnvType
+
+if TYPE_CHECKING:
+    from markdown_it import MarkdownIt
+
+
+class StateCore(StateBase):
+    def __init__(
+        self,
+        src: str,
+        md: MarkdownIt,
+        env: EnvType,
+        tokens: list[Token] | None = None,
+    ) -> None:
+        self.src = src
+        self.md = md  # link to parser instance
+        self.env = env
+        self.tokens: list[Token] = tokens or []
+        self.inlineMode = False
--- a/lib/markdown_it/rules_core/text_join.py
+++ b/lib/markdown_it/rules_core/text_join.py
@@ -0,0 +1,35 @@
+"""Join raw text tokens with the rest of the text
+
+This is set as a separate rule to provide an opportunity for plugins
+to run text replacements after text join, but before escape join.
+
+For example, `\\:)` shouldn't be replaced with an emoji.
+"""
+
+from __future__ import annotations
+
+from ..token import Token
+from .state_core import StateCore
+
+
+def text_join(state: StateCore) -> None:
+    """Join raw text for escape sequences (`text_special`) tokens with the rest of the text"""
+
+    for inline_token in state.tokens[:]:
+        if inline_token.type != "inline":
+            continue
+
+        # convert text_special to text and join all adjacent text nodes
+        new_tokens: list[Token] = []
+        for child_token in inline_token.children or []:
+            if child_token.type == "text_special":
+                child_token.type = "text"
+            if (
+                child_token.type == "text"
+                and new_tokens
+                and new_tokens[-1].type == "text"
+            ):
+                new_tokens[-1].content += child_token.content
+            else:
+                new_tokens.append(child_token)
+        inline_token.children = new_tokens