must go first:
text = self._strong_re.sub(r"\2", text)
text = self._em_re.sub(r"\2", text)
return text
_block_quote_base = r'''
( # Wrap whole match in \1
(
^[ \t]*>%s[ \t]? # '>' at the start of a line
.+\n # rest of the first line
(.+\n)* # subsequent consecutive lines
)+
)
'''
_block_quote_re = re.compile(_block_quote_base % '', re.M | re.X)
_block_quote_re_spoiler = re.compile(_block_quote_base % '[ \t]*?!?', re.M | re.X)
_bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
_bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
_bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
_html_pre_block_re = re.compile(r'(\s*.+?
)', re.S)
def _dedent_two_spaces_sub(self, match):
return re.sub(r'(?m)^ ', '', match.group(1))
def _block_quote_sub(self, match):
bq = match.group(1)
is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq)
# trim one level of quoting
if is_spoiler:
bq = self._bq_one_level_re_spoiler.sub('', bq)
else:
bq = self._bq_one_level_re.sub('', bq)
# trim whitespace-only lines
bq = self._ws_only_line_re.sub('', bq)
bq = self._run_block_gamut(bq) # recurse
bq = re.sub('(?m)^', ' ', bq)
# These leading spaces screw with content, so we need to fix that:
bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
if is_spoiler:
return '\n%s\n
\n\n' % bq
else:
return '\n%s\n
\n\n' % bq
@mark_stage(Stage.BLOCK_QUOTES)
def _do_block_quotes(self, text):
if '>' not in text:
return text
if 'spoiler' in self.extras:
return self._block_quote_re_spoiler.sub(self._block_quote_sub, text)
else:
return self._block_quote_re.sub(self._block_quote_sub, text)
@mark_stage(Stage.PARAGRAPHS)
def _form_paragraphs(self, text):
# Strip leading and trailing lines:
text = text.strip('\n')
# Wrap tags.
grafs = []
for i, graf in enumerate(re.split(r"\n{2,}", text)):
if graf in self.html_blocks:
# Unhashify HTML blocks
grafs.append(self.html_blocks[graf])
else:
cuddled_list = None
if "cuddled-lists" in self.extras:
# Need to put back trailing '\n' for `_list_item_re`
# match at the end of the paragraph.
li = self._list_item_re.search(graf + '\n')
# Two of the same list marker in this paragraph: a likely
# candidate for a list cuddled to preceding paragraph
# text (issue 33). Note the `[-1]` is a quick way to
# consider numeric bullets (e.g. "1." and "2.") to be
# equal.
if (li and len(li.group(2)) <= 3
and (
(li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1])
or
li.group("next_marker") is None
)
):
start = li.start()
cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
assert re.match(r'^<(?:ul|ol).*?>', cuddled_list)
graf = graf[:start]
# Wrap
tags.
graf = self._run_span_gamut(graf)
grafs.append("
" % self._html_class_str_from_tag('p') + graf.lstrip(" \t") + "
")
if cuddled_list:
grafs.append(cuddled_list)
return "\n\n".join(grafs)
def _add_footnotes(self, text):
if self.footnotes:
footer = [
'')
return text + '\n\n' + '\n'.join(footer)
else:
return text
_naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
_naked_gt_re = re.compile(r'''(?''', re.I)
def _encode_amps_and_angles(self, text):
# Smart processing for ampersands and angle brackets that need
# to be encoded.
text = _AMPERSAND_RE.sub('&', text)
# Encode naked <'s
text = self._naked_lt_re.sub('<', text)
# Encode naked >'s
# Note: Other markdown implementations (e.g. Markdown.pl, PHP
# Markdown) don't do this.
text = self._naked_gt_re.sub('>', text)
return text
_incomplete_tags_re = re.compile(r"<(!--|/?\w+?(?!\w)\s*?.+?[\s/]+?)")
def _encode_incomplete_tags(self, text):
if self.safe_mode not in ("replace", "escape"):
return text
if text.endswith(">"):
return text # this is not an incomplete tag, this is a link in the form
def incomplete_tags_sub(match):
return match.group().replace('<', '<')
return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
def _encode_backslash_escapes(self, text):
for ch, escape in list(self._escape_table.items()):
text = text.replace("\\"+ch, escape)
return text
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
def _auto_link_sub(self, match):
g1 = match.group(1)
return '%s' % (self._protect_url(g1), g1)
_auto_email_link_re = re.compile(r"""
<
(?:mailto:)?
(
[-.\w]+
\@
[-\w]+(\.[-\w]+)*\.[a-z]+
)
>
""", re.I | re.X | re.U)
def _auto_email_link_sub(self, match):
return self._encode_email_address(
self._unescape_special_chars(match.group(1)))
def _do_auto_links(self, text):
text = self._auto_link_re.sub(self._auto_link_sub, text)
text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
return text
def _encode_email_address(self, addr):
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
# of the address encoded as either a decimal or hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
# foo
# @example.com
#
# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
# mailing list:
chars = [_xml_encode_email_char_at_random(ch)
for ch in "mailto:" + addr]
# Strip the mailto: from the visible part.
addr = '%s' \
% (''.join(chars), ''.join(chars[7:]))
return addr
def _unescape_special_chars(self, text):
# Swap back in all the special characters we've hidden.
hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items())
# html_blocks table is in format {hash: item} compared to usual {item: hash}
hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items())
while True:
orig_text = text
for ch, hash in hashmap:
text = text.replace(hash, ch)
if text == orig_text:
break
return text
def _outdent(self, text):
# Remove one level of line-leading tabs or spaces
return self._outdent_re.sub('', text)
@staticmethod
def _uniform_outdent(text, min_outdent=None, max_outdent=None):
'''
Removes the smallest common leading indentation from each (non empty)
line of `text` and returns said indent along with the outdented text.
Args:
min_outdent: make sure the smallest common whitespace is at least this size
max_outdent: the maximum amount a line can be outdented by
'''
# find the leading whitespace for every line
whitespace = [
re.findall(r'^[ \t]*', line)[0] if line else None
for line in text.splitlines()
]
whitespace_not_empty = [i for i in whitespace if i is not None]
# if no whitespace detected (ie: no lines in code block, issue #505)
if not whitespace_not_empty:
return '', text
# get minimum common whitespace
outdent = min(whitespace_not_empty)
# adjust min common ws to be within bounds
if min_outdent is not None:
outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
if max_outdent is not None:
outdent = min(outdent, max_outdent)
outdented = []
for line_ws, line in zip(whitespace, text.splitlines(True)):
if line.startswith(outdent):
# if line starts with smallest common ws, dedent it
outdented.append(line.replace(outdent, '', 1))
elif line_ws is not None and line_ws < outdent:
# if less indented than min common whitespace then outdent as much as possible
outdented.append(line.replace(line_ws, '', 1))
else:
outdented.append(line)
return outdent, ''.join(outdented)
@staticmethod
def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False):
'''
Uniformly indent a block of text by a fixed amount
Args:
text: the text to indent
indent: a string containing the indent to apply
include_empty_lines: don't remove whitespace only lines
indent_empty_lines: indent whitespace only lines with the rest of the text
'''
blocks = []
for line in text.splitlines(True):
if line.strip() or indent_empty_lines:
blocks.append(indent + line)
elif include_empty_lines:
blocks.append(line)
else:
blocks.append('')
return ''.join(blocks)
@staticmethod
def _match_overlaps_substr(text, match, substr):
'''
Checks if a regex match overlaps with a substring in the given text.
'''
for instance in re.finditer(re.escape(substr), text):
start, end = instance.span()
if start <= match.start() <= end:
return True
if start <= match.end() <= end:
return True
return False
class MarkdownWithExtras(Markdown):
"""A markdowner class that enables most extras:
- footnotes
- fenced-code-blocks (only highlights code if 'pygments' Python module on path)
These are not included:
- pyshell (specific to Python-related documenting)
- code-friendly (because it *disables* part of the syntax)
- link-patterns (because you need to specify some actual
link-patterns anyway)
"""
extras = ["footnotes", "fenced-code-blocks"]
# ----------------------------------------------------------
# Extras
# ----------------------------------------------------------
# Base classes
# ----------------------------------------------------------
class Extra(ABC):
_registry = {}
_exec_order: Dict[Stage, Tuple[List['Extra'], List['Extra']]] = {}
name: str
'''
An identifiable name that users can use to invoke the extra
in the Markdown class
'''
order: Tuple[Iterable[Union[Stage, 'Extra']], Iterable[Union[Stage, 'Extra']]]
'''
Tuple of two iterables containing the stages/extras this extra will run before and
after, respectively
'''
def __init__(self, md: Markdown, options: Optional[dict]):
'''
Args:
md: An instance of `Markdown`
options: a dict of settings to alter the extra's behaviour
'''
self.md = md
self.options = options if options is not None else {}
@classmethod
def deregister(cls):
'''
Removes the class from the extras registry and unsets its execution order.
'''
if cls.name in cls._registry:
del cls._registry[cls.name]
for exec_order in Extra._exec_order.values():
# find everywhere this extra is mentioned and remove it
for section in exec_order:
while cls in section:
section.remove(cls)
@classmethod
def register(cls):
'''
Registers the class for use with `Markdown` and calculates its execution order based on
the `order` class attribute.
'''
cls._registry[cls.name] = cls
for index, item in enumerate((*cls.order[0], *cls.order[1])):
before = index < len(cls.order[0])
if not isinstance(item, Stage) and issubclass(item, Extra):
# eg: FencedCodeBlocks
for exec_orders in Extra._exec_order.values():
# insert this extra everywhere the other one is mentioned
for section in exec_orders:
if item in section:
to_index = section.index(item)
if not before:
to_index += 1
section.insert(to_index, cls)
else:
# eg: Stage.PREPROCESS
Extra._exec_order.setdefault(item, ([], []))
if cls in Extra._exec_order[item][0 if before else 1]:
# extra is already runnig after this stage. Don't duplicate that effort
continue
if before:
Extra._exec_order[item][0].insert(0, cls)
else:
Extra._exec_order[item][1].append(cls)
@abstractmethod
def run(self, text: str) -> str:
'''
Run the extra against the given text.
Returns:
The new text after being modified by the extra
'''
...
def test(self, text: str) -> bool:
'''
Check a section of markdown to see if this extra should be run upon it.
The default implementation will always return True but it's recommended to override
this behaviour to improve performance.
'''
return True
class ItalicAndBoldProcessor(Extra):
'''
An ABC that provides hooks for dealing with italics and bold syntax.
This class is set to trigger both before AND after the italics and bold stage.
This allows any child classes to intercept instances of bold or italic syntax and
change the output or hash it to prevent it from being processed.
After the I&B stage any hashes in the `hash_tables` instance variable are replaced.
'''
name = 'italic-and-bold-processor'
order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
strong_re = Markdown._strong_re
em_re = Markdown._em_re
def __init__(self, md: Markdown, options: dict):
super().__init__(md, options)
self.hash_table = {}
def run(self, text):
if self.md.order < Stage.ITALIC_AND_BOLD:
text = self.strong_re.sub(self.sub, text)
text = self.em_re.sub(self.sub, text)
else:
# put any hashed values back
for key, substr in self.hash_table.items():
text = text.replace(key, substr)
return text
@abstractmethod
def sub(self, match):
# do nothing. Let `Markdown._do_italics_and_bold` do its thing later
return match.string[match.start(): match.end()]
def sub_hash(self, match):
substr = match.string[match.start(): match.end()]
key = _hash_text(substr)
self.hash_table[key] = substr
return key
def test(self, text):
if self.md.order < Stage.ITALIC_AND_BOLD:
return '*' in text or '_' in text
return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
# User facing extras
# ----------------------------------------------------------
class Admonitions(Extra):
'''
Enable parsing of RST admonitions
'''
name = 'admonitions'
order = (Stage.BLOCK_GAMUT, Stage.LINK_DEFS), ()
admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
admonitions_re = re.compile(r'''
^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
(.*)? # $3 admonition title
((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
(?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
''' % admonitions,
re.IGNORECASE | re.MULTILINE | re.VERBOSE
)
def test(self, text):
return self.admonitions_re.search(text) is not None
def sub(self, match):
lead_indent, admonition_name, title, body = match.groups()
admonition_type = '%s' % admonition_name
# figure out the class names to assign the block
if admonition_name.lower() == 'admonition':
admonition_class = 'admonition'
else:
admonition_class = 'admonition %s' % admonition_name.lower()
# titles are generally optional
if title:
title = '%s' % title
# process the admonition body like regular markdown
body = self.md._run_block_gamut("\n%s\n" % self.md._uniform_outdent(body)[1])
# indent the body before placing inside the aside block
admonition = self.md._uniform_indent(
'%s\n%s\n\n%s\n' % (admonition_type, title, body),
self.md.tab, False
)
# wrap it in an aside
admonition = '' % (admonition_class, admonition)
# now indent the whole admonition back to where it started
return self.md._uniform_indent(admonition, lead_indent, False)
def run(self, text):
return self.admonitions_re.sub(self.sub, text)
class Breaks(Extra):
name = 'breaks'
order = (), (Stage.ITALIC_AND_BOLD,)
def run(self, text):
on_backslash = self.options.get('on_backslash', False)
on_newline = self.options.get('on_newline', False)
if on_backslash and on_newline:
pattern = r' *\\?'
elif on_backslash:
pattern = r'(?: *\\| {2,})'
elif on_newline:
pattern = r' *'
else:
pattern = r' {2,}'
break_tag = "
)", break_tag, text)
return text
class CodeFriendly(ItalicAndBoldProcessor):
'''
Disable _ and __ for em and strong.
'''
name = 'code-friendly'
def sub(self, match):
syntax = match.group(1)
if '_' not in syntax:
return super().sub(match)
text = match.string[match.start(): match.end()]
key = _hash_text(text)
self.hash_table[key] = text
return key
class FencedCodeBlocks(Extra):
'''
Allows a code block to not have to be indented
by fencing it with '```' on a line before and after. Based on
with support for
syntax highlighting.
'''
name = 'fenced-code-blocks'
order = (Stage.LINK_DEFS, Stage.BLOCK_GAMUT), (Stage.PREPROCESS,)
fenced_code_block_re = re.compile(r'''
(?:\n+|\A\n?|(?<=\n))
(^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang
(.*?) # $3 = code block content
\1[ \t]*\n # closing fence
''', re.M | re.X | re.S)
def test(self, text):
if '```' not in text:
return False
if self.md.stage == Stage.PREPROCESS and not self.md.safe_mode:
return True
if self.md.stage == Stage.LINK_DEFS and self.md.safe_mode:
return True
return self.md.stage == Stage.BLOCK_GAMUT
def _code_block_with_lexer_sub(self, codeblock, leading_indent, lexer):
formatter_opts = self.md.extras['fenced-code-blocks'] or {}
def unhash_code(codeblock):
for key, sanitized in list(self.md.html_spans.items()):
codeblock = codeblock.replace(key, sanitized)
replacements = [
("&", "&"),
("<", "<"),
(">", ">")
]
for old, new in replacements:
codeblock = codeblock.replace(old, new)
return codeblock
# remove leading indent from code block
_, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
codeblock = unhash_code(codeblock)
colored = self.md._color_with_pygments(codeblock, lexer,
**formatter_opts)
# add back the indent to all lines
return "\n%s\n" % self.md._uniform_indent(colored, leading_indent, True)
def tags(self, lexer_name) -> tuple:
'''
Returns the tags that the encoded code block will be wrapped in, based
upon the lexer name.
This function can be overridden by subclasses to piggy-back off of the
fenced code blocks syntax (see `Mermaid` extra).
Returns:
The opening and closing tags, as strings within a tuple
'''
pre_class = self.md._html_class_str_from_tag('pre')
if "highlightjs-lang" in self.md.extras and lexer_name:
code_class = ' class="%s language-%s"' % (lexer_name, lexer_name)
else:
code_class = self.md._html_class_str_from_tag('code')
return ('' % (pre_class, code_class), '
')
def sub(self, match):
lexer_name = match.group(2)
codeblock = match.group(3)
codeblock = codeblock[:-1] # drop one trailing newline
# Use pygments only if not using the highlightjs-lang extra
if lexer_name and "highlightjs-lang" not in self.md.extras:
lexer = self.md._get_pygments_lexer(lexer_name)
if lexer:
leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer)
# Fenced code blocks need to be outdented before encoding, and then reapplied
leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
if codeblock:
# only run the codeblock through the outdenter if not empty
leading_indent, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
codeblock = self.md._encode_code(codeblock)
tags = self.tags(lexer_name)
return "\n%s%s%s\n%s\n" % (leading_indent, tags[0], codeblock, tags[1])
def run(self, text):
return self.fenced_code_block_re.sub(self.sub, text)
class Latex(Extra):
'''
Convert $ and $$ to tags for inline and block math.
'''
name = 'latex'
order = (Stage.HASH_HTML,), ()
_single_dollar_re = re.compile(r'(?(.*?)
", re.DOTALL)
converter = None
code_blocks = {}
def _convert_single_match(self, match):
return self.converter.convert(match.group(1))
def _convert_double_match(self, match):
return self.converter.convert(match.group(1).replace(r"\n", ''), display="block")
def code_placeholder(self, match):
#print("found match:", match, match.group(0), match.group(1))
placeholder = f""
self.code_blocks[placeholder] = match.group(0)
return placeholder
def run(self, text):
try:
import latex2mathml.converter
self.converter = latex2mathml.converter
except ImportError:
raise ImportError('The "latex" extra requires the "latex2mathml" package to be installed.')
text = self._pre_code_block_re.sub(self.code_placeholder, text)
#print("Temp Text", text)
text = self._single_dollar_re.sub(self._convert_single_match, text)
text = self._double_dollar_re.sub(self._convert_double_match, text)
for placeholder, code_block in self.code_blocks.items():
text = text.replace(placeholder, code_block)
return text
class LinkPatterns(Extra):
'''
Auto-link given regex patterns in text (e.g. bug number
references, revision number references).
'''
name = 'link-patterns'
order = (Stage.LINKS,), ()
_basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
def run(self, text):
link_from_hash = {}
for regex, repl in self.options:
replacements = []
for match in regex.finditer(text):
if any(self.md._match_overlaps_substr(text, match, h) for h in link_from_hash):
continue
if hasattr(repl, "__call__"):
href = repl(match)
else:
href = match.expand(repl)
replacements.append((match.span(), href))
for (start, end), href in reversed(replacements):
# Do not match against links inside brackets.
if text[start - 1:start] == '[' and text[end:end + 1] == ']':
continue
# Do not match against links in the standard markdown syntax.
if text[start - 2:start] == '](' or text[end:end + 2] == '")':
continue
# Do not match against links which are escaped.
if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
text = text[:start - 3] + text[start:end] + text[end + 3:]
continue
# search the text for anything that looks like a link
is_inside_link = False
for link_re in (self.md._auto_link_re, self._basic_link_re):
for match in link_re.finditer(text):
if any((r[0] <= start and end <= r[1]) for r in match.regs):
# if the link pattern start and end pos is within the bounds of
# something that looks like a link, then don't process it
is_inside_link = True
break
else:
continue
break
if is_inside_link:
continue
escaped_href = (
href.replace('"', '"') # b/c of attr quote
# To avoid markdown and :
.replace('*', self.md._escape_table['*'])
.replace('_', self.md._escape_table['_']))
link = '%s' % (escaped_href, text[start:end])
hash = _hash_text(link)
link_from_hash[hash] = link
text = text[:start] + hash + text[end:]
for hash, link in list(link_from_hash.items()):
text = text.replace(hash, link)
return text
def test(self, text):
return True
class MarkdownInHTML(Extra):
'''
Allow the use of `markdown="1"` in a block HTML tag to
have markdown processing be done on its contents. Similar to
but with
some limitations.
'''
name = 'markdown-in-html'
order = (), (Stage.HASH_HTML,)
def run(self, text):
def callback(block):
indent, block = self.md._uniform_outdent(block)
block = self.md._hash_html_block_sub(block)
block = self.md._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
return block
return self.md._strict_tag_block_sub(text, self.md._block_tags_a, callback, True)
def test(self, text):
return True
class Mermaid(FencedCodeBlocks):
name = 'mermaid'
order = (FencedCodeBlocks,), ()
def tags(self, lexer_name):
if lexer_name == 'mermaid':
return ('', '
')
return super().tags(lexer_name)
class MiddleWordEm(ItalicAndBoldProcessor):
'''
Allows or disallows emphasis syntax in the middle of words,
defaulting to allow. Disabling this means that `this_text_here` will not be
converted to `thistexthere`.
'''
name = 'middle-word-em'
order = (CodeFriendly,), (Stage.ITALIC_AND_BOLD,)
def __init__(self, md: Markdown, options: Union[dict, bool]):
'''
Args:
md: the markdown instance
options: can be bool for backwards compatibility but will be converted to a dict
in the constructor. All options are:
- allowed (bool): whether to allow emphasis in the middle of a word.
If `options` is a bool it will be placed under this key.
'''
if isinstance(options, bool):
options = {'allowed': options}
options.setdefault('allowed', True)
super().__init__(md, options)
self.liberal_em_re = self.em_re
if not options['allowed']:
self.em_re = re.compile(r'(?<=\b)%s(?=\b)' % self.liberal_em_re.pattern, self.liberal_em_re.flags)
def run(self, text):
# run strong and whatnot first
# this also will process all strict ems
text = super().run(text)
if self.md.order < self.md.stage:
# hash all non-valid ems
text = self.liberal_em_re.sub(self.sub_hash, text)
return text
def sub(self, match):
syntax = match.group(1)
if len(syntax) != 1:
# strong syntax
return super().sub(match)
return '%s' % match.group(2)
def sub_hash(self, match):
text = match.string[match.start(): match.end()]
key = _hash_text(text)
self.hash_table[key] = text
return key
class Numbering(Extra):
'''
Support of generic counters. Non standard extension to
allow sequential numbering of figures, tables, equations, exhibits etc.
'''
name = 'numbering'
order = (Stage.LINK_DEFS,), ()
def test(self, text):
return True
def run(self, text):
# First pass to define all the references
regex_defns = re.compile(r'''
\[\#(\w+) # the counter. Open square plus hash plus a word \1
([^@]*) # Some optional characters, that aren't an @. \2
@(\w+) # the id. Should this be normed? \3
([^\]]*)\] # The rest of the text up to the terminating ] \4
''', re.VERBOSE)
regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
counters = {}
references = {}
replacements = []
definition_html = '{}{}{}'
reference_html = '{}'
for match in regex_defns.finditer(text):
# We must have four match groups otherwise this isn't a numbering reference
if len(match.groups()) != 4:
continue
counter = match.group(1)
text_before = match.group(2).strip()
ref_id = match.group(3)
text_after = match.group(4)
number = counters.get(counter, 1)
references[ref_id] = (number, counter)
replacements.append((match.start(0),
definition_html.format(counter,
ref_id,
text_before,
number,
text_after),
match.end(0)))
counters[counter] = number + 1
for repl in reversed(replacements):
text = text[:repl[0]] + repl[1] + text[repl[2]:]
# Second pass to replace the references with the right
# value of the counter
# Fwiw, it's vaguely annoying to have to turn the iterator into
# a list and then reverse it but I can't think of a better thing to do.
for match in reversed(list(regex_subs.finditer(text))):
number, counter = references.get(match.group(1), (None, None))
if number is not None:
repl = reference_html.format(counter,
match.group(1),
number)
else:
repl = reference_html.format(match.group(1),
'countererror',
'?' + match.group(1) + '?')
if "smarty-pants" in self.md.extras:
repl = repl.replace('"', self.md._escape_table['"'])
text = text[:match.start()] + repl + text[match.end():]
return text
class PyShell(Extra):
'''
Treats unindented Python interactive shell sessions as
blocks.
'''
name = 'pyshell'
order = (), (Stage.LISTS,)
def test(self, text):
return ">>>" in text
def sub(self, match):
if "fenced-code-blocks" in self.md.extras:
dedented = _dedent(match.group(0))
return self.md.extra_classes['fenced-code-blocks'].run("```pycon\n" + dedented + "```\n")
lines = match.group(0).splitlines(0)
_dedentlines(lines)
indent = ' ' * self.md.tab_width
s = ('\n' # separate from possible cuddled paragraph
+ indent + ('\n'+indent).join(lines)
+ '\n')
return s
def run(self, text):
less_than_tab = self.md.tab_width - 1
_pyshell_block_re = re.compile(r"""
^([ ]{0,%d})>>>[ ].*\n # first line
^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character
(?=^\1?\n|\Z) # ends with a blank line or end of document
""" % less_than_tab, re.M | re.X)
return _pyshell_block_re.sub(self.sub, text)
class SmartyPants(Extra):
'''
Replaces ' and " with curly quotation marks or curly
apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes,
and ellipses.
'''
name = 'smarty-pants'
order = (), (Stage.SPAN_GAMUT,)
_opening_single_quote_re = re.compile(r"(?
See "test/tm-cases/smarty_pants.text" for a full discussion of the
support here and
for a
discussion of some diversion from the original SmartyPants.
"""
if "'" in text: # guard for perf
text = self.contractions(text)
text = self._opening_single_quote_re.sub("‘", text)
text = self._closing_single_quote_re.sub("’", text)
if '"' in text: # guard for perf
text = self._opening_double_quote_re.sub("“", text)
text = self._closing_double_quote_re.sub("”", text)
text = text.replace("---", "—")
text = text.replace("--", "–")
text = text.replace("...", "…")
text = text.replace(" . . . ", "…")
text = text.replace(". . .", "…")
# TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
if "footnotes" in self.md.extras and "footnote-ref" in text:
# Quotes in the footnote back ref get converted to "smart" quotes
# Change them back here to ensure they work.
text = text.replace('class="footnote-ref”', 'class="footnote-ref"')
return text
def test(self, text):
return "'" in text or '"' in text
class Strike(Extra):
'''
Text inside of double tilde is ~~strikethrough~~
'''
name = 'strike'
order = (Stage.ITALIC_AND_BOLD,), ()
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
def run(self, text):
return self._strike_re.sub(r"\1", text)
def test(self, text):
return '~~' in text
class Tables(Extra):
'''
Tables using the same format as GFM
and
PHP-Markdown Extra .
'''
name = 'tables'
order = (), (Stage.LISTS,)
def run(self, text):
"""Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
"""
less_than_tab = self.md.tab_width - 1
table_re = re.compile(r'''
(?:(?<=\n)|\A\n?) # leading blank line
^[ ]{0,%d} # allowed whitespace
(.*[|].*)[ ]*\n # $1: header row (at least one pipe)
^[ ]{0,%d} # allowed whitespace
( # $2: underline row
# underline row with leading bar
(?: \|\ *:?-+:?\ * )+ \|? \s?[ ]*\n
|
# or, underline row without leading bar
(?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \s?[ ]*\n
)
( # $3: data rows
(?:
^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
.*\|.*[ ]*\n
)+
)
''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
return table_re.sub(self.sub, text)
def sub(self, match):
trim_space_re = '^[ \t\n]+|[ \t\n]+$'
trim_bar_re = r'^\||\|$'
split_bar_re = r'^\||(?' % self.md._html_class_str_from_tag('table'), '' % self.md._html_class_str_from_tag('thead'), '']
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
for col_idx, col in enumerate(cols):
hlines.append(' %s | ' % (
align_from_col_idx.get(col_idx, ''),
self.md._run_span_gamut(col)
))
hlines.append('
')
hlines.append('')
# tbody
hlines.append('')
for line in body.strip('\n').split('\n'):
hlines.append('')
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
for col_idx, col in enumerate(cols):
hlines.append(' %s | ' % (
align_from_col_idx.get(col_idx, ''),
self.md._run_span_gamut(col)
))
hlines.append('
')
hlines.append('')
hlines.append('