RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from https://sourceforge.net/p/pqmarkup/code/ci/default/tree/pqmarkup.py below:

Website Navigation

pqmarkup / Code

import sys
from typing import List, IO, Callable, Dict
Char = str
import syntax_highlighter_for_pqmarkup
class Exception(Exception):
    message : str
    line : int
    column : int
    pos : int
    def __init__(self, message, line, column, pos):
        self.message = message
        self.line = line
        self.column = column
        self.pos = pos
class Converter:
    to_html_called_inside_to_html_outer_pos_list : List[int]
    habr_html : bool
    ohd : bool
    highlight_style_was_added = False
    instr : str
    def __init__(self, habr_html, ohd):
        self.to_html_called_inside_to_html_outer_pos_list = []
        #self.newline_chars = []
        self.habr_html = habr_html
        self.ohd = ohd
    def to_html(self, instr : str, outfilef : IO[str] = None, *, outer_pos = 0) -> str:
        self.to_html_called_inside_to_html_outer_pos_list.append(outer_pos)
        result : List[str] = [] # this should be faster than using regular string
        class Writer:
            write : Callable[[str], None]
        outfile = Writer()
        if outfilef is None:
            outfile.write = lambda s: result.append(s)
        else:
            outfile.write = lambda s: outfilef.write(s)
        # Сохраняем instr для определения номера строки по номеру символа
        if len(self.to_html_called_inside_to_html_outer_pos_list) == 1:
            self.instr = instr
        def exit_with_error(message, pos):
            pos += sum(self.to_html_called_inside_to_html_outer_pos_list)
            line = 1
            line_start = -1
            t = 0
            while t < pos:
                if self.instr[t] == "\n":
                    line += 1
                    line_start = t
                t += 1
            raise Exception(message, line, pos - line_start, pos)
        i = 0
        def next_char(offset = 1):
            return instr[i + offset] if i + offset < len(instr) else Char("\0")
        def i_next_str(str): # i_ — if_/is_
            #return i+1+len(str) <= len(instr) and instr[i+1:i+1+len(str)] == str # first check is not necessary in Python
            return instr[i+1:i+1+len(str)] == str
        def prev_char(offset = 1):
            return instr[i - offset] if i - offset >= 0 else Char("\0")
        def html_escape(str):
            str = str.replace('&', '&amp;').replace('<', '&lt;')
            if self.habr_html:
                str = str.replace('"', '&quot;') # нужно для корректного отображения кавычек в <a href="http://address">, так как Habr автоматически конвертирует "" в «»
            return str
        def html_escapeq(str):
            if self.habr_html:
                return str.replace('"', "''")
            else:
                return str.replace('&', '&amp;').replace('"', '&quot;')
        writepos = 0
        def write_to_pos(pos, npos):
            nonlocal writepos
            outfile.write(html_escape(instr[writepos:pos]))
            writepos = npos
        def write_to_i(add_str, skip_chars = 1):
            write_to_pos(i, i+skip_chars)
            outfile.write(add_str)
        def find_ending_pair_quote(i): # ищет окончание ‘строки’
            assert(instr[i] == "‘") # ’
            startqpos = i
            nesting_level = 0
            while True:
                if i == len(instr):
                    exit_with_error('Unpaired left single quotation mark', startqpos)
                ch = instr[i]
                if ch == "‘":
                    nesting_level += 1
                elif ch == "’":
                    nesting_level -= 1
                    if nesting_level == 0:
                        return i
                i += 1
        def find_ending_sq_bracket(str, i, start = 0):
            starti = i
            assert(str[i] == "[") # ]
            nesting_level = 0
            while True:
                ch = str[i]
                if ch == "[":
                    nesting_level += 1
                elif ch == "]":
                    nesting_level -= 1
                    if nesting_level == 0:
                        return i
                i += 1
                if i == len(str):
                    exit_with_error('Unended comment started', start + starti)
        def remove_comments(s : str, start, level = 3, incode = False):
            j = 0
            while True:
                j = s.find("["*level, j)
                if j == -1:
                    break
                k = find_ending_sq_bracket(s, j, start) + 1
                if incode:
                    if s[k-3:k-1] != ']|':
                        j += 3
                        continue
                start += k - j
                s = s[0:j] + s[k:]
            return s
        nonunique_links : Dict[int, str] = {}
        link = ''
        def write_http_link(startpos, endpos : int, q_offset = 1, text = ''):
            nonlocal i, link
            # Ищем окончание ссылки
            nesting_level = 0
            i += 2
            while True:
                if i == len(instr):
                    exit_with_error('Unended link', endpos+q_offset)
                ch = instr[i]
                if ch == "[":
                    nesting_level += 1
                elif ch == "]":
                    if nesting_level == 0:
                        break
                    nesting_level -= 1
                elif ch == " ":
                    break
                i += 1
            link = html_escapeq(instr[endpos+1+q_offset:i])
            tag = '<a href="' + link + '"'
            if link.startswith('./'):
                tag += ' target="_self"'
            # Ищем альтернативный текст при такой записи: ссылка[http://... ‘альтернативный текст’]
            if instr[i] == " ":
                tag += ' title="'
                if next_char() == "‘": # [
                    endqpos2 = find_ending_pair_quote(i+1)
                    if instr[endqpos2+1] != ']': # [
                        exit_with_error('Expected `]` after `’`', endqpos2+1)
                    tag += html_escapeq(remove_comments(instr[i+2:endqpos2], i+2))
                    i = endqpos2 + 1
                else:
                    endb = find_ending_sq_bracket(instr, endpos+q_offset)
                    tag += html_escapeq(remove_comments(instr[i+1:endb], i+1))
                    i = endb
                tag += '"'
            if next_char() == '[' and next_char(2) == '-':
                j = i + 3
                while j < len(instr):
                    if instr[j] == ']':
                        nonunique_links[int(instr[i+3:j])] = link
                        i = j
                        break
                    if not instr[j].isdigit():
                        break
                    j += 1
            if text == '':
                write_to_pos(startpos, i+1)
                text = self.to_html(instr[startpos+q_offset:endpos], outer_pos = startpos+q_offset)
            outfile.write(tag + '>' + (text if text != '' else link) + '</a>')
        def write_abbr(startpos, endpos, q_offset = 1):
            nonlocal i
            i += q_offset
            endqpos2 = find_ending_pair_quote(i+1) # [[‘
            if instr[endqpos2+1] != ']':
                exit_with_error("Bracket ] should follow after ’", endqpos2+1)
            write_to_pos(startpos, endqpos2+2)
            outfile.write('<abbr title="'
                + html_escapeq(remove_comments(instr[i+2:endqpos2], i+2)) + '">'
                + self.to_html(instr[startpos+q_offset:endpos], outer_pos = startpos+q_offset) + '</abbr>')
            i = endqpos2 + 1
        endi = 0
        def numbered_link(offset = 1):
            if next_char(offset) == '-' and next_char(offset+1).isdigit():
                j = i + offset + 1
                while j < len(instr): # [
                    if instr[j] == ']':
                        nonlocal link
                        try:
                            link = nonunique_links[int(instr[i+offset+1:j])]
                        except KeyError:
                            exit_with_error("Link with such index was not declared previously", i+offset+1)
                        nonlocal endi
                        endi = j
                        return True
                    if not instr[j].isdigit():
                        break
                    j += 1
            return False
        ordered_list_current_number = -1
        def close_ordered_list():
            nonlocal ordered_list_current_number
            if ordered_list_current_number != -1:
                write_to_i("</li>\n</ol>\n", 0)
                ordered_list_current_number = -1
        in_unordered_list = False
        def close_unordered_list():
            nonlocal in_unordered_list
            if in_unordered_list:
                write_to_i("</li>\n</ul>\n", 0)
                in_unordered_list = False
        def translate_color(color):
            if color[0:1] == '#':
                r = ''
                for c in color:
                    cc = {'а':'A','б':'B','с':'C','д':'D','е':'E','ф':'F'}.get(c.lower(), c)[0]
                    r += cc.lower() if c.islower() else cc
                return r
            elif len(color) in (1, 3, 4) and color.isdigit():
                r = "#"
                for ii in [0, 0, 0] if len(color) == 1 else list(range(len(color))):
                    r += hex((int(color[ii]) * 0xFF + 4) // 8)[2:].upper().zfill(2) # 8 - FF, 0 - 00, 4 - 80 (почему не 7F[‘когда `+ 3` вместо `+ 4`’] — две субъективные причины: 1.‘больше нравится как выглядит’ и 2.‘количество пикселей в строке `80` при `"font_face": "Courier New", "font_size": 10`’)
                return r
            return color
        ending_tags : List[str] = []
        new_line_tag = "\0"
        while i < len(instr):
            ch = instr[i]
            if (i == 0 or prev_char() == "\n" # if beginning of line
                       or (i == writepos and len(ending_tags) != 0 and ending_tags[-1] in ('</blockquote>', '</div>') and instr[i-2:i] in ('>‘', '<‘', '!‘', ':‘'))): # ’’’’ # or beginning of blockquote or note
                if ch == '.' and (next_char() in ' ‘'): # ’ this is unordered list
                    close_ordered_list()
                    s = ''
                    if not in_unordered_list:
                        s = "<ul>\n<li>"
                        in_unordered_list = True
                    else:
                        s = "</li>\n<li>"
                    write_to_i(s)
                    new_line_tag = '' # используем тот факт, что разрыва строк в списках вида `. элемент списка` быть не может, и следующий символ \n будет либо закрывать список, либо обозначать начало следующего элемента списка
                    if next_char() == ' ':
                        i += 1
                    else:
                        endqpos = find_ending_pair_quote(i + 1)
                        outfile.write(self.to_html(instr[i+2:endqpos], outer_pos = i+2))
                        i = endqpos
                    writepos = i + 1
                else:
                    close_unordered_list()
                    if ch.isdigit():
                        j = i + 1
                        while j < len(instr):
                            if not instr[j].isdigit():
                                break
                            j += 1
                        if instr[j:j+1] == '.' and instr[j+1:j+2] in (' ', '‘'): # ’ this is ordered list
                            value = int(instr[i:j])
                            s = ''
                            if ordered_list_current_number == -1:
                                s = ('<ol>' if value == 1 else '<ol start="' + str(value) + '">') + "\n<li>"
                                ordered_list_current_number = value
                            else:
                                s = "</li>\n" + ("<li>" if value == ordered_list_current_number + 1 else '<li value="' + str(value) + '">')
                                ordered_list_current_number = value
                            write_to_i(s)
                            new_line_tag = '' # используем тот факт, что разрыва строк в списках вида `1. элемент списка` быть не может
                            if instr[j+1] == ' ':
                                i = j + 1
                            else:
                                endqpos = find_ending_pair_quote(j + 1)
                                outfile.write(self.to_html(instr[j+2:endqpos], outer_pos = j+2))
                                i = endqpos
                            writepos = i + 1
                        else:
                            close_ordered_list()
                    else:
                        close_ordered_list()
                if ch == ' ':
                    write_to_i('&emsp;')
                elif ch == '-': # horizontal rule
                    if i_next_str('--'):
                        j = i + 3
                        while True:
                            if j == len(instr) or instr[j] == "\n":
                                write_to_i("<hr />\n")
                                if j == len(instr):
                                    j -= 1
                                i = j
                                writepos = j + 1
                                break
                            if instr[j] != '-':
                                break
                            j += 1
                elif ch in ('>', '<') and (next_char() in ' ‘['): # this is blockquote # ]’
                    write_to_pos(i, i + 2)
                    outfile.write('<blockquote'+(ch=='<')*' class="re"'+'>')
                    if next_char() == ' ': # > Quoted text.
                        new_line_tag = '</blockquote>'
                    else:
                        if next_char() == '[': # ]
                            if numbered_link(2): # >[-1]:‘Quoted text.’
                                linkstr = link
                                if len(linkstr) > 57:
                                    linkstr = linkstr[:linkstr.rfind('/', 0, 47)+1] + '...'
                                outfile.write('<a href="' + link + '">[' + instr[i+3:endi] + ']<i>' + linkstr + '</i></a>')
                                i = endi + 1
                            else: # >[http...]:‘Quoted text.’ or >[http...][-1]:‘Quoted text.’
                                i += 1
                                endb = find_ending_sq_bracket(instr, i)
                                linkn = ''
                                if instr[endb+1:endb+3] == '[-': # ]
                                    linkn = '['+instr[endb+3:find_ending_sq_bracket(instr, endb+1)]+']'
                                link = instr[i + 1:endb]
                                spacepos = link.find(' ')
                                if spacepos != -1:
                                    link = link[:spacepos]
                                if len(link) > 57:
                                    link = link[:link.rfind('/', 0, 47)+1] + '...'
                                write_http_link(i, i, 0, linkn+'<i>'+link+'</i>') # this function changes `link` :o, but I left[‘I mean didn't rename it to `link_`’] it as is [at least for a while] because it still works correctly
                                i += 1
                            if instr[i:i+2] != ':‘': # ’
                                exit_with_error("Quotation with url should always has :‘...’ after ["+link[:link.find(':')]+"://url]", i)
                            outfile.write(":<br />\n")
                            writepos = i + 2
                        else:
                            endqpos = find_ending_pair_quote(i + 1)
                            if instr[endqpos+1:endqpos+2] == "[": # >‘Author's name’[http...]:‘Quoted text.’ # ]
                                startqpos = i + 1
                                i = endqpos
                                if numbered_link(2): # >‘Author's name’[-1]:‘Quoted text.’
                                    outfile.write('<i><a href="' + link + '">' + html_escape(instr[startqpos+1:endqpos]) + '</a></i>')
                                    i = endi + 1
                                else:
                                    outfile.write('<i>')
                                    assert(writepos == startqpos + 1)
                                    writepos = startqpos
                                    write_http_link(startqpos, endqpos)
                                    outfile.write('</i>')
                                    i += 1
                                if instr[i:i+2] != ':‘': # ’
                                    exit_with_error("Quotation with url should always has :‘...’ after [...]", i)
                                outfile.write(":<br />\n")
                                writepos = i + 2
                            elif instr[endqpos+1:endqpos+2] == ":": # >‘Author's name’:‘Quoted text.’
                                outfile.write("<i>"+instr[i+2:endqpos]+"</i>:<br />\n")
                                i = endqpos + 1
                                if instr[i:i+2] != ':‘': # ’
                                    exit_with_error("Quotation with author's name should be in the form >‘Author's name’:‘Quoted text.’", i)
                                writepos = i + 2
                            # else this is just >‘Quoted text.’
                        ending_tags.append('</blockquote>')
                    i += 2
                    continue
            if ch == "‘":
                prevci = i - 1
                prevc = instr[prevci] if prevci >= 0 else Char("\0")
                #assert(prevc == prev_char())
                startqpos = i
                i = find_ending_pair_quote(i)
                endqpos = i
                str_in_p = '' # (
                if prevc == ')':
                    openp = instr.rfind('(', 0, prevci - 1) # )
                    if openp != -1 and openp > 0:
                        str_in_p = instr[openp+1:startqpos-1]
                        prevci = openp - 1
                        prevc = instr[prevci]
                if prevc in 'PР': # Рисунок обрабатывается по-особенному
                    write_to_pos(prevci, endqpos + 1)
                    title = ''
                    endqpos2 : int
                    if i_next_str('[‘'): # альтернативный текст
                        endqpos2 = find_ending_pair_quote(i+2)
                        if instr[endqpos2+1] != ']': # [
                            exit_with_error('Expected `]` after `’`', endqpos2+1)
                        title = ' title="'+html_escapeq(remove_comments(instr[i+3:endqpos2], i+3))+'"'
                    imgtag = '<img'
                    if str_in_p != '':
                        wh = str_in_p.replace(',', ' ').split(' ')
                        assert(len(wh) in (1, 2))
                        imgtag += ' width="' + wh[0] + '" height="' + wh[-1] + '"'
                    imgtag += ' src="'+instr[startqpos+1:endqpos]+'"'+title+' />'
                    if i_next_str('[http') or i_next_str('[./'): # ]]
                        write_http_link(startqpos, endqpos, 1, imgtag)
                        writepos = i + 1
                    elif i_next_str('[‘'): # ’]
                        outfile.write(imgtag)
                        writepos = endqpos2 + 2
                        i = endqpos2 + 1
                    else:
                        outfile.write(imgtag)
                        i = endqpos
                elif i_next_str('[http') or i_next_str('[./'): # ]]
                    write_http_link(startqpos, endqpos)
                elif next_char() == '[' and numbered_link(2): # ]
                    i = endi
                    write_to_pos(startqpos, i+1)
                    outfile.write('<a href="' + link + '">' + html_escape(instr[startqpos+1:endqpos]) + '</a>')
                elif i_next_str('[‘'): # ’] сноска/альтернативный текст/текст всплывающей подсказки
                    write_abbr(startqpos, endqpos)
                elif next_char() == '{' and (self.habr_html or self.ohd):
                    # Ищем окончание спойлера }
                    nesting_level = 0
                    i += 2
                    while True:
                        if i == len(instr):
                            exit_with_error('Unended spoiler', endqpos+1)
                        ch = instr[i]
                        if ch == "{":
                            nesting_level += 1
                        elif ch == "}":
                            if nesting_level == 0:
                                break
                            nesting_level -= 1
                        i += 1
                    write_to_pos(prevci + 1, i + 1)
                    outer_p = endqpos+(3 if instr[endqpos+2] == "\n" else 2) # проверка на == "\n" нужна, чтобы переход на новую строку/перевод строки после `{` игнорировался
                    if self.habr_html:
                        outfile.write('<spoiler title="' + remove_comments(instr[startqpos+1:endqpos], startqpos+1).replace('"', "''") + '">\n' + self.to_html(instr[outer_p:i], outer_pos = outer_p) + "</spoiler>\n")
                    else:
                        outfile.write('<span class="spoiler_title" onclick="return spoiler2(this, event)">' + remove_comments(instr[startqpos+1:endqpos], startqpos+1) + '<br /></span>' # используется span, так как с div подчёркивание будет на весь экран
                            + '<div class="spoiler_text" style="display: none">\n' + self.to_html(instr[outer_p:i], outer_pos = outer_p) + "</div>\n")
                    if (next_char() == "\n" # чтобы переход на новую строку/перевод строки после `}` игнорировался
                            and not in_unordered_list and ordered_list_current_number == -1): # если находимся внутри списка, то пропуска новой строки делать не нужно
                        i += 1
                        writepos = i + 1
                elif prevc == "'": # raw [html] output
                    t = startqpos - 1
                    while t >= 0:
                        if instr[t] != "'":
                            break
                        t -= 1
                    eat_left = startqpos - 1 - t # количество кавычек, которые нужно съесть слева
                    t = endqpos + 1
                    while t < len(instr):
                        if instr[t] != "'":
                            break
                        t += 1
                    eat_right = t - (endqpos + 1) # количество кавычек, которые нужно съесть справа
                    write_to_pos(startqpos - eat_left, t)
                    outfile.write(instr[startqpos + eat_left:endqpos - eat_right + 1])
                elif prevc in '0OО':
                    write_to_pos(prevci, endqpos+1)
                    outfile.write(html_escape(instr[startqpos+1:endqpos]).replace("\n", "<br />\n"))
                elif prevc == "#":
                    ins = remove_comments(instr[startqpos+1:endqpos], startqpos+1, incode = True)
                    write_to_pos(prevci, endqpos+1)
                    if self.habr_html:
                        str_in_p = {'C++':'cpp', 'C#':'cs'}.get(str_in_p, str_in_p)
                        not_inline = "\n" in ins or (prevci == 0 and endqpos == len(instr) - 1)
                        outfile.write((('<source lang="' + str_in_p + '">' if str_in_p != '' else '<source>') if not_inline else '<code>') + ins.replace('&', '&amp;').replace('<', '&lt;') + ("</source>" if not_inline else "</code>")) # так как <source> в Habr — блочный элемент, а не встроенный\inline
                    else:
                        pre = '<pre ' + ('class="code_block"' if ins[0] == "\n" else 'style="display: inline"') + '>' # can not do `outfile.write('<pre ' + ...)` here because `outfile.write(syntax_highlighter_for_pqmarkup.css)` should be outside of <pre> block
                        if self.ohd and syntax_highlighter_for_pqmarkup.is_lang_supported(str_in_p):
                            if not self.highlight_style_was_added:
                                outfile.write(syntax_highlighter_for_pqmarkup.css)
                                self.highlight_style_was_added = True
                            try:
                                outfile.write(pre + syntax_highlighter_for_pqmarkup.highlight(str_in_p, ins) + '</pre>')
                            except syntax_highlighter_for_pqmarkup.Error as e:
                                exit_with_error('Syntax highlighter: ' + e.message, startqpos+1+e.pos)
                        else:
                            outfile.write(pre + html_escape(ins) + '</pre>') # в habr_html тег pre не стоит задействовать, так как в Habr для тега pre используется шрифт monospace, в котором символы ‘ и ’ выглядят непонятно (не так как в Courier New)
                    if ins[0] == "\n" and (instr[i+1:i+2] == "\n" or instr[i+1:i+4] == '[[['): # ]]]
                        new_line_tag = "\n"
                elif prevc in 'TТ':
                    write_to_pos(prevci, endqpos+1)
                    header_row = False
                    hor_row_align = ''
                    ver_row_align = ''
                    row_style = ''
                    # Fill/prepare 2d-array `table`
                    class TableCell:
                        text  : str
                        attrs : str
                        def __init__(self, text : str, attrs : str): # type hints are needed to avoid this error in MSVC 2017: ‘error C2892: local class shall not have member templates’
                            self.text  = text
                            self.attrs = attrs
                    class TableRow:
                        style: str
                        def __init__(self, style: str):
                            self.cells: List[TableCell] = []
                            self.style = style
                    table: List[TableRow] = []
                    j = startqpos + 1
                    def parse_color():
                        nonlocal j
                        j += 1
                        if instr[j] != '(':
                            exit_with_error('Expected `(` after `C`', j) # )
                        j += 1
                        if instr[j] != '-':
                            exit_with_error('Expected `-` after `C(`', j) # )
                        s = j + 1
                        j = instr.find(')', s)
                        return 'background-color: ' + translate_color(instr[s:j])
                    while j < endqpos:
                        ch = instr[j]
                        if ch == "‘": # ’
                            table.append(TableRow(row_style))
                            endrow = find_ending_pair_quote(j)
                            hor_col_align = ''
                            ver_col_align = ''
                            cell_style = ''
                            # Read table row
                            j += 1
                            while j < endrow:
                                ch = instr[j]
                                if ch == "‘": # ’
                                    end_of_column = find_ending_pair_quote(j)
                                    style = ""
                                    if hor_row_align != '' or hor_col_align != '':
                                        style += "text-align: " + (hor_col_align if hor_col_align != '' else hor_row_align)
                                    if ver_row_align != '' or ver_col_align != '':
                                        if style != "":
                                            style += "; "
                                        style += "vertical-align: " + (ver_col_align if ver_col_align != '' else ver_row_align)
                                    if cell_style != '':
                                        style += '; ' * (style != '') + cell_style
                                    hor_col_align = ''
                                    ver_col_align = ''
                                    cell_style = ''
                                    table[-1].cells.append(TableCell(self.to_html(instr[j+1:end_of_column], outer_pos = j+1), ("th" if header_row else "td") + (' style="'+style+'"' if style != '' else '')))
                                    j = end_of_column
                                elif ch in 'CС':
                                    cell_style = parse_color()
                                elif ch in '<>' and instr[j+1:j+2] in ('<', '>'):
                                    hor_col_align = {'<<':'left', '>>':'right', '><':'center', '<>':'justify'}[instr[j:j+2]]
                                    j += 1
                                elif instr[j:j+2] in ("/\\", "\\/"):
                                    ver_col_align = "top" if instr[j:j+2] == "/\\" else "bottom"
                                    j += 1
                                elif ch == "-":
                                    if len(table[-1].cells) == 0:
                                        exit_with_error('Wrong table column span marker "-"', j)
                                    table[-1].cells.append(TableCell('', '-'))
                                elif ch == "|":
                                    if len(table) == 1:
                                        exit_with_error('Wrong table row span marker "|"', j)
                                    table[-1].cells.append(TableCell('', '|'))
                                elif instr[j:j+3] == "[[[": # ]]]
                                    j = find_ending_sq_bracket(instr, j)
                                elif ch not in "  \t\n":
                                    exit_with_error('Unknown formatting character inside table row', j)
                                j += 1
                            header_row = False
                            hor_row_align = ''
                            ver_row_align = ''
                            row_style = ''
                        elif ch in 'HН':
                            header_row = True
                        elif ch in 'CС':
                            row_style = parse_color()
                        elif ch in '<>' and instr[j+1:j+2] in ('<', '>'):
                            hor_row_align = {'<<':'left', '>>':'right', '><':'center', '<>':'justify'}[instr[j:j+2]]
                            j += 1
                        elif instr[j:j+2] in ("/\\", "\\/"):
                            ver_row_align = "top" if instr[j:j+2] == "/\\" else "bottom"
                            j += 1
                        elif instr[j:j+3] == "[[[": # ]]]
                            j = find_ending_sq_bracket(instr, j)
                        elif ch not in "  \t\n":
                            exit_with_error('Unknown formatting character inside table', j)
                        j += 1
                    # Process column and row spans (walk in the reverse order — from bottom right corner of the table)
                    for y in range(len(table)-1, -1, -1):
                        for x in range(len(table[y].cells)-1, -1, -1):
                            if table[y].cells[x].attrs in ('-', '|'):
                                xx = x
                                yy = y
                                while True:
                                    if table[yy].cells[xx].attrs == '-':
                                        xx -= 1
                                    elif table[yy].cells[xx].attrs == '|':
                                        yy -= 1
                                    else:
                                        break
                                if xx < x:
                                    table[yy].cells[xx].attrs += ' colspan="'+str(x-xx+1)+'"'
                                if yy < y:
                                    table[yy].cells[xx].attrs += ' rowspan="'+str(y-yy+1)+'"'
                                for xxx in range(xx, x+1): # mark a whole rect of this merged cell as processed to avoid its further processing (in this loop) and to skip it at output table loop
                                    for yyy in range(yy, y+1):
                                        if (xxx, yyy) != (xx, yy):
                                            table[yyy].cells[xxx].attrs = ''
                    # Output table
                    is_inline = True
                    if (prevci == 0 or
                        instr[prevci-1] == "\n" or # [[[
                       (prevci-3 >= 0 and instr[prevci-3:prevci] == ']]]' and instr[0:3] == '[[[' and find_ending_sq_bracket(instr, 0) == prevci-1)): # ]]]
                        is_inline = False
                    outfile.write("<table"+' style="display: inline"'*is_inline+">\n")
                    for row in table:
                        outfile.write('<tr' + (' style="'+row.style+'"' if row.style != '' else '') + '>')
                        for cell in row.cells:
                            if cell.attrs != '': # if this is a merged cell (cell.attrs == '') — skip it
                                outfile.write('<' + cell.attrs + '>' + cell.text + '</' + cell.attrs[:2] + '>')
                        outfile.write("</tr>\n")
                    outfile.write("</table>\n")
                    if not is_inline:
                        new_line_tag = ''
                elif prevc in '<>' and instr[prevci-1] in '<>': # выравнивание текста \ text alignment
                    write_to_pos(prevci-1, endqpos+1)
                    outfile.write('<div align="' + {'<<':'left', '>>':'right', '><':'center', '<>':'justify'}[instr[prevci-1]+prevc] + '">'
                                 + self.to_html(instr[startqpos+1:endqpos], outer_pos = startqpos+1) + "</div>\n")
                    new_line_tag = ''
                elif i_next_str(":‘") and instr[find_ending_pair_quote(i+2)+1:find_ending_pair_quote(i+2)+2] == '<': # this is reversed quote ‘Quoted text.’:‘Author's name’< # ’
                    endrq = find_ending_pair_quote(i+2)
                    i = endrq + 1
                    write_to_pos(prevci + 1, i + 1)
                    outfile.write('<blockquote>' + self.to_html(instr[startqpos+1:endqpos], outer_pos = startqpos+1) + "<br />\n<div align='right'><i>" + instr[endqpos+3:endrq] + "</i></div></blockquote>")
                    new_line_tag = ''
                else:
                    i = startqpos # откатываем позицию обратно
                    if prevc in '*_-~':
                        write_to_pos(i - 1, i + 1)
                        tag = {'*':'b', '_':'u', '-':'s', '~':'i'}[prevc]
                        outfile.write('<' + tag + '>')
                        ending_tags.append('</' + tag + '>')
                    elif prevc in 'HН':
                        write_to_pos(prevci, i + 1)
                        tag = 'h' + str(min(max(3 - (0 if str_in_p == '' else int(str_in_p)), 1), 6))
                        outfile.write('<' + tag + '>')
                        ending_tags.append('</' + tag + '>')
                    elif prevc in 'CС':
                        write_to_pos(prevci, i + 1)
                        if self.habr_html:
                            if '-' in str_in_p:
                                #exit_with_error('`-` is not supported in color with --habr-html', prevci + 2 + str_in_p.index('-'))
                                exit_with_error('background color is not supported with --habr-html', prevci + 2 + str_in_p.index('-'))
                            outfile.write('<font color="' + translate_color(str_in_p) + '">')
                            ending_tags.append('</font>')
                        else: # The <font> tag is not supported in HTML5.
                            style = ''
                            if str_in_p[0:1] == '-':
                                style = 'background-color: ' + translate_color(str_in_p[1:])
                            elif '-' in str_in_p:
                                color, bgcolor = str_in_p.split('-')
                                style = 'color: ' + translate_color(color) + '; background-color: ' + translate_color(bgcolor)
                            else:
                                style = 'color: ' + translate_color(str_in_p)
                            outfile.write('<span style="' + style + '">')
                            ending_tags.append('</span>')
                    elif (instr[prevci-1:prevci], prevc) in (('/', "\\"), ("\\", '/')):
                        write_to_pos(prevci-1, i + 1)
                        tag = 'sup' if (instr[prevci-1], prevc) == ('/', "\\") else 'sub'
                        outfile.write('<' + tag + '>')
                        ending_tags.append('</' + tag + '>')
                    elif prevc == '!':
                        write_to_pos(prevci, i + 1)
                        outfile.write('<blockquote>' if self.habr_html else '<div class="note">')
                        ending_tags.append('</blockquote>' if self.habr_html else '</div>')
                    else: # ‘
                        ending_tags.append('’')
            elif ch == "’":
                write_to_pos(i, i + 1)
                if len(ending_tags) == 0:
                    exit_with_error('Unpaired right single quotation mark', i)
                last = ending_tags.pop()
                if next_char() == "\n" and (last.startswith('</h') or last in ('</blockquote>', '</div>')): # так как <h.> - блоковый элемент, то он автоматически завершает строку, поэтому лишний тег <br> в этом случае добавлять не нужно (иначе получится лишняя пустая строка после заголовка)
                    # Поскольку мы пропускаем "\n", необходимо добавить new_line_tag, если он есть
                    if new_line_tag not in ('', "\0"):
                        outfile.write(new_line_tag)
                    new_line_tag = "\0"
                    i += 1 # эта строка была перенесена сюда из-за особенности работы функций close_*ordered_list()
                    # Также закрываем список внутри цитаты перед тегом </blockquote>
                    close_ordered_list()
                    close_unordered_list()
                    outfile.write(last)
                    outfile.write("\n")
                    writepos += 1
                else:
                    outfile.write(last)
            elif ch == '`':
                # Сначала считаем количество символов ` — это определит границу, где находится окончание span of code
                start = i
                i += 1
                while i < len(instr):
                    if instr[i] != '`':
                        break
                    i += 1
                end = instr.find((i - start)*'`', i)
                if end == -1:
                    exit_with_error('Unended ` started', start)
                write_to_pos(start, end + i - start)
                ins = instr[i:end]
                delta = ins.count("‘") - ins.count("’") # в `backticks` могут быть ‘кавычки’ и в [[[комментариях]]] (выглядит это, например, так: [[[‘]]]`Don’t`), для этого и нужны
                if delta > 0: # эти строки кода[:backticks]
                    for ii in range(delta): # ‘‘
                        ending_tags.append('’')
                else:
                    for ii in range(-delta):
                        if ending_tags.pop() != '’':
                            exit_with_error('Unpaired single quotation mark found inside code block/span beginning', start)
                ins = html_escape(ins)
                if not "\n" in ins: # this is a single-line code -‘block’span
                    outfile.write('<code>' + ins + '</code>' if self.habr_html else '<pre class="inline_code">' + ins + '</pre>')
                else:
                    outfile.write('<pre>' + ins + '</pre>' + "\n"*(not self.habr_html))
                    new_line_tag = ''
                i = end + i - start - 1
            elif ch == '[': # ]
                if i_next_str('http') or i_next_str('./') or (i_next_str('‘') and prev_char() not in "\r\n\t \0") or numbered_link(): # ’
                    s = i - 1
                    while s >= writepos and instr[s] not in "\r\n\t (‘“«„": # »”’)
                        s -= 1
                    if i_next_str('‘'): # ’ сноска/альтернативный текст/текст всплывающей подсказки
                        write_abbr(s + 1, i, 0)
                    elif i_next_str('http') or i_next_str('./'):
                        write_http_link(s + 1, i, 0)
                    else:
                        write_to_pos(s + 1, endi+1)
                        outfile.write('<a href="' + link + '">' + html_escape(instr[s+1:i]) + '</a>')
                        i = endi
                elif i_next_str('[['): # ]] comment
                    comment_start = i
                    nesting_level = 0
                    while True:
                        ch = instr[i]
                        if ch == "[":
                            nesting_level += 1
                        elif ch == "]":
                            nesting_level -= 1
                            if nesting_level == 0:
                                break
                        elif ch == "‘": # [backticks:]а также эти строки кода
                            ending_tags.append('’') # ‘‘
                        elif ch == "’":
                            assert(ending_tags.pop() == '’')
                        i += 1
                        if i == len(instr):
                            exit_with_error('Unended comment started', comment_start)
                    write_to_pos(comment_start, i+1)
                    if instr[comment_start+3:comment_start+4] != '[': # это [[[такой]]] комментарий, а не [[[[такой]]]] или [[[[[такой и [[[[[[так далее]]]]]]]]]]], а [[[такие]]] комментарии следует транслировать в HTML: <!--[[[комментарий]...]...]-->
                        outfile.write('<!--')
                        outfile.write(remove_comments(instr[comment_start:i+1], comment_start, 4)) # берётся вся строка вместе со [[[скобочками]]] для [[[таких] ситуаций]]
                        outfile.write('-->')
                else:
                    write_to_i('<span class="sq"><span class="sq_brackets">'*self.ohd + '<font color="#BFBFBF">'*self.habr_html + '[' + '</font><font color="gray">'*self.habr_html + self.ohd*'</span>')
            elif ch == "]":
                write_to_i('<span class="sq_brackets">'*self.ohd + '</font><font color="#BFBFBF">'*self.habr_html + ']' + '</font>'*self.habr_html + self.ohd*'</span></span>')
            elif ch == "{":
                write_to_i('<span class="cu_brackets" onclick="return spoiler(this, event)"><span class="cu_brackets_b">'*self.ohd + '{' + self.ohd*'</span><span>…</span><span class="cu" style="display: none">')
            elif ch == "}":
                write_to_i('</span><span class="cu_brackets_b">'*self.ohd + '}' + self.ohd*'</span></span>')
            elif ch == "\n":
                write_to_i((new_line_tag if new_line_tag != "\0" else "<br />") + ("\n" if new_line_tag not in ('', "\n") else "")) # код `"\n" if new_line_tag != ''` нужен только для списков (unordered/ordered list)
                new_line_tag = "\0"
            i += 1
        close_ordered_list()
        close_unordered_list()
        write_to_pos(len(instr), 0)
        if len(ending_tags) != 0: # ‘слишком много открывающих одинарных кавычек’/‘где-то есть незакрытая открывающая кавычка’\‘there is an unclosed opening/left quote somewhere’
            exit_with_error('Unclosed left single quotation mark somewhere', len(instr))
        assert(self.to_html_called_inside_to_html_outer_pos_list.pop() == outer_pos)
        if outfilef is None:
            r = "".join(result)
            if self.habr_html:                                    # // dirty hack
                r = r.replace("</blockquote>\n", '</blockquote>') # \\ (just left it as is)
            return r
        return ''
def to_html(instr, outfilef : IO[str] = None, ohd = False, *, habr_html = False):
    return Converter(habr_html, ohd).to_html(instr, outfilef)
if __name__ == '__main__':
    # Support running module as a command line command.
    if '-h' in sys.argv or '--help' in sys.argv:
        print(R'''A Python implementation of pqmarkup to HTML converter.
Usage: pqmarkup [options] [INPUT_FILE]
Positional arguments:
  INPUT_FILE            input file (STDIN is assumed if no INPUT_FILE is
                        given)
Options:
  -h, --help            show this help message and exit
  --habr-html           for publishing articles on habr.com
  --output-html-document
                        add some html header for rough testing preview of your
                        converted documents
  -f OUTPUT_FILE, --file OUTPUT_FILE
                        write output to OUTPUT_FILE (defaults to STDOUT)''')
        sys.exit(0)
    args_habr_html            = '--habr-html'            in sys.argv
    args_output_html_document = '--output-html-document' in sys.argv
    args_infile = sys.stdin
    i = 1
    while i < len(sys.argv):
        if sys.argv[i] in ('-f', '--file'):
            i += 2
            continue
        if not sys.argv[i].startswith('-'):
            try:
                args_infile = open(sys.argv[i], 'r', encoding = 'utf-8-sig')
            except:
                sys.exit("Can't open file '" + sys.argv[i] + "'")
            break
        i += 1
    args_outfile = sys.stdout
    outfile_name : str
    try:
        if '-f' in sys.argv:
            outfile_name = sys.argv[sys.argv.index('-f')     + 1]
            args_outfile = open(outfile_name, 'w', encoding = 'utf-8', newline = "\n")
        elif '--file' in sys.argv:
            outfile_name = sys.argv[sys.argv.index('--file') + 1]
            args_outfile = open(outfile_name, 'w', encoding = 'utf-8', newline = "\n")
    except:
        sys.exit("Can't open file '" + outfile_name + "' for writing")
    if args_output_html_document and args_habr_html:
        sys.exit("Options --output-html-document and --habr-html are mutually exclusive")
    infile_str : str
    try:
        infile_str = args_infile.read()
    except UnicodeDecodeError:
        sys.exit('Input is not a valid UTF-8!')
    title = ''
    if infile_str.startswith('[[[H‘') or \
       infile_str.startswith('[[[Н‘'): # ’]]]’]]]
        i = 5
        nesting_level = 1
        while i < len(infile_str):
            ch = infile_str[i]
            if ch == "‘":
                nesting_level += 1
            elif ch == "’":
                nesting_level -= 1
                if nesting_level == 0:
                    break
            i += 1
        title = infile_str[5:i]
    if args_output_html_document:
        args_outfile.write(
R'''<html>
<head>
<meta charset="utf-8" />
''' + ('<title>' + title + "</title>\n" if title != '' else '') + R'''<base target="_blank">
<script type="text/javascript">
function spoiler(element, event)
{
    if (event.target.nodeName == 'A' || event.target.parentNode.nodeName == 'A' || event.target.onclick)//чтобы работали ссылки в спойлерах и спойлеры2 в спойлерах
        return;
    var e = element.firstChild.nextSibling.nextSibling;//element.getElementsByTagName('span')[0]
    e.previousSibling.style.display = e.style.display;//<span>…</span> must have inverted display style
    e.style.display = (e.style.display == "none" ? "" : "none");
    element.firstChild.style.fontWeight =
    element. lastChild.style.fontWeight = (e.style.display == "" ? "normal" : "bold");
    event.stopPropagation();
    //[-Чтобы была возможность выделять текст внутри раскрытого ‘скрытого текста’/спойлера, необходимо [скрытие/]закрытие спойлера делать не просто по нажатию левой кнопки мыши, а по отпусканию левой кнопки мыши при условии отсутствия движения курсора [мыши] после того, как была нажата левая кнопка мыши.-]
}
function spoiler2(element, event)
{
    element.nextSibling.style.display = (element.nextSibling.style.display == "none" ? "" : "none");
}
</script>
<style type="text/css">
div#main, td {
    font-size: 14px;
    font-family: Verdana, sans-serif;
    line-height: 160%;
    text-align: justify;
}
span.cu_brackets_b {
    font-size: initial;
    font-family: initial;
    font-weight: bold;
}
a {
    text-decoration: none;
    color: #6da3bd;
}
a:hover {
    text-decoration: underline;
    color: #4d7285;
}
h1, h2, h3, h4, h5, h6 {
    margin: 0;
    font-weight: 400;
}
h1 {font-size: 200%; line-height: 130%;}
h2 {font-size: 180%; line-height: 135%;}
h3 {font-size: 160%; line-height: 140%;}
h4 {font-size: 145%; line-height: 145%;}
h5 {font-size: 130%; line-height: 140%;}
h6 {font-size: 120%; line-height: 140%;}
span.sq {color: gray; font-size: 0.8rem; font-weight: normal; /*pointer-events: none;*/}
span.sq_brackets {color: #BFBFBF;}
span.cu_brackets {cursor: pointer;}
span.cu {background-color: #F7F7FF;}
abbr {text-decoration: none; border-bottom: 1px dotted;}
pre {margin: 0;}''' + # когда везде использовался <pre style="display: inline">, то margin в таких блоках не учитывался, поэтому и без этой строки с `pre {margin: 0}` код выглядел также как с этой строкой выглядят `<pre>` без `style="display: inline"`; но, вообще говоря, я добавил эту строку для соответствия форматированию Habr
'''
pre, code {font-family: 'Courier New'; line-height: normal}
ul, ol {margin: 11px 0 7px 0;}
ul li, ol li {padding: 7px 0;}
ul li:first-child, ol li:first-child {padding-top   : 0;}
ul  li:last-child, ol  li:last-child {padding-bottom: 0;}
table {margin: 9px 0; border-collapse: collapse;}
table th, table td {padding: 6px 13px; border: 1px solid #BFBFBF;}
span.spoiler_title {
    color: #548eaa;
    cursor: pointer;
    border-bottom: 1px dotted;
}
div.spoiler_text {
    /*border: 1px dotted;*/
    margin: 5px;
    padding: 3px;
}
blockquote {
    margin: 0 0 7px 0;
    padding: 7px 12px;
}
blockquote:not(.re) {border-left:  0.2em solid #90ddaa; background-color: #fbfffb;}
blockquote.re       {border-right: 0.2em solid #90ddaa; background-color: #f4fff8;}
div.note {
    padding: 18px 20px;
    background: #ffffd7;
}
pre.code_block {padding: 6px 0;}
pre.inline_code {
    display: inline;
    padding: 0px 3px;
    border: 1px solid #E5E5E5;
    background-color: #FAFAFA;
    border-radius: 3px;
}
img {vertical-align: middle;}
div#main {width: 100%;}
@media screen and (min-width: 1050px) {
    div#main {width: 1024px;}
}
</style>
</head>
<body>
<div id="main" style="margin: 0 auto">
''')
    try:
        to_html(infile_str, args_outfile, args_output_html_document, habr_html = args_habr_html)
    except Exception as e:
        sys.stderr.write(e.message + " at line " + str(e.line) + ", column " + str(e.column) + "\n")
        sys.exit(-1)
    if args_output_html_document:
        args_outfile.write(
'''</div>
</body>
</html>''')
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4