From Wiktionary, the free dictionary
import re import hashlib from pywikibot import Site, Page # The pages to read from / write to. get_page_title = "Wiktionary:Language flags list" set_page_title = "MediaWiki:Gadget-WiktCountryFlags.css" # because this modifies global CSS, interface admin rights are required # The L2 order. The following languages are always placed first, in this order. always_first_l2 = ["Translingual", "English"] # The CSS header. css_header_template = """/* * flags.css - adds flags to language headers. * originally created by [[User:Prince Kassad]] * this version is auto-generated from the list at [[{get_page_title}]] * ([[Special:Permalink/{get_page_revision_id}]]) */ """ # The CSS rules to create for every language. selector_template = ".ns-0 h2 #{language}::before, .ns-0 h2#{language}::before" rule_template = """{selectors}{{padding-right:5px; content:url({url});}}""" # Regex to iterate over the language names and filenames # in the CSS code produced by this script css_language_filename_regex = (r"""h2 \#([^:]+)::before|\{[^;]+;\s*content:url\(['"]""" + re.escape("//upload.wikimedia.org/wikipedia/commons/thumb/") + r"[0-9a-f]/[0-9a-f]{2}/([^/]+)") # Other settings. default_size = "45px" edit_summary = "(bot) update CSS based on flag list from [[" + get_page_title + "]]" debug_mode = False # Format for the input list. list_regex = r"<!-- start list -->(.+?)<!-- end list -->" line_regex = r"(.+?): (.+?)(?: (\d+px))?$" # Code follows size_dict = {} # to be defined later def convert_line_to_item(match): if not match: return None return (match.group(1), match.group(2).replace("_", " "), match.group(3) or default_size) def get_url_from_file(file): size = size_dict.get(file, default_size) file = file.replace(" ", "_") thumbfile = file # SVG files do not have thumbnails, get as PNG if thumbfile.endswith(".svg"): thumbfile += ".png" # right now the thumbnail path has parts of the MD5 hash of the file name # this may change one day md5 = hashlib.md5(file.encode('utf-8')).hexdigest().lower() # make protocol-relative URL return ("//upload.wikimedia.org/wikipedia/commons/thumb/" "{l1}/{l2}/{file}/{size}-{thumbfile}").format( size=size, file=file, thumbfile=thumbfile, l1=md5[:1], l2=md5[:2]) def quote_url_for_css(url): # use double quotes if URL has single quotes quote_character = '"' if "'" in url else "'" # escape double quotes anyway return "{0}{1}{0}".format(quote_character, url.replace('"', '%22')) def make_css_rule(l2s, file): selectors = [selector_template.format(language=l2.replace(" ", "_").replace("'", r"\'")) for l2 in l2s] return rule_template.format(selectors=', '.join(selectors), url=quote_url_for_css(get_url_from_file(file))) def deduplicate(array): """Remove duplicates from a list but keeps its order of elements. All but the first copy of any element is removed.""" seen = set() return [x for x in array if not (x in seen or seen.add(x))] def reverse_lookup(dict, value): """Return all keys that correspond to a specific value on a dictionary.""" return [key for key in dict.keys() if dict[key] == value] def search_matching(array, predicate, start=0, stop=2147483647): """ Find the first index in the array for which the predicate taking in the index and the corresponding item returns a truthy value, or -1 if none do. """ try: return next(index for index, value in enumerate(array[start:stop], start=start) if predicate(index, value)) except StopIteration: return -1 def convert_flag_dict_to_css(flags): # convert flag dictionary to list of tuples (lang, file) sorted by lang sorted_flags = list(sorted(flags.items(), key=lambda t: t[0])) # bring configured L2s to front for l2 in reversed(always_first_l2): # try to find matching index actual_index = search_matching(sorted_flags, lambda i, v: v[0] == l2) if actual_index > 0: # found? # move index to front of list sorted_flags = ([sorted_flags[actual_index]] + sorted_flags[:actual_index] + sorted_flags[actual_index + 1:]) # remove duplicate files from the file list... file_list = deduplicate(file for _, file in sorted_flags) # ...and then make a reverse {file: [lang, lang, ...]} dictionary with it dedup_flags = {file: reverse_lookup(flags, file) for file in file_list} # convert the above dict into sets of rules and return them return "\n\n".join(make_css_rule(l2s, file) for file, l2s in dedup_flags.items()) def get_flag_dict_from_css(css_text): """Read CSS page to get dict of language to filename.""" flag_dict = {} language_names = [] for match in re.finditer(css_language_filename_regex, css_text): if match[1]: language_names.append(match[1].replace("_", " ").replace(r"\'", "'")) elif match[2]: filename = match[2] for name in language_names: # reverse filename changes flag_dict[name] = filename.replace("_", " ").replace("%22", '"') language_names = [] return flag_dict def get_flag_dict_diffs(old_flag_dict, new_flag_dict): # added, changed, removed return ( {lang: filename for lang, filename in new_flag_dict.items() if lang not in old_flag_dict}, {lang: filename for lang, filename in old_flag_dict.items() if lang in new_flag_dict and new_flag_dict[lang] != filename}, {lang: filename for lang, filename in old_flag_dict.items() if lang not in new_flag_dict} ) def utf8_len(s): return len(s.encode('utf-8')) def add_flag_change_summary(edit_summary, css_text, new_flag_dict): old_flag_dict = get_flag_dict_from_css(css_text) added, changed, removed = get_flag_dict_diffs(old_flag_dict, flag_dict) messages = [] def add_lang_message(verb, flag_dict): if len(flag_dict) > 0: messages.append( verb + " " + ", ".join(lang for lang, filename in flag_dict.items())) add_lang_message("add", added) add_lang_message("change", changed) add_lang_message("remove", removed) edit_summary_addition = "; ".join(messages) if len(edit_summary_addition) == 0: return edit_summary + ": no changes identified" else: new_summary = edit_summary + ": " + edit_summary_addition if utf8_len(new_summary) <= 800: return new_summary else: return edit_summary + ": too many changes to list" enwikt = Site("en", fam="wiktionary") enwikt.login() get_page = Page(enwikt, get_page_title) get_page_revision_id = get_page.latest_revision_id list_of_langs = re.search(list_regex, get_page.text, re.DOTALL).group(0) flag_list = [convert_line_to_item(re.match(line_regex, line)) for line in list_of_langs.splitlines()] flag_list = [x for x in flag_list if x] size_dict = {filename: size for language, filename, size in flag_list} flag_dict = {language: filename for language, filename, size in flag_list} css_header = css_header_template.format(get_page_title = get_page_title, get_page_revision_id = get_page_revision_id) css = css_header + convert_flag_dict_to_css(flag_dict) set_page = Page(enwikt, set_page_title) edit_summary = add_flag_change_summary(edit_summary, set_page.text, flag_dict) if debug_mode: print(css + "\n\n----\n\nsummary:\n" + edit_summary) else: set_page.text = css set_page.save(summary=edit_summary, minor=False)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4