stow/oh-my-zsh/.oh-my-zsh/plugins/emoji/update_emoji.py

   1 """
   2 Update Emoji.py
   3 Refeshes OMZ emoji database based on the latest Unicode spec
   4 """
   5 import re
   6 import json
   7
   8 spec = open("emoji-data.txt", "r")
   9
  10 # Regexes
  11 # regex_emoji will return, respectively:
  12 # the code points, its type (status), the actual emoji, and its official name
  13 regex_emoji = r"^([\w ].*?\S)\s*;\s*([\w-]+)\s*#\s*(.*?)\s(\S.*).*$"
  14 # regex_group returns the group of subgroup that a line opens
  15 regex_group = r"^#\s*(group|subgroup):\s*(.*)$"
  16
  17 headers = """
  18 # emoji-char-definitions.zsh - Emoji definitions for oh-my-zsh emoji plugin
  19 #
  20 # This file is auto-generated by update_emoji.py. Do not edit it manually.
  21 #
  22 # This contains the definition for:
  23 #   $emoji         - which maps character names to Unicode characters
  24 #   $emoji_flags   - maps country names to Unicode flag characters using region
  25 #                    indicators
  26 #   $emoji_mod     - maps modifier components to Unicode characters
  27 #   $emoji_groups  - a single associative array to avoid cluttering up the
  28 #                    global namespace, and to allow adding additional group
  29 #                    definitions at run time. The keys are the group names, and
  30 #                    the values are whitespace-separated lists of emoji
  31 #                    character names.
  32
  33 # Main emoji
  34 typeset -gAH emoji
  35 # National flags
  36 typeset -gAH emoji_flags
  37 # Combining modifiers
  38 typeset -gAH emoji_mod
  39 # Emoji groups
  40 typeset -gAH emoji_groups
  41 """
  42
  43 #######
  44 # Adding country codes
  45 #######
  46 # This is the only part of this script that relies on an external library
  47 # (country_converter), and is hence commented out by default.
  48 # You can uncomment it to have country codes added as aliases for flag
  49 # emojis. (By default, when you install this extension, country codes are
  50 # included as aliases, but not if you re-run this script without uncommenting.)
  51 # Warning: country_converter is very verbose, and will print warnings all over
  52 # your terminal.
  53
  54 # import country_converter as coco # pylint: disable=wrong-import-position
  55 # cc = coco.CountryConverter()
  56
  57 # def country_iso(_all_names, _omz_name):
  58 #     """ Using the external library country_converter,
  59 #         this function can detect the ISO2 and ISO3 codes
  60 #         of the country. It takes as argument the array
  61 #         with all the names of the emoji, and returns that array."""
  62 #     omz_no_underscore = re.sub(r'_', r' ', _omz_name)
  63 #     iso2 = cc.convert(names=[omz_no_underscore], to='ISO2')
  64 #     if iso2 != 'not found':
  65 #         _all_names.append(iso2)
  66 #         iso3 = cc.convert(names=[omz_no_underscore], to='ISO3')
  67 #         _all_names.append(iso3)
  68 #     return _all_names
  69
  70
  71 #######
  72 # Helper functions
  73 #######
  74
  75 def code_to_omz(_code_points):
  76     """ Returns a ZSH-compatible Unicode string from the code point(s) """
  77     return r'\U' + r'\U'.join(_code_points.split(' '))
  78
  79 def name_to_omz(_name, _group, _subgroup, _status):
  80     """ Returns a reasonable snake_case name for the emoji. """
  81     def snake_case(_string):
  82         """ Does the regex work of snake_case """
  83         remove_dots = re.sub(r'\.\(\)', r'', _string)
  84         replace_ands = re.sub(r'\&', r'and', remove_dots)
  85         remove_whitespace = re.sub(r'[^\#\*\w]', r'_', replace_ands)
  86         return re.sub(r'__', r'_', remove_whitespace)
  87
  88     shortname = ""
  89     split_at_colon = lambda s: s.split(": ")
  90     # Special treatment by group and subgroup
  91     # If the emoji is a flag, we strip "flag" from its name
  92     if _group == "Flags" and len(split_at_colon(_name)) > 1:
  93         shortname = snake_case(split_at_colon(_name)[1])
  94     else:
  95         shortname = snake_case(_name)
  96     # Special treatment by status
  97     # Enables us to have every emoji combination,
  98     # even the one that are not officially sanctionned
  99     # and are implemented by, say, only one vendor
 100     if _status == "unqualified":
 101         shortname += "_unqualified"
 102     elif _status == "minimally-qualified":
 103         shortname += "_minimally"
 104     return shortname
 105
 106 def increment_name(_shortname):
 107     """ Increment the short name by 1. If you get, say,
 108     'woman_detective_unqualified', it returns
 109     'woman_detective_unqualified_1', and then
 110     'woman_detective_unqualified_2', etc. """
 111     last_char = _shortname[-1]
 112     if last_char.isdigit():
 113         num = int(last_char)
 114         return _shortname[:-1] + str(num + 1)
 115     return _shortname + "_1"
 116
 117 ########
 118 # Going through every line
 119 ########
 120
 121 group, subgroup, short_name_buffer = "", "", ""
 122 emoji_database = []
 123 for line in spec:
 124     # First, test if this line opens a group or subgroup
 125     group_match = re.findall(regex_group, line)
 126     if group_match != []:
 127         gr_or_sub, name = group_match[0]
 128         if gr_or_sub == "group":
 129             group = name
 130         elif gr_or_sub == "subgroup":
 131             subgroup = name
 132         continue # Moving on...
 133     # Second, test if this line references one emoji
 134     emoji_match = re.findall(regex_emoji, line)
 135     if emoji_match != []:
 136         code_points, status, emoji, name = emoji_match[0]
 137         omz_codes = code_to_omz(code_points)
 138         omz_name = name_to_omz(name, group, subgroup, status)
 139         # If this emoji has the same shortname as the preceding one
 140         if omz_name in short_name_buffer:
 141             omz_name = increment_name(short_name_buffer)
 142         short_name_buffer = omz_name
 143         emoji_database.append(
 144             [omz_codes, status, emoji, omz_name, group, subgroup])
 145 spec.close()
 146
 147 ########
 148 # Write to emoji-char-definitions.zsh
 149 ########
 150
 151 # Aliases for emojis are retrieved through the DB of Gemoji
 152 # Retrieved on Aug 9 2019 from the following URL:
 153 # https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json
 154
 155 gemoji_db = open("gemoji_db.json")
 156 j = json.load(gemoji_db)
 157 aliases_map = {entry['emoji']: entry['aliases'] for entry in j}
 158 all_omz_names = [emoji_data[3] for emoji_data in emoji_database]
 159
 160 # Let's begin writing to this file
 161 output = open("emoji-char-definitions.zsh", "w")
 162 output.write(headers)
 163
 164 emoji_groups = {"fruits": "\n", "vehicles": "\n", "hands": "\n",
 165                 "people": "\n", "animals": "\n", "faces": "\n",
 166                 "flags": "\n"}
 167
 168 # First, write every emoji down
 169 for _omz_codes, _status, _emoji, _omz_name, _group, _subgroup in emoji_database:
 170
 171     # One emoji can be mapped to multiple names (aliases or country codes)
 172     names_for_this_emoji = [_omz_name]
 173
 174     # Variable that indicates in which map the emoji will be located
 175     emoji_map = "emoji"
 176     if _status == "component":
 177         emoji_map = "emoji_mod"
 178     if _group == "Flags":
 179         emoji_map = "emoji_flags"
 180         # Adding country codes (Optional, see above)
 181         # names_for_this_emoji = country_iso(names_for_this_emoji, _omz_name)
 182
 183     # Check if there is an alias available in the Gemoji DB
 184     if _emoji in aliases_map.keys():
 185         for alias in aliases_map[_emoji]:
 186             if alias not in all_omz_names:
 187                 names_for_this_emoji.append(alias)
 188
 189     # And now we write to the definitions file
 190     for one_name in names_for_this_emoji:
 191         output.write(f"{emoji_map}[{one_name}]=$'{_omz_codes}'\n")
 192
 193     # Storing the emoji in defined subgroups for the next step
 194     if _status == "fully-qualified":
 195         if _subgroup == "food-fruit":
 196             emoji_groups["fruits"] += f"  {_omz_name}\n"
 197         elif "transport-" in _subgroup:
 198             emoji_groups["vehicles"] += f"  {_omz_name}\n"
 199         elif "hand-" in _subgroup:
 200             emoji_groups["hands"] += f"  {_omz_name}\n"
 201         elif "person-" in _subgroup or _subgroup == "family":
 202             emoji_groups["people"] += f"  {_omz_name}\n"
 203         elif "animal-" in _subgroup:
 204             emoji_groups["animals"] += f"  {_omz_name}\n"
 205         elif "face-" in _subgroup:
 206             emoji_groups["faces"] += f"  {_omz_name}\n"
 207         elif _group == "Flags":
 208             emoji_groups["flags"] += f"  {_omz_name}\n"
 209
 210 # Second, write the subgroups to the end of the file
 211 for name, string in emoji_groups.items():
 212     output.write(f'\nemoji_groups[{name}]="{string}"\n')
 213 output.close()