python/samba/ms_forest_updates_markdown.py

   1 # Create forest updates ldif from Github markdown
   2 #
   3 # Each update is converted to an ldif then gets written to a corresponding
   4 # .LDF output file or stored in a dictionary.
   5 #
   6 # Only add updates can generally be applied.
   7 #
   8 # Copyright (C) Andrew Bartlett <abartlet@samba.org> 2017
   9 #
  10 # This program is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 3 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # This program is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  22
  23 """Generate LDIF from Github documentation."""
  24
  25 import re
  26 import os
  27 import markdown
  28 import xml.etree.ElementTree as ET
  29 from samba.common import get_string
  30
  31
  32 # Display specifier updates or otherwise (ignored in forest_update.py)
  33 def noop(description, attributes, sd):
  34     return (None, None, [], None)
  35
  36
  37 # ACE addition updates (ignored in forest_update.py)
  38 def parse_grant(description, attributes, sd):
  39     # Granting the "CN=Send-As,CN=Extended-Rights" to gMSA accounts.
  40     if (description.startswith("Granting the ") and
  41         description.endswith("to gMSA accounts.") and
  42         (attributes and attributes.lower() == 'n/a') and
  43         (sd and sd.lower() == 'n/a')):
  44         return ('modify', extract_dn_or_none(description),
  45                 ['add: appliesTo', 'appliesTo: 7b8b558a-93a5-4af7-adca-c017e67f1057'],
  46                 None)
  47
  48     return ('modify', None, [], sd if sd.lower() != 'n/a' else None)
  49
  50
  51 # Addition of new objects to the directory (most are applied in forest_update.py)
  52 def parse_add(description, attributes, sd):
  53     dn = extract_dn(description)
  54     return ('add', dn, extract_attrib(dn, attributes), sd if sd.lower() != 'n/a' else None)
  55
  56
  57 # Set of a particular attribute (ignored in forest_update.py)
  58 def parse_set(description, attributes, sd):
  59     return ('modify', extract_dn_or_none(description),
  60             extract_replace_attrib(attributes),
  61             sd if sd.lower() != 'n/a' else None)
  62
  63
  64 # Set of a particular ACE (ignored in forest_update.py)
  65 # The general issue is that the list of DNs must be generated dynamically
  66 def parse_ace(description, attributes, sd):
  67
  68     def extract_dn_ace(text):
  69         if 'Sam-Domain' in text:
  70             return ('${DOMAIN_DN}', 'CN=Sam-Domain,${SCHEMA_DN}')
  71         elif 'Domain-DNS' in text:
  72             return ('${...}', 'CN=Domain-DNS,${SCHEMA_DN}')
  73
  74         return None
  75
  76     return [('modify', extract_dn_ace(description)[0],
  77              ['replace: nTSecurityDescriptor',
  78               'nTSecurityDescriptor: ${DOMAIN_SCHEMA_SD}%s' % sd], None),
  79             ('modify', extract_dn_ace(description)[1],
  80              ['replace: defaultSecurityDescriptor',
  81               'defaultSecurityDescriptor: ${OLD_SAMBA_SD}%s' % sd], None)]
  82
  83
  84 # We are really only interested in 'Created' items
  85 operation_map = {
  86     # modify
  87     'Granting': parse_grant,
  88     # add
  89     'Created': parse_add,
  90     # modify
  91     'Set': parse_set,
  92     # modify
  93     'Added ACE': parse_ace,
  94     # modify
  95     'Updated': parse_set,
  96     # unknown
  97     'Call': noop
  98 }
  99
 100
 101 def extract_dn(text):
 102     """
 103     Extract a DN from the textual description
 104     :param text:
 105     :return: DN in string form
 106     """
 107     text = text.replace(' in the Schema partition.', ',${SCHEMA_DN}')
 108     text = text.replace(' in the Configuration partition.', ',${CONFIG_DN}')
 109     dn = re.search('([CDO][NCU]=.*?,)*([CDO][NCU]=.*)', text).group(0)
 110
 111     # This should probably be also fixed upstream
 112     if dn == 'CN=ad://ext/AuthenticationSilo,CN=Claim Types,CN=Claims Configuration,CN=Services':
 113         return 'CN=ad://ext/AuthenticationSilo,CN=Claim Types,CN=Claims Configuration,CN=Services,${CONFIG_DN}'
 114
 115     # Granting the "CN=Send-As,CN=Extended-Rights" to gMSA accounts.
 116     if dn.endswith(',CN=Extended-Rights" to gMSA accounts.'):
 117         dn = dn.replace('" to gMSA accounts.', '')
 118         return dn + ",${CONFIG_DN}"
 119
 120     return dn
 121
 122
 123 def extract_dn_or_none(text):
 124     """
 125     Same as above, but returns None if it doesn't work
 126     :param text:
 127     :return: DN or None
 128     """
 129     try:
 130         return extract_dn(text)
 131     except:
 132         return None
 133
 134
 135 def save_ldif(filename, answers, out_folder):
 136     """
 137     Save ldif to disk for each updates
 138     :param filename: filename use ([OPERATION NUM]-{GUID}.ldif)
 139     :param answers: array of tuples generated with earlier functions
 140     :param out_folder: folder to prepend
 141     """
 142     path = os.path.join(out_folder, filename)
 143     with open(path, 'w') as ldif:
 144         for answer in answers:
 145             change, dn, attrib, sd = answer
 146             ldif.write('dn: %s\n' % dn)
 147             ldif.write('changetype: %s\n' % change)
 148             if len(attrib) > 0:
 149                 ldif.write('\n'.join(attrib) + '\n')
 150             if sd is not None:
 151                 ldif.write('nTSecurityDescriptor: D:%s\n' % sd)
 152             ldif.write('-\n\n')
 153
 154
 155 def save_array(guid, answers, out_dict):
 156     """
 157     Save ldif to an output dictionary
 158     :param guid: GUID to store
 159     :param answers: array of tuples generated with earlier functions
 160     :param out_dict: output dictionary
 161     """
 162     ldif = ''
 163     for answer in answers:
 164         change, dn, attrib, sd = answer
 165         ldif += 'dn: %s\n' % dn
 166         ldif += 'changetype: %s\n' % change
 167         if len(attrib) > 0:
 168             ldif += '\n'.join(attrib) + '\n'
 169         if sd is not None:
 170             ldif += 'nTSecurityDescriptor: D:%s\n' % sd
 171         ldif += '-\n\n'
 172
 173     out_dict[guid] = ldif
 174
 175
 176 def extract_attrib(dn, attributes):
 177     """
 178     Extract the attributes as an array from the attributes column
 179     :param dn: parsed from markdown
 180     :param attributes: from markdown
 181     :return: attribute array (ldif-type format)
 182     """
 183     attrib = [x.lstrip('- ') for x in attributes.split('-   ') if x.lower() != 'n/a' and x != '']
 184     attrib = [x.replace(': True', ': TRUE') if x.endswith(': True') else x for x in attrib]
 185     attrib = [x.replace(': False', ': FALSE') if x.endswith(': False') else x for x in attrib]
 186     # We only have one such value, we may as well skip them all consistently
 187     attrib = [x for x in attrib if not x.lower().startswith('msds-claimpossiblevalues')]
 188
 189     return attrib
 190
 191
 192 def extract_replace_attrib(attributes):
 193     """
 194     Extract the attributes as an array from the attributes column
 195     (for replace)
 196     :param attributes: from markdown
 197     :return: attribute array (ldif-type format)
 198     """
 199     lines = [x.lstrip('- ') for x in attributes.split('-   ') if x.lower() != 'n/a' and x != '']
 200     lines = [('replace: %s' % line.split(':')[0], line) for line in lines]
 201     lines = [line for pair in lines for line in pair]
 202     return lines
 203
 204
 205 def innertext(tag):
 206     return (tag.text or '') + \
 207         ''.join(innertext(e) for e in tag) + \
 208         (tag.tail or '')
 209
 210
 211 def read_ms_markdown(in_file, out_folder=None, out_dict=None):
 212     """
 213     Read Github documentation to produce forest wide updates
 214     :param in_file: Forest-Wide-Updates.md
 215     :param out_folder: output folder
 216     :param out_dict: output dictionary
 217     """
 218
 219     with open(in_file) as update_file:
 220         # There is a hidden ClaimPossibleValues in this md file
 221         content = update_file.read()
 222
 223         content = re.sub(r'<p>',
 224                          '<br />',
 225                          content)
 226         content = re.sub(r'CN=\\<forest root domain',
 227                          'CN=<forest root domain',
 228                          content)
 229
 230         content = re.sub(r'CN=<forest root domain.*?>',
 231                          '${FOREST_ROOT_DOMAIN}',
 232                          content)
 233
 234         html = markdown.markdown(content,
 235                                  output_format='xhtml')
 236
 237     html = html.replace('CN=Schema,%ws', '${SCHEMA_DN}')
 238
 239     tree = ET.fromstring('<root>' + html + '</root>')
 240
 241     for node in tree:
 242         if not node.text:
 243             continue
 244         updates = None
 245         if node.text.startswith('|Operation'):
 246             # Strip first and last |
 247             updates = [x[1:len(x) - 1].split('|') for x in
 248                        get_string(ET.tostring(node, method='text')).splitlines()]
 249         elif node.text.startswith('| Operation'):
 250             # Strip first and last |
 251             updates = [x[2:len(x) - 2].split(' | ') for x in
 252                        get_string(ET.tostring(node, method='text')).splitlines()]
 253         if updates:
 254             for update in updates[2:]:
 255                 output = re.match(r'Operation (\d+): {(.*)}', update[0])
 256                 if output:
 257                     # print output.group(1), output.group(2)
 258                     guid = output.group(2)
 259                     filename = "%s-{%s}.ldif" % (output.group(1).zfill(4), guid)
 260
 261                 found = False
 262
 263                 if update[3].startswith('Created') or update[1].startswith('Added ACE'):
 264                     # Trigger the security descriptor code
 265                     # Reduce info to just the security descriptor
 266                     update[3] = update[3].split(':')[-1]
 267
 268                     result = parse_ace(update[1], update[2], update[3])
 269
 270                     if filename and out_folder is not None:
 271                         save_ldif(filename, result, out_folder)
 272                     else:
 273                         save_array(guid, result, out_dict)
 274
 275                     continue
 276
 277                 for operation in operation_map:
 278                     if update[1].startswith(operation):
 279                         found = True
 280
 281                         result = operation_map[operation](update[1], update[2], update[3])
 282
 283                         if filename and out_folder is not None:
 284                             save_ldif(filename, [result], out_folder)
 285                         else:
 286                             save_array(guid, [result], out_dict)
 287
 288                         break
 289
 290                 if not found:
 291                     raise Exception(update)
 292
 293             # print ET.tostring(node, method='text')
 294
 295
 296 if __name__ == '__main__':
 297     import sys
 298
 299     out_folder = ''
 300
 301     if len(sys.argv) == 0:
 302         print("Usage: %s <Forest-Wide-Updates.md> [<output folder>]" % (sys.argv[0]), file=sys.stderr)
 303         sys.exit(1)
 304
 305     in_file = sys.argv[1]
 306     if len(sys.argv) > 2:
 307         out_folder = sys.argv[2]
 308
 309     read_ms_markdown(in_file, out_folder)