tdf#90839: follow-up work
[LibreOffice.git] / bin / extract-tooltip.py
blob5397c718ff2be13411c70e144b1cad2139f587c0
1 #!/usr/bin/env python
2 import sys
3 import os
4 import re
5 import urlparse
7 def usage():
8 message = """ usage: {program} inDir outDir
9 inDir: directory containing .ht files
10 outDir: target for the new files"""
11 print(message.format(program = os.path.basename(sys.argv[0])))
13 def parseFile(filename):
14 file = open(filename, "r")
15 data = file.readlines()
16 data = [line.rstrip('\n') for line in data]
18 pairs = {}
19 regEx = re.compile("^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$")
20 old_line = None
21 for line in data:
22 if len(line) > 0:
23 if(old_line != None):
24 print filename
25 #print("failed to parse line")
26 #print(old_line)
27 line = old_line + line
28 print line
29 old_line = None
30 split_line = regEx.split(line)
31 #print(split_line)
32 #print(urlparse.unquote(split_line[2]))
33 #print(split_line[4])
34 if(old_line == None and split_line[4] == "" and split_line[3] != "0"):
35 print(line)
36 print(split_line)
37 old_line = line
38 else:
39 pairs[urlparse.unquote(split_line[2])] = split_line[4]
40 assert(len(split_line) == 6)
41 #print data
42 #print(pairs)
43 return pairs
45 def parseFiles(dir):
46 strings = []
47 for files in os.listdir(dir):
48 if files.endswith(".ht"):
49 string = parseFile(os.path.join(dir,files))
50 print(files)
51 #print string
52 strings.append([files, string])
53 return strings
55 def extractSharedEntries(strings):
56 first_dict = strings[0][1]
57 shared_dict = {}
58 #print(first_dict)
59 for key, value in first_dict.iteritems():
60 # check that the entry in the same in all dics
61 is_in_all_dicts = True
62 for dict_file_pair in strings:
63 dict = dict_file_pair[1]
64 if not dict.has_key(key):
65 is_in_all_dicts = False
66 elif not dict[key] == value:
67 print("Element with different values")
68 print(key)
69 is_in_all_dicts = False
70 if is_in_all_dicts:
71 shared_dict[key] = value
72 #print(shared_dict)
73 for dict_file_pair in strings:
74 for key in shared_dict.iterkeys():
75 dict_file_pair[1].pop(key)
77 strings.append(["shared.ht", shared_dict])
78 return strings
80 def writeOutFiles(dir, strings):
81 for string in strings:
82 file_name_base = string[0]
83 file_name_base = file_name_base.replace(".ht", ".properties")
84 file_name = os.path.join(dir, file_name_base)
85 file = open(file_name, "w")
86 for key, value in string[1].iteritems():
87 try:
88 file.write(key)
89 file.write("=")
90 file.write(value)
91 file.write("\n")
92 except UnicodeDecodeError:
93 print key
94 print value
95 file.close()
97 def main (args):
98 if(len(args) != 3):
99 usage()
100 sys.exit(1)
102 strings = parseFiles(args[1])
103 new_strings = extractSharedEntries(strings)
104 writeOutFiles(args[2], new_strings)
106 if __name__ == "__main__":
107 main(sys.argv)