8 message
= """ usage: {program} inDir outDir
9 inDir: directory containing .ht files
10 outDir: target for the new files"""
11 print(message
.format(program
= os
.path
.basename(sys
.argv
[0])))
13 def parseFile(filename
):
14 file = open(filename
, "r")
15 data
= file.readlines()
16 data
= [line
.rstrip('\n') for line
in data
]
19 regEx
= re
.compile("^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$")
25 #print("failed to parse line")
27 line
= old_line
+ line
30 split_line
= regEx
.split(line
)
32 #print(urlparse.unquote(split_line[2]))
34 if(old_line
== None and split_line
[4] == "" and split_line
[3] != "0"):
39 pairs
[urlparse
.unquote(split_line
[2])] = split_line
[4]
40 assert(len(split_line
) == 6)
47 for files
in os
.listdir(dir):
48 if files
.endswith(".ht"):
49 string
= parseFile(os
.path
.join(dir,files
))
52 strings
.append([files
, string
])
55 def extractSharedEntries(strings
):
56 first_dict
= strings
[0][1]
59 for key
, value
in first_dict
.iteritems():
60 # check that the entry in the same in all dics
61 is_in_all_dicts
= True
62 for dict_file_pair
in strings
:
63 dict = dict_file_pair
[1]
64 if not dict.has_key(key
):
65 is_in_all_dicts
= False
66 elif not dict[key
] == value
:
67 print("Element with different values")
69 is_in_all_dicts
= False
71 shared_dict
[key
] = value
73 for dict_file_pair
in strings
:
74 for key
in shared_dict
.iterkeys():
75 dict_file_pair
[1].pop(key
)
77 strings
.append(["shared.ht", shared_dict
])
80 def writeOutFiles(dir, strings
):
81 for string
in strings
:
82 file_name_base
= string
[0]
83 file_name_base
= file_name_base
.replace(".ht", ".properties")
84 file_name
= os
.path
.join(dir, file_name_base
)
85 file = open(file_name
, "w")
86 for key
, value
in string
[1].iteritems():
92 except UnicodeDecodeError:
102 strings
= parseFiles(args
[1])
103 new_strings
= extractSharedEntries(strings
)
104 writeOutFiles(args
[2], new_strings
)
106 if __name__
== "__main__":