5 def __init__(self
, wording
):
9 def update(self
, new_wording
):
11 self
.wording
= new_wording
17 return "%s (%d+)" % (self
.wording
, self
.num
)
20 def numbers_are_irrelevant(txt
):
21 ## ? when do we replace numbers with NN ?
22 ## By default is always, but
23 ## if/when some categories of reasons choose to keep their numbers,
24 ## then the function shall return False for such categories
28 def aggregate_reason_fields(reasons_list
):
29 # each reason in the list may be a combination
30 # of | - separated reasons.
32 reasons_txt
= '|'.join(reasons_list
)
33 reasons
= reasons_txt
.split('|')
35 for reason
in reasons
:
36 reason_reduced
= reason
.strip()
38 reason_reduced
= re
.sub(r
"\s+"," ", reason_reduced
)
40 if reason_reduced
== '':
41 continue # ignore empty reasons
43 if numbers_are_irrelevant(reason_reduced
):
44 # reduce numbers included into reason descriptor
45 # by replacing them with generic NN
46 reason_reduced
= re
.sub(r
"\d+","NN", reason_reduced
)
48 if not reason_reduced
in reason_htable
:
49 reason_htable
[reason_reduced
] = reason_counter(reason
)
51 ## reason_counter keeps original ( non reduced )
52 ## reason if it occured once
53 ## if reason occured more then once, reason_counter
54 ## will keep it in reduced/generalized form
55 reason_htable
[reason_reduced
].update(reason_reduced
)
57 generic_reasons
= reason_htable
.keys()
58 generic_reasons
.sort(key
= (lambda k
: reason_htable
[k
].num
),
60 return map(lambda generic_reason
: reason_htable
[generic_reason
].html(),