2015-01-14 05:56:44 -06:00
|
|
|
#!/usr/bin/env python
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import urlparse
|
|
|
|
|
|
|
|
def usage():
|
|
|
|
message = """ usage: {program} inDir outDir
|
|
|
|
inDir: directory containing .ht files
|
|
|
|
outDir: target for the new files"""
|
|
|
|
print(message.format(program = os.path.basename(sys.argv[0])))
|
|
|
|
|
|
|
|
def parseFile(filename):
|
|
|
|
file = open(filename, "r")
|
|
|
|
data = file.readlines()
|
|
|
|
data = [line.rstrip('\n') for line in data]
|
|
|
|
|
|
|
|
pairs = {}
|
2020-07-08 03:51:48 -05:00
|
|
|
regEx = re.compile(r"^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$")
|
2015-01-14 05:56:44 -06:00
|
|
|
old_line = None
|
|
|
|
for line in data:
|
|
|
|
if len(line) > 0:
|
2024-03-28 08:34:04 -05:00
|
|
|
if old_line is not None:
|
|
|
|
print(filename)
|
|
|
|
# print("failed to parse line")
|
|
|
|
# print(old_line)
|
2015-01-14 05:56:44 -06:00
|
|
|
line = old_line + line
|
2024-03-28 08:34:04 -05:00
|
|
|
print(line)
|
2015-01-14 05:56:44 -06:00
|
|
|
old_line = None
|
|
|
|
split_line = regEx.split(line)
|
2024-03-28 08:34:04 -05:00
|
|
|
# print(split_line)
|
|
|
|
# print(urlparse.unquote(split_line[2]))
|
|
|
|
# print(split_line[4])
|
|
|
|
if (old_line is None and split_line[4] == "" and split_line[3] != "0"):
|
2015-01-14 05:56:44 -06:00
|
|
|
print(line)
|
|
|
|
print(split_line)
|
|
|
|
old_line = line
|
|
|
|
else:
|
|
|
|
pairs[urlparse.unquote(split_line[2])] = split_line[4]
|
|
|
|
assert(len(split_line) == 6)
|
2024-03-28 08:34:04 -05:00
|
|
|
# print data
|
|
|
|
# print(pairs)
|
2015-01-14 05:56:44 -06:00
|
|
|
return pairs
|
|
|
|
|
|
|
|
def parseFiles(dir):
|
|
|
|
strings = []
|
|
|
|
for files in os.listdir(dir):
|
|
|
|
if files.endswith(".ht"):
|
|
|
|
string = parseFile(os.path.join(dir,files))
|
|
|
|
print(files)
|
|
|
|
#print string
|
|
|
|
strings.append([files, string])
|
|
|
|
return strings
|
|
|
|
|
|
|
|
def extractSharedEntries(strings):
|
|
|
|
first_dict = strings[0][1]
|
|
|
|
shared_dict = {}
|
|
|
|
#print(first_dict)
|
|
|
|
for key, value in first_dict.iteritems():
|
|
|
|
# check that the entry in the same in all dics
|
|
|
|
is_in_all_dicts = True
|
|
|
|
for dict_file_pair in strings:
|
|
|
|
dict = dict_file_pair[1]
|
|
|
|
if not dict.has_key(key):
|
|
|
|
is_in_all_dicts = False
|
|
|
|
elif not dict[key] == value:
|
|
|
|
print("Element with different values")
|
|
|
|
print(key)
|
|
|
|
is_in_all_dicts = False
|
|
|
|
if is_in_all_dicts:
|
|
|
|
shared_dict[key] = value
|
|
|
|
#print(shared_dict)
|
|
|
|
for dict_file_pair in strings:
|
|
|
|
for key in shared_dict.iterkeys():
|
|
|
|
dict_file_pair[1].pop(key)
|
|
|
|
|
|
|
|
strings.append(["shared.ht", shared_dict])
|
|
|
|
return strings
|
|
|
|
|
|
|
|
def writeOutFiles(dir, strings):
|
|
|
|
for string in strings:
|
|
|
|
file_name_base = string[0]
|
|
|
|
file_name_base = file_name_base.replace(".ht", ".properties")
|
|
|
|
file_name = os.path.join(dir, file_name_base)
|
|
|
|
file = open(file_name, "w")
|
|
|
|
for key, value in string[1].iteritems():
|
|
|
|
try:
|
|
|
|
file.write(key)
|
|
|
|
file.write("=")
|
|
|
|
file.write(value)
|
|
|
|
file.write("\n")
|
|
|
|
except UnicodeDecodeError:
|
2024-03-28 08:34:04 -05:00
|
|
|
print(key)
|
|
|
|
print(value)
|
2015-01-14 05:56:44 -06:00
|
|
|
file.close()
|
|
|
|
|
|
|
|
def main (args):
|
|
|
|
if(len(args) != 3):
|
|
|
|
usage()
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
strings = parseFiles(args[1])
|
|
|
|
new_strings = extractSharedEntries(strings)
|
|
|
|
writeOutFiles(args[2], new_strings)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main(sys.argv)
|