#!/usr/bin/env python import sys import os import re import urlparse def usage(): message = """ usage: {program} inDir outDir inDir: directory containing .ht files outDir: target for the new files""" print(message.format(program = os.path.basename(sys.argv[0]))) def parseFile(filename): file = open(filename, "r") data = file.readlines() data = [line.rstrip('\n') for line in data] pairs = {} regEx = re.compile(r"^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$") old_line = None for line in data: if len(line) > 0: if old_line is not None: print(filename) # print("failed to parse line") # print(old_line) line = old_line + line print(line) old_line = None split_line = regEx.split(line) # print(split_line) # print(urlparse.unquote(split_line[2])) # print(split_line[4]) if (old_line is None and split_line[4] == "" and split_line[3] != "0"): print(line) print(split_line) old_line = line else: pairs[urlparse.unquote(split_line[2])] = split_line[4] assert(len(split_line) == 6) # print data # print(pairs) return pairs def parseFiles(dir): strings = [] for files in os.listdir(dir): if files.endswith(".ht"): string = parseFile(os.path.join(dir,files)) print(files) #print string strings.append([files, string]) return strings def extractSharedEntries(strings): first_dict = strings[0][1] shared_dict = {} #print(first_dict) for key, value in first_dict.iteritems(): # check that the entry in the same in all dics is_in_all_dicts = True for dict_file_pair in strings: dict = dict_file_pair[1] if not dict.has_key(key): is_in_all_dicts = False elif not dict[key] == value: print("Element with different values") print(key) is_in_all_dicts = False if is_in_all_dicts: shared_dict[key] = value #print(shared_dict) for dict_file_pair in strings: for key in shared_dict.iterkeys(): dict_file_pair[1].pop(key) strings.append(["shared.ht", shared_dict]) return strings def writeOutFiles(dir, strings): for string in strings: file_name_base = string[0] file_name_base = file_name_base.replace(".ht", ".properties") file_name = os.path.join(dir, file_name_base) file = open(file_name, "w") for key, value in string[1].iteritems(): try: file.write(key) file.write("=") file.write(value) file.write("\n") except UnicodeDecodeError: print(key) print(value) file.close() def main (args): if(len(args) != 3): usage() sys.exit(1) strings = parseFiles(args[1]) new_strings = extractSharedEntries(strings) writeOutFiles(args[2], new_strings) if __name__ == "__main__": main(sys.argv)