#!/usr/bin/python # -*- coding: utf-8 -*- import sys import codecs import re USAGE = sys.argv[0], "" if len(sys.argv) < 2: print USAGE input_file = sys.argv[1] d = {u'’':"'", u'«':'"', u'»':'"', u'»':'"', u'—':'-', u'–':'-', u'œ':'oe', u'…':'...', u'€':' euros'} f = codecs.open(input_file, "r", "utf-8") for line in f.xreadlines(): line = line.decode('utf-8')[:-1] for k in d: line = line.replace(k, d[k]) print line.encode('iso-8859-1', 'strict') f.close()