#!/usr/bin/python # -*- coding: iso-8859-1 -*- __version__ = "$Revision: 0.4$" __author__ = "Nicolas Seriot" __date__ = "2004-11-28" import sys, string USAGE = """woc: word occurences count counts the occurences of the words in a file Usage: woc [] -h, --help Show this help text and exit -v, --version Show file version and exit lines : number of lines swords : number of single words twords : number of total words chars : number of characters """ VERSION = "0.4" if len(sys.argv) < 2: print USAGE sys.exit(1) if sys.argv[1] == '-h' or sys.argv[1] == '--help': print USAGE sys.exit(0) if sys.argv[1] == '-v' or sys.argv[1] == '--version': print VERSION sys.exit(0) path = sys.argv[len(sys.argv) - 1] try: f = open(path) except: sys.stderr.write('give me a good file !\n') print USAGE sys.exit(1) words = {} stats = {} stats_keys = ['lines', 'swords', 'twords', 'chars'] for key in stats_keys: stats[key] = 0 for line in f.read().split('\n'): stats['lines'] += 1 stats['chars'] += len(line) for p in string.punctuation: line = string.replace(line, p, '') line_of_words = line.split(' ') stats['twords'] += len(line_of_words) for word in line.split(' '): word = string.lower(word) if not words.has_key(word): words[word] = 1 else: words[word] += 1 if words.has_key(''): del words[''] stats['swords'] = len(words.keys()) for key in stats_keys: print "%s%d" % (key.ljust(8), stats[key]) # search mode if len(sys.argv) == 3: s = sys.argv[1] count = 0 if words.has_key(s): count = words[s] print "%s : %d" % (sys.argv[1], count) # full mode if len(sys.argv) == 2: list = [] for key in words.keys(): list.append(("%s : %s") % (str(words[key]).rjust(4), key)) list.sort() list.reverse() for l in list: print l