#!/usr/bin/python # -*- coding: iso-8859-1 -*- __version__ = "$Revision: 0.2 $" __author__ = "Nicolas Seriot" __date__ = "2004-11-27" """ 0.2 search in paragraphs instead of lines (faster, more accurate) 0.1 first release """ import sys, re, string USAGE = """pdfmi: display pdf document meta-informations Usage: pdfmi -h, --help Show this help text and exit -v, --version Show file version and exit """ VERSION = "0.15" # deal with the user input if len(sys.argv) == 1: print USAGE sys.exit(1) if sys.argv[1] == '-h' or sys.argv[1] == '--help': print USAGE sys.exit(0) if sys.argv[1] == '-v' or sys.argv[1] == '--version': print VERSION sys.exit(0) # try opening the file try: f = open(sys.argv[1], 'r') except: sys.stderr.write('give me a good file !\n') print USAGE sys.exit(1) # stores the data dic = {} # useful to keep the order keylist = ['Title', 'Author', 'Subject', 'Keywords', 'Creator', 'Producer', 'CreationDate', 'ModDate', 'Trapped'] # add a dictionnary per key, set and compile the regex for each of these keys for key in keylist: dic[key] = {'exp':'','found':'', 'value':''} dic[key]['exp'] = re.compile('\/' + key + '\s?\(.*?[^\\\\]\)') # for each paragraph of the file, search the keys and store the values for par in f.read().split('\n\n'): for key in dic.keys(): match = dic[key]['found'] = dic[key]['exp'].search(par) if match: s = match.group() i = string.index(s, '(') s = s[i+1:-1] s = string.replace(s, "\\r", " ") s = string.replace(s, "\\", "") dic[key]['value'] = s # display the dictionnary for key in keylist: print key + " "*(13-len(key)) + ": " + dic[key]['value']