#!/usr/bin/python
# shelve.py: Look up ISBN numbers in various Web databases
# This program is in the public domain.
# Written by Andrew Plotkin (erkyrath@eblong.com)
# http://www.eblong.com/zarf/bookscan/
import sys
import types
import time
import string
import fileinput
import re
import urllib
def percentthunk(match):
hexnum = match.group(1)
retval = chr(string.atoi(hexnum, 16))
return retval
class GetDummy:
name = 'Dummy'
def makeurl(dummy, isbn):
return ''
def extract(dummy, buf):
return 'dummy says no'
killtags = re.compile('<.*?>', re.DOTALL)
sanewhite = re.compile('[ \012\015\011]+')
#valtabchap = re.compile('')
valtabchap = re.compile(']*SIZE=3>')
#boldexchap = re.compile('.*?', re.DOTALL)
boldexchap = re.compile('.*?', re.DOTALL)
authexchap = re.compile(']*AuthorRedirect[^>]*>')
namexchap = re.compile('q1=([^&]*)&q2=([^&"]*)')
killplus = re.compile('\\+')
killpercent = re.compile('%([0-9a-fA-F][0-9a-fA-F])')
anylowercase = re.compile('[a-z]')
class GetChapters:
name = 'Chapters'
def makeurl(dummy, isbn):
return ('http://www.chapters.ca/books/details/default.asp?ISBN='
+ isbn)
def extract(dummy, buf):
fres = valtabchap.search(buf)
if (fres == None):
return 'Not found'
buf2 = buf[fres.end() : ]
titleres = boldexchap.search(buf2)
title = killtags.sub('', titleres.group())
title = string.strip(title)
author = ''
flist = authexchap.findall(buf2)
for fres in flist:
authres2 = namexchap.search(fres)
if (len(author) != 0):
author = author + '; '
auth2 = killplus.sub(' ', authres2.group(2))
auth1 = killplus.sub(' ', authres2.group(1))
auth2 = killpercent.sub(percentthunk, auth2)
auth1 = killpercent.sub(percentthunk, auth1)
auth3 = string.strip(auth2) + ', ' + string.strip(auth1)
if (anylowercase.search(auth3) == None):
auth3 = string.capwords(auth3)
author = author + auth3
title = sanewhite.sub(' ', title)
author = sanewhite.sub(' ', author)
return (author, title)
valform = re.compile('.*?', re.DOTALL)
#authorex = re.compile('/Author=([^/"]*)')
strongex = re.compile('.*?')
authorex = re.compile('&field-author=([^/"]*)')
class GetAmazon:
name = 'Amazon'
def makeurl(dummy, isbn):
return 'http://www.amazon.com/exec/obidos/ISBN=' + isbn + '/'
def extract(dummy, buf):
fres = valform.search(buf)
if (fres == None):
return 'Not found'
buf2 = buf[fres.start() : ]
fres = valformend.search(buf2)
buf3 = buf2[0 : fres.start()]
titleres = strongex.search(buf3)
title = killtags.sub('', titleres.group())
flist = authorex.findall(buf3)
author = ''
for fres in flist:
res = killpercent.sub(percentthunk, fres)
if (len(author) != 0):
author = author + '; '
author = author + res
title = sanewhite.sub(' ', title)
author = sanewhite.sub(' ', author)
return (author, title)
#authorexuk = re.compile('/Author=([^/"]*)')
authorexuk = re.compile('&field-author=([^/"]*)')
#valsegauk = re.compile('')
valsegauk = re.compile('')
valendauk = re.compile('.*?', re.DOTALL)
boldexauk = re.compile('.*?
', re.DOTALL)
class GetAmazonUK:
name = 'AmazonUK'
def makeurl(dummy, isbn):
return 'http://www.amazon.co.uk/exec/obidos/ASIN/' + isbn + '/'
def extract(dummy, buf):
if (string.find(buf, 'Amazon.co.uk Error Page') >= 0):
return 'Not found'
fres = valsegauk.search(buf)
if (fres == None):
return 'Not found'
buf2 = buf[fres.start() : ]
fres = valendauk.search(buf2)
buf3 = buf2[0 : fres.start()]
titleres = boldexauk.search(buf3)
title = killtags.sub('', titleres.group())
flist = authorexuk.findall(buf3)
author = ''
for fres in flist:
res = killpercent.sub(percentthunk, fres)
if (len(author) != 0):
author = author + '; '
author = author + res
title = sanewhite.sub(' ', title)
author = sanewhite.sub(' ', author)
return (author, title)
searchlist = [ GetAmazon, GetChapters, GetAmazonUK ]
for line in fileinput.input():
time.sleep(1)
line = string.strip(line)
if (len(line) == 0):
continue
comment = ''
commentpos = string.find(line, '#')
if (commentpos >= 0):
comment = line[commentpos:]
line = line[0:commentpos]
line = string.strip(line)
if (len(line) == 0):
sys.stdout.write(comment + '\n')
continue
if (len(line) != 10):
sys.stdout.write('# ' + line + ' # Not an ISBN' + comment + '\n')
continue
gotit = 0
for cla in searchlist:
target = cla()
sys.stderr.write('Querying ' + target.name + ' for ISBN ' +
line + '...\n')
url = target.makeurl(line)
try:
infl = urllib.urlopen(url)
html = infl.read()
infl.close()
res = target.extract(html)
if (type(res) != types.StringType):
gotit = 1
if (len(res[0])):
sys.stdout.write(res[0])
else:
sys.stdout.write('-')
sys.stdout.write('\t')
if (len(res[1])):
sys.stdout.write(res[1])
else:
sys.stdout.write('-')
sys.stdout.write('\n')
except:
res = sys.exc_info()
res = str(res[1])
sys.stderr.write('...' + str(res) + '\n')
if (gotit):
break
if (not gotit):
sys.stdout.write('# ' + line + ' # ' + res + comment + '\n')