#! /usr/bin/python
""" moviesHandler.py
theatreList is module that abstracts two classes, theatre and
theatreList.
Author: Patrick Curtain <patrick@swdev.com>
http://www.swdev.com
$Id: moviesHandler.py,v 1.41 2002/02/10 07:47:41 patrick Exp $
"""
import sys, os, time, urllib, string, re, shelve, pickle, rfc822
hostname = os.uname()[1]
homeDir = '/home/swdev'
pydir = homeDir + os.sep + 'pyscripts'
webLogDir = homeDir + os.sep + 'public_html' + os.sep + 'WeBlog'
themesDir = homeDir + os.sep + 'public_html/themes/templates'
kMoviesDir = homeDir + os.sep + 'moviesData'
if sys.platform in ['Darwin1.2', 'next', 'darwin', 'darwin1', 'darwin5']:
homeDir = '/Users/patrick'
pydir = homeDir + os.sep + 'src/pyscripts'
themesDir = "/Volumes/vol2/html/themes/templates"
kMoviesDir = "/usr/local/var" + os.sep + 'moviesData'
if hostname == 'swdevteam.net':
kMoviesDir = "/var/local/moviesData"
themesDir = "/vol/htdocs/swdev.com/themes/templates"
else:
if not pydir in sys.path:
sys.path.insert(0, pydir)
import sendEmail
if not os.path.exists(kMoviesDir):
try:
os.path.makedirs(kMoviesDir) # try to create the path
os.path.makedir(kMoviesDir+os.sep+"db")
except:
print "ERROR: We don't have a directory to cache the movies data"
print " -- The directory %s doesn't exist." % kMoviesDir
print " You need to a) make that dir or b) change the "
print " variable in moviesHandler.py"
TRUE = 1
True = 1
kdbg = None
# kdbg = True
endl = os.linesep
tab = '\t'
# Define some strings that Hollywood.com uses in it's movies listing page
#
kTitleLine = """<hr SIZE="1" NOSHADE>"""
kTimesLine = """ShowTimes"""
# Define some regular expressions to use in matching the strings
#
reTitleLine = re.compile(r'<hr SIZE="1" NOSHADE>', re.IGNORECASE)
reTitle = re.compile(r'<b>(.*?)</b>', re.IGNORECASE)
reTimesLine = re.compile(r'ShowTimes')
reMovieName = re.compile('<[^>]+movie_detail\\.asp[^>]*>\s*(<[^>]*>\s*)*(.+?)\s*<[^>]*>')
reMovieTime = re.compile(r'\(?\d+:\d+\)?')
# this is the url hollywood.com expects in order to retrieve the listing for
# a theatre. VOLATILE!
# theatreURL = """http://www.hollywood.com/showtimes/theatre_detail.asp?searchtheatre=%d"""
theatreURL = """http://www.hollywood.com/showtimes/theatre_detail.asp?searchtheatre=%d"""
leftNavHTML = "../leftNav.html"
moviesListingFilename = "movies.html"
movieRow = """<TR><TD VALIGN="TOP">%(title)s</TD>
<TD VALIGN="TOP">%(times)s</TD></TR>"""
def getMatches(pattern, text, start, end):
""" Find multiple occurances of pattern in text, starting at start.
Return the Match objects in a list. """
matches = []
while 1:
match = pattern.search(text, start, end)
if not match:
break
start = match.end()
matches.append(match)
return matches
def filterMatchOutsideAttribute(match, text):
""" Returns true if the match is located outside a tag attribute. """
endOfMatch = match.end()
nextCloseBracket = string.find(text, '>', endOfMatch)
if nextCloseBracket == -1:
nextCloseBracket = len(text)
if string.find(text, '<', endOfMatch, nextCloseBracket) != -1:
inAttribute = 0
else:
inAttribute = 1
return not inAttribute
class Movie:
""" Represent a single movie title. Holds the title of the movie and the
times that it plays. Both are strings in the dict element.
"""
def __init__(self, dict={}):
""" Create a movie instance from it's dict arg, if supplied.
"""
movieKeys = ['title', 'times', 'rating', 'length']
self.dict = { }
for k in movieKeys:
self.dict[k] = dict.get(k, '')
def __getitem__(self, key):
""" Retrieve a dict item safely. """
value = self.dict.get(key, None)
return value
def __setitem__(self, key, value):
""" set an item in dict appropriately. """
self.dict[key] = value
def asString(self):
""" Print a string representation of this movie. """
print tab + self.dict['title'],
print ' <length %s>' % self.dict['length']
print tab*2 + self.dict['times'] + endl*2
def row_html(self):
""" The html row representation of a Movie. """
movieRow = """<TR><TD VALIGN="TOP"><b>%(title)s</b> - [%(length)s]</TD>
<TD VALIGN="TOP">%(times)s</TD></TR>"""
rowhtml = movieRow % self.dict
return rowhtml
def xml(self):
""" Return an XML representation of an object. """
xmlTemplate = """
<MovieItem>
<MovieTitle>%(title)s</MovieTitle>
<MovieRating>%(rating)s</MovieRating>
<MovieLength>%(length)s</MovieLength>
<MovieTimes>%(times)s</MovieTimes>
</MovieItem>
"""
xmlstring = xmlTemplate % self.dict
return xmlstring
class Theatre:
""" Theatre represents all of the information about one physical Theatre.
Holds things like the name of the theatre, it's short name, the list
of movie instances playing at that theatre and so on.
"""
def __init__(self, row=[], dict={}):
""" Represents a theatre. """
self.data = {}
self.movies = []
self.key = ''
self.name = ''
self.id = 0
self.city = ''
self.locale = ''
self.url = ''
self.picklefilename = ''
if dict:
self.fromDict(dict)
if row:
self.fromRow(row)
setattr(self,'index.html',self.index)
'''
def __repr__(self):
""" default representation method """
return self.index()
'''
def fromDict(self, data={}):
""" Bogus doc string. """
expectedKeys = ('theatreKey', 'theatreName', 'theatreID', 'city', 'locale')
for k in expectedKeys:
if data.has_key(k):
self.data[k] = data[k]
self.data = data
self.key = data['theatreKey']
self.name = data['theatreName']
self.id = data['theatreID']
self.city = data['city']
self.locale = data['locale']
self.setURL()
self.setPickleFilename()
def fromRow(self, row=[], newLocale=''):
""" Set the local data from a row, such as would come in from either
a string.split on a line from a tdf or from a record in from sql.
"""
fieldcount = len(row)
if not fieldcount in (4, 5):
print "%d is the wrong field count! sb 4 or 5" % fieldcount
self.key = row[0]
self.name = row[1]
self.id = row[2]
if type(self.id) == type('str'):
self.id = string.atoi(self.id)
self.city = string.strip(row[3])
if fieldcount == 5:
self.locale = string.strip(row[4])
else:
if not self.locale == 'all':
self.locale = newLocale
self.setURL()
self.setPickleFilename()
self.data = self.asDict()
def fromStr(self, str=''):
""" Set the local data from a string, tab delimited. """
str = string.strip(str)
row = string.split(str, '\t')
self.fromRow(row)
def setURL(self):
""" Define the URL that gets this theatre's informaton. """
if self.id:
self.url = theatreURL % self.id
def setPickleFilename(self):
""" Definte the filename to use when pickling this object. """
if self.key:
self.picklefilename = kMoviesDir + os.sep + self.key
def asRow(self):
""" Represent this Theatre as an html row. """
row = [ self.key, self.name, `self.id`, self.city, self.locale ]
return row
def asDict(self, withMovies=0):
""" Return the Theatre as a dictionary. """
data = { }
data['theatreKey'] = self.key
data['theatreName'] = self.name
data['theatreID'] = self.id
data['city'] = self.city
data['locale'] = self.locale
self.data = data
if withMovies:
self.getMovies()
movieslist = [ ]
for m in self.movies:
movieslist.append(m.dict)
data['movies'] = movieslist
return data
def __getitem__(self, key):
""" Get the item requested. """
value = None
validElements = vars(self).keys()
if key in validElements:
try:
value = getattr(self, key)
except:
pass
return value
def getHTMLPage(self):
""" Use the urllib Library to open and retrieve the contents for
a remote URL.
"""
try:
fn, h = urllib.urlretrieve(self.url)
fp = open(fn, 'r')
try:
page = fp.read()
except:
raise
finally:
urllib.urlcleanup()
return page
def parseTheatreForMovies(self, page=""):
""" THIS is the key parsing function. It combs through the html
line provided by line looking for movie titles and their
associated show times.
The complexity of the function comes from needing to keep
track of which show times go with which titles. This will
be true of any parser where there will be a list of reponses.
VOLATILE!
"""
self.movies = []
if not page:
page = self.getHTMLPage()
# find the end location of the theatre name. start the
# search from there
theatreName = self.name
theatreNameMatch = re.search(theatreName, page)
if theatreNameMatch:
endOfTheatreName = theatreNameMatch.end()
endOfPage = len(page)
# reMovieName is now declared at the top of the module.
# reMovieTime is now declared at the top as well.
# get all movie names
matches = getMatches(reMovieName, page, endOfTheatreName, endOfPage)
numMatches = len(matches)
for i in range(numMatches):
movieNameMatch = matches[i]
movieName = movieNameMatch.group(2)
movieData = {'title': movieName}
# get all times between end of this movie and start of next
endOfThisMovie = movieNameMatch.end()
if i+1 < numMatches:
startOfNextMovie = matches[i+1].start()
else:
startOfNextMovie = endOfPage
timeMatches = getMatches(reMovieTime, page, endOfThisMovie, startOfNextMovie)
# keep all times that are outside a tag attribute.
# there were some times in an <input type="hidden"> tag
timeMatches = filter(lambda match, page=page: filterMatchOutsideAttribute(match, page), timeMatches)
if timeMatches:
movieLength = timeMatches[0].group()
movieData['length'] = movieLength
movieShowtimes = map(lambda x: apply(x.group, ()), timeMatches[1:])
movieData['times'] = string.join(movieShowtimes, ", ")
movie = Movie(movieData)
self.movies.append(movie)
return self.movies
def getDataTime(self):
filetime = 0
if os.path.exists(self.picklefilename):
filetime = os.path.getmtime(self.picklefilename)
return filetime
def getMovies(self):
""" retrieve the information for the most recent movies. Try to
get it from a local file, rather than hitting the web site, if
possible.
"""
self.movies = [ ]
lastFriTime = getLastFriTime()
movieDataTime = self.getDataTime()
if movieDataTime and movieDataTime > lastFriTime:
self.movies = self.readMovies()
else:
self.updateMovies()
def updateMovies(self):
""" Do everything necessary to update the current list of movies. """
self.movies = self.parseTheatreForMovies()
self.writeMovies()
def writeMovies(self):
pfile = open(self.picklefilename, 'w')
pickle.dump(self.movies, pfile)
pfile.close()
return self.picklefilename
def readMovies(self):
""" Read the movies from this Theatre's pickle object store. """
pfile = open(self.picklefilename)
self.movies = pickle.load(pfile)
pfile.close()
return self.movies
def simpleStr(self):
""" Display the movies data as a simple string. """
template = """%(theatreKey)-20s %(theatreName)-25s"""
str = template % self.data
return str
def sendmail(self, requestor, individually=True, source='movies'):
if not source:
source = requestor
if not self.movies:
self.getMovies()
if individually:
self.sendEachMovie(requestor, source)
else:
self.sendAllMovies(requestor, source)
def sendEachMovie(self, requestor, source='movies'):
requestType = source
reqHandler = requestType + "@swdev.com"
for m in self.movies:
mailsubj = m['title']
thisFrom = '"%s:" <movies@swdev.com>' % mailsubj
mailtext = m['times'] + " \n @ " + self.name
sendEmail.sendMsg( requestor, mailsubj, mailtext, thisFrom )
mailsubj = "%s request completed!" % self.name
mailtext = completionMsg % self.name
mailtext = mailtext + requestReflector % ( reqHandler, source )
mailtext = mailtext + thanksText
sendEmail.sendMsg( requestor, mailsubj, mailtext, reqHandler )
def sendAllMovies(self, requestor, source='allmovies'):
requestType = 'allmovies'
reqHandler = "allmovies@swdev.com"
movieStrings = [ ]
for m in self.movies:
movieStr = "%(title)s\n %(times)s\n\n" % m
movieStrings.append(movieStr)
moviesString = string.join(movieStrings)
mailsubj = "Now playing at %s!" % self.name
mailtext = moviesString
mailtext = mailtext + completionMsg % self.name
mailtext = mailtext + requestReflector % ( reqHandler, source )
mailtext = mailtext + thanksText
sendEmail.sendMsg( requestor, mailsubj, mailtext, reqHandler )
def xml(self):
""" Present the current theatre with a listing of all the movies. """
xmlDict = {'name': self.name, 'city': self.city, 'url': self.url }
if not self.movies:
self.getMovies()
allmovies = ""
for m in self.movies:
xmlstr = m.xml()
allmovies += xmlstr
xmlDict['movies_xml'] = allmovies
xmlstring = xmlTemplate % xmlDict
return xmlstring
def moviesListingPage(self, infoString='', themeDir=themesDir):
""" This is the representation to use for a web page. """
if not themeDir:
themeDir = ''
html = ''
rows = [ ]
# leftNavHTML = "../leftNav.html"
leftNavHTML = "/html/swdev.com/leftNav.html"
moviesListingFilename = "movies.html"
moviesTemplateFilename = themeDir + os.sep + moviesListingFilename
moviesTemplate = open( moviesTemplateFilename ).read()
theatreName = self.name
if not self.movies:
self.getMovies()
pageDict = { 'theatreName': theatreName }
pageDict['locale'] = self.locale
pageDict['theatreKey'] = self.key
timeTuple = time.localtime( time.time() )
# pageDict['datepage'] = datepage.flipStyleCalendar(timeTuple)
# pageDict['newsListSidebar'] = newsList.sidebar()
for movie in self.movies:
movie['times'] = movie['times'] or "missing"
rowHTML = movie.row_html()
rows.append(rowHTML)
pageDict['rows'] = string.join(rows, os.linesep)
if os.path.exists(leftNavHTML):
pageDict['leftNav'] = open(leftNavHTML).read()
else:
pageDict['leftNav'] = 'missing'
pageDict['info'] = infoString
html = moviesTemplate % pageDict
return html
def index(self):
""" This is the representation to use for a web page. """
return self.moviesListingPage()
def asString(self):
""" This is the representation to use for a simple string. """
outstr = self.name + endl
for movie in self.movies:
movieStr = movie.asString() or ''
outstr += movieStr
return outstr
def GetMovie(self, index=0, title=''):
""" Given an index or title, show that movie. """
movie = Movie()
if title:
for m in self.movies:
if title == m['title']:
movie = m
break
else:
index = int(index)
movie = self.movies[index]
try: outhtml = movie.row_html()
except: outhtml = 'no movie to show'
return outhtml
# global functions
#
def getLastFriTime():
comparisonHour = 10 # 10am. A good point on fri to check movie listings
thursday = 3 # weekday value of 4
friday = 4
nowtime = time.time()
nowtuple = time.localtime(nowtime)
year = nowtuple[0]
weekday = nowtuple[6]
yearday = nowtuple[7]
if weekday == friday:
friYearDay = yearday
elif weekday > thursday:
friYearDay = yearday - weekday
else:
friYearDay = yearday - ( weekday + 3 )
lastfri = ( year, 0, 0, comparisonHour, 0, 0, 0, friYearDay, 0 )
frisecs = time.mktime(lastfri)
return frisecs
localeList = ( 'portland', 'atlanta', 'bend' )
class Theatres:
""" Abstraction to represent a dictionary of theatres. """
def __init__(self, locale='all', populateFlag=1):
""" Theatres. """
self.dict = { }
self.theatrekeys = [ ]
self.names = [ ]
self.IDs = [ ]
self.locale = ''
self.filename = ''
self.shelvename = ''
setattr(self,'index.html',self.index)
if locale:
self.locale = locale
self.setFilename()
# skip the shelve for the time being.
# self.setShelvename()
if populateFlag:
if self.shelvename:
self.read()
elif self.filename:
self.readFile()
def __repr__(self):
"""default method"""
return self.index()
def setShelvename(self):
basename = "theatres"
self.shelvename = kMoviesDir + os.sep + "db" + os.sep + basename
def setFilename(self):
if self.locale:
basename = "%sTheatres.tdf" % string.lower(self.locale)
self.filename = kMoviesDir + os.sep + basename
def read(self):
""" Get the Theatres data from a shelve database."""
if kdbg: print "attempting to open the shelve db."
shelf = shelve.open(self.shelvename)
self.dict = { }
for k in shelf.keys():
try:
theatre = shelf[k]
except:
pass
if self.locale == 'all':
self.dict[k] = theatre
else:
if theatre.locale == self.locale:
self.dict[k] = theatre
shelf.close()
self.syncValues()
def readFile(self):
""" Get the theatres data, stored in a tab-delimited text file
based on which locale we're looking at. Used as a bootstrap.
"""
self.dict = { }
fileExists = os.path.exists(self.filename)
if fileExists:
datafile = open(self.filename)
lines = datafile.readlines()
for l in lines:
row = string.split(l, '\t')
theatre = Theatre()
theatre.fromRow(row, self.locale)
self.dict[theatre.key] = theatre
self.syncValues()
return self.dict
def write(self):
shelf = shelve.open(self.shelvename)
for k in self.dict.keys():
shelf[k] = self.dict[k]
shelf.close()
def writeFile(self):
fileExists = os.path.exists(self.filename)
datafile = open(self.filename, 'w')
for theatreKey in self.theatrekeys:
theatre = self.dict[theatreKey]
row = theatre.asRow()
line = string.join(row, '\t')
line = line + os.linesep
datafile.write(line)
datafile.close
return self.filename
def syncValues(self):
self.theatrekeys = self.dict.keys()
self.theatrekeys.sort()
self.names = [ ]
for k in self.theatrekeys:
theatre = self.dict[k]
self.names.append( theatre.name )
self.IDs.append( theatre.id )
def __len__(self):
return len(self.theatrekeys)
def theatreWithID(self, ID=0):
""" Publishes the 'contains' method for Theatres. """
if ID in self.IDs:
for theatreKey in self.theatrekeys:
candidate = self.theatres[theatreKey]
if candidate.id == ID:
return candidate
return None
def theatreWithKey(self, key):
""" Publishes the 'contains' method for Theatres. """
theatre = None
if self.dict.has_key(key):
theatre = self.dict[key]
return theatre
def __getitem__(self, key):
""" Retrieve the element from either the object, or it's dict. """
if type(key) == type('str'):
value = self.dict[key]
elif type(key) == type(4):
theatrekey = self.theatrekeys[key]
value = self.dict[ theatrekey ]
return value
def contains(self, key):
""" Publishes the 'contains' method for Theatres. """
if key in self.theatrekeys:
return self.dict[key]
if key in self.IDs:
return self.theatreWithID(key)
return None
def theatreKeys(self, locale='portland'):
""" Provide the keys for the theatres list. """
if locale == 'all':
theatreKeys = self.theatrekeys
else:
theatreKeys = [ ]
for thkey in self.theatrekeys:
theatre = self.dict[thkey]
if theatre.locale == locale:
theatreKeys.append(thkey)
return tuple(theatreKeys)
def theatresDict(self, locale='portland'):
thisDict = { }
theatrekeys = self.dict.keys()
for k in theatrekeys:
theatre = self.dict[k]
if string.lower(theatre.locale) == locale:
try:
thisDict[theatre.key] == theatre.asDict(withMovies=1)
except:
print "trouble with theatre: %s" % k
pass # for now
return thisDict
def theatreList(self, locale='portland'):
""" Provide the actual dictionaries for each theatre as a tuple. """
theatres = [ ]
theatrekeys = self.dict.keys()
for theatrekey in theatrekeys:
theatre = self.dict[theatrekey]
theatredict = theatre.data
theatres.append(theatredict)
return tuple(theatres)
def index_page(self, themeDir=themesDir):
""" This is the representation to use for a web page. """
html = ''
rows = [ ]
theatresListingFilename = "theatres.html"
theatresTemplateFilename = themeDir + os.sep + theatresListingFilename
theatresTemplate = open( theatresTemplateFilename ).read()
theatreRow = """
<TR>
<TD VALIGN="TOP">
<!-- for CGI --><!--
<A HREF="movies.py?tk=%(theatreKey)s">%(theatreName)s</A> </TD>
-->
<!-- for Zope -->
<A HREF="/movies/theatres/%(theatreKey)s">%(theatreName)s</A> </TD>
<TD VALIGN="TOP">
<A HREF="mailto:allmovies@swdev.com?subject=%(theatreKey)s">%(theatreKey)s</A> </TD>
</TR>
"""
for th in self.theatrekeys:
theatre = self.dict[th]
rowHTML = theatreRow % theatre.data
rows.append(rowHTML)
rowsHTML = string.join(rows, os.linesep)
pageDict = { 'rows': rowsHTML }
pageDict['locale'] = self.locale
# pageDict['leftNav'] = open(leftNavHTML).read()
pageDict['leftNav'] = ''
timeTuple = time.localtime( time.time() )
# pageDict['datepage'] = datepage.flipStyleCalendar(timeTuple)
# pageDict['newsListSidebar'] = newsList.sidebar()
pageDict['datepage'] = ''
pageDict['newsListSidebar'] = ''
html = theatresTemplate % pageDict
return html
def index(self):
""" This is the representation to use for a web page. """
return self.index_page()
def sendMovies(requestor, request, individually=TRUE, source='movies'):
if not source:
source="movies"
try:
theatres = Theatres(locale='all')
request = string.lower(request)
if request in theatres.theatrekeys:
if kdbg: print "-- sendMovies: it's a theatre req"
theatre = theatres[request]
theatre.sendmail(requestor, individually, source)
elif request in localeList:
# send a list of theatres in the locale
if kdbg: print "-- sendMovies: it's a locale req"
theatres = Theatres(locale=request)
mailsubj = 're: %s locale' % request
mailtext = "Movie times for %s area theatres can be " % request
mailtext = mailtext + "retrieved by sending a message to %s@swdev.com" % source
mailtext = mailtext + "with one of the following in the subject: \n\n"
for th in theatres.theatrekeys:
thText = theatreKeyTemplate % th
mailtext = mailtext + thText
mailtext = mailtext + thanksText
reqHandler = "%s@swdev.com" % source
sendEmail.sendMsg( requestor, mailsubj, mailtext, reqHandler )
else:
# tell the user what locales are available
if kdbg: print "-- sendMovies: it's a help req"
mailsubj = 're: help request'
mailtext = "To get a list of theatres in your area, just send "
mailtext = mailtext + "a message to %s@swdev.com " % source
mailtext = mailtext + "with one of the following in the subject: \n\n"
for locale in localeList:
ltext = theatreKeyTemplate % locale
mailtext = mailtext + ltext
mailtext = mailtext + "\nTo have your area added, send your "
mailtext = mailtext + "request to information@swdev.com.\n"
mailtext = mailtext + thanksText
reqHandler = "%s@swdev.com" % source
sendEmail.sendMsg( requestor, mailsubj, mailtext, reqHandler )
except:
sys.stderr.write("sendMovies just wouldn't work!\n")
# pass
raise
xmlTemplate = """
<TheatreMoviesListing>
<TheatreName>%(name)s</TheatreName>
<TheatreCity>%(city)s</TheatreCity>
<TheatreURL>%(url)s</TheatreURL>
<CurrentMoviesList>
%(movies_xml)s
</CurrentMoviesList>
</TheatreMoviesListing>
"""
theatreKeyTemplate = " %s \n"
completionMsg = """Thank You!
Your request for movies listings from %s has been
processed by Movies at SWDev (movies@swdev.com). We hope you
have a great time at the movies!
"""
requestReflector = """Note:
You are receiving this email because a request was submitted
to %s via %s. If this message was in any way
unwelcome, PLEASE send an email to admin@swdev.com and we
will ensure that no other messages get sent to your address.
"""
thanksText = """
--
And t h a n k y o u for using SWDev's movies service. :-)
http://www.swdev.com/movies/
To see more of what we can do, come to http://www.swdev.com/
"""
if __name__ == '__main__':
""" the message handler. """
if len(sys.argv) > 1:
handleMailRequest(sys.stdin)
else:
# If we get to here, we're NOT being called as "main", so I'll initiate
# a variable to use with Zope.
if kdbg: print "trying to instantiate theatres..."
theatres = Theatres(locale='Portland')
def index_html():
""" Define an index method for the module. """
return theatres.index()