#!/usr/bin/env python
#This is a simple cgi program that does not accept any user input.
#When accessed from a web browser,
#it reads calender files at http://www.ou.edu/calendar/,
#modifies the text slightly, and sends it a the browser.
#The modifications consist of html tags that colorize certain
#text matched in regular expressions.
#A good reference for python regular expressions is:
#http://www.amk.ca/python/howto/regex/regex.html
#Students are required to change some of those regular expressions,
#see TASK below.
import re, urllib2, sys, time
months=['january','february','march','april','may','june',
'july','august','september','ocotober','november','december']
calurl="http://www.ou.edu/calendar/"
#this filter is "demo-ed" with this practice file:
practice='http://it.metr.ou.edu/regex/filt/practice.htm'
#following needed to construct file names of the current files
month=time.localtime()[1] #current month number
nextmonth=(month+1)%12 #next month number (reverts to 1 if 13)
files=[practice] #use this practice file for development
#use these files for your useful app
#files=[calurl+months[month-1]+'.htm', calurl+months[nextmonth-1]+'.htm']
#this MUST be the first line spit out by a cgi script:
print "Content-type: text/html\n"
#next write out some html to send to a browser:
print "
My OU events"
print "my enhancement of:
"
content="" #this single string will hold all the text from the input file(s)
for file in files:
print ''+file+'
' #print link to original files
content+=urllib2.urlopen(file).read() # this reads the text at the URL "file"
print "
"
# Easy replacement using string methods:
#------------------------------------------------------------
content=content.replace('Free','Free');
# Comment the above, and uncomment the following for the same effect, but using re:
#content=re.sub('Free','Free',content)
#------------------------------------------------------------
# A bit more fancy: colorize 'ticket' or 'tickets'. Also compile the
# regular expression for the match into a pattern object. Why? Well,
# two reasons: First, it allows a compilation flag to be set. In this
# case it is re.I, which will ignore distinctions between uppercase
# and lowercase in the match. Second, it is a bit more readable...
tickets=re.compile('ticket[s]?',re.I)
# In the following, \g<0> is the text of the entire match. Thus, html
# colorizing tags are being inserted before and after the match.
content=tickets.sub('\g<0>',content)
#------------------------------------------------------------
# Even more fancy, look for calls to a phone number. Use parenthesis
# to make subgroups 1 and 2. Subgroup 1 matches the end of a sentence,
# subgroup 2 matches from the end of that sentence through a phone
# number. Subgoup 2 is colorized. re.S allows \s to include \n (newline)
# Note that with all these escape \ characters we use a "raw" string: r'...'
# Note \. means a real "period",
# and not the metacharacter.
# The [^\.] means "anything but a 'period'".
calls=re.compile(r'(\.\s+)([^\.]*?\s+\d\d\d-\d\d\d\d)',re.S)
content=calls.sub(r'\g<1>\g<2>',content)
#------------------------------------------------------------
# TASK:
# 1) highlight in color ff00ff all instances of "woman" and "women",
# using one regex
# hint: this is easy
woman=re.compile(r"wom[a|e]n",re.I)
content=woman.sub(r'\g<0>',content)
# 2) highlight in color red all "Lecture" and subsequent title in quotes.
# look at the source html:see quotes are strings “ and ”
# see http://www.natural-innovations.com/wa/doc-charset.html
# hint: compile with re.S, because the white space has a newline
lect=re.compile(r"Lecture
\s+“.*?”",re.I|re.S)
content=lect.sub(r'\g<0>',content)
# 3) Find ALL things that look like URLs. e.g. www.snomnh.ou.edu,
# and make an href, e.g. www.snomnh.ou.edu
# hint: \w and \. may be useful in your regex
url=re.compile(r'\w+(\.\w+){2,3}')
content=url.sub(r'\g<0>',content)
print content