#!/usr/bin/env python #This is a simple cgi program that does not accept any user input. #When accessed from a web browser, #it reads calender files at http://www.ou.edu/calendar/, #modifies the text slightly, and sends it a the browser. #The modifications consist of html tags that colorize certain #text matched in regular expressions. #A good reference for python regular expressions is: #http://www.amk.ca/python/howto/regex/regex.html #Students are required to change some of those regular expressions, #see TASK below. import re, urllib2, sys, time months=['january','february','march','april','may','june', 'july','august','september','ocotober','november','december'] calurl="http://www.ou.edu/calendar/" #this filter is "demo-ed" with this practice file: practice='http://it.metr.ou.edu/regex/filt/practice.htm' #following needed to construct file names of the current files month=time.localtime()[1] #current month number nextmonth=(month+1)%12 #next month number (reverts to 1 if 13) files=[practice] #use this practice file for development #use these files for your useful app #files=[calurl+months[month-1]+'.htm', calurl+months[nextmonth-1]+'.htm'] #this MUST be the first line spit out by a cgi script: print "Content-type: text/html\n" #next write out some html to send to a browser: print "My OU events" print "my enhancement of:
" content="" #this single string will hold all the text from the input file(s) for file in files: print ''+file+'
' #print link to original files content+=urllib2.urlopen(file).read() # this reads the text at the URL "file" print "


" # Easy replacement using string methods: #------------------------------------------------------------ content=content.replace('Free','Free'); # Comment the above, and uncomment the following for the same effect, but using re: #content=re.sub('Free','Free',content) #------------------------------------------------------------ # A bit more fancy: colorize 'ticket' or 'tickets'. Also compile the # regular expression for the match into a pattern object. Why? Well, # two reasons: First, it allows a compilation flag to be set. In this # case it is re.I, which will ignore distinctions between uppercase # and lowercase in the match. Second, it is a bit more readable... tickets=re.compile('ticket[s]?',re.I) # In the following, \g<0> is the text of the entire match. Thus, html # colorizing tags are being inserted before and after the match. content=tickets.sub('\g<0>',content) #------------------------------------------------------------ # Even more fancy, look for calls to a phone number. Use parenthesis # to make subgroups 1 and 2. Subgroup 1 matches the end of a sentence, # subgroup 2 matches from the end of that sentence through a phone # number. Subgoup 2 is colorized. re.S allows \s to include \n (newline) # Note that with all these escape \ characters we use a "raw" string: r'...' # Note \. means a real "period", # and not the metacharacter. # The [^\.] means "anything but a 'period'". calls=re.compile(r'(\.\s+)([^\.]*?\s+\d\d\d-\d\d\d\d)',re.S) content=calls.sub(r'\g<1>\g<2>',content) #------------------------------------------------------------ # TASK: # 1) highlight in color ff00ff all instances of "woman" and "women", # using one regex # hint: this is easy woman=re.compile(r"wom[a|e]n",re.I) content=woman.sub(r'\g<0>',content) # 2) highlight in color red all "Lecture" and subsequent title in quotes. # look at the source html:see quotes are strings “ and ” # see http://www.natural-innovations.com/wa/doc-charset.html # hint: compile with re.S, because the white space has a newline lect=re.compile(r"Lecture
\s+“.*?”",re.I|re.S) content=lect.sub(r'\g<0>',content) # 3) Find ALL things that look like URLs. e.g. www.snomnh.ou.edu, # and make an href, e.g. www.snomnh.ou.edu # hint: \w and \. may be useful in your regex url=re.compile(r'\w+(\.\w+){2,3}') content=url.sub(r'\g<0>',content) print content