from HTMLParser import HTMLParser class MyHTMLParser(HTMLParser): story = False time = True text = '' updates=[] times=[] def handle_starttag(self, tag, attrs): #print "Encountered the beginning of a %s tag" % tag if tag == 'span' and attrs: for name,value in attrs: if name=='class' and value=='UIStory_Message': self.story=True if name=='class' and value=='UIIntentionalStory_Time': self.time=True def handle_endtag(self, tag): if self.story: #if len(self.text) > 0: self.updates.append(self.text) self.text = '' self.story=False def handle_data(self, data): if self.story: self.text = self.text + data.strip().replace("\n","") if self.time: update_count = len(self.updates) if( update_count > 0 ): self.updates[update_count-1] = data + " " +self.updates[update_count-1] self.time=False f = open('facebook log.htm', 'r') htmlSource = f.read() myparser = MyHTMLParser() myparser.feed(htmlSource) update_count = len(myparser.updates) while update_count > 0: update_count -= 1 print myparser.updates[update_count]
Sunday, June 6, 2010
Parsing Facebook using Python
This is an example of parsing Facebook to pull status updates out: