#!/bin/ruby require 'rubygems' require 'scrapi' require 'open-uri' require 'date' session_scraper = Scraper.define do process "a", :title=>:text, :url=>"@href" process "small:first-of-type", :author=>:text process "small:not(:empty)", :info=>:text result :title, :url, :info, :author end day_scraper = Scraper.define do array :sessions process "td#entry", :sessions => session_scraper process "td[bgcolor=white]>b", :date => :text result :date, :sessions end calendar = Scraper.define do array :days process "table[bgcolor='#E6E6E6']", :days => day_scraper result :days end html = open("http://www.foss4g2006.org/conferenceTimeTable.py?confId=1&showDate=all&showSession=all&detailLevel=contribution&viewMode=parallel").read days = calendar.scrape(html) puts days.size html =< FOSS4G 2006 hCalendar
END days.each do |day| day.sessions.each do |session| empty, location, room, starttime, endtime = session.info.chomp.split(/\((.*) \((.*)\): (.*) - (.*)\)/) dtstart = DateTime.parse("#{day.date} #{starttime}") dtend = DateTime.parse("#{day.date} #{endtime}") empty, author, organization = session.author.chomp.split(/by (.*) \((.*)\)/) html << < #{session.title} by #{author}#{organization} #{day.date} from #{starttime}- #{endtime}, at the #{location} #{room} END end end html << < END output_html = File.new('foss4g.html', 'w') output_html.write(html) output_html.close