予定表がテーブルで出力されているとき、icalやRSSに出来たら便利だなと思って他ので。
ical形式にしてみた。
#!/usr/bin/env ruby require 'rubygems' require 'mechanize' require 'scanf' require 'kconv' $KCODE=‘u’ ## データサンプル #b = mech.page.search("//tr[@tmpl='LOOP EVENT_LIST']") #b[13].search('td[1]').text #=> "1月12日(木)" #b[13].search('td[2]').text #=> "第165回知能システム研究発表会" #b[13].search('td[3]').text #=> "11月18日(金)" #b[13].search('td[4]').text #=> "当日のみ" #b[13].search('td[5]').text #=> "〔関東〕国立情報学研究所" class IPSJEvents include Enumerable def initialize() url = "http://www.ipsj.or.jp/cgi-bin/ipsj_calendar.cgi" @mech = Mechanize.new @mech.get url #puts @mech.page.to_s.toutf8 ##pp @mech.page.search "//table" end def parse_event() event_tr_list = @mech.page.search("//tr[@tmpl='LOOP EVENT_LIST']") @events = event_tr_list.map{|tr| # "開催日"は期間があるので日付にする str = tr.search('td[1]').text {"開催日" => tr.search('td[1]').text.strip, "名称" => tr.search('td[2]').text.strip.toutf8, "url" => tr.search('td[2]/a').attr('href'), "論文応募締め切り日" => tr.search('td[3]').text.strip, "参加締切日" => tr.search('td[4]').text.strip, "開催地" => tr.search('td[5]').text.strip } } end def each(&block) self.parse_event unless @events @events.each{|e| yield e } end def to_ical require 'icalendar' require 'date' cal = Icalendar::Calendar.new self.map{|e| next if e["開催日"].strip == "" s_month,s_day,s_wday,e_month,e_day,e_wday = e["開催日"].scanf('%d月%d日(%s)〜%d月%d日(%s)') start_date = Date.new(2012,s_month,s_day) end_date = Date.new(2012,e_month,e_day) if e_month e["開催期間"] = [start_date,end_date] cal.event do event = Icalendar::Event.new dtstart start_date if start_date dtend end_date if end_date summary "#{e['開催地']}-#{e['名称']}" url "#{e['url']}" description "#{e['url']}\r参加締切日:#{e['参加締切日']}\r論文応募締め切り日:#{e['論文応募締め切り日']}\r" end } cal.to_ical end end ipsj = IPSJEvents.new #a= ipsj.map{|e| e} puts ipsj.to_ical
2012-01-07追加
ついでに電子情報通信学会もやってみた。
#!/usr/bin/env ruby # require 'rubygems' require 'mechanize' require 'kconv' $KCODE='u' class IEICEEvents include Enumerable def initialize() @mech = Mechanize.new @url = "http://www.ieice.org/ken/program/index.php" @event_list = [] end def parse_event table = @mech.page.search('//table[@cellpadding=2 and @width="100%"]').last event_tr_list = table.search('tr').select{|tr| tr.search('td[1]').text.gsub(/\s+/,"").toutf8 !="開催日"} event_tr_list.each{|tr| e= { "開催日" => tr.search('td[1]').text.gsub(/\s+/,"").toutf8.strip, "開催地" => tr.search('td[2]').text.gsub(/\s+/,"").toutf8.strip, "名称" => tr.search('td[3]').text.gsub(/\s+/,"").toutf8.strip, "研究会" => tr.search('td[4]').text.gsub(/\s+/,"").toutf8.strip, "論文応募締め切り日" => tr.search('td[5]').text.gsub(/\s+/,"").toutf8.strip, } links = tr.search('td[6]//a').select{|a| a.text =~ /プログラム/} e["url"] = links.first.attr('href') if links.size>0 @event_list.push e } end def next_url return nil if @mech.page.links.select{|a| a.text =~ /次/}.size<1 next_url = @mech.page.links.select{|a| a.text =~ /次/}.first.href end def get_all loop { @mech.get @url self.parse_event @url = self.next_url #puts @url break if @url==nil } @event_list end def each(&block) self.get_all.each{|e| yield e } end def to_ical require 'icalendar' require 'date' require 'scanf' cal = Icalendar::Calendar.new self.each{|f| start_date=nil end_date =nil start_date,end_date = f["開催日"].split('-').map{|e| e.gsub /2012年/, ""}. map{|e| e.scanf("%d月%d日(%1s)")}. map{|e| [2012,e[0],e[1]]}. map{|e| d= nil begin d= Date.new(e[0],e[1], e[2]) rescue =>err #puts err end d } cal.event do event = Icalendar::Event.new dtstart start_date if start_date dtend end_date if end_date summary "#{f['開催地']}-#{f['名称']}" url "#{f['url']}" description "#{f['名称']}--#{f['url']} 参加締切日:#{f['参加締切日']}:論文応募締め切り日:#{f['論文応募締め切り日']}".gsub /\n/, "" end } cal.to_ical end end ieicie = IEICEEvents.new puts ieicie.to_ical
簡単
mechanizeは簡単なのでもっと普及して良いと思うよ。