予定表がテーブルで出力されているとき、icalやRSSに出来たら便利だなと思って他ので。
ical形式にしてみた。
#!/usr/bin/env ruby require 'rubygems' require 'mechanize' require 'scanf' require 'kconv' $KCODE=‘u’ ## データサンプル #b = mech.page.search("//tr[@tmpl='LOOP EVENT_LIST']") #b[13].search('td[1]').text #=> "1月12日(木)" #b[13].search('td[2]').text #=> "第165回知能システム研究発表会" #b[13].search('td[3]').text #=> "11月18日(金)" #b[13].search('td[4]').text #=> "当日のみ" #b[13].search('td[5]').text #=> "〔関東〕国立情報学研究所" class IPSJEvents include Enumerable def initialize() url = "http://www.ipsj.or.jp/cgi-bin/ipsj_calendar.cgi" @mech = Mechanize.new @mech.get url #puts @mech.page.to_s.toutf8 ##pp @mech.page.search "//table" end def parse_event() event_tr_list = @mech.page.search("//tr[@tmpl='LOOP EVENT_LIST']") @events = event_tr_list.map{|tr| # "開催日"は期間があるので日付にする str = tr.search('td[1]').text {"開催日" => tr.search('td[1]').text.strip, "名称" => tr.search('td[2]').text.strip.toutf8, "url" => tr.search('td[2]/a').attr('href'), "論文応募締め切り日" => tr.search('td[3]').text.strip, "参加締切日" => tr.search('td[4]').text.strip, "開催地" => tr.search('td[5]').text.strip } } end def each(&block) self.parse_event unless @events @events.each{|e| yield e } end def to_ical require 'icalendar' require 'date' cal = Icalendar::Calendar.new self.map{|e| next if e["開催日"].strip == "" s_month,s_day,s_wday,e_month,e_day,e_wday = e["開催日"].scanf('%d月%d日(%s)〜%d月%d日(%s)') start_date = Date.new(2012,s_month,s_day) end_date = Date.new(2012,e_month,e_day) if e_month e["開催期間"] = [start_date,end_date] cal.event do event = Icalendar::Event.new dtstart start_date if start_date dtend end_date if end_date summary "#{e['開催地']}-#{e['名称']}" url "#{e['url']}" description "#{e['url']}\r参加締切日:#{e['参加締切日']}\r論文応募締め切り日:#{e['論文応募締め切り日']}\r" end } cal.to_ical end end ipsj = IPSJEvents.new #a= ipsj.map{|e| e} puts ipsj.to_ical
2012-01-07追加
ついでに電子情報通信学会もやってみた。
#!/usr/bin/env ruby
#
require 'rubygems'
require 'mechanize'
require 'kconv'
$KCODE='u'
class IEICEEvents
include Enumerable
def initialize()
@mech = Mechanize.new
@url = "http://www.ieice.org/ken/program/index.php"
@event_list = []
end
def parse_event
table = @mech.page.search('//table[@cellpadding=2 and @width="100%"]').last
event_tr_list = table.search('tr').select{|tr| tr.search('td[1]').text.gsub(/\s+/,"").toutf8 !="開催日"}
event_tr_list.each{|tr|
e= {
"開催日" => tr.search('td[1]').text.gsub(/\s+/,"").toutf8.strip,
"開催地" => tr.search('td[2]').text.gsub(/\s+/,"").toutf8.strip,
"名称" => tr.search('td[3]').text.gsub(/\s+/,"").toutf8.strip,
"研究会" => tr.search('td[4]').text.gsub(/\s+/,"").toutf8.strip,
"論文応募締め切り日" => tr.search('td[5]').text.gsub(/\s+/,"").toutf8.strip,
}
links = tr.search('td[6]//a').select{|a| a.text =~ /プログラム/}
e["url"] = links.first.attr('href') if links.size>0
@event_list.push e
}
end
def next_url
return nil if @mech.page.links.select{|a| a.text =~ /次/}.size<1
next_url = @mech.page.links.select{|a| a.text =~ /次/}.first.href
end
def get_all
loop {
@mech.get @url
self.parse_event
@url = self.next_url
#puts @url
break if @url==nil
}
@event_list
end
def each(&block)
self.get_all.each{|e|
yield e
}
end
def to_ical
require 'icalendar'
require 'date'
require 'scanf'
cal = Icalendar::Calendar.new
self.each{|f|
start_date=nil
end_date =nil
start_date,end_date = f["開催日"].split('-').map{|e| e.gsub /2012年/, ""}.
map{|e| e.scanf("%d月%d日(%1s)")}.
map{|e| [2012,e[0],e[1]]}.
map{|e|
d= nil
begin
d= Date.new(e[0],e[1], e[2])
rescue =>err
#puts err
end
d
}
cal.event do
event = Icalendar::Event.new
dtstart start_date if start_date
dtend end_date if end_date
summary "#{f['開催地']}-#{f['名称']}"
url "#{f['url']}"
description "#{f['名称']}--#{f['url']} 参加締切日:#{f['参加締切日']}:論文応募締め切り日:#{f['論文応募締め切り日']}".gsub /\n/, ""
end
}
cal.to_ical
end
end
ieicie = IEICEEvents.new
puts ieicie.to_ical
簡単
mechanizeは簡単なのでもっと普及して良いと思うよ。