データ解析のお供に

perl -e 'for($i=1;$i<10;$i++){print `wget http://www.yomiuri.co.jp/election/sangiin/2010/kaihyou/ye0$i.htm`;sleep 3}'
perl -e 'for($i=10;$i<48;$i++){print `wget http://www.yomiuri.co.jp/election/sangiin/2010/kaihyou/ye$i.htm`;sleep 3}'
#!/usr/bin/ruby -Ku

require 'rubygems'
require 'hpricot'
require 'nkf'
require 'kconv'

h = Hash.new

open("voters.txt"){|io|
  while l=io.gets
    a,b = l.chomp.split(/\t/)
    h[a]=b.to_i
  end
}

s = (1..47).to_a.map{|i|
  i = if i < 10 then "0" else "" end + i.to_s

  a=Hpricot(open("ye"+i+".htm"){|io|io.gets(nil).toutf8})

  name = a.search("span[@class='f120']").inner_text
  voters = h[name] || h[name.chop] || h[name.chop.chop] || h[name.chop.chop.chop]
  j=0
  r = a.search("table[@id='candidates']").map{|b|
    h1=Hash.new{|hash,key|hash[key]=0}
    [
      (b.search("tr")[1..-1].
      map{|c|
      e = c.search("td[@class=vote]")
      e.search("div").remove
      e = e.inner_text.gsub(/[^0-9]/,"").to_i
      g = c.search("td[@class=party]").inner_text.toutf8
      g.gsub!(/\(.*\)/,"")
      g.gsub!(/[\s]/,"")
      if g == "諸 派" || g == "無所属" then g+=j.to_s;j+=1 end
      next if g==""
      h1[g] += e
      [
	e,
	g
      ]
    }),
    (a0=Array.new
     h1.each{|k,v|a0.push([v,k])}
     a0.sort{|a,b|b[0]<=>a[0]})
    ]
  }[0]

  [name,voters,r]
}
s.map{|name,voters,r|
  (r[1]+
   r[1].map{|x,y|[x/2,y]}+
   r[1].map{|x,y|[x/3,y]}).
   sort{|a,b|b[0]<=>a[0]}[0..2].map{|x,y|
    print name+"\t"+
      voters.to_s+"\t"+
      y+"\n"
  }
}

=begin
print "["+s.map{|name,voters,r|
  "(\"" + name + "\", " + voters.to_s + ",["+r.map{|a,b|"("+a.to_s+",\""+b+"\""+")"}.join(",")+"])"
}.join(",")+"]"
=end