グローバルな並列リクエスト数と、ホスト毎の並列リクエスト数を考慮した試作版です。
もう少し改善出来そうな気がします。
require 'rubygems' require 'em-http' require 'addressable/uri' PARALLEL_REQUEST_NUM = 50 PARALLEL_REQUEST_PER_HOST = 2 uris = File.readlines(File.join(File.dirname(__FILE__), 'urls.txt')). map { |v| Addressable::URI.parse(v.strip).normalize rescue nil }.compact pending = { :http => 0, :http_per_host => Hash.new { |h, k| h[k] = 0 } } EM.run do EM.add_periodic_timer(0.1) do (PARALLEL_REQUEST_NUM - pending[:http]).times do uri = uris.shift next unless uri if pending[:http_per_host][uri.host] < PARALLEL_REQUEST_PER_HOST puts "Getting: #{ uri }" pending[:http] += 1 pending[:http_per_host][uri.host] += 1 http = EM::HttpRequest.new(uri.to_s).get(:timeout => 30) http.callback { puts "Got: #{ uri } (status: #{ http.response_header.status })" pending[:http] -= 1 pending[:http_per_host][uri.host] -= 1 EM.stop if pending[:http] < 1 && uris.empty? } http.errback { puts "Error: #{ uri } (#{ http.errors })" pending[:http] -= 1 pending[:http_per_host][uri.host] -= 1 EM.stop if pending[:http] < 1 && uris.empty? } else uris.push uri end end end end