#!/usr/bin/env ruby require 'thread' require 'socket' begin ; require 'openssl' ; rescue LoadError ; end # # A ruby http file downloader using multiple parallel connections # # supports proxies, basic authentication, ssl # # Usage: # ruby httppget.rb -n 12 http://kik:oo@myhost.com/bigfile.lol # # Licence:: Redistributes under the terms of the GPL version 2, with specific permission to be linked to OpenSSL without invoking GPL clause 2 (b) # class Http def initialize(url) raise 'Unparsed url' unless md = %r{(?:http-proxy://(\w+:\w+@)?([\w.-]+)(:\d+)?/)?(?:http(s)?://)?(\w+:\w+@)?([\w.-]+@)?([\w.-]+)(:\d+)?(/.*)}.match(url) @proxylp, @proxyh, @proxyp, @use_ssl, @loginpass, @vhost, @host, @port, @path = md.captures @proxyp = @proxyp ? @proxyp[1..-1].to_i : 3128 @port = @port ? @port[1..-1].to_i : (@use_ssl ? 443 : 80) @proxylp = @proxylp ? "Proxy-Authorization: Basic #{[proxylp.chop].pack('m').chomp}\r\n" : '' @loginpass = @loginpass ? "Authorization: Basic #{[loginpass.chop].pack('m').chomp}\r\n" : '' @vhost = @vhost ? @vhost.chop : @host @path = @path.gsub(/[^a-zA-Z0-9_.\/-]/) { |c| '%' << c.unpack('H2').first } connect end def connect if @proxyh @socket = TCPSocket.new @proxyh, @proxyp if @use_ssl @socket.puts "CONNECT #@host:#@port HTTP/1.1\r\n" << @proxylp << "\r\n" buf = @socket.gets raise "non http answer #{buf[1..100].inspect}" if buf !~ /^HTTP\/1.. (\d+) / raise "CONNECT bad response: #{buf.inspect}" if $1.to_i != 200 nil until @socket.gets.chomp.empty? end else @socket = TCPSocket.new @host, @port end if @use_ssl @socket = OpenSSL::SSL::SSLSocket.new(@socket, OpenSSL::SSL::SSLContext.new) @socket.sync_close = true @socket.connect end end def request(method='GET', range=nil) range = "Range: bytes=#{range}\r\n" if range request = "#{method} #{"http://#@host#{':'+@port if @port != 80}/" if @proxyh and not @use_ssl}#{@path} HTTP/1.1\r\n" << "Host: #{@vhost}#{':'+@port if @port != 80}\r\n" << "#{@proxylp}" << "#{@loginpass}" << "#{range}" << "\r\n" puts request if $DEBUG @socket.write request raise 'eof' if not buf = @socket.readpartial(4096) buf =~ /^HTTP\/1.1 (\d+) / case $1.to_i when 200, 206 # ok when 301, 302 puts '302 redirect to ' << buf[/^Location: (.*?)\r?$/, 1].inspect return '' when 416 # bad range : beyond end of file return '' else raise 'unhandled http response:'+ buf end buf =~ /^Content-length: (\d+)/i len = $1.to_i if $1 headlen = buf.index("\r\n\r\n") + 4 puts buf[0, headlen+2] if $DEBUG return len if method == 'HEAD' data = buf[headlen..-1] if not len if buf.length < 4096 data else data << @socket.read end else data << @socket.read(len - data.length) if len > data.length end end def self.download_file(urls, nthreads=nil, filename=nil, chunksize=1024*1024) nthreads ||= urls.length filename ||= urls.first[/[^\/]*$/] File.open(filename, 'w') {} if not File.exist? filename len = new(urls.first).request('HEAD') chunks = [] (0..len).step(chunksize) { |i| chunks << i } m = Mutex.new tstart = Time.now threads = [] idx = 0 while not chunks.empty? $stderr.print "#{chunks.length} to go \r" if $stderr.isatty if threads.length < nthreads threads << Thread.new { i = m.synchronize { chunks.shift } data = new(urls[idx = (idx + 1 % urls.length)]).request('GET', "#{i}-#{(i + chunksize) > len ? len : (i + chunksize)}") m.synchronize { File.open(filename, 'r+') { |fd| fd.pos = i fd.write data } } } end threads.delete_if { |t| not t.alive? } sleep 0.1 end threads.each { |t| t.join } puts 'got %.02fM at %.02f ko/s' % [len/1024.0/1024, (len/(Time.now-tstart)/1024)] end end if $0 == __FILE__ require 'optparse' n = nil ofile = nil chunksize = 1024*1024 OptionParser.new { |opts| opts.banner = "Usage: httppget.rb [options] url [mirror url ...]" opts.on('-n N', '--nthreads N', 'maximum number of parallel connections') { |o| n = o.to_i } opts.on('-O F', '--outfile F', 'file to write') { |o| ofile = o } opts.on('-c N', '--chunksize N', 'chunk size') { |o| chunksize = o.to_i } opts.on('-h', '--help', 'this message') { puts opts ; exit } }.parse! Http.download_file(ARGV, n, ofile, chunksize) end