So you need to pull a list of diseases from the CDC website? Oh, you don’t… well I did.

require 'rest-client'
require 'nokogiri'
master = Set.new
for letter in 'a'..'z'
page = RestClient.get("http://www.cdc.gov/DiseasesConditions/az/#{letter}.html")
html = Nokogiri::HTML(page)
diseases = html.css('.span16')[0].css('li')
for disease in diseases
parsed = disease.text.split(/[\[\]\(\)]|[—-] see|see also/).collect {|text| text.strip}.reject { |text| text.empty? }
for parse in parsed
master.add parse
end
end
end
for item in master
puts item
end
view raw cdc.rb hosted with ❤ by GitHub