Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions app/models/searchgov_domain.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ class SearchgovDomain < ActiveRecord::Base

attr_readonly :domain

def delay
@delay ||= begin
robotex = Robotex.new 'usasearch'
robotex.delay("http://#{domain}/") || 1
end
end

private

def valid_domain?
Expand Down
32 changes: 32 additions & 0 deletions spec/models/searchgov_domain_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,36 @@
end
end
end

describe '#delay' do
subject(:delay) { searchgov_domain.delay }

before do
stub_request(:get, "http://#{domain}/robots.txt").
to_return(status: [200, "OK"], headers: { content_type: 'text/plain' }, body: robots)
end

context 'when a delay is specified in robots.txt' do
let(:robots) { "User-agent: *\nCrawl-delay: 10" }

it { is_expected.to eq 10 }
end

context 'when no delay is specified' do
let(:robots) { "User-agent: *\nDisallow: /somedir/" }

it 'defaults to 1' do
expect(delay).to eq 1
end
end

context 'when a delay is specified for the "usasearch" user agent' do
let(:robots) { "User-agent: *\nCrawl-delay: 10\nUser-agent: usasearch\nCrawl-delay: 2" }

# This needs to be fixed in the robotex gem:
# https://github.com/chriskite/robotex/issues/9
# https://www.pivotaltracker.com/story/show/157329443
xit { is_expected.to eq 2 }
end
end
end