|
|
|
|
|
by kmike84
4380 days ago
|
|
Scrapy spider that is doing exactly the same:: # It requires scrapy from github.
# Save it to tickets.py and execute
# "scrapy runspider tickets.py" from the command line
from urlparse import urljoin
import scrapy
class TicketSpider(scrapy.Spider):
name = 'tickets'
start_urls = ['http://philadelphia.craigslist.org/search/sss?sort=date&query=firefly%20tickets']
def parse(self, response):
for listing in response.css('p.row'):
price_txt = listing.css('span.price').re('(\d+)')
if not price_txt:
continue
price = int(price_txt[0])
if 100 < price <= 250:
url = urljoin(response.url, listing.css('a::attr(href)').extract()[0])
print ' '.join(listing.css('::text').extract())
print url
print
There is no reason to prefer Scrapy for extracting information from a single webpage, but on the other hand it is not any harder than BS+pyquery+requests. |
|