l=ItemLoader(item=xxxItem(),response=response) l.add_xpath('title','//xxx',MapCompose(str.strip,str.title)) MapCompose(float) #turn to float l.add_value('title',response.url) l.load_item() start_URL=[i.strip() for i in open('xxx').readlines()] 1、scrapy startproject loginscrapy cd loginscrapy scrapy genspider -t basic loginspider example 2、设置setting.py:添加 USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.54 Safari/536.5' 3、vi spider/loginspider
import scrapy from scrapy import Request,FormRequest
class BasicloginSpider(scrapy.Spider): name = 'basiclogin' allowed_domains = ['example.webscraping.com']
def start_requests(self):
return [Request("http://example.webscraping.com/places/default/user/login",callback=self.login,meta={"cookiejar":1})]
def login(self, response):
data={"email":"liushuo@webscraping.com","password":"12345678"}
return [FormRequest.from_response(response,meta={"cookiejar":response.meta["cookiejar"]},callback=self.parse)]
def parse(self,response):
print("logined")
print(response.url)