1 import urllib.request 2 import urllib.parse 3 import requests 4 from urllib.parse import urlencode 5 from pyquery import PyQuery as pq 6 from pymongo import MongoClient 7 import json 8 9 10 url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo'11 12 headers = {13 'Host': 'aibee.com',14 'Referer': 'http://aibee.com/cn/joinus.aspx',15 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',16 'X-Requested-With': 'XMLHttpRequest',17 } 18 19 client = MongoClient()20 db = client['aibee']21 collection = db['aibee']22 max_id = 5023 24 def get_page(id): 25 26 formData = { 27 'id': id,28 } 29 30 #将str类型转换为bytes类型 31 data = urllib.parse.urlencode(formData).encode("utf-8") 32 request = urllib.request.Request(url, data=data, headers=headers) 33 response = urllib.request.urlopen(request)34 #print(response.read().decode('utf-8'))35 result = response.read().decode('utf-8')36 #print(result)37 #print(len(result))38 #print(id)39 40 if len(result)!=12:41 42 # print(id)43 content=result.replace(",",":")44 45 id=content.split(':')[2].strip()46 #print(id)47 title=content.split(':')[4].strip()48 #print(title)49 zhize=content.split(':')[6].strip().replace("\t","").replace("
- ","").replace("
- ","").replace(" ","").replace("
- ","").replace("
- ","").replace(" ","").replace("