[西瓜]不会用python的看这里http://www.lukou.com/userfeed/19864317
=======================================================
昨天到中午的时候才知道有1688考试,刷路口抄功课脑子已经蒙了
今天研究 191827 的路况才发现几个找商品的页面, 只能等明年320了[鹿捂脸]
爬的是伙拼1元秒杀
https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=1&status=onSale
也可以自行改commit_url爬5元/10元和明日预告。
爬出来的原始数据
自己改改Excel可以搞一个按起批件数/起批价格排序的
=====================Python 3.6的分割线=====================
# -*- coding: UTF-8 -*-
# aali
from bs4 import BeautifulSoup
import requests
import json
import csv,codecs
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver import Remote
from selenium.webdriver.chrome import options
from selenium.common.exceptions import InvalidArgumentException
#http://www.lukou.com/aali
def get_commit(browser, commit_url, fname):
with codecs.open(fname + '.csv', 'w', encoding='utf_8_sig') as f:
f.write('链接,单价,起批,标题' + '\n' + '\n')
browser.get(commit_url)
SCROLL_PAUSE_TIME = 0.5
for i in range(50):
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
ActionChains(browser).key_down(Keys.END).key_up(Keys.END).perform()
time.sleep(SCROLL_PAUSE_TIME)
soup = BeautifulSoup(browser.page_source,'lxml')
feeds = soup.find_all('div', class_="ts-sales-offer")
print (len(feeds))
for i in range(len(feeds)):
try:
desc = feeds[i].find('dd', class_=['of-msg'])
if desc:
desc = ', ' + desc.find('span', class_=['txt']).get_text(', ')+', '
else:
desc = ', '
price = ', ' + feeds[i].find('div', class_="of-price").find('span', class_="price-y").get_text(', ')
price = price + feeds[i].find('div', class_="of-price").find('span', class_="price-f").get_text(', ')
qty = feeds[i].find('div', class_="of-min-order-quaitity").get_text(', ')
qty = ', ' + qty[:-3]
link = feeds[i].find('a', class_="of-link").get_attribute_list('href')[0]
link = link.split("?",1)[0]
with open(fname + '.csv', 'a', encoding='utf_8_sig') as f:
f.write(link + price + qty + desc + '\n')
f.close()
except:
pass
#else:break
browser.quit()
if __name__ == '__main__':
fname = input('save as: ')
#commit_url = 'https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=1&status=onSale' 1元
#commit_url = 'https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=5&status=onSale' 5元
#commit_url = 'https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=10&status=onSale' 10元
#commit_url = 'https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=0&status=annotion&tmStatus=annotion' 明日预告
commit_url = 'https://huopin.1688.com/page/temai.html?biz_type=tm&price_range=1&status=onSale'
browser = webdriver.Chrome(executable_path=r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') # Get local session of chrome
get_commit(browser, commit_url, fname)