Python爬虫教程,利用Python采集QQ群成员信息( 二 )
完整代码【Python爬虫教程,利用Python采集QQ群成员信息】# Author:smart_num_1# Blog:# WeChat:Be_a_lucky_dogfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.chrome.options import Optionsimport timeimport randomimport osdef get_group_member(driver = None):driver.refresh()elem_end = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//td[@class="td-user-nick"]/img')))for i in range(10):time.sleep(0.5)driver.execute_script("var action=document.documentElement.scrollTop=10000")print('加载中······')group_members = driver.find_elements_by_xpath('//tr[contains(@class,"mb")]')for group_member in group_members:try:data = http://kandian.youth.cn/index/group_member.text.split('\n')[2].split(' ')[0]if data.isdigit() == True:with open('./record.txt','a',encoding = 'utf-8') as record:record.write(data + '@qq.com')record.write('\n')except:continueprint('Loaded')def get_group_number(driver = None):group_number_dic = {}my_group_list = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//ul[@class="my-group-list"]/li')))print('在以下群中选择:')i = 1for my_group in my_group_list:try:group_number_dic[str(i)] = my_groupprint('第 %s 个--- '%str(i) + my_group.get_attribute('title') + ' ' + my_group.get_attribute('data-id'))i += 1except:continuegroup = input('获取群编号 : ')group_number_dic[group].click()return driverdef login(driver = None):already_dic = {}login_button = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//p[@class="user-info"]/a')))login_button.click()already_login_number = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//div[@id="loginWin"]/iframe')))driver.get(url = already_login_number.get_attribute('src'))already_login_numbers = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//span[contains(@class,"nick")]')))print('在以下账号中选择所需账号')for already_login_number in already_login_numbers:already_dic[already_login_number.get_attribute('innerText')] = already_login_numberprint(already_login_number.get_attribute('innerText'))QQ_NeedToLogin = input('需要登陆: ')already_dic[QQ_NeedToLogin].click()time.sleep(1)def start(driver = None,url = None):print('Please wait for loading\n')driver.get(url = url)driver = get_group_number(driver=driver)print('Please wait for loading\n')get_group_member(driver=driver)if __name__ == '__main__':print('Please wait for loading')chrome_options=Options()chrome_options.add_argument('--headless')try:random.seed(time.time())QQ_number = '738334209'start_url = '#click'member_url = '#gid=%s'%QQ_numbermember_url_test = ''driver = webdriver.Chrome(executable_path = './chromedriver.exe',chrome_options=chrome_options)try:driver.get(url=start_url)login(driver=driver)while True:start(driver = driver,url = member_url_test)flag = input('是否继续爬取? yes or no : ')if flag == 'no':breakos.system('cls')driver.quit()except:print('Something wrong')driver.quit()except:print('Something wrong!!!!!!')os.system('pause')
- 缩小|调整电脑屏幕文本文字显示大小,系统设置放大缩小DPI图文教程
- 告诉|阿里大佬告诉你如何一分钟利用Python在家告别会员看电影
- Python源码阅读-基础1
- Python调用时使用*和**
- 如何基于Python实现自动化控制鼠标和键盘操作
- 解决多版本的python冲突问题
- 学习python第二弹
- 更改计算机待机睡眠状态时间方法,电脑设置关闭显示器时间教程
- 随身携带「Windows」Windows To Go制作教程
- Python中文速查表-Pandas 基础
