Python selenium 加載并保存QQ群成員,去除其群主、管理員信息的示例代碼
一位伙計自己開了個游戲室,想在群里拉點人,就用所學(xué)知識幫幫忙,于是就有了這篇文章,今天小編特此通過實例代碼給大家介紹下Python selenium 加載并保存QQ群成員去除其群主、管理員信息的示例代碼
模擬登陸頁面
頁面分析
思路:
點擊登陸按鈕
選擇要登陸的賬號
代碼實現(xiàn)
# Author:smart_num_1 # Blog:https://blog.csdn.net/smart_num_1 # WeChat:Be_a_lucky_dog from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait def login(driver = None): already_dic = {} # 創(chuàng)建一個字典,保存電腦登陸的QQ login_button = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//p[@class="user-info"]/a'))) login_button.click() # 點擊登錄,獲取電腦登陸的QQ already_login_number = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//div[@id="loginWin"]/iframe'))) driver.get(url = already_login_number.get_attribute('src')) # 此步驟目的,是因為登錄框是一個子頁面,在上一級頁面中獲得到的這個子頁面 already_login_numbers = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//span[contains(@class,"nick")]'))) # 獲取電腦登陸的QQ print('在以下賬號中選擇所需賬號') for already_login_number in already_login_numbers: already_dic[already_login_number.get_attribute('innerText')] = already_login_number print(already_login_number.get_attribute('innerText')) QQ_NeedToLogin = input('需要登陸: ') # 通過獲取鍵名,在 already_dic 獲得相應(yīng)的鍵值 already_dic[QQ_NeedToLogin].click() # 模擬點擊要登陸的QQ,達到登陸的效果 time.sleep(1) if __name__ == '__main__': start_url = 'https://qun.qq.com/index.html#click' # 群首頁,用來登陸賬號 driver = webdriver.Chrome(executable_path = './chromedriver.exe') # 因為selenium 需要用到瀏覽器、瀏覽器驅(qū)動,但是還要配置環(huán)境變量,很麻煩,如果這樣指定 webdriver 路徑的話,就可以省去那一步 driver.get(url=start_url) login(driver=driver)
選擇所需加載群
頁面分析
打開群管理界面,會看到這樣的信息,我們的目的是爬取已加入群的成員信息
代碼實現(xiàn)
# Author:smart_num_1 # Blog:https://blog.csdn.net/smart_num_1 # WeChat:Be_a_lucky_dog def get_group_number(driver = None): group_number_dic = {} # 同樣的,利用字典儲存信息 my_group_list = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//ul[@class="my-group-list"]/li'))) # 獲取每個已加入群的節(jié)點信息 print('在以下群中選擇:') i = 1 for my_group in my_group_list: try: group_number_dic[str(i)] = my_group print('第 %s 個--- '%str(i) + my_group.get_attribute('title') + ' ' + my_group.get_attribute('data-id')) i += 1 except: continue # 打印出獲得的群信息,獲取所有的目標群 group = input('獲取群編號 : ') # 通過鍵名獲取鍵值,得到要點擊的目標 group_number_dic[group].click() return driver if __name__ == '__main__': member_url_test = 'https://qun.qq.com/member.html' driver.get(url = member_url_test) driver = get_group_number(driver=driver)
保存所需信息
頁面分析
可以看到,是個動態(tài)加載的頁面,因為用的是selenium,所以就沒必要分析到底是通過請求那個url得到的信息,直接模擬滾動獲取就可以了
代碼實現(xiàn)
# Author:smart_num_1 # Blog:https://blog.csdn.net/smart_num_1 # WeChat:Be_a_lucky_dog def get_group_member(driver = None): driver.refresh() # 刷新一下界面,防止上一步點擊過后,頁面不更新的情況 elem_end = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//td[@class="td-user-nick"]/img'))) # 添加了等待,這個定位可以隨便的選擇,確保頁面加載完畢的 for i in range(10): time.sleep(0.5) driver.execute_script("var action=document.documentElement.scrollTop=10000") print('加載中······') # 這個滾動范圍可以任選,因為每次會加載21個信息,我看過我加的群,在10次過后的成員基本屬于潛水的人了,要不要的就無所謂了 group_members = driver.find_elements_by_xpath('//tr[contains(@class,"mb")]') for group_member in group_members: try: data = group_member.text.split('\n')[2].split(' ')[0] # 這一步,得到一個列表,從第一位開始分別是成員、群昵稱、QQ號、性別、Q齡、入群時間、等級(積分)、最后發(fā)言,在這里我是只需要QQ號碼 #對于其他信息,根據(jù)自己需要,添加代碼即可 if data.isdigit() == True: with open('./record.txt','a',encoding = 'utf-8') as record: record.write(data + '@qq.com') record.write('\n') except: continue print('Loaded')
完整代碼
# Author:smart_num_1 # Blog:https://blog.csdn.net/smart_num_1 # WeChat:Be_a_lucky_dog from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.chrome.options import Options import time import random import os def get_group_member(driver = None): driver.refresh() elem_end = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//td[@class="td-user-nick"]/img'))) for i in range(10): time.sleep(0.5) driver.execute_script("var action=document.documentElement.scrollTop=10000") print('加載中······') group_members = driver.find_elements_by_xpath('//tr[contains(@class,"mb")]') for group_member in group_members: try: data = group_member.text.split('\n')[2].split(' ')[0] if data.isdigit() == True: with open('./record.txt','a',encoding = 'utf-8') as record: record.write(data + '@qq.com') record.write('\n') except: continue print('Loaded') def get_group_number(driver = None): group_number_dic = {} my_group_list = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//ul[@class="my-group-list"]/li'))) print('在以下群中選擇:') i = 1 for my_group in my_group_list: try: group_number_dic[str(i)] = my_group print('第 %s 個--- '%str(i) + my_group.get_attribute('title') + ' ' + my_group.get_attribute('data-id')) i += 1 except: continue group = input('獲取群編號 : ') group_number_dic[group].click() return driver def login(driver = None): already_dic = {} login_button = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//p[@class="user-info"]/a'))) login_button.click() already_login_number = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//div[@id="loginWin"]/iframe'))) driver.get(url = already_login_number.get_attribute('src')) already_login_numbers = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//span[contains(@class,"nick")]'))) print('在以下賬號中選擇所需賬號') for already_login_number in already_login_numbers: already_dic[already_login_number.get_attribute('innerText')] = already_login_number print(already_login_number.get_attribute('innerText')) QQ_NeedToLogin = input('需要登陸: ') already_dic[QQ_NeedToLogin].click() time.sleep(1) def start(driver = None,url = None): print('Please wait for loading\n') driver.get(url = url) driver = get_group_number(driver=driver) print('Please wait for loading\n') get_group_member(driver=driver) if __name__ == '__main__': print('Please wait for loading') chrome_options=Options() chrome_options.add_argument('--headless') try: random.seed(time.time()) QQ_number = '738334209' start_url = 'https://qun.qq.com/index.html#click' member_url = 'https://qun.qq.com/member.html#gid=%s'%QQ_number member_url_test = 'https://qun.qq.com/member.html' driver = webdriver.Chrome(executable_path = './chromedriver.exe',chrome_options=chrome_options) try: driver.get(url=start_url) login(driver=driver) while True: start(driver = driver,url = member_url_test) flag = input('是否繼續(xù)爬??? yes or no : ') if flag == 'no': break os.system('cls') driver.quit() except: print('Something wrong') driver.quit() except: print('Something wrong!!!!!!') os.system('pause')
轉(zhuǎn)載請標明出處:https://blog.csdn.net/smart_num_1/article/details/106326488
總結(jié)
到此這篇關(guān)于Python selenium 加載并保存QQ群成員 去除其群主、管理員信息的示例代碼的文章就介紹到這了,更多相關(guān)Python selenium 加載并保存QQ群成員內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
python 實現(xiàn)非極大值抑制算法(Non-maximum suppression, NMS)
這篇文章主要介紹了python 如何實現(xiàn)非極大值抑制算法(Non-maximum suppression, NMS),幫助大家更好的進行機器學(xué)習(xí),感興趣的朋友可以了解下2020-10-10Python中的 sort 和 sorted的用法與區(qū)別
這篇文章主要介紹了Python中的 sort 和 sorted的用法與區(qū)別,文中通過示例代碼介紹的非常詳細,對大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價值,需要的朋友可以參考下2019-08-08解決Pycharm 中遇到Unresolved reference ''sklearn''的問題
這篇文章主要介紹了解決Pycharm 中遇到Unresolved reference 'sklearn'的問題,具有很好的參考價值,希望對大家有所幫助。一起跟隨小編過來看看吧2020-07-07結(jié)合OpenCV與TensorFlow進行人臉識別的實現(xiàn)
這篇文章主要介紹了結(jié)合OpenCV與TensorFlow進行人臉識別的實現(xiàn),文中通過示例代碼介紹的非常詳細,對大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價值,需要的朋友們下面隨著小編來一起學(xué)習(xí)學(xué)習(xí)吧2019-10-10