我对Python相当陌生,我不知道selenium是什么,但是如果您能够找到某种模式,您应该能够执行您描述的内容。关键是找到一个模式。下面是几个示例脚本,它们可能会让您了解如何开始。在import urllib2
from bs4 import BeautifulSoup
f = open('C:/Users/rshuell001/Desktop/test.txt', 'w')
link = "http://espn.go.com/nba/team/roster/_/name/bkn/brooklyn-nets"
page = urllib2.urlopen(link)
soup = BeautifulSoup(page, "html.parser")
table = soup.find("table")
for row in table.findAll("tr",{"class":["oddrow","evenrow"]}):
col = row.findAll('td')
player = col[1].string
position = col[2].string
f.write(player + '\t' + position + '\n')
f.close()
******** ******** ******** ******** ******** ******** ******** ******** ******** ********
from bs4 import BeautifulSoup
from urllib2 import urlopen
def make_soup(url):
html = urlopen(url).read()
return BeautifulSoup(html)
def get_teams():
url = "http://espn.go.com/nba/teams"
soup = make_soup(url)
teams = []
for team_chunk in soup.find_all('ul', class_='medium-logos'):
li_tags = team_chunk.find_all('li')
for li in li_tags:
team = li.h5.a
link = team['href']
teams.append(link.encode('utf-8'))
return teams
if __name__ == "__main__":
print get_teams()
print len(get_teams())
import requests
from bs4 import BeautifulSoup
r = requests.get("http://www.kijiji.ca/h-gander/1700255")
soup = BeautifulSoup(r.content)
print soup.find_all("a",{"class":"category-selected"})
import requests
from bs4 import BeautifulSoup
url = "http://www.indeed.com/jobs?q=hardware+engineer&l=San+Francisco%2C+CA"
r = requests.get(url)
soup = BeautifulSoup(r.content)
job_titles = soup.find_all("h2", {"class", "jobtitle"})
for job in job_titles:
print job.text.strip()