-
StackOverflow 文件
-
Python Language 教程
-
使用 Python 進行 Web 抓取
-
使用 BeautifulSoup4 刮痧
from bs4 import BeautifulSoup
import requests
# Use the requests module to obtain a page
res = requests.get('https://www.codechef.com/problems/easy')
# Create a BeautifulSoup object
page = BeautifulSoup(res.text, 'lxml') # the text field contains the source of the page
# Now use a CSS selector in order to get the table containing the list of problems
datatable_tags = page.select('table.dataTable') # The problems are in the <table> tag,
# with class "dataTable"
# We extract the first tag from the list, since that's what we desire
datatable = datatable_tags[0]
# Now since we want problem names, they are contained in <b> tags, which are
# directly nested under <a> tags
prob_tags = datatable.select('a > b')
prob_names = [tag.getText().strip() for tag in prob_tags]
print prob_names