-
StackOverflow 文档
-
Python Language 教程
-
使用 Python 进行 Web 抓取
-
使用请求和 lxml 来抓取一些数据的基本示例
# For Python 2 compatibility.
from __future__ import print_function
import lxml.html
import requests
def main():
r = requests.get("https://httpbin.org")
html_source = r.text
root_element = lxml.html.fromstring(html_source)
# Note root_element.xpath() gives a *list* of results.
# XPath specifies a path to the element we want.
page_title = root_element.xpath('/html/head/title/text()')[0]
print(page_title)
if __name__ == '__main__':
main()