第11章:Python 网络编程
11.1 网络编程基础
网络编程是指编写程序来实现计算机之间的通信。Python 提供了丰富的库来支持网络编程。
基本概念
- IP 地址:标识网络中的设备
- 端口:标识设备上的服务
- 协议:通信规则,如 TCP、UDP、HTTP 等
- Socket:网络通信的端点
网络协议
| 协议 | 描述 | 端口 |
|---|---|---|
| HTTP | 超文本传输协议 | 80 |
| HTTPS | 安全的超文本传输协议 | 443 |
| FTP | 文件传输协议 | 21 |
| SMTP | 简单邮件传输协议 | 25 |
| POP3 | 邮局协议版本 3 | 110 |
| IMAP | 互联网消息访问协议 | 143 |
| SSH | 安全外壳协议 | 22 |
| Telnet | 远程登录协议 | 23 |
11.2 Socket 编程
Socket 是网络通信的基础,它提供了一种在不同主机之间传输数据的方式。
TCP Socket
TCP(传输控制协议)是一种可靠的、面向连接的协议。
服务器端
import socket
# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 绑定地址和端口
host = '127.0.0.1' # 本地主机
port = 12345 # 端口号
s.bind((host, port))
# 监听连接
s.listen(5) # 最多允许 5 个连接等待
print(f"Server listening on {host}:{port}")
while True:
# 接受连接
client_socket, addr = s.accept()
print(f"Connected by {addr}")
# 接收数据
data = client_socket.recv(1024)
print(f"Received: {data.decode('utf-8')}")
# 发送数据
response = "Hello, client!"
client_socket.send(response.encode('utf-8'))
# 关闭连接
client_socket.close()
客户端
import socket
# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 连接服务器
host = '127.0.0.1' # 服务器地址
port = 12345 # 服务器端口
s.connect((host, port))
# 发送数据
message = "Hello, server!"
s.send(message.encode('utf-8'))
# 接收数据
data = s.recv(1024)
print(f"Received: {data.decode('utf-8')}")
# 关闭连接
s.close()
UDP Socket
UDP(用户数据报协议)是一种不可靠的、无连接的协议。
服务器端
import socket
# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# 绑定地址和端口
host = '127.0.0.1' # 本地主机
port = 12345 # 端口号
s.bind((host, port))
print(f"Server listening on {host}:{port}")
while True:
# 接收数据
data, addr = s.recvfrom(1024)
print(f"Received from {addr}: {data.decode('utf-8')}")
# 发送数据
response = "Hello, client!"
s.sendto(response.encode('utf-8'), addr)
客户端
import socket
# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# 服务器地址和端口
host = '127.0.0.1'
port = 12345
# 发送数据
message = "Hello, server!"
s.sendto(message.encode('utf-8'), (host, port))
# 接收数据
data, addr = s.recvfrom(1024)
print(f"Received from {addr}: {data.decode('utf-8')}")
# 关闭连接
s.close()
11.3 HTTP 客户端
HTTP 客户端用于发送 HTTP 请求并接收响应。Python 提供了 urllib 和 requests 库来实现 HTTP 客户端功能。
使用 urllib
import urllib.request
import urllib.parse
# 发送 GET 请求
url = 'https://www.example.com'
try:
response = urllib.request.urlopen(url)
data = response.read()
print(f"Status code: {response.status}")
print(f"Data: {data.decode('utf-8')[:500]}...")
except Exception as e:
print(f"Error: {e}")
# 发送 POST 请求
url = 'https://httpbin.org/post'
data = {'name': 'John', 'age': 30}
data = urllib.parse.urlencode(data).encode('utf-8')
try:
req = urllib.request.Request(url, data=data, method='POST')
response = urllib.request.urlopen(req)
data = response.read()
print(f"Status code: {response.status}")
print(f"Data: {data.decode('utf-8')}")
except Exception as e:
print(f"Error: {e}")
使用 requests
requests 是一个第三方库,提供了更简洁、更强大的 HTTP 客户端功能。
import requests
# 安装 requests
# pip install requests
# 发送 GET 请求
url = 'https://www.example.com'
try:
response = requests.get(url)
print(f"Status code: {response.status_code}")
print(f"Data: {response.text[:500]}...")
except Exception as e:
print(f"Error: {e}")
# 发送 POST 请求
url = 'https://httpbin.org/post'
data = {'name': 'John', 'age': 30}
try:
response = requests.post(url, json=data)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")
# 带参数的 GET 请求
url = 'https://httpbin.org/get'
params = {'name': 'John', 'age': 30}
try:
response = requests.get(url, params=params)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")
# 带 headers 的请求
url = 'https://httpbin.org/headers'
headers = {'User-Agent': 'Mozilla/5.0'}
try:
response = requests.get(url, headers=headers)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")
11.4 HTTP 服务器
Python 提供了 http.server 模块来创建简单的 HTTP 服务器。
基本 HTTP 服务器
import http.server
import socketserver
PORT = 8000
Handler = http.server.SimpleHTTPRequestHandler
with socketserver.TCPServer(("", PORT), Handler) as httpd:
print(f"Server running at http://localhost:{PORT}")
httpd.serve_forever()
自定义 HTTP 服务器
from http.server import BaseHTTPRequestHandler, HTTPServer
class MyHandler(BaseHTTPRequestHandler):
def do_GET(self):
# 发送响应头
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
# 发送响应体
message = "<h1>Hello, World!</h1>"
self.wfile.write(message.encode('utf-8'))
def do_POST(self):
# 获取请求体长度
content_length = int(self.headers['Content-Length'])
# 读取请求体
post_data = self.rfile.read(content_length)
# 发送响应头
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
# 发送响应体
message = f"<h1>POST Request Received</h1><p>Data: {post_data.decode('utf-8')}</p>"
self.wfile.write(message.encode('utf-8'))
def run(server_class=HTTPServer, handler_class=MyHandler, port=8000):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
print(f"Server running at http://localhost:{port}")
httpd.serve_forever()
if __name__ == "__main__":
run()
使用 Flask 创建 HTTP 服务器
Flask 是一个轻量级的 Web 框架,用于创建更复杂的 HTTP 服务器。
from flask import Flask, request, jsonify
# 安装 Flask
# pip install flask
app = Flask(__name__)
@app.route('/')
def home():
return "<h1>Hello, World!</h1>"
@app.route('/api', methods=['GET'])
def api_get():
name = request.args.get('name', 'World')
return jsonify({"message": f"Hello, {name}!"})
@app.route('/api', methods=['POST'])
def api_post():
data = request.json
name = data.get('name', 'World')
age = data.get('age', 0)
return jsonify({"message": f"Hello, {name}! You are {age} years old."})
if __name__ == "__main__":
app.run(debug=True, port=8000)
11.5 网络爬虫
网络爬虫是一种自动获取网页内容的程序。Python 提供了 urllib、requests、BeautifulSoup 等库来实现网络爬虫功能。
基本爬虫
import requests
from bs4 import BeautifulSoup
# 安装 BeautifulSoup
# pip install beautifulsoup4
url = 'https://www.example.com'
try:
# 发送请求
response = requests.get(url)
# 解析 HTML
soup = BeautifulSoup(response.text, 'html.parser')
# 提取标题
title = soup.find('h1').text
print(f"Title: {title}")
# 提取链接
links = soup.find_all('a')
print("Links:")
for link in links:
href = link.get('href')
text = link.text
print(f"{text}: {href}")
except Exception as e:
print(f"Error: {e}")
爬取多个页面
import requests
from bs4 import BeautifulSoup
import time
# 爬取多个页面
def crawl_page(url):
try:
# 发送请求
response = requests.get(url)
# 解析 HTML
soup = BeautifulSoup(response.text, 'html.parser')
# 提取标题
title = soup.find('title').text
print(f"Title: {title}")
# 提取链接
links = soup.find_all('a')
return [link.get('href') for link in links if link.get('href') and link.get('href').startswith('http')]
except Exception as e:
print(f"Error crawling {url}: {e}")
return []
# 主函数
def main():
start_url = 'https://www.example.com'
visited = set()
to_visit = [start_url]
while to_visit and len(visited) < 10:
url = to_visit.pop(0)
if url not in visited:
print(f"Crawling: {url}")
visited.add(url)
new_links = crawl_page(url)
# 添加新链接到待访问列表
for link in new_links:
if link not in visited and link not in to_visit:
to_visit.append(link)
# 避免请求过快
time.sleep(1)
if __name__ == "__main__":
main()
处理动态内容
对于使用 JavaScript 动态生成内容的网站,可以使用 Selenium 来模拟浏览器行为。
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# 安装 Selenium
# pip install selenium
# 下载对应浏览器的驱动并添加到 PATH
# 初始化浏览器
driver = webdriver.Chrome()
try:
# 访问网页
driver.get('https://www.example.com')
# 等待页面加载
time.sleep(2)
# 提取标题
title = driver.title
print(f"Title: {title}")
# 提取链接
links = driver.find_elements(By.TAG_NAME, 'a')
print("Links:")
for link in links:
href = link.get_attribute('href')
text = link.text
print(f"{text}: {href}")
finally:
# 关闭浏览器
driver.quit()
11.6 综合示例
示例1:聊天服务器和客户端
服务器端
import socket
import threading
class ChatServer:
def __init__(self, host='127.0.0.1', port=12345):
self.host = host
self.port = port
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.clients = []
self.lock = threading.Lock()
def start(self):
# 绑定地址和端口
self.server_socket.bind((self.host, self.port))
# 监听连接
self.server_socket.listen(5)
print(f"Chat server started on {self.host}:{self.port}")
# 接受连接
while True:
client_socket, addr = self.server_socket.accept()
print(f"Client connected: {addr}")
# 添加客户端到列表
with self.lock:
self.clients.append(client_socket)
# 为每个客户端创建线程
thread = threading.Thread(target=self.handle_client, args=(client_socket, addr))
thread.daemon = True
thread.start()
def handle_client(self, client_socket, addr):
try:
while True:
# 接收数据
data = client_socket.recv(1024)
if not data:
break
message = data.decode('utf-8')
print(f"Received from {addr}: {message}")
# 广播消息给所有客户端
self.broadcast(message, client_socket)
except Exception as e:
print(f"Error handling client {addr}: {e}")
finally:
# 移除客户端
with self.lock:
if client_socket in self.clients:
self.clients.remove(client_socket)
client_socket.close()
print(f"Client disconnected: {addr}")
def broadcast(self, message, sender_socket):
with self.lock:
for client in self.clients:
if client != sender_socket:
try:
client.send(message.encode('utf-8'))
except Exception as e:
print(f"Error broadcasting to client: {e}")
client.close()
self.clients.remove(client)
if __name__ == "__main__":
server = ChatServer()
server.start()
客户端
import socket
import threading
class ChatClient:
def __init__(self, host='127.0.0.1', port=12345):
self.host = host
self.port = port
self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def connect(self):
# 连接服务器
self.client_socket.connect((self.host, self.port))
print(f"Connected to chat server at {self.host}:{self.port}")
# 创建接收消息的线程
receive_thread = threading.Thread(target=self.receive_messages)
receive_thread.daemon = True
receive_thread.start()
# 发送消息
while True:
message = input()
if message.lower() == 'exit':
break
self.client_socket.send(message.encode('utf-8'))
# 关闭连接
self.client_socket.close()
def receive_messages(self):
while True:
try:
# 接收消息
data = self.client_socket.recv(1024)
if not data:
break
message = data.decode('utf-8')
print(f"{message}")
except Exception as e:
print(f"Error receiving message: {e}")
break
if __name__ == "__main__":
client = ChatClient()
client.connect()
示例2:简单的 REST API
from flask import Flask, request, jsonify
app = Flask(__name__)
# 模拟数据库
users = [
{"id": 1, "name": "John", "age": 30},
{"id": 2, "name": "Alice", "age": 25},
{"id": 3, "name": "Bob", "age": 35}
]
# GET /api/users
@app.route('/api/users', methods=['GET'])
def get_users():
return jsonify(users)
# GET /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['GET'])
def get_user(user_id):
user = next((user for user in users if user['id'] == user_id), None)
if user:
return jsonify(user)
else:
return jsonify({"error": "User not found"}), 404
# POST /api/users
@app.route('/api/users', methods=['POST'])
def create_user():
data = request.json
if not data or 'name' not in data or 'age' not in data:
return jsonify({"error": "Missing required fields"}), 400
new_user = {
"id": len(users) + 1,
"name": data['name'],
"age": data['age']
}
users.append(new_user)
return jsonify(new_user), 201
# PUT /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['PUT'])
def update_user(user_id):
user = next((user for user in users if user['id'] == user_id), None)
if not user:
return jsonify({"error": "User not found"}), 404
data = request.json
if data:
user['name'] = data.get('name', user['name'])
user['age'] = data.get('age', user['age'])
return jsonify(user)
# DELETE /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['DELETE'])
def delete_user(user_id):
global users
users = [user for user in users if user['id'] != user_id]
return jsonify({"message": "User deleted"})
if __name__ == "__main__":
app.run(debug=True, port=8000)
示例3:天气查询工具
import requests
class WeatherApp:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = 'https://api.openweathermap.org/data/2.5/weather'
def get_weather(self, city):
"""获取城市天气"""
params = {
'q': city,
'appid': self.api_key,
'units': 'metric' # 使用摄氏度
}
try:
response = requests.get(self.base_url, params=params)
response.raise_for_status() # 检查响应状态
data = response.json()
return self.format_weather(data)
except Exception as e:
return f"Error: {e}"
def format_weather(self, data):
"""格式化天气数据"""
city = data['name']
country = data['sys']['country']
temp = data['main']['temp']
feels_like = data['main']['feels_like']
humidity = data['main']['humidity']
weather = data['weather'][0]['description']
wind_speed = data['wind']['speed']
result = f"Weather in {city}, {country}:\n"
result += f"Temperature: {temp}°C (feels like {feels_like}°C)\n"
result += f"Humidity: {humidity}%\n"
result += f"Weather: {weather}\n"
result += f"Wind speed: {wind_speed} m/s"
return result
# 使用示例
if __name__ == "__main__":
# 替换为你的 OpenWeatherMap API key
api_key = 'YOUR_API_KEY'
app = WeatherApp(api_key)
city = input("Enter city name: ")
weather = app.get_weather(city)
print(weather)
11.7 常见问题和解决方案
问题1:连接超时
# 错误示例
import requests
response = requests.get('https://www.example.com', timeout=0.1) # 超时时间太短
# 解决方案
import requests
try:
response = requests.get('https://www.example.com', timeout=10) # 设置合理的超时时间
print(response.status_code)
except requests.exceptions.Timeout:
print("Request timed out")
except Exception as e:
print(f"Error: {e}")
问题2:代理设置
# 解决方案
import requests
proxies = {
'http': 'http://proxy.example.com:8080',
'https': 'https://proxy.example.com:8080'
}
try:
response = requests.get('https://www.example.com', proxies=proxies)
print(response.status_code)
except Exception as e:
print(f"Error: {e}")
问题3:SSL 证书验证
# 解决方案
import requests
# 禁用 SSL 证书验证(不推荐用于生产环境)
try:
response = requests.get('https://www.example.com', verify=False)
print(response.status_code)
except Exception as e:
print(f"Error: {e}")
# 或者指定证书路径
try:
response = requests.get('https://www.example.com', cert='/path/to/cert.pem')
print(response.status_code)
except Exception as e:
print(f"Error: {e}")
问题4:并发请求
# 解决方案:使用 threading 或 concurrent.futures
import requests
import concurrent.futures
urls = [
'https://www.example.com',
'https://www.google.com',
'https://www.github.com'
]
def fetch_url(url):
try:
response = requests.get(url, timeout=5)
return url, response.status_code
except Exception as e:
return url, str(e)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
results = list(executor.map(fetch_url, urls))
for url, result in results:
print(f"{url}: {result}")
11.8 练习
- Socket 编程练习:创建一个 TCP 服务器和客户端,实现简单的消息传递
- HTTP 客户端练习:使用 requests 库发送 GET 和 POST 请求
- HTTP 服务器练习:使用 Flask 创建一个简单的 Web 服务器
- 网络爬虫练习:爬取一个网站的内容,提取链接和标题
- 综合练习:创建一个天气查询工具,使用第三方 API
- 挑战练习:创建一个完整的聊天应用,包括服务器和多个客户端
11.9 小结
本章我们学习了:
- 网络编程的基本概念
- Socket 编程(TCP 和 UDP)
- HTTP 客户端(使用 urllib 和 requests)
- HTTP 服务器(使用 http.server 和 Flask)
- 网络爬虫(使用 requests 和 BeautifulSoup)
- 综合示例
现在你已经掌握了 Python 的网络编程,可以开始学习 Python 的数据分析了!