跳到主要内容

第11章:Python 网络编程

11.1 网络编程基础

网络编程是指编写程序来实现计算机之间的通信。Python 提供了丰富的库来支持网络编程。

基本概念

  • IP 地址:标识网络中的设备
  • 端口:标识设备上的服务
  • 协议:通信规则,如 TCP、UDP、HTTP 等
  • Socket:网络通信的端点

网络协议

协议描述端口
HTTP超文本传输协议80
HTTPS安全的超文本传输协议443
FTP文件传输协议21
SMTP简单邮件传输协议25
POP3邮局协议版本 3110
IMAP互联网消息访问协议143
SSH安全外壳协议22
Telnet远程登录协议23

11.2 Socket 编程

Socket 是网络通信的基础,它提供了一种在不同主机之间传输数据的方式。

TCP Socket

TCP(传输控制协议)是一种可靠的、面向连接的协议。

服务器端

import socket

# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

# 绑定地址和端口
host = '127.0.0.1' # 本地主机
port = 12345 # 端口号
s.bind((host, port))

# 监听连接
s.listen(5) # 最多允许 5 个连接等待

print(f"Server listening on {host}:{port}")

while True:
# 接受连接
client_socket, addr = s.accept()
print(f"Connected by {addr}")

# 接收数据
data = client_socket.recv(1024)
print(f"Received: {data.decode('utf-8')}")

# 发送数据
response = "Hello, client!"
client_socket.send(response.encode('utf-8'))

# 关闭连接
client_socket.close()

客户端

import socket

# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

# 连接服务器
host = '127.0.0.1' # 服务器地址
port = 12345 # 服务器端口
s.connect((host, port))

# 发送数据
message = "Hello, server!"
s.send(message.encode('utf-8'))

# 接收数据
data = s.recv(1024)
print(f"Received: {data.decode('utf-8')}")

# 关闭连接
s.close()

UDP Socket

UDP(用户数据报协议)是一种不可靠的、无连接的协议。

服务器端

import socket

# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

# 绑定地址和端口
host = '127.0.0.1' # 本地主机
port = 12345 # 端口号
s.bind((host, port))

print(f"Server listening on {host}:{port}")

while True:
# 接收数据
data, addr = s.recvfrom(1024)
print(f"Received from {addr}: {data.decode('utf-8')}")

# 发送数据
response = "Hello, client!"
s.sendto(response.encode('utf-8'), addr)

客户端

import socket

# 创建 socket 对象
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

# 服务器地址和端口
host = '127.0.0.1'
port = 12345

# 发送数据
message = "Hello, server!"
s.sendto(message.encode('utf-8'), (host, port))

# 接收数据
data, addr = s.recvfrom(1024)
print(f"Received from {addr}: {data.decode('utf-8')}")

# 关闭连接
s.close()

11.3 HTTP 客户端

HTTP 客户端用于发送 HTTP 请求并接收响应。Python 提供了 urllibrequests 库来实现 HTTP 客户端功能。

使用 urllib

import urllib.request
import urllib.parse

# 发送 GET 请求
url = 'https://www.example.com'
try:
response = urllib.request.urlopen(url)
data = response.read()
print(f"Status code: {response.status}")
print(f"Data: {data.decode('utf-8')[:500]}...")
except Exception as e:
print(f"Error: {e}")

# 发送 POST 请求
url = 'https://httpbin.org/post'
data = {'name': 'John', 'age': 30}
data = urllib.parse.urlencode(data).encode('utf-8')
try:
req = urllib.request.Request(url, data=data, method='POST')
response = urllib.request.urlopen(req)
data = response.read()
print(f"Status code: {response.status}")
print(f"Data: {data.decode('utf-8')}")
except Exception as e:
print(f"Error: {e}")

使用 requests

requests 是一个第三方库,提供了更简洁、更强大的 HTTP 客户端功能。

import requests

# 安装 requests
# pip install requests

# 发送 GET 请求
url = 'https://www.example.com'
try:
response = requests.get(url)
print(f"Status code: {response.status_code}")
print(f"Data: {response.text[:500]}...")
except Exception as e:
print(f"Error: {e}")

# 发送 POST 请求
url = 'https://httpbin.org/post'
data = {'name': 'John', 'age': 30}
try:
response = requests.post(url, json=data)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")

# 带参数的 GET 请求
url = 'https://httpbin.org/get'
params = {'name': 'John', 'age': 30}
try:
response = requests.get(url, params=params)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")

# 带 headers 的请求
url = 'https://httpbin.org/headers'
headers = {'User-Agent': 'Mozilla/5.0'}
try:
response = requests.get(url, headers=headers)
print(f"Status code: {response.status_code}")
print(f"Data: {response.json()}")
except Exception as e:
print(f"Error: {e}")

11.4 HTTP 服务器

Python 提供了 http.server 模块来创建简单的 HTTP 服务器。

基本 HTTP 服务器

import http.server
import socketserver

PORT = 8000

Handler = http.server.SimpleHTTPRequestHandler

with socketserver.TCPServer(("", PORT), Handler) as httpd:
print(f"Server running at http://localhost:{PORT}")
httpd.serve_forever()

自定义 HTTP 服务器

from http.server import BaseHTTPRequestHandler, HTTPServer

class MyHandler(BaseHTTPRequestHandler):
def do_GET(self):
# 发送响应头
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()

# 发送响应体
message = "<h1>Hello, World!</h1>"
self.wfile.write(message.encode('utf-8'))

def do_POST(self):
# 获取请求体长度
content_length = int(self.headers['Content-Length'])
# 读取请求体
post_data = self.rfile.read(content_length)

# 发送响应头
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()

# 发送响应体
message = f"<h1>POST Request Received</h1><p>Data: {post_data.decode('utf-8')}</p>"
self.wfile.write(message.encode('utf-8'))

def run(server_class=HTTPServer, handler_class=MyHandler, port=8000):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
print(f"Server running at http://localhost:{port}")
httpd.serve_forever()

if __name__ == "__main__":
run()

使用 Flask 创建 HTTP 服务器

Flask 是一个轻量级的 Web 框架,用于创建更复杂的 HTTP 服务器。

from flask import Flask, request, jsonify

# 安装 Flask
# pip install flask

app = Flask(__name__)

@app.route('/')
def home():
return "<h1>Hello, World!</h1>"

@app.route('/api', methods=['GET'])
def api_get():
name = request.args.get('name', 'World')
return jsonify({"message": f"Hello, {name}!"})

@app.route('/api', methods=['POST'])
def api_post():
data = request.json
name = data.get('name', 'World')
age = data.get('age', 0)
return jsonify({"message": f"Hello, {name}! You are {age} years old."})

if __name__ == "__main__":
app.run(debug=True, port=8000)

11.5 网络爬虫

网络爬虫是一种自动获取网页内容的程序。Python 提供了 urllibrequestsBeautifulSoup 等库来实现网络爬虫功能。

基本爬虫

import requests
from bs4 import BeautifulSoup

# 安装 BeautifulSoup
# pip install beautifulsoup4

url = 'https://www.example.com'
try:
# 发送请求
response = requests.get(url)
# 解析 HTML
soup = BeautifulSoup(response.text, 'html.parser')

# 提取标题
title = soup.find('h1').text
print(f"Title: {title}")

# 提取链接
links = soup.find_all('a')
print("Links:")
for link in links:
href = link.get('href')
text = link.text
print(f"{text}: {href}")

except Exception as e:
print(f"Error: {e}")

爬取多个页面

import requests
from bs4 import BeautifulSoup
import time

# 爬取多个页面
def crawl_page(url):
try:
# 发送请求
response = requests.get(url)
# 解析 HTML
soup = BeautifulSoup(response.text, 'html.parser')

# 提取标题
title = soup.find('title').text
print(f"Title: {title}")

# 提取链接
links = soup.find_all('a')
return [link.get('href') for link in links if link.get('href') and link.get('href').startswith('http')]

except Exception as e:
print(f"Error crawling {url}: {e}")
return []

# 主函数
def main():
start_url = 'https://www.example.com'
visited = set()
to_visit = [start_url]

while to_visit and len(visited) < 10:
url = to_visit.pop(0)
if url not in visited:
print(f"Crawling: {url}")
visited.add(url)
new_links = crawl_page(url)
# 添加新链接到待访问列表
for link in new_links:
if link not in visited and link not in to_visit:
to_visit.append(link)
# 避免请求过快
time.sleep(1)

if __name__ == "__main__":
main()

处理动态内容

对于使用 JavaScript 动态生成内容的网站,可以使用 Selenium 来模拟浏览器行为。

from selenium import webdriver
from selenium.webdriver.common.by import By
import time

# 安装 Selenium
# pip install selenium
# 下载对应浏览器的驱动并添加到 PATH

# 初始化浏览器
driver = webdriver.Chrome()

try:
# 访问网页
driver.get('https://www.example.com')

# 等待页面加载
time.sleep(2)

# 提取标题
title = driver.title
print(f"Title: {title}")

# 提取链接
links = driver.find_elements(By.TAG_NAME, 'a')
print("Links:")
for link in links:
href = link.get_attribute('href')
text = link.text
print(f"{text}: {href}")

finally:
# 关闭浏览器
driver.quit()

11.6 综合示例

示例1:聊天服务器和客户端

服务器端

import socket
import threading

class ChatServer:
def __init__(self, host='127.0.0.1', port=12345):
self.host = host
self.port = port
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.clients = []
self.lock = threading.Lock()

def start(self):
# 绑定地址和端口
self.server_socket.bind((self.host, self.port))
# 监听连接
self.server_socket.listen(5)
print(f"Chat server started on {self.host}:{self.port}")

# 接受连接
while True:
client_socket, addr = self.server_socket.accept()
print(f"Client connected: {addr}")

# 添加客户端到列表
with self.lock:
self.clients.append(client_socket)

# 为每个客户端创建线程
thread = threading.Thread(target=self.handle_client, args=(client_socket, addr))
thread.daemon = True
thread.start()

def handle_client(self, client_socket, addr):
try:
while True:
# 接收数据
data = client_socket.recv(1024)
if not data:
break

message = data.decode('utf-8')
print(f"Received from {addr}: {message}")

# 广播消息给所有客户端
self.broadcast(message, client_socket)
except Exception as e:
print(f"Error handling client {addr}: {e}")
finally:
# 移除客户端
with self.lock:
if client_socket in self.clients:
self.clients.remove(client_socket)
client_socket.close()
print(f"Client disconnected: {addr}")

def broadcast(self, message, sender_socket):
with self.lock:
for client in self.clients:
if client != sender_socket:
try:
client.send(message.encode('utf-8'))
except Exception as e:
print(f"Error broadcasting to client: {e}")
client.close()
self.clients.remove(client)

if __name__ == "__main__":
server = ChatServer()
server.start()

客户端

import socket
import threading

class ChatClient:
def __init__(self, host='127.0.0.1', port=12345):
self.host = host
self.port = port
self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

def connect(self):
# 连接服务器
self.client_socket.connect((self.host, self.port))
print(f"Connected to chat server at {self.host}:{self.port}")

# 创建接收消息的线程
receive_thread = threading.Thread(target=self.receive_messages)
receive_thread.daemon = True
receive_thread.start()

# 发送消息
while True:
message = input()
if message.lower() == 'exit':
break
self.client_socket.send(message.encode('utf-8'))

# 关闭连接
self.client_socket.close()

def receive_messages(self):
while True:
try:
# 接收消息
data = self.client_socket.recv(1024)
if not data:
break
message = data.decode('utf-8')
print(f"{message}")
except Exception as e:
print(f"Error receiving message: {e}")
break

if __name__ == "__main__":
client = ChatClient()
client.connect()

示例2:简单的 REST API

from flask import Flask, request, jsonify

app = Flask(__name__)

# 模拟数据库
users = [
{"id": 1, "name": "John", "age": 30},
{"id": 2, "name": "Alice", "age": 25},
{"id": 3, "name": "Bob", "age": 35}
]

# GET /api/users
@app.route('/api/users', methods=['GET'])
def get_users():
return jsonify(users)

# GET /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['GET'])
def get_user(user_id):
user = next((user for user in users if user['id'] == user_id), None)
if user:
return jsonify(user)
else:
return jsonify({"error": "User not found"}), 404

# POST /api/users
@app.route('/api/users', methods=['POST'])
def create_user():
data = request.json
if not data or 'name' not in data or 'age' not in data:
return jsonify({"error": "Missing required fields"}), 400

new_user = {
"id": len(users) + 1,
"name": data['name'],
"age": data['age']
}
users.append(new_user)
return jsonify(new_user), 201

# PUT /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['PUT'])
def update_user(user_id):
user = next((user for user in users if user['id'] == user_id), None)
if not user:
return jsonify({"error": "User not found"}), 404

data = request.json
if data:
user['name'] = data.get('name', user['name'])
user['age'] = data.get('age', user['age'])

return jsonify(user)

# DELETE /api/users/<id>
@app.route('/api/users/<int:user_id>', methods=['DELETE'])
def delete_user(user_id):
global users
users = [user for user in users if user['id'] != user_id]
return jsonify({"message": "User deleted"})

if __name__ == "__main__":
app.run(debug=True, port=8000)

示例3:天气查询工具

import requests

class WeatherApp:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = 'https://api.openweathermap.org/data/2.5/weather'

def get_weather(self, city):
"""获取城市天气"""
params = {
'q': city,
'appid': self.api_key,
'units': 'metric' # 使用摄氏度
}

try:
response = requests.get(self.base_url, params=params)
response.raise_for_status() # 检查响应状态
data = response.json()
return self.format_weather(data)
except Exception as e:
return f"Error: {e}"

def format_weather(self, data):
"""格式化天气数据"""
city = data['name']
country = data['sys']['country']
temp = data['main']['temp']
feels_like = data['main']['feels_like']
humidity = data['main']['humidity']
weather = data['weather'][0]['description']
wind_speed = data['wind']['speed']

result = f"Weather in {city}, {country}:\n"
result += f"Temperature: {temp}°C (feels like {feels_like}°C)\n"
result += f"Humidity: {humidity}%\n"
result += f"Weather: {weather}\n"
result += f"Wind speed: {wind_speed} m/s"

return result

# 使用示例
if __name__ == "__main__":
# 替换为你的 OpenWeatherMap API key
api_key = 'YOUR_API_KEY'
app = WeatherApp(api_key)

city = input("Enter city name: ")
weather = app.get_weather(city)
print(weather)

11.7 常见问题和解决方案

问题1:连接超时

# 错误示例
import requests

response = requests.get('https://www.example.com', timeout=0.1) # 超时时间太短

# 解决方案
import requests

try:
response = requests.get('https://www.example.com', timeout=10) # 设置合理的超时时间
print(response.status_code)
except requests.exceptions.Timeout:
print("Request timed out")
except Exception as e:
print(f"Error: {e}")

问题2:代理设置

# 解决方案
import requests

proxies = {
'http': 'http://proxy.example.com:8080',
'https': 'https://proxy.example.com:8080'
}

try:
response = requests.get('https://www.example.com', proxies=proxies)
print(response.status_code)
except Exception as e:
print(f"Error: {e}")

问题3:SSL 证书验证

# 解决方案
import requests

# 禁用 SSL 证书验证(不推荐用于生产环境)
try:
response = requests.get('https://www.example.com', verify=False)
print(response.status_code)
except Exception as e:
print(f"Error: {e}")

# 或者指定证书路径
try:
response = requests.get('https://www.example.com', cert='/path/to/cert.pem')
print(response.status_code)
except Exception as e:
print(f"Error: {e}")

问题4:并发请求

# 解决方案:使用 threading 或 concurrent.futures
import requests
import concurrent.futures

urls = [
'https://www.example.com',
'https://www.google.com',
'https://www.github.com'
]

def fetch_url(url):
try:
response = requests.get(url, timeout=5)
return url, response.status_code
except Exception as e:
return url, str(e)

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
results = list(executor.map(fetch_url, urls))

for url, result in results:
print(f"{url}: {result}")

11.8 练习

  1. Socket 编程练习:创建一个 TCP 服务器和客户端,实现简单的消息传递
  2. HTTP 客户端练习:使用 requests 库发送 GET 和 POST 请求
  3. HTTP 服务器练习:使用 Flask 创建一个简单的 Web 服务器
  4. 网络爬虫练习:爬取一个网站的内容,提取链接和标题
  5. 综合练习:创建一个天气查询工具,使用第三方 API
  6. 挑战练习:创建一个完整的聊天应用,包括服务器和多个客户端

11.9 小结

本章我们学习了:

  • 网络编程的基本概念
  • Socket 编程(TCP 和 UDP)
  • HTTP 客户端(使用 urllib 和 requests)
  • HTTP 服务器(使用 http.server 和 Flask)
  • 网络爬虫(使用 requests 和 BeautifulSoup)
  • 综合示例

现在你已经掌握了 Python 的网络编程,可以开始学习 Python 的数据分析了!