在现代Web应用中,处理耗时任务(如大型Excel文件处理)是一个常见需求。本文将详细介绍如何构建一个完整的文件处理服务,包含以下核心技术点:

组件 | 技术 | 作用 |
|---|---|---|
Web框架 | Flask | 提供RESTful接口和页面渲染 |
异步任务 | Celery+Redis | 后台任务调度和状态跟踪 |
邮件服务 | smtplib | 发送处理结果通知 |
文件处理 | openpyxl | Excel文件读写操作 |
# app.py
from flask import Flask
from celery import Celery
import os
app = Flask(__name__)
app.config.update(
UPLOAD_FOLDER='uploads',
ALLOWED_EXTENSIONS={'xlsx', 'xls'},
CELERY_BROKER_URL='redis://localhost:6379/0'
)
# Celery配置
celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
celery.conf.update(app.config)@app.route('/upload', methods=['POST'])
def upload_file():
file = request.files['file']
if not allowed_file(file.filename):
return jsonify(error="仅支持Excel文件"), 400
filename = secure_filename(file.filename)
input_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(input_path)
task = process_file.delay(input_path, request.form['email'])
return jsonify(task_id=task.id), 202@celery.task(bind=True)
def process_file(self, input_path, email):
try:
# 处理进度回调
def progress_callback(pct):
self.update_state(
state='PROGRESS',
meta={'progress': pct}
)
# 文件处理
output_path = process_excel(input_path, progress_callback)
# 发送邮件
send_email_with_attachment(
filepath=output_path,
receiver_email=email
)
return {'result': output_path}
except Exception as e:
self.retry(exc=e, countdown=60)def secure_processing(filepath):
# 路径安全检查
if not os.path.abspath(filepath).startswith(os.getcwd()):
raise SecurityError("非法文件路径")
# 文件类型验证
with open(filepath, 'rb') as f:
header = f.read(4)
if not header.startswith(b'PK\x03\x04'): # Excel文件魔数
raise InvalidFileType()def send_email_with_attachment(filepath, receiver_email):
msg = MIMEMultipart()
msg['Subject'] = '文件处理完成通知'
# 带样式的HTML正文
html = f"""
<html>
<body>
<p>您的文件已处理完成:</p>
<table border="1">
<tr><td>文件名</td><td>{os.path.basename(filepath)}</td></tr>
<tr><td>处理时间</td><td>{datetime.now()}</td></tr>
</table>
</body>
</html>
"""
msg.attach(MIMEText(html, 'html'))
# 加密附件
with open(filepath, 'rb') as f:
part = MIMEApplication(
f.read(),
_encoder=lambda x: x.decode('latin1')
)
part.add_header(
'Content-Disposition',
'attachment',
filename=os.path.basename(filepath)
msg.attach(part)
# TLS安全连接
with smtplib.SMTP_SSL('smtp.qq.com', 465) as server:
server.login(os.getenv('SMTP_USER'), os.getenv('SMTP_PASS'))
server.send_message(msg)// progress.js
class ProgressManager {
constructor(taskId) {
this.taskId = taskId
this.progressBar = document.getElementById('progress-bar')
this.statusEl = document.getElementById('status-message')
}
startPolling() {
this.interval = setInterval(() => {
fetch(`/api/tasks/${this.taskId}/status`)
.then(res => res.json())
.then(data => {
this.updateProgress(data)
if (data.state === 'SUCCESS') this.onComplete(data)
})
}, 1000)
}
updateProgress(data) {
const pct = data.progress || 0
this.progressBar.style.width = `${pct}%`
this.statusEl.textContent = this.getStatusText(data.state)
}
}<!-- upload.html -->
<div class="upload-area" id="dropZone">
<input type="file" id="fileInput" accept=".xlsx,.xls">
<div class="progress-container">
<div class="progress-bar" id="uploadProgress"></div>
</div>
</div>
<script>
document.getElementById('fileInput').addEventListener('change', async (e) => {
const file = e.target.files[0]
const formData = new FormData()
formData.append('file', file)
const response = await fetch('/upload', {
method: 'POST',
body: formData
})
if (response.ok) {
const { task_id } = await response.json()
new ProgressManager(task_id).startPolling()
}
})
</script># nginx.conf
server {
listen 80;
client_max_body_size 20M;
proxy_read_timeout 300s;
location / {
proxy_pass http://flask_app:5000;
proxy_set_header Upgrade $http_upgrade;
}
location /celery {
proxy_pass http://celery_flower:5555;
}
}敏感信息保护:
# .env 示例
SMTP_PASSWORD=your_encrypted_password
SECRET_KEY=your_flask_secret定期清理策略:
# cleanup.py
def delete_old_files(dir_path, days=7):
cutoff = time.time() - days * 86400
for f in os.listdir(dir_path):
path = os.path.join(dir_path, f)
if os.stat(path).st_mtime < cutoff:
os.remove(path)本文详细介绍了从零构建高可靠文件处理服务的完整方案。关键点总结:
实际部署时建议:
完整项目代码已开源在GitHub:[示例项目链接]
附录:扩展阅读