XSS防护
概览
实施全面的跨站脚本攻击(XSS)防护,通过输入清理、输出编码、CSP头部和安全编码实践。在处理Web应用程序中的用户生成内容时使用。
何时使用
- 用户生成内容展示
- 富文本编辑器
- 评论系统
- 搜索功能
- 动态HTML生成
- 模板渲染
实施示例
1. Node.js XSS防护
// xss-prevention.js
const createDOMPurify = require('dompurify');
const { JSDOM } = require('jsdom');
const he = require('he');
const window = new JSDOM('').window;
const DOMPurify = createDOMPurify(window);
class XSSPrevention {
/**
* HTML实体编码 - 对于文本内容最安全
*/
static encodeHTML(str) {
return he.encode(str, {
useNamedReferences: true,
encodeEverything: false
});
}
/**
* 清理HTML - 适用于富内容
*/
static sanitizeHTML(dirty) {
const config = {
ALLOWED_TAGS: [
'p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3',
'ul', 'ol', 'li', 'a', 'img', 'blockquote', 'code'
],
ALLOWED_ATTR: [
'href', 'src', 'alt', 'title', 'class'
],
ALLOWED_URI_REGEXP: /^(?:https?|mailto):/i,
KEEP_CONTENT: true,
RETURN_DOM: false,
RETURN_DOM_FRAGMENT: false
};
return DOMPurify.sanitize(dirty, config);
}
/**
* 严格清理 - 适用于不受信任的HTML
*/
static sanitizeStrict(dirty) {
return DOMPurify.sanitize(dirty, {
ALLOWED_TAGS: ['b', 'i', 'em', 'strong'],
ALLOWED_ATTR: [],
KEEP_CONTENT: true
});
}
/**
* JavaScript上下文编码
*/
static encodeForJS(str) {
return str.replace(/[<>"'&]/g, (char) => {
const escape = {
'<': '\\x3C',
'>': '\\x3E',
'"': '\\x22',
"'": '\\x27',
'&': '\\x26'
};
return escape[char];
});
}
/**
* URL参数编码
*/
static encodeURL(str) {
return encodeURIComponent(str);
}
/**
* 属性上下文编码
*/
static encodeAttribute(str) {
return str
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''')
.replace(/\//g, '/');
}
/**
* 验证和清理URL
*/
static sanitizeURL(url) {
try {
const parsed = new URL(url);
// 只允许安全的协议
if (!['http:', 'https:', 'mailto:'].includes(parsed.protocol)) {
return '';
}
return parsed.href;
} catch {
return '';
}
}
/**
* 剥离所有HTML标签
*/
static stripHTML(str) {
return str.replace(/<[^>]*>/g, '');
}
/**
* React风格的JSX转义
*/
static escapeForReact(str) {
return {
__html: DOMPurify.sanitize(str)
};
}
}
// Express中间件
function xssProtection(req, res, next) {
// 清理请求正文
if (req.body) {
req.body = sanitizeObject(req.body);
}
// 清理查询参数
if (req.query) {
req.query = sanitizeObject(req.query);
}
next();
}
function sanitizeObject(obj) {
const sanitized = {};
for (const [key, value] of Object.entries(obj)) {
if (typeof value === 'string') {
sanitized[key] = XSSPrevention.stripHTML(value);
} else if (typeof value === 'object' && value !== null) {
sanitized[key] = sanitizeObject(value);
} else {
sanitized[key] = value;
}
}
return sanitized;
}
// Express示例
const express = require('express');
const app = express();
app.use(express.json());
app.use(xssProtection);
app.post('/api/comments', (req, res) => {
const { comment } = req.body;
// 额外的富内容清理
const safeComment = XSSPrevention.sanitizeHTML(comment);
// 存储到数据库
// db.comments.insert({ content: safeComment });
res.json({ comment: safeComment });
});
module.exports = XSSPrevention;
2. Python XSS防护
# xss_prevention.py
import html
import bleach
from urllib.parse import urlparse, quote
import re
class XSSPrevention:
# 富内容允许的HTML标签
ALLOWED_TAGS = [
'p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3',
'ul', 'ol', 'li', 'a', 'blockquote', 'code'
]
ALLOWED_ATTRIBUTES = {
'a': ['href', 'title'],
'img': ['src', 'alt']
}
@staticmethod
def encode_html(text: str) -> str:
"""HTML实体编码 - 对于文本内容最安全"""
return html.escape(text, quote=True)
@staticmethod
def sanitize_html(dirty_html: str) -> str:
"""清理HTML - 适用于富内容"""
return bleach.clean(
dirty_html,
tags=XSSPrevention.ALLOWED_TAGS,
attributes=XSSPrevention.ALLOWED_ATTRIBUTES,
strip=True
)
@staticmethod
def sanitize_strict(dirty_html: str) -> str:
"""严格清理 - 剥离所有HTML"""
return bleach.clean(
dirty_html,
tags=[],
attributes={},
strip=True
)
@staticmethod
def strip_html(text: str) -> str:
"""移除所有HTML标签"""
return re.sub(r'<[^>]*>', '', text)
@staticmethod
def sanitize_url(url: str) -> str:
"""验证和清理URL"""
try:
parsed = urlparse(url)
# 只允许安全的协议
if parsed.scheme not in ['http', 'https', 'mailto']:
return ''
return url
except:
return ''
@staticmethod
def encode_for_javascript(text: str) -> str:
"""JavaScript上下文编码"""
escape_map = {
'<': '\\x3C',
'>': '\\x3E',
'"': '\\x22',
"'": '\\x27',
'&': '\\x26',
'/': '\\x2F'
}
return ''.join(escape_map.get(char, char) for char in text)
@staticmethod
def encode_url_param(text: str) -> str:
"""URL参数编码"""
return quote(text, safe='')
# Flask集成
from flask import Flask, request, jsonify
from functools import wraps
app = Flask(__name__)
def sanitize_input(f):
"""装饰器,清理所有请求输入"""
@wraps(f)
def decorated_function(*args, **kwargs):
if request.is_json:
data = request.get_json()
request._cached_json = sanitize_dict(data)
return f(*args, **kwargs)
return decorated_function
def sanitize_dict(data: dict) -> dict:
"""递归清理字典值"""
sanitized = {}
for key, value in data.items():
if isinstance(value, str):
sanitized[key] = XSSPrevention.strip_html(value)
elif isinstance(value, dict):
sanitized[key] = sanitize_dict(value)
elif isinstance(value, list):
sanitized[key] = [
sanitize_dict(item) if isinstance(item, dict)
else XSSPrevention.strip_html(item) if isinstance(item, str)
else item
for item in value
]
else:
sanitized[key] = value
return sanitized
@app.route('/api/comments', methods=['POST'])
@sanitize_input
def create_comment():
data = request.get_json()
comment = data.get('comment', '')
# 额外的富内容清理
safe_comment = XSSPrevention.sanitize_html(comment)
return jsonify({'comment': safe_comment})
# Django模板过滤器
from django import template
from django.utils.safestring import mark_safe
register = template.Library()
@register.filter(name='sanitize_html')
def sanitize_html_filter(value):
"""Django模板过滤器,用于HTML清理"""
sanitized = XSSPrevention.sanitize_html(value)
return mark_safe(sanitized)
# 使用模板:
# {{ user_content|sanitize_html }}
3. React XSS防护
// XSSSafeComponent.jsx
import React from 'react';
import DOMPurify from 'dompurify';
// 安全文本渲染(React自动转义)
function SafeText({ text }) {
return <div>{text}</div>;
}
// 清理后的HTML渲染
function SafeHTML({ html }) {
const sanitized = DOMPurify.sanitize(html, {
ALLOWED_TAGS: ['p', 'br', 'strong', 'em', 'u', 'a'],
ALLOWED_ATTR: ['href']
});
return (
<div
dangerouslySetInnerHTML={{ __html: sanitized }}
/>
);
}
// 安全URL属性
function SafeLink({ href, children }) {
const safeHref = sanitizeURL(href);
return (
<a
href={safeHref}
rel="noopener noreferrer"
target="_blank"
>
{children}
</a>
);
}
function sanitizeURL(url) {
try {
const parsed = new URL(url);
if (!['http:', 'https:'].includes(parsed.protocol)) {
return '';
}
return parsed.href;
} catch {
return '';
}
}
// 输入清理钩子
function useSanitizedInput(initialValue = '') {
const [value, setValue] = React.useState(initialValue);
const handleChange = (e) => {
const sanitized = DOMPurify.sanitize(e.target.value, {
ALLOWED_TAGS: [],
KEEP_CONTENT: true
});
setValue(sanitized);
};
return [value, handleChange];
}
// 使用
function CommentForm() {
const [comment, handleCommentChange] = useSanitizedInput();
const handleSubmit = async (e) => {
e.preventDefault();
await fetch('/api/comments', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ comment })
});
};
return (
<form onSubmit={handleSubmit}>
<textarea
value={comment}
onChange={handleCommentChange}
placeholder="Enter comment"
/>
<button type="submit">Submit</button>
</form>
);
}
export { SafeText, SafeHTML, SafeLink, useSanitizedInput };
4. 内容安全策略
// csp-config.js
const helmet = require('helmet');
function setupCSP(app) {
app.use(helmet.contentSecurityPolicy({
directives: {
defaultSrc: ["'self'"],
// 只允许来自信任源的脚本
scriptSrc: [
"'self'",
"'nonce-RANDOM_NONCE'", // 使用动态nonce
"https://cdn.example.com"
],
// 样式
styleSrc: [
"'self'",
"'nonce-RANDOM_NONCE'",
"https://fonts.googleapis.com"
],
// 无内联样式/脚本
objectSrc: ["'none'"],
baseUri: ["'self'"],
// 报告违规
reportUri: ['/api/csp-violations']
}
}));
// CSP违规报告器
app.post('/api/csp-violations', (req, res) => {
console.error('CSP Violation:', req.body);
res.status(204).end();
});
}
// 生成内联脚本的nonce
function generateNonce() {
return require('crypto').randomBytes(16).toString('base64');
}
// Express中间件添加nonce
app.use((req, res, next) => {
res.locals.nonce = generateNonce();
next();
});
// 在模板中:<script nonce="<%= nonce %>">
最佳实践
✅ 要做
- 默认情况下编码输出
- 使用模板引擎
- 实施CSP头部
- 清理富内容
- 验证URLs
- 使用HTTPOnly cookies
- 定期安全测试
- 使用安全框架
❌ 不要做
- 信任用户输入
- 直接使用innerHTML
- 跳过输出编码
- 允许内联脚本
- 使用eval()
- 混合上下文(HTML/JS)
XSS类型
- 反射型:立即响应
- 存储型:存储在数据库中
- 基于DOM:客户端操作
- 基于变异:解析器差异
上下文特定编码
- HTML内容:HTML实体编码
- HTML属性:属性编码
- JavaScript:JavaScript转义
- URL:URL编码
- CSS:CSS转义