AIGC 内容生成技术
图片、视频、文案、UI、代码生成全栈技术
目录
核心技术概念
| 概念 |
定义 |
核心价值 |
| Diffusion Model |
扩散模型 |
高质量图像生成 |
| Stable Diffusion |
开源扩散模型 |
免费、可定制、效果优秀 |
| ControlNet |
条件控制网络 |
精确控制生成图像 |
| LoRA |
低秩适应 |
小样本风格定制 |
| Text-to-Image |
文本生成图像 |
创意设计 |
| Image-to-Image |
图像生成图像 |
图像编辑、风格迁移 |
| Inpainting |
图像修复 |
局部编辑、去除水印 |
| Text-to-Video |
文本生成视频 |
视频创作 |
| Text-to-Code |
文本生成代码 |
开发提效 |
图像生成
1.1 主流模型对比
| 模型 |
类型 |
优势 |
劣势 |
适用场景 |
| DALL-E 3 |
商业闭源 |
效果好、理解强 |
费用高 |
商业应用 |
| Midjourney |
商业闭源 |
艺术效果强 |
API限制 |
艺术创作 |
| Stable Diffusion |
开源 |
免费、可定制 |
需要GPU |
私有化部署 |
| Stable Diffusion XL |
开源 |
高分辨率、质量高 |
显存需求大 |
高质量生成 |
1.2 Stable Diffusion 核心组件
# aigc/image_generator.py
"""
AIGC 图像生成核心实现
包含:SD基础推理、ControlNet、LoRA加载
"""
from typing import Optional, List, Dict
from diffusers import (
StableDiffusionPipeline,
StableDiffusionXLPipeline,
ControlNetModel,
StableDiffusionControlNetPipeline
)
from diffusers.utils import load_image
import torch
from PIL import Image
class ImageGenerator:
"""图像生成器基类"""
def __init__(
self,
model_name: str = "runwayml/stable-diffusion-v1-5",
device: str = "cuda",
torch_dtype: torch.dtype = torch.float16
):
self.device = device
self.torch_dtype = torch_dtype
self.model_name = model_name
self.pipeline = self._load_pipeline()
def _load_pipeline(self):
"""加载基础模型"""
pipeline = StableDiffusionPipeline.from_pretrained(
self.model_name,
torch_dtype=self.torch_dtype,
safety_checker=None
).to(self.device)
# 开启内存优化
pipeline.enable_attention_slicing()
pipeline.enable_model_cpu_offload()
return pipeline
def generate(
self,
prompt: str,
negative_prompt: Optional[str] = None,
num_images: int = 1,
width: int = 512,
height: int = 512,
num_inference_steps: int = 50,
guidance_scale: float = 7.5,
seed: Optional[int] = None
) -> List[Image.Image]:
"""生成图像"""
# 设置随机种子
generator = None
if seed is not None:
generator = torch.Generator(device=self.device).manual_seed(seed)
# 生成
with torch.inference_mode():
result = self.pipeline(
prompt=prompt,
negative_prompt=negative_prompt,
num_images_per_prompt=num_images,
width=width,
height=height,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator
)
return result.images
class SDXLGenerator(ImageGenerator):
"""Stable Diffusion XL 生成器"""
def __init__(self, device: str = "cuda"):
super().__init__(
model_name="stabilityai/stable-diffusion-xl-base-1.0",
device=device
)
def _load_pipeline(self):
pipeline = StableDiffusionXLPipeline.from_pretrained(
self.model_name,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16"
).to(self.device)
pipeline.enable_attention_slicing()
pipeline.enable_model_cpu_offload()
return pipeline
class ControlNetGenerator:
"""ControlNet 控制生成"""
def __init__(
self,
base_model: str = "runwayml/stable-diffusion-v1-5",
controlnet_model: str = "lllyasviel/sd-controlnet-canny",
device: str = "cuda"
):
self.device = device
# 加载 ControlNet
controlnet = ControlNetModel.from_pretrained(
controlnet_model,
torch_dtype=torch.float16
).to(device)
# 加载基础 pipeline
self.pipeline = StableDiffusionControlNetPipeline.from_pretrained(
base_model,
controlnet=controlnet,
torch_dtype=torch.float16,
safety_checker=None
).to(device)
self.pipeline.enable_model_cpu_offload()
def generate_with_control(
self,
prompt: str,
control_image: Image.Image,
control_conditioning_scale: float = 1.0,
**kwargs
) -> List[Image.Image]:
"""使用 ControlNet 生成"""
with torch.inference_mode():
result = self.pipeline(
prompt=prompt,
control_image=control_image,
controlnet_conditioning_scale=control_conditioning_scale,
**kwargs
)
return result.images
class LoRAGGenerator:
"""LoRA 风格生成"""
def __init__(
self,
base_model: str = "runwayml/stable-diffusion-v1-5",
lora_path: str = None,
device: str = "cuda"
):
self.device = device
# 加载基础模型
self.pipeline = StableDiffusionPipeline.from_pretrained(
base_model,
torch_dtype=torch.float16
).to(device)
# 加载 LoRA
if lora_path:
self.pipeline.load_lora_weights(lora_path)
self.pipeline.enable_model_cpu_offload()
def generate(
self,
prompt: str,
lora_scale: float = 1.0,
**kwargs
) -> List[Image.Image]:
"""使用 LoRA 生成"""
# 设置 LoRA 权重
self.pipeline.set_adapters(["default"])
self.pipeline.set_adaptor_weights([lora_scale])
with torch.inference_mode():
result = self.pipeline(prompt=prompt, **kwargs)
return result.images
# ============== 图像预处理 ==============
class ImagePreprocessor:
"""图像预处理工具"""
@staticmethod
def canny_edge_detection(
image: Image.Image,
low_threshold: int = 100,
high_threshold: int = 200
) -> Image.Image:
"""Canny 边缘检测"""
import cv2
import numpy as np
# 转换为 OpenCV 格式
img_np = np.array(image)
img_gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
# Canny 检测
edges = cv2.Canny(img_gray, low_threshold, high_threshold)
# 转换回 PIL
return Image.fromarray(edges)
@staticmethod
def pose_detection(image: Image.Image) -> Image.Image:
"""姿态检测"""
from controlnet_aux import OpenposeDetector
detector = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
return detector(image)
@staticmethod
def depth_estimation(image: Image.Image) -> Image.Image:
"""深度估计"""
from controlnet_aux import MidasDetector
detector = MidasDetector.from_pretrained("valhalla/t2iadapter-depth-midas")
return detector(image)
@staticmethod
def segmentation(image: Image.Image) -> Image.Image:
"""语义分割"""
from controlnet_aux import SamDetector
detector = SamDetector.from_pretrained("ybelkada/segment-anything")
return detector(image)
# ============== 图像后处理 ==============
class ImagePostprocessor:
"""图像后处理工具"""
@staticmethod
def upscale(image: Image.Image, scale: int = 2) -> Image.Image:
"""超分辨率放大"""
from diffusers import StableDiffusionUpscalePipeline
pipeline = StableDiffusionUpscalePipeline.from_pretrained(
"stabilityai/stable-diffusion-x4-upscaler",
torch_dtype=torch.float16
).to("cuda")
result = pipeline(
image=image,
prompt="high quality, detailed",
num_inference_steps=20
)
return result.images[0]
@staticmethod
def inpaint(
image: Image.Image,
mask: Image.Image,
prompt: str
) -> Image.Image:
"""图像修复"""
from diffusers import StableDiffusionInpaintPipeline
pipeline = StableDiffusionInpaintPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
torch_dtype=torch.float16
).to("cuda")
result = pipeline(
prompt=prompt,
image=image,
mask_image=mask,
num_inference_steps=50
)
return result.images[0]
# ============== 使用示例 ==============
if __name__ == "__main__":
# 1. 基础图像生成
generator = ImageGenerator(model_name="runwayml/stable-diffusion-v1-5")
images = generator.generate(
prompt="一只可爱的猫咪坐在窗台上,阳光明媚",
num_images=2,
seed=42
)
images[0].save("cat.png")
# 2. ControlNet 控制
preprocessor = ImagePreprocessor()
control_gen = ControlNetGenerator()
# 加载参考图
reference_image = load_image("reference.png")
control_image = preprocessor.canny_edge_detection(reference_image)
# 使用 Canny 控制
result = control_gen.generate_with_control(
prompt="一只猫咪坐在窗台上",
control_image=control_image,
control_conditioning_scale=1.0
)
result[0].save("controlled_cat.png")
# 3. LoRA 风格
lora_gen = LoRAGenerator(lora_path="path/to/lora.safetensors")
styled_images = lora_gen.generate(
prompt="a beautiful landscape",
lora_scale=0.8
)
styled_images[0].save("styled.png")
1.3 常见 ControlNet 类型
| ControlNet |
功能 |
应用场景 |
| Canny |
边缘检测 |
保持轮廓、结构 |
| Pose |
人体姿态 |
人物姿态控制 |
| Depth |
深度估计 |
空间结构控制 |
| Seg |
语义分割 |
区域编辑 |
| Scribble |
简笔画 |
草图生成 |
| Hed |
边缘检测 |
柔和轮廓 |
| MLSD |
直线检测 |
建筑结构 |
视频生成
2.1 主流视频生成模型
| 模型 |
类型 |
特点 |
| Sora |
OpenAI 闭源 |
超长视频、高质量 |
| Runway Gen-2 |
商业 |
文字生成视频、图生视频 |
| Pika Labs |
商业 |
图片转视频、风格迁移 |
| Stable Video Diffusion |
开源 |
图片转短视频 |
2.2 视频生成实现
# aigc/video_generator.py
"""
AIGC 视频生成实现
包含:图生视频、文字生视频
"""
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import torch
class VideoGenerator:
"""视频生成器"""
def __init__(self, device: str = "cuda"):
self.device = device
self.pipeline = self._load_pipeline()
def _load_pipeline(self):
"""加载 SVD 模型"""
pipeline = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt",
torch_dtype=torch.float16,
variant="fp16"
).to(self.device)
pipeline.enable_model_cpu_offload()
return pipeline
def image_to_video(
self,
image: Image.Image,
motion_bucket_id: int = 127,
fps: int = 7,
num_frames: int = 25,
num_inference_steps: int = 25
) -> List[Image.Image]:
"""
图像生成视频
Args:
image: 输入图像
motion_bucket_id: 运动强度 (0-255)
fps: 帧率
num_frames: 帧数
num_inference_steps: 推理步数
"""
with torch.inference_mode():
frames = self.pipeline(
image=image,
motion_bucket_id=motion_bucket_id,
fps=fps,
num_frames=num_frames,
num_inference_steps=num_inference_steps
).frames[0]
return frames
def save_video(
self,
frames: List[Image.Image],
output_path: str,
fps: int = 7
):
"""保存视频"""
import imageio
# 转换为 numpy 数组
frames_np = [np.array(frame) for frame in frames]
# 保存
imageio.mimwrite(output_path, frames_np, fps=fps)
# ============== 文字生视频(调用商业API)==============
class TextToVideoAPI:
"""文字生成视频(商业API)"""
def __init__(self, api_key: str, provider: str = "runway"):
self.api_key = api_key
self.provider = provider
def generate(
self,
prompt: str,
duration: int = 4,
width: int = 1024,
height: int = 576
) -> str:
"""
生成视频
Returns:
video_url: 视频下载链接
"""
if self.provider == "runway":
return self._runway_generate(prompt, duration, width, height)
elif self.provider == "pika":
return self._pika_generate(prompt, duration, width, height)
def _runway_generate(self, prompt: str, duration: int, width: int, height: int) -> str:
"""Runway Gen-2 API"""
import requests
url = "https://api.runwayml.com/v1/generate"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"prompt": prompt,
"duration": duration,
"width": width,
"height": height
}
response = requests.post(url, headers=headers, json=payload)
data = response.json()
return data.get("output_url")
# ============== 使用示例 ==============
if __name__ == "__main__":
# 1. 图像生成视频
video_gen = VideoGenerator()
input_image = Image.open("input.png")
frames = video_gen.image_to_video(
image=input_image,
motion_bucket_id=127, # 中等运动
fps=7,
num_frames=25
)
video_gen.save_video(frames, "output.mp4")
# 2. 文字生成视频(商业API)
# text_to_video = TextToVideoAPI(api_key="your-api-key", provider="runway")
# video_url = text_to_video.generate(
# prompt="一只猫在阳光明媚的窗台上打盹",
# duration=4
# )
文案生成
3.1 文案生成类型
| 类型 |
技术点 |
应用 |
| 营销文案 |
情感驱动、用户画像 |
广告投放、商品推广 |
| SEO文案 |
关键词优化、长尾词 |
搜索排名 |
| 产品描述 |
特点提炼、价值主张 |
商品详情 |
| 社交媒体 |
内容形式、互动性 |
微博、小红书 |
3.2 文案生成实现
# aigc/copywriting_generator.py
"""
AIGC 文案生成实现
包含:营销文案、SEO文案、社交媒体文案
"""
from typing import List, Optional, Dict
from pydantic import BaseModel, Field
import openai
class CopywritingRequest(BaseModel):
"""文案生成请求"""
product_name: str = Field(description="产品名称")
product_features: List[str] = Field(description="产品特点")
target_audience: str = Field(description="目标受众")
tone: str = Field(description="语气", default="professional")
platform: str = Field(description="发布平台")
class CopywritingGenerator:
"""文案生成器"""
def __init__(self, api_key: str, model: str = "gpt-4"):
self.client = openai.OpenAI(api_key=api_key)
self.model = model
def generate_marketing_copy(
self,
request: CopywritingRequest,
num_variants: int = 3
) -> List[Dict]:
"""生成营销文案"""
prompt = self._build_marketing_prompt(request)
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
n=num_variants
)
return [
{
"variant": i + 1,
"content": choice.message.content
}
for i, choice in enumerate(response.choices)
]
def generate_seo_copy(
self,
product_name: str,
keywords: List[str],
target_length: int = 300
) -> str:
"""生成SEO文案"""
prompt = f"""
为以下产品生成SEO优化的产品描述:
产品名称:{product_name}
关键词:{', '.join(keywords)}
目标长度:约 {target_length} 字
要求:
1. 自然融入关键词
2. 突出产品优势
3. 适合搜索引擎抓取
4. 语言流畅有吸引力
"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
def generate_social_media_copy(
self,
content: str,
platform: str = "xiaohongshu",
style: str = "casual"
) -> Dict:
"""生成社交媒体文案"""
platform_templates = {
"xiaohongshu": {
"format": "标题 + emoji + 正文 + 标签",
"max_length": 1000,
"emoji_style": "丰富的emoji"
},
"weibo": {
"format": "正文 + 话题标签",
"max_length": 140,
"emoji_style": "适度使用"
},
"wechat": {
"format": "标题 + 正文",
"max_length": 2000,
"emoji_style": "官方风格"
}
}
template = platform_templates.get(platform, platform_templates["xiaohongshu"])
prompt = f"""
为以下内容生成 {platform} 平台的文案:
原始内容:{content}
风格:{style}
平台要求:
- 格式:{template['format']}
- 字数限制:{template['max_length']}
- Emoji:{template['emoji_style']}
请生成完整的文案,包含标题和正文。
"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return {
"platform": platform,
"content": response.choices[0].message.content,
"style": style
}
def _build_marketing_prompt(self, request: CopywritingRequest) -> str:
"""构建营销文案提示"""
prompt = f"""
为以下产品生成营销文案:
产品名称:{request.product_name}
产品特点:
{self._format_list(request.product_features)}
目标受众:{request.target_audience}
语气:{request.tone}
发布平台:{request.platform}
要求:
1. 突出产品核心价值
2. 引发目标受众共鸣
3. 包含行动号召(CTA)
4. 适合 {request.platform} 平台
请生成3个不同角度的文案变体,每个变体包含标题和正文。
"""
return prompt
@staticmethod
def _format_list(items: List[str]) -> str:
return "\n".join([f"- {item}" for item in items])
# ============== A/B 测试文案生成 ==============
class CopywritingABTester:
"""文案A/B测试生成器"""
def __init__(self, generator: CopywritingGenerator):
self.generator = generator
def generate_ab_test_variants(
self,
request: CopywritingRequest,
variants: int = 4
) -> Dict:
"""
生成A/B测试文案变体
Returns:
{
"control": 对照组文案,
"treatment": 实验组文案列表
}
"""
# 对照组
control = self.generator.generate_marketing_copy(request, num_variants=1)
# 实验组 - 不同策略
strategies = [
"情感共鸣导向",
"功能特性导向",
"社会证明导向",
"稀缺紧迫导向"
]
treatment_variants = []
for i in range(variants - 1):
request_copy = request.model_copy()
strategy = strategies[i % len(strategies)]
prompt = f"{strategy}的文案"
variant = self.generator.generate_marketing_copy(
request_copy,
num_variants=1
)[0]
variant["strategy"] = strategy
treatment_variants.append(variant)
return {
"control": control[0],
"treatment": treatment_variants
}
# ============== 使用示例 ==============
if __name__ == "__main__":
generator = CopywritingGenerator(api_key="your-api-key")
# 1. 营销文案
request = CopywritingRequest(
product_name="AI智能写作助手",
product_features=["一键生成", "多平台适配", "SEO优化"],
target_audience="内容创作者",
tone="professional",
platform="xiaohongshu"
)
copies = generator.generate_marketing_copy(request, num_variants=3)
for copy in copies:
print(f"变体 {copy['variant']}:")
print(copy['content'])
print("---")
# 2. SEO文案
seo_copy = generator.generate_seo_copy(
product_name="AI写作工具",
keywords=["AI", "写作助手", "内容生成"],
target_length=300
)
print("SEO文案:", seo_copy)
# 3. 社交媒体文案
social_copy = generator.generate_social_media_copy(
content="推出全新AI写作功能,让创作更轻松",
platform="xiaohongshu",
style="casual"
)
print("小红书文案:", social_copy["content"])
UI与代码生成
4.1 UI生成技术栈
| 技术 |
描述 |
| GPT-4V |
多模态理解截图转代码 |
| v0.dev |
Vercel UI生成工具 |
| Screenshot-to-Code |
截图转代码开源方案 |
4.2 代码生成实现
# aigc/code_generator.py
"""
AIGC 代码生成实现
包含:UI生成、代码补全、代码重构
"""
from typing import Optional, List
import openai
class CodeGenerator:
"""代码生成器"""
def __init__(self, api_key: str, model: str = "gpt-4"):
self.client = openai.OpenAI(api_key=api_key)
self.model = model
def generate_ui_from_description(
self,
description: str,
framework: str = "React",
style: str = "modern"
) -> str:
"""从描述生成UI代码"""
prompt = f"""
根据以下描述生成 {framework} 组件代码:
描述:{description}
风格:{style}
要求:
1. 使用 {framework} 框架
2. 代码完整可直接运行
3. 包含必要的样式
4. 遵循最佳实践
5. 包含TypeScript类型定义
只返回代码,不要解释。
"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
def generate_ui_from_screenshot(
self,
image_path: str,
framework: str = "React"
) -> str:
"""从截图生成UI代码"""
import base64
# 读取图片并编码
with open(image_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
prompt = f"""
分析这个UI截图并生成 {framework} 组件代码。
要求:
1. 精确还原UI布局
2. 使用合适的组件库(如 Tailwind CSS)
3. 包含响应式设计
4. 代码可直接使用
只返回代码。
"""
response = self.client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_data}"
}
}
]
}]
)
return response.choices[0].message.content
def generate_function(
self,
description: str,
language: str = "Python",
context: Optional[str] = None
) -> str:
"""生成函数代码"""
prompt = f"""
用 {language} 编写以下功能的代码:
{description}
"""
if context:
prompt += f"\n\n上下文代码:\n{context}"
prompt += "\n\n只返回代码,不要解释。"
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
def refactor_code(
self,
code: str,
goal: str = "improve readability"
) -> str:
"""代码重构"""
prompt = f"""
重构以下代码以{goal}:
原始代码:
{code}
要求:
1. 保持功能不变
2. 提高代码质量
3. 添加必要的注释
只返回重构后的代码。
"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
def generate_tests(
self,
code: str,
test_framework: str = "pytest"
) -> str:
"""生成测试代码"""
prompt = f"""
为以下代码生成 {test_framework} 测试:
代码:
{code}
要求:
1. 覆盖主要功能
2. 包含边界条件测试
3. 使用合适的断言
只返回测试代码。
"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# ============== 完整App生成 ==============
class AppGenerator:
"""完整应用生成器"""
def __init__(self, code_generator: CodeGenerator):
self.code_gen = code_generator
def generate_full_app(
self,
app_description: str,
tech_stack: Dict[str, str]
) -> Dict:
"""
生成完整应用代码
Args:
app_description: 应用描述
tech_stack: 技术栈
{
"frontend": "React",
"backend": "FastAPI",
"database": "PostgreSQL"
}
"""
# 生成前端
frontend = self._generate_frontend(
app_description,
tech_stack["frontend"]
)
# 生成后端
backend = self._generate_backend(
app_description,
tech_stack["backend"],
tech_stack["database"]
)
# 生成部署配置
deploy = self._generate_deployment(tech_stack)
return {
"frontend": frontend,
"backend": backend,
"deployment": deploy
}
def _generate_frontend(self, description: str, framework: str) -> Dict:
prompt = f"""
生成前端应用代码,技术栈:{framework}
应用描述:{description}
生成以下文件:
1. 主组件
2. 路由配置
3. API调用层
4. 样式文件
返回JSON格式,键为文件路径,值为文件内容。
"""
# 实现省略
return {}
def _generate_backend(self, description: str, framework: str, db: str) -> Dict:
prompt = f"""
生成后端API代码,技术栈:{framework}, 数据库:{db}
应用描述:{description}
生成以下文件:
1. 主应用入口
2. API路由
3. 数据模型
4. 数据库连接
返回JSON格式,键为文件路径,值为文件内容。
"""
# 实现省略
return {}
def _generate_deployment(self, tech_stack: Dict) -> Dict:
prompt = f"""
生成Docker部署配置
技术栈:{tech_stack}
生成:
1. Dockerfile
2. docker-compose.yml
3. nginx配置
返回JSON格式。
"""
# 实现省略
return {}
# ============== 使用示例 ==============
if __name__ == "__main__":
generator = CodeGenerator(api_key="your-api-key")
# 1. UI生成
ui_code = generator.generate_ui_from_description(
description="一个现代化的登录页面,包含用户名、密码输入框和登录按钮",
framework="React",
style="modern"
)
print("UI代码:", ui_code)
# 2. 从截图生成
# ui_from_image = generator.generate_ui_from_screenshot(
# image_path="screenshot.png",
# framework="React"
# )
# 3. 函数生成
func_code = generator.generate_function(
description="快速排序算法,参数为列表,返回排序后的列表",
language="Python"
)
print("函数代码:", func_code)
# 4. 代码重构
refactored = generator.refactor_code(
code="def add(a,b):return a+b",
goal="improve readability and add type hints"
)
print("重构后:", refactored)
# 5. 生成测试
test_code = generator.generate_tests(
code=func_code,
test_framework="pytest"
)
print("测试代码:", test_code)
内容质量控制
5.1 质量评估维度
| 维度 |
评估方法 |
| 一致性 |
与Prompt的一致性 |
| 质量 |
图像清晰度、文案流畅度 |
| 安全性 |
NSFW检测、敏感内容过滤 |
| 版权 |
相似度检测、水印检测 |
5.2 质量控制实现
# aigc/quality_control.py
"""
AIGC 内容质量控制
包含:NSFW检测、相似度检测、合规检查
"""
from typing import List, Optional
import torch
from PIL import Image
import hashlib
class ContentSafetyChecker:
"""内容安全检查器"""
def __init__(self):
self.nsfw_detector = self._load_nsfw_detector()
def _load_nsfw_detector(self):
"""加载NSFW检测模型"""
from diffusers import StableDiffusionPipeline
pipeline = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
safety_checker=True
)
return pipeline.safety_checker
def check_image_safety(self, image: Image.Image) -> Dict:
"""检查图像安全性"""
if not self.nsfw_detector:
return {"safe": True, "confidence": 0}
# 运行安全检查
image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).unsqueeze(0)
safety_output, _ = self.nsfw_detector(image_tensor, clip_input=None)
is_safe = safety_output[0][0] < 0.5
confidence = float(safety_output[0][0])
return {
"safe": is_safe,
"confidence": confidence,
"risk_level": "high" if confidence > 0.8 else "medium" if confidence > 0.5 else "low"
}
def check_text_safety(self, text: str) -> Dict:
"""检查文本安全性"""
import re
# 敏感词列表
sensitive_patterns = [
r'暴力',
r'非法',
r'赌博'
]
for pattern in sensitive_patterns:
if re.search(pattern, text):
return {
"safe": False,
"reason": f"检测到敏感内容: {pattern}"
}
return {"safe": True}
class SimilarityDetector:
"""相似度检测器"""
def __init__(self):
from sentence_transformers import SentenceTransformer
self.model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
def text_similarity(self, text1: str, text2: str) -> float:
"""计算文本相似度"""
embeddings = self.model.encode([text1, text2])
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
return float(similarity)
def image_similarity(self, img1: Image.Image, img2: Image.Image) -> float:
"""计算图像相似度"""
# 使用 perceptual hash
hash1 = self._image_hash(img1)
hash2 = self._image_hash(img2)
# 计算汉明距离
distance = self._hamming_distance(hash1, hash2)
max_distance = len(hash1)
return 1 - (distance / max_distance)
@staticmethod
def _image_hash(image: Image.Image) -> str:
"""计算图像 perceptual hash"""
import imagehash
return str(imagehash.phash(image))
@staticmethod
def _hamming_distance(hash1: str, hash2: str) -> int:
"""计算汉明距离"""
return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
class QualityGate:
"""质量门禁"""
def __init__(
self,
safety_checker: ContentSafetyChecker,
similarity_detector: SimilarityDetector
):
self.safety_checker = safety_checker
self.similarity_detector = similarity_detector
def evaluate_generated_content(
self,
content: any,
reference: Optional[any] = None
) -> Dict:
"""
评估生成内容质量
Returns:
{
"passed": bool,
"safety": {...},
"similarity": float,
"overall_score": float
}
"""
result = {
"passed": True,
"safety": None,
"similarity": None,
"overall_score": 0
}
# 安全检查
if isinstance(content, Image.Image):
safety = self.safety_checker.check_image_safety(content)
result["safety"] = safety
if not safety["safe"]:
result["passed"] = False
elif isinstance(content, str):
safety = self.safety_checker.check_text_safety(content)
result["safety"] = safety
if not safety["safe"]:
result["passed"] = False
# 相似度检查
if reference is not None:
if isinstance(content, str) and isinstance(reference, str):
sim = self.similarity_detector.text_similarity(content, reference)
result["similarity"] = sim
elif isinstance(content, Image.Image) and isinstance(reference, Image.Image):
sim = self.similarity_detector.image_similarity(content, reference)
result["similarity"] = sim
# 计算综合得分
score = 0
weight = 0
if result["safety"] and result["safety"]["safe"]:
score += 0.5
weight += 0.5
if result["similarity"] is not None:
score += result["similarity"] * 0.5
weight += 0.5
result["overall_score"] = score / weight if weight > 0 else 0
return result
# ============== 使用示例 ==============
if __name__ == "__main__":
safety_checker = ContentSafetyChecker()
similarity_detector = SimilarityDetector()
quality_gate = QualityGate(safety_checker, similarity_detector)
# 1. 图像安全检查
test_image = Image.open("test.png")
safety_result = safety_checker.check_image_safety(test_image)
print("安全检查:", safety_result)
# 2. 文本相似度
sim = similarity_detector.text_similarity(
"这是一只可爱的猫",
"这是一只很可爱的猫"
)
print("相似度:", sim)
# 3. 质量评估
evaluation = quality_gate.evaluate_generated_content(
"这是一只猫",
reference="这是一只很可爱的猫"
)
print("质量评估:", evaluation)
应用场景
6.1 营销投放场景
| 场景 | 技术 | 实现要点 |
|------|------|`----------|
| 广告素材 | 图像生成 + 文案 | A/B测试、多平台适配 |
| 动态创意 | 模板填充 | 批量生成、个性化 |
| 落地页 | UI生成 | 响应式、SEO优化 |
6.2 内容创作场景
| 场景 |
技术 |
实现要点 |
| 社交媒体 |
多模态生成 |
风格一致、批量生产 |
| 短视频 |
视频生成 |
节奏控制、音乐同步 |
| 图文创作 |
图文配合 |
内容连贯、风格统一 |
6.3 开发提效场景
| 场景 |
技术 |
实现要点 |
| 原型设计 |
UI生成 |
快速迭代 |
| 代码补全 |
代码生成 |
上下文理解 |
| 测试生成 |
自动测试 |
覆盖率保证 |
最佳实践
1. 性能优化
- 使用显存优化:
enable_model_cpu_offload, enable_attention_slicing
- 批量生成:减少推理次数
- 模型量化:使用 FP16/INT8
- 缓存机制:相同提示词缓存结果
2. 成本控制
- 小模型优先:简单任务用小模型
- 提示词优化:减少token消耗
- 混合部署:高频任务本地部署
- 结果缓存:避免重复生成
3. 质量保证
- 多轮生成:选择最佳结果
- 质量评估:自动筛选
- 人工审核:关键内容人工确认
- A/B测试:持续优化
文档版本: 1.0
最后更新: 2026-01-22