【AI Agent 知识库】27-AIGC内容生成技术

内容纲要

AIGC 内容生成技术

图片、视频、文案、UI、代码生成全栈技术


目录


核心技术概念

概念 定义 核心价值
Diffusion Model 扩散模型 高质量图像生成
Stable Diffusion 开源扩散模型 免费、可定制、效果优秀
ControlNet 条件控制网络 精确控制生成图像
LoRA 低秩适应 小样本风格定制
Text-to-Image 文本生成图像 创意设计
Image-to-Image 图像生成图像 图像编辑、风格迁移
Inpainting 图像修复 局部编辑、去除水印
Text-to-Video 文本生成视频 视频创作
Text-to-Code 文本生成代码 开发提效

图像生成

1.1 主流模型对比

模型 类型 优势 劣势 适用场景
DALL-E 3 商业闭源 效果好、理解强 费用高 商业应用
Midjourney 商业闭源 艺术效果强 API限制 艺术创作
Stable Diffusion 开源 免费、可定制 需要GPU 私有化部署
Stable Diffusion XL 开源 高分辨率、质量高 显存需求大 高质量生成

1.2 Stable Diffusion 核心组件

# aigc/image_generator.py
"""
AIGC 图像生成核心实现
包含:SD基础推理、ControlNet、LoRA加载
"""

from typing import Optional, List, Dict
from diffusers import (
    StableDiffusionPipeline,
    StableDiffusionXLPipeline,
    ControlNetModel,
    StableDiffusionControlNetPipeline
)
from diffusers.utils import load_image
import torch
from PIL import Image

class ImageGenerator:
    """图像生成器基类"""

    def __init__(
        self,
        model_name: str = "runwayml/stable-diffusion-v1-5",
        device: str = "cuda",
        torch_dtype: torch.dtype = torch.float16
    ):
        self.device = device
        self.torch_dtype = torch_dtype
        self.model_name = model_name
        self.pipeline = self._load_pipeline()

    def _load_pipeline(self):
        """加载基础模型"""
        pipeline = StableDiffusionPipeline.from_pretrained(
            self.model_name,
            torch_dtype=self.torch_dtype,
            safety_checker=None
        ).to(self.device)

        # 开启内存优化
        pipeline.enable_attention_slicing()
        pipeline.enable_model_cpu_offload()

        return pipeline

    def generate(
        self,
        prompt: str,
        negative_prompt: Optional[str] = None,
        num_images: int = 1,
        width: int = 512,
        height: int = 512,
        num_inference_steps: int = 50,
        guidance_scale: float = 7.5,
        seed: Optional[int] = None
    ) -> List[Image.Image]:
        """生成图像"""
        # 设置随机种子
        generator = None
        if seed is not None:
            generator = torch.Generator(device=self.device).manual_seed(seed)

        # 生成
        with torch.inference_mode():
            result = self.pipeline(
                prompt=prompt,
                negative_prompt=negative_prompt,
                num_images_per_prompt=num_images,
width=width,
                height=height,
                num_inference_steps=num_inference_steps,
                guidance_scale=guidance_scale,
                generator=generator
            )

        return result.images

class SDXLGenerator(ImageGenerator):
    """Stable Diffusion XL 生成器"""

    def __init__(self, device: str = "cuda"):
        super().__init__(
            model_name="stabilityai/stable-diffusion-xl-base-1.0",
            device=device
        )

    def _load_pipeline(self):
        pipeline = StableDiffusionXLPipeline.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            use_safetensors=True,
            variant="fp16"
        ).to(self.device)

        pipeline.enable_attention_slicing()
        pipeline.enable_model_cpu_offload()

        return pipeline

class ControlNetGenerator:
    """ControlNet 控制生成"""

    def __init__(
        self,
        base_model: str = "runwayml/stable-diffusion-v1-5",
        controlnet_model: str = "lllyasviel/sd-controlnet-canny",
        device: str = "cuda"
    ):
        self.device = device

        # 加载 ControlNet
        controlnet = ControlNetModel.from_pretrained(
            controlnet_model,
            torch_dtype=torch.float16
        ).to(device)

        # 加载基础 pipeline
        self.pipeline = StableDiffusionControlNetPipeline.from_pretrained(
            base_model,
            controlnet=controlnet,
            torch_dtype=torch.float16,
            safety_checker=None
        ).to(device)

        self.pipeline.enable_model_cpu_offload()

    def generate_with_control(
        self,
        prompt: str,
        control_image: Image.Image,
        control_conditioning_scale: float = 1.0,
        **kwargs
    ) -> List[Image.Image]:
        """使用 ControlNet 生成"""
        with torch.inference_mode():
            result = self.pipeline(
                prompt=prompt,
                control_image=control_image,
                controlnet_conditioning_scale=control_conditioning_scale,
                **kwargs
            )
        return result.images

class LoRAGGenerator:
    """LoRA 风格生成"""

    def __init__(
        self,
        base_model: str = "runwayml/stable-diffusion-v1-5",
        lora_path: str = None,
        device: str = "cuda"
    ):
        self.device = device

        # 加载基础模型
        self.pipeline = StableDiffusionPipeline.from_pretrained(
            base_model,
            torch_dtype=torch.float16
        ).to(device)

        # 加载 LoRA
        if lora_path:
            self.pipeline.load_lora_weights(lora_path)

        self.pipeline.enable_model_cpu_offload()

    def generate(
        self,
        prompt: str,
        lora_scale: float = 1.0,
        **kwargs
    ) -> List[Image.Image]:
        """使用 LoRA 生成"""
        # 设置 LoRA 权重
        self.pipeline.set_adapters(["default"])
        self.pipeline.set_adaptor_weights([lora_scale])

        with torch.inference_mode():
            result = self.pipeline(prompt=prompt, **kwargs)

        return result.images

# ============== 图像预处理 ==============

class ImagePreprocessor:
    """图像预处理工具"""

    @staticmethod
    def canny_edge_detection(
        image: Image.Image,
        low_threshold: int = 100,
        high_threshold: int = 200
    ) -> Image.Image:
        """Canny 边缘检测"""
        import cv2
        import numpy as np

        # 转换为 OpenCV 格式
        img_np = np.array(image)
        img_gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)

        # Canny 检测
        edges = cv2.Canny(img_gray, low_threshold, high_threshold)

        # 转换回 PIL
        return Image.fromarray(edges)

    @staticmethod
    def pose_detection(image: Image.Image) -> Image.Image:
        """姿态检测"""
        from controlnet_aux import OpenposeDetector

        detector = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
        return detector(image)

    @staticmethod
    def depth_estimation(image: Image.Image) -> Image.Image:
        """深度估计"""
        from controlnet_aux import MidasDetector

        detector = MidasDetector.from_pretrained("valhalla/t2iadapter-depth-midas")
        return detector(image)

    @staticmethod
    def segmentation(image: Image.Image) -> Image.Image:
        """语义分割"""
        from controlnet_aux import SamDetector

        detector = SamDetector.from_pretrained("ybelkada/segment-anything")
        return detector(image)

# ============== 图像后处理 ==============

class ImagePostprocessor:
    """图像后处理工具"""

    @staticmethod
    def upscale(image: Image.Image, scale: int = 2) -> Image.Image:
        """超分辨率放大"""
        from diffusers import StableDiffusionUpscalePipeline

        pipeline = StableDiffusionUpscalePipeline.from_pretrained(
            "stabilityai/stable-diffusion-x4-upscaler",
            torch_dtype=torch.float16
        ).to("cuda")

        result = pipeline(
            image=image,
            prompt="high quality, detailed",
            num_inference_steps=20
        )

        return result.images[0]

    @staticmethod
    def inpaint(
        image: Image.Image,
        mask: Image.Image,
        prompt: str
    ) -> Image.Image:
        """图像修复"""
        from diffusers import StableDiffusionInpaintPipeline

        pipeline = StableDiffusionInpaintPipeline.from_pretrained(
            "runwayml/stable-diffusion-inpainting",
            torch_dtype=torch.float16
        ).to("cuda")

        result = pipeline(
            prompt=prompt,
            image=image,
            mask_image=mask,
            num_inference_steps=50
        )

        return result.images[0]

# ============== 使用示例 ==============

if __name__ == "__main__":
    # 1. 基础图像生成
    generator = ImageGenerator(model_name="runwayml/stable-diffusion-v1-5")
    images = generator.generate(
        prompt="一只可爱的猫咪坐在窗台上,阳光明媚",
        num_images=2,
        seed=42
    )
    images[0].save("cat.png")

    # 2. ControlNet 控制
    preprocessor = ImagePreprocessor()
    control_gen = ControlNetGenerator()

    # 加载参考图
    reference_image = load_image("reference.png")
    control_image = preprocessor.canny_edge_detection(reference_image)

    # 使用 Canny 控制
    result = control_gen.generate_with_control(
        prompt="一只猫咪坐在窗台上",
        control_image=control_image,
        control_conditioning_scale=1.0
    )
    result[0].save("controlled_cat.png")

    # 3. LoRA 风格
    lora_gen = LoRAGenerator(lora_path="path/to/lora.safetensors")
    styled_images = lora_gen.generate(
        prompt="a beautiful landscape",
        lora_scale=0.8
    )
    styled_images[0].save("styled.png")

1.3 常见 ControlNet 类型

ControlNet 功能 应用场景
Canny 边缘检测 保持轮廓、结构
Pose 人体姿态 人物姿态控制
Depth 深度估计 空间结构控制
Seg 语义分割 区域编辑
Scribble 简笔画 草图生成
Hed 边缘检测 柔和轮廓
MLSD 直线检测 建筑结构

视频生成

2.1 主流视频生成模型

模型 类型 特点
Sora OpenAI 闭源 超长视频、高质量
Runway Gen-2 商业 文字生成视频、图生视频
Pika Labs 商业 图片转视频、风格迁移
Stable Video Diffusion 开源 图片转短视频

2.2 视频生成实现

# aigc/video_generator.py
"""
AIGC 视频生成实现
包含:图生视频、文字生视频
"""

from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import torch

class VideoGenerator:
    """视频生成器"""

    def __init__(self, device: str = "cuda"):
        self.device = device
        self.pipeline = self._load_pipeline()

    def _load_pipeline(self):
        """加载 SVD 模型"""
        pipeline = StableVideoDiffusionPipeline.from_pretrained(
            "stabilityai/stable-video-diffusion-img2vid-xt",
            torch_dtype=torch.float16,
            variant="fp16"
        ).to(self.device)

        pipeline.enable_model_cpu_offload()
        return pipeline

    def image_to_video(
        self,
        image: Image.Image,
        motion_bucket_id: int = 127,
        fps: int = 7,
        num_frames: int = 25,
        num_inference_steps: int = 25
    ) -> List[Image.Image]:
        """
        图像生成视频

        Args:
            image: 输入图像
            motion_bucket_id: 运动强度 (0-255)
            fps: 帧率
            num_frames: 帧数
            num_inference_steps: 推理步数
        """
        with torch.inference_mode():
            frames = self.pipeline(
                image=image,
                motion_bucket_id=motion_bucket_id,
                fps=fps,
                num_frames=num_frames,
                num_inference_steps=num_inference_steps
            ).frames[0]

        return frames

    def save_video(
        self,
        frames: List[Image.Image],
        output_path: str,
        fps: int = 7
    ):
        """保存视频"""
        import imageio

        # 转换为 numpy 数组
        frames_np = [np.array(frame) for frame in frames]

        # 保存
        imageio.mimwrite(output_path, frames_np, fps=fps)

# ============== 文字生视频(调用商业API)==============

class TextToVideoAPI:
    """文字生成视频(商业API)"""

    def __init__(self, api_key: str, provider: str = "runway"):
        self.api_key = api_key
        self.provider = provider

    def generate(
        self,
        prompt: str,
        duration: int = 4,
        width: int = 1024,
        height: int = 576
    ) -> str:
        """
        生成视频

        Returns:
            video_url: 视频下载链接
        """
        if self.provider == "runway":
            return self._runway_generate(prompt, duration, width, height)
        elif self.provider == "pika":
            return self._pika_generate(prompt, duration, width, height)

    def _runway_generate(self, prompt: str, duration: int, width: int, height: int) -> str:
        """Runway Gen-2 API"""
        import requests

        url = "https://api.runwayml.com/v1/generate"

        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        payload = {
            "prompt": prompt,
            "duration": duration,
            "width": width,
            "height": height
        }

        response = requests.post(url, headers=headers, json=payload)
        data = response.json()

        return data.get("output_url")

# ============== 使用示例 ==============

if __name__ == "__main__":
    # 1. 图像生成视频
    video_gen = VideoGenerator()
    input_image = Image.open("input.png")

    frames = video_gen.image_to_video(
        image=input_image,
        motion_bucket_id=127,  # 中等运动
        fps=7,
        num_frames=25
    )

    video_gen.save_video(frames, "output.mp4")

    # 2. 文字生成视频(商业API)
    # text_to_video = TextToVideoAPI(api_key="your-api-key", provider="runway")
    # video_url = text_to_video.generate(
    #     prompt="一只猫在阳光明媚的窗台上打盹",
    #     duration=4
    # )

文案生成

3.1 文案生成类型

类型 技术点 应用
营销文案 情感驱动、用户画像 广告投放、商品推广
SEO文案 关键词优化、长尾词 搜索排名
产品描述 特点提炼、价值主张 商品详情
社交媒体 内容形式、互动性 微博、小红书

3.2 文案生成实现

# aigc/copywriting_generator.py
"""
AIGC 文案生成实现
包含:营销文案、SEO文案、社交媒体文案
"""

from typing import List, Optional, Dict
from pydantic import BaseModel, Field
import openai

class CopywritingRequest(BaseModel):
    """文案生成请求"""
    product_name: str = Field(description="产品名称")
    product_features: List[str] = Field(description="产品特点")
    target_audience: str = Field(description="目标受众")
    tone: str = Field(description="语气", default="professional")
    platform: str = Field(description="发布平台")

class CopywritingGenerator:
    """文案生成器"""

    def __init__(self, api_key: str, model: str = "gpt-4"):
        self.client = openai.OpenAI(api_key=api_key)
        self.model = model

    def generate_marketing_copy(
        self,
        request: CopywritingRequest,
        num_variants: int = 3
    ) -> List[Dict]:
        """生成营销文案"""
        prompt = self._build_marketing_prompt(request)

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            n=num_variants
        )

        return [
            {
                "variant": i + 1,
                "content": choice.message.content
            }
            for i, choice in enumerate(response.choices)
        ]

    def generate_seo_copy(
        self,
        product_name: str,
        keywords: List[str],
        target_length: int = 300
    ) -> str:
        """生成SEO文案"""
        prompt = f"""
        为以下产品生成SEO优化的产品描述:

        产品名称:{product_name}
        关键词:{', '.join(keywords)}
        目标长度:约 {target_length} 字

        要求:
        1. 自然融入关键词
        2. 突出产品优势
        3. 适合搜索引擎抓取
        4. 语言流畅有吸引力
        """

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

    def generate_social_media_copy(
        self,
        content: str,
        platform: str = "xiaohongshu",
        style: str = "casual"
    ) -> Dict:
        """生成社交媒体文案"""
        platform_templates = {
            "xiaohongshu": {
                "format": "标题 + emoji + 正文 + 标签",
                "max_length": 1000,
                "emoji_style": "丰富的emoji"
            },
            "weibo": {
                "format": "正文 + 话题标签",
                "max_length": 140,
                "emoji_style": "适度使用"
            },
            "wechat": {
                "format": "标题 + 正文",
                "max_length": 2000,
                "emoji_style": "官方风格"
            }
        }

        template = platform_templates.get(platform, platform_templates["xiaohongshu"])

        prompt = f"""
        为以下内容生成 {platform} 平台的文案:

        原始内容:{content}
        风格:{style}

        平台要求:
        - 格式:{template['format']}
        - 字数限制:{template['max_length']}
        - Emoji:{template['emoji_style']}

        请生成完整的文案,包含标题和正文。
        """

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return {
            "platform": platform,
            "content": response.choices[0].message.content,
            "style": style
        }

    def _build_marketing_prompt(self, request: CopywritingRequest) -> str:
        """构建营销文案提示"""
        prompt = f"""
        为以下产品生成营销文案:

        产品名称:{request.product_name}
        产品特点:
        {self._format_list(request.product_features)}

        目标受众:{request.target_audience}
        语气:{request.tone}
        发布平台:{request.platform}

        要求:
        1. 突出产品核心价值
        2. 引发目标受众共鸣
        3. 包含行动号召(CTA)
        4. 适合 {request.platform} 平台

        请生成3个不同角度的文案变体,每个变体包含标题和正文。
        """

        return prompt

    @staticmethod
    def _format_list(items: List[str]) -> str:
        return "\n".join([f"- {item}" for item in items])

# ============== A/B 测试文案生成 ==============

class CopywritingABTester:
    """文案A/B测试生成器"""

    def __init__(self, generator: CopywritingGenerator):
        self.generator = generator

    def generate_ab_test_variants(
        self,
        request: CopywritingRequest,
        variants: int = 4
    ) -> Dict:
        """
        生成A/B测试文案变体

        Returns:
            {
                "control": 对照组文案,
                "treatment": 实验组文案列表
            }
        """
        # 对照组
        control = self.generator.generate_marketing_copy(request, num_variants=1)

        # 实验组 - 不同策略
        strategies = [
            "情感共鸣导向",
            "功能特性导向",
            "社会证明导向",
            "稀缺紧迫导向"
        ]

        treatment_variants = []
        for i in range(variants - 1):
            request_copy = request.model_copy()
            strategy = strategies[i % len(strategies)]
            prompt = f"{strategy}的文案"

            variant = self.generator.generate_marketing_copy(
                request_copy,
                num_variants=1
            )[0]
            variant["strategy"] = strategy
            treatment_variants.append(variant)

        return {
            "control": control[0],
            "treatment": treatment_variants
        }

# ============== 使用示例 ==============

if __name__ == "__main__":
    generator = CopywritingGenerator(api_key="your-api-key")

    # 1. 营销文案
    request = CopywritingRequest(
        product_name="AI智能写作助手",
        product_features=["一键生成", "多平台适配", "SEO优化"],
        target_audience="内容创作者",
        tone="professional",
        platform="xiaohongshu"
    )

    copies = generator.generate_marketing_copy(request, num_variants=3)
    for copy in copies:
        print(f"变体 {copy['variant']}:")
        print(copy['content'])
        print("---")

    # 2. SEO文案
    seo_copy = generator.generate_seo_copy(
        product_name="AI写作工具",
        keywords=["AI", "写作助手", "内容生成"],
        target_length=300
    )
    print("SEO文案:", seo_copy)

    # 3. 社交媒体文案
    social_copy = generator.generate_social_media_copy(
        content="推出全新AI写作功能,让创作更轻松",
        platform="xiaohongshu",
        style="casual"
    )
    print("小红书文案:", social_copy["content"])

UI与代码生成

4.1 UI生成技术栈

技术 描述
GPT-4V 多模态理解截图转代码
v0.dev Vercel UI生成工具
Screenshot-to-Code 截图转代码开源方案

4.2 代码生成实现

# aigc/code_generator.py
"""
AIGC 代码生成实现
包含:UI生成、代码补全、代码重构
"""

from typing import Optional, List
import openai

class CodeGenerator:
    """代码生成器"""

    def __init__(self, api_key: str, model: str = "gpt-4"):
        self.client = openai.OpenAI(api_key=api_key)
        self.model = model

    def generate_ui_from_description(
        self,
        description: str,
        framework: str = "React",
        style: str = "modern"
    ) -> str:
        """从描述生成UI代码"""
        prompt = f"""
        根据以下描述生成 {framework} 组件代码:

        描述:{description}
        风格:{style}

        要求:
        1. 使用 {framework} 框架
        2. 代码完整可直接运行
        3. 包含必要的样式
        4. 遵循最佳实践
        5. 包含TypeScript类型定义

        只返回代码,不要解释。
        """

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

    def generate_ui_from_screenshot(
        self,
        image_path: str,
        framework: str = "React"
    ) -> str:
        """从截图生成UI代码"""
        import base64

        # 读取图片并编码
        with open(image_path, "rb") as f:
            image_data = base64.b64encode(f.read()).decode()

        prompt = f"""
        分析这个UI截图并生成 {framework} 组件代码。

        要求:
        1. 精确还原UI布局
        2. 使用合适的组件库(如 Tailwind CSS)
        3. 包含响应式设计
        4. 代码可直接使用

        只返回代码。
        """

        response = self.client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_data}"
                        }
                    }
                ]
            }]
        )

        return response.choices[0].message.content

    def generate_function(
        self,
        description: str,
        language: str = "Python",
        context: Optional[str] = None
    ) -> str:
        """生成函数代码"""
        prompt = f"""
        用 {language} 编写以下功能的代码:

        {description}
        """

        if context:
            prompt += f"\n\n上下文代码:\n{context}"

        prompt += "\n\n只返回代码,不要解释。"

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

    def refactor_code(
        self,
        code: str,
        goal: str = "improve readability"
    ) -> str:
        """代码重构"""
        prompt = f"""
        重构以下代码以{goal}:

        原始代码:
        {code}

        要求:
        1. 保持功能不变
        2. 提高代码质量
        3. 添加必要的注释

        只返回重构后的代码。
        """

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

    def generate_tests(
        self,
        code: str,
        test_framework: str = "pytest"
    ) -> str:
        """生成测试代码"""
        prompt = f"""
        为以下代码生成 {test_framework} 测试:

        代码:
        {code}

        要求:
        1. 覆盖主要功能
        2. 包含边界条件测试
        3. 使用合适的断言

        只返回测试代码。
        """

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

# ============== 完整App生成 ==============

class AppGenerator:
    """完整应用生成器"""

    def __init__(self, code_generator: CodeGenerator):
        self.code_gen = code_generator

    def generate_full_app(
        self,
        app_description: str,
        tech_stack: Dict[str, str]
    ) -> Dict:
        """
        生成完整应用代码

        Args:
            app_description: 应用描述
            tech_stack: 技术栈
                {
                    "frontend": "React",
                    "backend": "FastAPI",
                    "database": "PostgreSQL"
                }
        """
        # 生成前端
        frontend = self._generate_frontend(
            app_description,
            tech_stack["frontend"]
        )

        # 生成后端
        backend = self._generate_backend(
            app_description,
            tech_stack["backend"],
            tech_stack["database"]
        )

        # 生成部署配置
        deploy = self._generate_deployment(tech_stack)

        return {
            "frontend": frontend,
            "backend": backend,
            "deployment": deploy
        }

    def _generate_frontend(self, description: str, framework: str) -> Dict:
        prompt = f"""
        生成前端应用代码,技术栈:{framework}

        应用描述:{description}

        生成以下文件:
        1. 主组件
        2. 路由配置
        3. API调用层
        4. 样式文件

        返回JSON格式,键为文件路径,值为文件内容。
        """
        # 实现省略
        return {}

    def _generate_backend(self, description: str, framework: str, db: str) -> Dict:
        prompt = f"""
        生成后端API代码,技术栈:{framework}, 数据库:{db}

        应用描述:{description}

        生成以下文件:
        1. 主应用入口
        2. API路由
        3. 数据模型
        4. 数据库连接

        返回JSON格式,键为文件路径,值为文件内容。
        """
        # 实现省略
        return {}

    def _generate_deployment(self, tech_stack: Dict) -> Dict:
        prompt = f"""
        生成Docker部署配置

        技术栈:{tech_stack}

        生成:
        1. Dockerfile
        2. docker-compose.yml
        3. nginx配置

        返回JSON格式。
        """
        # 实现省略
        return {}

# ============== 使用示例 ==============

if __name__ == "__main__":
    generator = CodeGenerator(api_key="your-api-key")

    # 1. UI生成
    ui_code = generator.generate_ui_from_description(
        description="一个现代化的登录页面,包含用户名、密码输入框和登录按钮",
        framework="React",
        style="modern"
    )
    print("UI代码:", ui_code)

    # 2. 从截图生成
    # ui_from_image = generator.generate_ui_from_screenshot(
    #     image_path="screenshot.png",
    #     framework="React"
    # )

    # 3. 函数生成
    func_code = generator.generate_function(
        description="快速排序算法,参数为列表,返回排序后的列表",
        language="Python"
    )
    print("函数代码:", func_code)

    # 4. 代码重构
    refactored = generator.refactor_code(
        code="def add(a,b):return a+b",
        goal="improve readability and add type hints"
    )
    print("重构后:", refactored)

    # 5. 生成测试
    test_code = generator.generate_tests(
        code=func_code,
        test_framework="pytest"
    )
    print("测试代码:", test_code)

内容质量控制

5.1 质量评估维度

维度 评估方法
一致性 与Prompt的一致性
质量 图像清晰度、文案流畅度
安全性 NSFW检测、敏感内容过滤
版权 相似度检测、水印检测

5.2 质量控制实现

# aigc/quality_control.py
"""
AIGC 内容质量控制
包含:NSFW检测、相似度检测、合规检查
"""

from typing import List, Optional
import torch
from PIL import Image
import hashlib

class ContentSafetyChecker:
    """内容安全检查器"""

    def __init__(self):
        self.nsfw_detector = self._load_nsfw_detector()

    def _load_nsfw_detector(self):
        """加载NSFW检测模型"""
        from diffusers import StableDiffusionPipeline

        pipeline = StableDiffusionPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            safety_checker=True
        )
        return pipeline.safety_checker

    def check_image_safety(self, image: Image.Image) -> Dict:
        """检查图像安全性"""
        if not self.nsfw_detector:
            return {"safe": True, "confidence": 0}

        # 运行安全检查
        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).unsqueeze(0)
        safety_output, _ = self.nsfw_detector(image_tensor, clip_input=None)

        is_safe = safety_output[0][0] < 0.5
        confidence = float(safety_output[0][0])

        return {
            "safe": is_safe,
            "confidence": confidence,
            "risk_level": "high" if confidence > 0.8 else "medium" if confidence > 0.5 else "low"
        }

    def check_text_safety(self, text: str) -> Dict:
        """检查文本安全性"""
        import re

        # 敏感词列表
        sensitive_patterns = [
            r'暴力',
            r'非法',
            r'赌博'
        ]

        for pattern in sensitive_patterns:
            if re.search(pattern, text):
                return {
                    "safe": False,
                    "reason": f"检测到敏感内容: {pattern}"
                }

        return {"safe": True}

class SimilarityDetector:
    """相似度检测器"""

    def __init__(self):
        from sentence_transformers import SentenceTransformer
        self.model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

    def text_similarity(self, text1: str, text2: str) -> float:
        """计算文本相似度"""
        embeddings = self.model.encode([text1, text2])
        from sklearn.metrics.pairwise import cosine_similarity
        similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
        return float(similarity)

    def image_similarity(self, img1: Image.Image, img2: Image.Image) -> float:
        """计算图像相似度"""
        # 使用 perceptual hash
        hash1 = self._image_hash(img1)
        hash2 = self._image_hash(img2)

        # 计算汉明距离
        distance = self._hamming_distance(hash1, hash2)
        max_distance = len(hash1)

        return 1 - (distance / max_distance)

    @staticmethod
    def _image_hash(image: Image.Image) -> str:
        """计算图像 perceptual hash"""
        import imagehash
        return str(imagehash.phash(image))

    @staticmethod
    def _hamming_distance(hash1: str, hash2: str) -> int:
        """计算汉明距离"""
        return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))

class QualityGate:
    """质量门禁"""

    def __init__(
        self,
        safety_checker: ContentSafetyChecker,
        similarity_detector: SimilarityDetector
    ):
        self.safety_checker = safety_checker
        self.similarity_detector = similarity_detector

    def evaluate_generated_content(
        self,
        content: any,
        reference: Optional[any] = None
    ) -> Dict:
        """
        评估生成内容质量

        Returns:
            {
                "passed": bool,
                "safety": {...},
                "similarity": float,
                "overall_score": float
            }
        """
        result = {
            "passed": True,
            "safety": None,
            "similarity": None,
            "overall_score": 0
        }

        # 安全检查
        if isinstance(content, Image.Image):
            safety = self.safety_checker.check_image_safety(content)
            result["safety"] = safety
            if not safety["safe"]:
                result["passed"] = False

        elif isinstance(content, str):
            safety = self.safety_checker.check_text_safety(content)
            result["safety"] = safety
            if not safety["safe"]:
                result["passed"] = False

        # 相似度检查
        if reference is not None:
            if isinstance(content, str) and isinstance(reference, str):
                sim = self.similarity_detector.text_similarity(content, reference)
                result["similarity"] = sim
            elif isinstance(content, Image.Image) and isinstance(reference, Image.Image):
                sim = self.similarity_detector.image_similarity(content, reference)
                result["similarity"] = sim

        # 计算综合得分
        score = 0
        weight = 0

        if result["safety"] and result["safety"]["safe"]:
            score += 0.5
            weight += 0.5

        if result["similarity"] is not None:
            score += result["similarity"] * 0.5
            weight += 0.5

        result["overall_score"] = score / weight if weight > 0 else 0

        return result

# ============== 使用示例 ==============

if __name__ == "__main__":
    safety_checker = ContentSafetyChecker()
    similarity_detector = SimilarityDetector()
    quality_gate = QualityGate(safety_checker, similarity_detector)

    # 1. 图像安全检查
    test_image = Image.open("test.png")
    safety_result = safety_checker.check_image_safety(test_image)
    print("安全检查:", safety_result)

    # 2. 文本相似度
    sim = similarity_detector.text_similarity(
        "这是一只可爱的猫",
        "这是一只很可爱的猫"
    )
    print("相似度:", sim)

    # 3. 质量评估
    evaluation = quality_gate.evaluate_generated_content(
        "这是一只猫",
        reference="这是一只很可爱的猫"
    )
    print("质量评估:", evaluation)

应用场景

6.1 营销投放场景

| 场景 | 技术 | 实现要点 |
|------|------|`----------|
| 广告素材 | 图像生成 + 文案 | A/B测试、多平台适配 |
| 动态创意 | 模板填充 | 批量生成、个性化 |
| 落地页 | UI生成 | 响应式、SEO优化 |

6.2 内容创作场景

场景 技术 实现要点
社交媒体 多模态生成 风格一致、批量生产
短视频 视频生成 节奏控制、音乐同步
图文创作 图文配合 内容连贯、风格统一

6.3 开发提效场景

场景 技术 实现要点
原型设计 UI生成 快速迭代
代码补全 代码生成 上下文理解
测试生成 自动测试 覆盖率保证

最佳实践

1. 性能优化

  • 使用显存优化:enable_model_cpu_offload, enable_attention_slicing
  • 批量生成:减少推理次数
  • 模型量化:使用 FP16/INT8
  • 缓存机制:相同提示词缓存结果

2. 成本控制

  • 小模型优先:简单任务用小模型
  • 提示词优化:减少token消耗
  • 混合部署:高频任务本地部署
  • 结果缓存:避免重复生成

3. 质量保证

  • 多轮生成:选择最佳结果
  • 质量评估:自动筛选
  • 人工审核:关键内容人工确认
  • A/B测试:持续优化

文档版本: 1.0
最后更新: 2026-01-22

close
arrow_upward