ai_learn_node/backend/prisma/seed.ts

815 lines
18 KiB
TypeScript
Raw Permalink Normal View History

2026-01-13 01:59:40 +00:00
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function main() {
console.log('开始初始化数据库...');
// 清理现有数据
await prisma.pathItem.deleteMany();
await prisma.learningPath.deleteMany();
await prisma.userProgress.deleteMany();
await prisma.chapter.deleteMany();
await prisma.course.deleteMany();
// 创建机器学习基础课程
const mlBasics1 = await prisma.course.create({
data: {
title: '线性回归',
description: '学习线性回归的基本原理和实现方法,包括最小二乘法、梯度下降等核心概念。',
category: 'ML_BASICS',
difficulty: 'BEGINNER',
estimatedHours: 3,
chapters: {
create: [
{
title: '什么是线性回归',
order: 1,
content: `# 什么是线性回归
线
##
线线使线
###
线
\\\`\\\`\\\`python
y = wx + b
\\\`\\\`\\\`
- \`y\` 是预测值(目标变量)
- \`x\` 是输入特征
- \`w\` 是权重(斜率)
- \`b\` 是偏置(截距)
###
-
-
-
-
##
使线`,
},
{
title: '最小二乘法',
order: 2,
content: `# 最小二乘法
线
##
\`w\`\`b\`,使得预测值与真实值之间的平方误差最小:
\\\`\\\`\\\`python
= Σ(y_i - (wx_i + b))²
\\\`\\\`\\\`
##
### 1.
线
\\\`\\\`\\\`python
w = Σ(x_i - )(y_i - ȳ) / Σ(x_i - )²
b = ȳ - w *
\\\`\\\`\\\`
### 2.
线使
\\\`\\\`\\\`python
θ = (X^T * X)^(-1) * X^T * y
\\\`\\\`\\\`
##
\\\`\\\`\\\`python
import numpy as np
def linear_regression(X, y):
#
X = np.column_stack([np.ones(len(X)), X])
#
theta = np.linalg.inv(X.T @ X) @ X.T @ y
return theta
\\\`\\\`\\\`
##
线`,
},
{
title: '梯度下降',
order: 3,
content: `# 梯度下降
##
沿
##
1. \`w\`\`b\`
2.
3. \`θ = θ - α * ∇θ\`
4. 2-3
\`α\` 是学习率。
##
\\\`\\\`\\\`python
import numpy as np
def gradient_descent(X, y, learning_rate=0.01, iterations=1000):
m = len(y)
theta = np.zeros(X.shape[1])
for i in range(iterations):
#
predictions = X @ theta
#
gradient = (1/m) * X.T @ (predictions - y)
#
theta = theta - learning_rate * gradient
#
if i % 100 == 0:
loss = np.mean((predictions - y) ** 2)
print(f'Iteration {i}, Loss: {loss}')
return theta
\\\`\\\`\\\`
##
-
-
- 0.01
##
`,
},
],
},
},
});
const mlBasics2 = await prisma.course.create({
data: {
title: '逻辑回归',
description: '学习逻辑回归用于分类问题的原理和应用包括sigmoid函数、损失函数等。',
category: 'ML_BASICS',
difficulty: 'BEGINNER',
estimatedHours: 4,
chapters: {
create: [
{
title: '分类问题',
order: 1,
content: `# 分类问题
## vs
- ****
- ****//
##
01
###
- 10
- 10
- 10
## 线
线[0,1]
##
sigmoid函数`,
},
{
title: 'Sigmoid函数',
order: 2,
content: `# Sigmoid函数
Sigmoid函数将任意实数映射到(0,1)
##
\\\`\\\`\\\`python
σ(z) = 1 / (1 + e^(-z))
\\\`\\\`\\\`
##
- (0, 1)
-
- S型曲线
- z=0σ(z)=0.5
##
\\\`\\\`\\\`python
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(z):
return 1 / (1 + np.exp(-z))
#
z = np.linspace(-10, 10, 100)
y = sigmoid(z)
plt.plot(z, y)
plt.xlabel('z')
plt.ylabel('σ(z)')
plt.title('Sigmoid Function')
plt.grid(True)
plt.show()
\\\`\\\`\\\`
##
\\\`\\\`\\\`python
h(x) = σ(wx + b) = 1 / (1 + e^(-(wx + b)))
\\\`\\\`\\\`
P(y=1|x)
##
Sigmoid函数是逻辑回归的基础`,
},
],
},
},
});
// 创建深度学习课程
const dl1 = await prisma.course.create({
data: {
title: '神经网络基础',
description: '从感知机到多层神经网络,学习神经网络的基本原理和结构。',
category: 'DEEP_LEARNING',
difficulty: 'INTERMEDIATE',
estimatedHours: 5,
chapters: {
create: [
{
title: '感知机',
order: 1,
content: `# 感知机
##
线
##
\\\`\\\`\\\`python
\\\`\\\`\\\`
##
\\\`\\\`\\\`python
y = f(Σ(w_i * x_i) + b)
\\\`\\\`\\\`
f是激活函数
##
线XOR等非线性问题
##
MLP线`,
},
{
title: '多层神经网络',
order: 2,
content: `# 多层神经网络
MLP线
##
- ****
- ****
- ****
##
\\\`\\\`\\\`python
#
z1 = W1 @ x + b1
a1 = activation(z1)
#
z2 = W2 @ a1 + b2
a2 = activation(z2)
#
output = softmax(z2) #
\\\`\\\`\\\`
##
- ReLU: \`f(x) = max(0, x)\`
- Sigmoid: \`f(x) = 1/(1+e^(-x))\`
- Tanh: \`f(x) = tanh(x)\`
##
线使
##
CNNRNN等模型非常重要`,
},
],
},
},
});
const dl2 = await prisma.course.create({
data: {
title: '卷积神经网络CNN',
description: '学习CNN在图像处理中的应用包括卷积层、池化层等核心概念。',
category: 'DEEP_LEARNING',
difficulty: 'INTERMEDIATE',
estimatedHours: 6,
chapters: {
create: [
{
title: '卷积操作',
order: 1,
content: `# 卷积操作
CNN的核心操作
##
##
1.
2.
3.
4. 2-3
##
\\\`\\\`\\\`python
import numpy as np
from scipy import signal
#
def convolve2d(image, kernel):
return signal.convolve2d(image, kernel, mode='valid')
\\\`\\\`\\\`
##
-
-
-
-
##
CNN的关键第一步`,
},
],
},
},
});
// 创建NLP课程
const nlp1 = await prisma.course.create({
data: {
title: '文本预处理',
description: '学习NLP中的文本预处理技术包括分词、去停用词、词干提取等。',
category: 'NLP',
difficulty: 'BEGINNER',
estimatedHours: 3,
chapters: {
create: [
{
title: '分词',
order: 1,
content: `# 分词
NLP的基础步骤
##
\\\`\\\`\\\`python
import jieba
text = "自然语言处理是人工智能的重要分支"
words = jieba.cut(text)
print(list(words))
# ['自然语言', '处理', '是', '人工智能', '的', '重要', '分支']
\\\`\\\`\\\`
##
\\\`\\\`\\\`python
text = "Natural language processing is important"
words = text.split()
print(words)
# ['Natural', 'language', 'processing', 'is', 'important']
\\\`\\\`\\\`
##
- jieba, HanLP
- NLTK, spaCy
##
NLP任务的基础`,
},
],
},
},
});
const nlp2 = await prisma.course.create({
data: {
title: '词向量',
description: '学习词嵌入和词向量的表示方法包括Word2Vec、GloVe等。',
category: 'NLP',
difficulty: 'INTERMEDIATE',
estimatedHours: 5,
chapters: {
create: [
{
title: '词嵌入基础',
order: 1,
content: `# 词嵌入基础
##
one-hot编码
-
-
-
##
-
-
- king - man + woman queen
## Word2Vec
Word2Vec是经典的词向量训练方法
- Skip-gram
- CBOW
## 使
\\\`\\\`\\\`python
from gensim.models import Word2Vec
#
model = Word2Vec.load('word2vec.model')
vector = model.wv['人工智能']
\\\`\\\`\\\`
##
NLP的基础Transformer和LLM非常重要`,
},
],
},
},
});
// 创建LLM课程
const llm1 = await prisma.course.create({
data: {
title: 'GPT原理',
description: '深入理解GPTGenerative Pre-trained Transformer的工作原理和架构。',
category: 'LLM',
difficulty: 'ADVANCED',
estimatedHours: 8,
chapters: {
create: [
{
title: 'Transformer架构',
order: 1,
content: `# Transformer架构
GPT基于Transformer架构Transformer是理解GPT的基础
## Transformer核心组件
1. **Self-Attention**
2. **Positional Encoding**
3. **Feed-Forward**
4. **Layer Normalization**
##
\\\`\\\`\\\`python
Attention(Q, K, V) = softmax(QK^T / d_k) V
\\\`\\\`\\\`
- Q: Query
- K: Key
- V: Value
## GPT的改进
GPT使用
- Decoder-only
- Causal Masking
-
##
Transformer是GPT的基础`,
},
{
title: '预训练与微调',
order: 2,
content: `# 预训练与微调
GPT采用两阶段训练
##
\\\`\\\`\\\`python
#
= -log P(w_t | w_1, ..., w_{t-1})
\\\`\\\`\\\`
##
-
-
-
##
\\\`\\\`\\\`python
from transformers import GPT2LMHeadModel, GPT2Tokenizer
#
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#
for batch in training_data:
outputs = model(batch.input_ids)
loss = compute_loss(outputs, batch.labels)
loss.backward()
optimizer.step()
\\\`\\\`\\\`
##
+GPT成功的关键广`,
},
],
},
},
});
const llm2 = await prisma.course.create({
data: {
title: 'Prompt工程',
description: '学习如何编写有效的Prompt来引导大语言模型生成期望的输出。',
category: 'LLM',
difficulty: 'INTERMEDIATE',
estimatedHours: 4,
chapters: {
create: [
{
title: 'Prompt基础',
order: 1,
content: `# Prompt基础
Prompt是用户输入给大语言模型的指令或问题
## Prompt
Prompt是引导模型生成特定输出的文本输入
## Prompt的特征
1. ****
2. ****
3. ****
4. ****few-shot示例
##
### Prompt
\\\`\\\`\\\`
\\\`\\\`\\\`
### Prompt
\\\`\\\`\\\`
"Hello, how are you?"
\\\`\\\`\\\`
## Prompt技巧
- ****
- ****
- ****JSONMarkdown等格式
##
Prompt工程是有效使用大语言模型的关键技能`,
},
],
},
},
});
// 创建AI工具课程
const tools1 = await prisma.course.create({
data: {
title: 'LangChain入门',
description: '学习使用LangChain构建AI应用包括链式调用、记忆、工具等核心概念。',
category: 'AI_TOOLS',
difficulty: 'INTERMEDIATE',
estimatedHours: 6,
chapters: {
create: [
{
title: 'LangChain简介',
order: 1,
content: `# LangChain简介
LangChain是一个用于构建LLM应用的框架
## LangChain
使LLM API的问题
-
-
-
LangChain提供了
-
-
-
-
##
1. **LLM/**
2. ****Prompt
3. **Chain**
4. **Agent**使
5. **Memory**
##
\\\`\\\`\\\`python
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
input_variables=["topic"],
template="写一篇关于{topic}的短文。"
)
chain = prompt | llm
result = chain.invoke({"topic": "人工智能"})
print(result)
\\\`\\\`\\\`
##
LangChain大大简化了LLM应用的开发AI应用的重要工具`,
},
],
},
},
});
const tools2 = await prisma.course.create({
data: {
title: 'Hugging Face使用',
description: '学习使用Hugging Face Transformers库加载和使用预训练模型。',
category: 'AI_TOOLS',
difficulty: 'BEGINNER',
estimatedHours: 4,
chapters: {
create: [
{
title: 'Transformers库',
order: 1,
content: `# Transformers库
Hugging Face Transformers提供了大量预训练模型
##
\\\`\\\`\\\`bash
pip install transformers torch
\\\`\\\`\\\`
## 使
\\\`\\\`\\\`python
from transformers import AutoTokenizer, AutoModelForCausalLM
#
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")
#
inputs = tokenizer("人工智能是", return_tensors="pt")
#
outputs = model.generate(**inputs, max_length=50)
text = tokenizer.decode(outputs[0])
print(text)
\\\`\\\`\\\`
## Hub
Hugging Face Hub提供了数万个预训练模型
- GPT-2, GPT-Neo
- BERT, RoBERTa
- mBART, T5
##
Transformers库让使用预训练模型变得非常简单AI开发的重要工具`,
},
],
},
},
});
console.log('数据库初始化完成!');
console.log(`创建了 ${await prisma.course.count()} 门课程`);
console.log(`创建了 ${await prisma.chapter.count()} 个章节`);
}
main()
.catch((e) => {
console.error(e);
process.exit(1);
})
.finally(async () => {
await prisma.$disconnect();
});