From 631ff29e43e6b0b73961d45434f021dfd1ee63b8 Mon Sep 17 00:00:00 2001 From: Meng Sen Date: Thu, 22 May 2025 17:11:52 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=91=E5=B8=83=20BiliFetcher?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Meng Sen --- .../latest/config/categories.json | 144 +++++++++++++ apps/bili-fetcher/latest/config/config.yaml | 194 ++++++++++++++++++ .../latest/config/scheduler_config.yaml | 80 ++++++++ .../latest/config/sql_statements_mysql.py | 67 ++++++ .../latest/config/sql_statements_sqlite.py | 103 ++++++++++ apps/bili-fetcher/latest/config/template.html | 28 +++ apps/bili-fetcher/latest/data.yml | 10 +- apps/bili-fetcher/latest/docker-compose.yml | 2 + apps/bili-fetcher/latest/scripts/init.sh | 5 + 9 files changed, 632 insertions(+), 1 deletion(-) create mode 100644 apps/bili-fetcher/latest/config/categories.json create mode 100644 apps/bili-fetcher/latest/config/config.yaml create mode 100644 apps/bili-fetcher/latest/config/scheduler_config.yaml create mode 100644 apps/bili-fetcher/latest/config/sql_statements_mysql.py create mode 100644 apps/bili-fetcher/latest/config/sql_statements_sqlite.py create mode 100644 apps/bili-fetcher/latest/config/template.html diff --git a/apps/bili-fetcher/latest/config/categories.json b/apps/bili-fetcher/latest/config/categories.json new file mode 100644 index 000000000..d35f020a7 --- /dev/null +++ b/apps/bili-fetcher/latest/config/categories.json @@ -0,0 +1,144 @@ +{ + "duplicated_tags": [ + "资讯", + "综合" + ], + "unique_tag_to_main": { + "动画": "动画", + "MAD·AMV": "动画", + "MMD·3D": "动画", + "短片·手书": "动画", + "配音": "动画", + "手办·模玩": "动画", + "特摄": "动画", + "动漫杂谈": "动画", + "番剧": "番剧", + "官方延伸": "番剧", + "完结动画": "番剧", + "连载动画": "番剧", + "国创": "国创", + "国产动画": "国创", + "国产原创相关": "国创", + "布袋戏": "国创", + "动态漫·广播剧": "国创", + "音乐": "音乐", + "原创音乐": "音乐", + "翻唱": "音乐", + "VOCALOID·UTAU": "音乐", + "演奏": "音乐", + "MV": "音乐", + "音乐现场": "音乐", + "音乐综合": "音乐", + "乐评盘点": "音乐", + "音乐教学": "音乐", + "舞蹈": "舞蹈", + "宅舞": "舞蹈", + "舞蹈综合": "舞蹈", + "舞蹈教程": "舞蹈", + "街舞": "舞蹈", + "明星舞蹈": "舞蹈", + "国风舞蹈": "舞蹈", + "手势·网红舞": "舞蹈", + "游戏": "游戏", + "单机游戏": "游戏", + "电子竞技": "游戏", + "手机游戏": "游戏", + "网络游戏": "游戏", + "桌游棋牌": "游戏", + "GMV": "游戏", + "音游": "游戏", + "Mugen": "游戏", + "知识": "知识", + "科学科普": "知识", + "社科·法律·心理": "知识", + "人文历史": "知识", + "财经商业": "知识", + "校园学习": "知识", + "职业职场": "知识", + "设计·创意": "知识", + "野生技术协会": "知识", + "科技": "科技", + "数码": "科技", + "软件应用": "科技", + "计算机技术": "科技", + "科工机械": "科技", + "极客DIY": "科技", + "运动": "运动", + "篮球": "运动", + "足球": "运动", + "健身": "运动", + "竞技体育": "运动", + "运动文化": "运动", + "运动综合": "运动", + "汽车": "汽车", + "汽车知识科普": "汽车", + "赛车": "汽车", + "改装玩车": "汽车", + "新能源车": "汽车", + "房车": "汽车", + "摩托车": "汽车", + "购车攻略": "汽车", + "汽车生活": "汽车", + "生活": "生活", + "搞笑": "生活", + "出行": "生活", + "三农": "生活", + "家居房产": "生活", + "手工": "生活", + "绘画": "生活", + "日常": "生活", + "亲子": "生活", + "美食": "美食", + "美食制作": "美食", + "美食侦探": "美食", + "美食测评": "美食", + "田园美食": "美食", + "美食记录": "美食", + "动物圈": "动物圈", + "喵星人": "动物圈", + "汪星人": "动物圈", + "动物二创": "动物圈", + "野生动物": "动物圈", + "小宠异宠": "动物圈", + "动物综合": "动物圈", + "鬼畜": "鬼畜", + "鬼畜调教": "鬼畜", + "音MAD": "鬼畜", + "人力VOCALOID": "鬼畜", + "鬼畜剧场": "鬼畜", + "教程演示": "鬼畜", + "时尚": "时尚", + "美妆护肤": "时尚", + "仿妆cos": "时尚", + "穿搭": "时尚", + "时尚潮流": "时尚", + "热点": "资讯", + "环球": "资讯", + "社会": "资讯", + "multiple": "资讯", + "娱乐": "娱乐", + "综艺": "娱乐", + "娱乐杂谈": "娱乐", + "粉丝创作": "娱乐", + "明星综合": "娱乐", + "影视": "影视", + "影视杂谈": "影视", + "影视剪辑": "影视", + "小剧场": "影视", + "预告·资讯": "影视", + "短片": "影视", + "纪录片": "纪录片", + "人文·历史": "纪录片", + "科学·探索·自然": "纪录片", + "军事": "纪录片", + "社会·美食·旅行": "纪录片", + "电影": "电影", + "华语电影": "电影", + "欧美电影": "电影", + "日本电影": "电影", + "其他国家": "电影", + "电视剧": "电视剧", + "国产剧": "电视剧", + "海外剧": "电视剧" + } +} diff --git a/apps/bili-fetcher/latest/config/config.yaml b/apps/bili-fetcher/latest/config/config.yaml new file mode 100644 index 000000000..6c98d87c1 --- /dev/null +++ b/apps/bili-fetcher/latest/config/config.yaml @@ -0,0 +1,194 @@ +# B站用户的SESSDATA,用于API认证 +SESSDATA: "Cookie里的SESSDATA字段值" + +# 视频摘要配置 +# CACHE_EMPTY_SUMMARY: 控制是否缓存无摘要内容的视频结果 +# 官方状态码含义: +# - result_type=0: 没有摘要 +# - result_type=1: 仅存在摘要总结 +# - result_type=2: 存在摘要以及提纲 +# +# - 设置为true时:缓存所有视频摘要结果,包括无摘要的视频(result_type=0) +# 优点:减少对B站API的重复请求,提高响应速度,降低被限流风险 +# 缺点:占用更多数据库空间 +# - 设置为false时:只缓存有实际摘要内容的视频结果(result_type=1或2) +# 优点:节省数据库空间 +# 缺点:对于无摘要的视频,每次都会重新请求B站API +CACHE_EMPTY_SUMMARY: true + +# 原始历史记录数据的输入文件夹 +input_folder: "history_by_date" + +# 清理后的历史记录数据的输出文件夹 +output_folder: "output" + +# SQLite数据库文件名 +db_file: "bilibili_history.db" + +# 导入日志文件名,用于记录上次导入的位置 +log_file: "last_import_log.json" + +# 分类配置文件名 +categories_file: "categories.json" + +# 每日观看数量统计文件夹 +daily_count_folder: "daily_count" + +# 热力图模板文件名 +heatmap_template: "template.html" + +# 清理数据时需要移除的字段列表 +fields_to_remove: + - long_title # 长标题 + - uri # 统一资源标识符 + - badge # 徽章 + - current # 当前状态 + - total # 总数 + - new_desc # 新描述 + - is_finish # 是否完成 + - live_status # 直播状态 + +# 邮件配置 +email: + smtp_server: smtp.qq.com + smtp_port: 587 + sender: "xxxx@qq.com" + password: "xxxx" # QQ邮箱授权码 + receiver: "xxxx@qq.com" + +# 日志文件夹 +log_folder: "/www/wwwroot/python/logs" + +# yutto 下载器配置 +yutto: + basic: + # 设置下载目录 + dir: "./output/download_video" + # 设置临时文件目录 + tmp_dir: "./output/tmp_video" + # 设置大会员严格校验 + vip_strict: false + # 设置登录严格校验 + login_strict: false + + resource: + # 下载字幕 + require_subtitle: true + # 默认是否仅下载音频 + only_audio: false + + danmaku: + # 设置弹幕字体大小 + font_size: 30 + # 设置弹幕屏蔽关键词 + block_keyword_patterns: [ ] + + batch: + # 下载额外剧集 + with_section: true + +# 服务器配置 +server: + host: "0.0.0.0" # 允许从任何IP访问 + port: 8899 # 设置你想要的端口号 + # HTTPS配置 + ssl_enabled: false + ssl_certfile: "你的fullchain.pem" + ssl_keyfile: "你的privkey.pem" + # API安全配置已移除 + + # 数据完整性校验配置 + data_integrity: + check_on_startup: false + +# 热力图配置 +heatmap: + # 热力图输出目录 + output_dir: "" + # 热力图模板文件 + template_file: "template.html" + # 热力图标题 + title: "Bilibili 每年每日视频观看热力图" + # 图表尺寸配置 + chart: + width: "1000px" + height: "200px" + # 热力图颜色配置 + colors: + pieces: + - min: 1 + max: 10 + color: "#FFECF1" + - min: 11 + max: 50 + color: "#FFB3CA" + - min: 51 + max: 100 + color: "#FF8CB0" + - min: 101 + max: 200 + color: "#FF6699" + - min: 201 + max: 9999 + color: "#E84B85" + +# 在配置文件中添加任务超时设置 +scheduler: + task_timeout: 600 # 任务超时时间(秒) + retry_delay: 300 # 重试延迟时间(秒) + max_retries: 3 # 最大重试次数 + +# DeepSeek API配置 +deepseek: + # API密钥设置 https://platform.deepseek.com/api_keys + api_key: "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + # API基础URL + api_base: "https://api.deepseek.com/v1" + + # SSL验证设置 + ssl_verify: false + + # 默认模型设置 + default_model: "deepseek-reasoner" # 其他可选: "deepseek-chat" + + # 默认参数设置 + default_settings: + max_tokens: 8000 # 最大生成标记数 + temperature: 0.7 # 温度设置(0-2.0) + top_p: 1.0 # 核采样(0-1.0) + frequency_penalty: 0.0 # 频率惩罚(0-2.0) + presence_penalty: 0.0 # 存在惩罚(0-2.0) + + # 不同场景的推荐温度设置 + recommended_temperatures: + 通用对话: 1.3 + 创意写作: 1.5 + 代码生成: 0.0 + 数据抽取: 1.0 + 数学推理: 0.0 + + # DeepSeek价格信息(单位:元/百万tokens) + pricing: + deepseek-chat: + input: + standard: 1.0 # 标准时段(08:30-00:30) + discount: 0.5 # 优惠时段(00:30-08:30) + output: + standard: 8.0 # 标准时段 + discount: 4.0 # 优惠时段 + deepseek-reasoner: + input: + standard: 1.0 + discount: 0.25 + output: + standard: 16.0 + discount: 4.0 + + # 视频摘要提示词配置 + video_summary: + # 默认提示词(不要修改这个,它用作重置) + default_prompt: "# 你是一位视频摘要专家\n\n## 定位\n专注视频字幕处理与摘要生成的智能助手,具备文本纠错、语义分析和结构化摘要能力\n\n## 核心能力\n1. 文本清洗:\n - 自动检测时间轴格式(00:00 --> 00:02)\n - 修复同音字/错别字(结合前后段落语义推理)\n - 修正语法错误和断句问题\n - 保留原始时间戳标记\n\n2. 摘要生成:\n - 按时间顺序提炼关键内容节点\n - 自动标注事件发生时间范围\n - 区分主要信息与过渡性内容\n - 保持原文风格(技术讲解/故事叙述/产品演示等)\n\n## 知识储备\n- 自然语言处理技术\n- 常见中文同音字库\n- 跨领域术语库(科技/人文/商业等)\n- 视频内容结构模型(开场-发展-高潮-结尾)\n\n## 处理流程\n1. 接收原始字幕文件\n2. 执行文本预处理:\n a. 验证时间戳连续性\n b. 上下文关联修复(例:\"温馳包碗\"→\"奔驰宝马奥迪\")\n c. 合并碎片化短句\n3. 此要求很重要,这关乎生死:总结的时间戳要分布均匀,不要前面分布的很细,后面由于限制就让时间戳跨度很大,这很不好!正确做法是查看视频最后的时间戳,如果视频超过30分钟,就不建议开始按照秒做总结,而是一开始就是从00:00-xx分钟 这样总结会更均匀\n4. 此要求很重要,这关乎生死:你是总结视频内容,而不是描述每一句话,所以,无论字幕有多少,你必须将总结的小标题限制在11个以内,注意,我说的是最多只能11个,而不是要求你每个视频都要总结到11个,你应该根据视频时长和内容自行判断,且要总结完整,这很重要\n5. 请不要输出清洗后的字幕,用户只需要摘要内容,用户不关心字幕内容\n6. 生成结构化摘要:\n - 时间锚点 00:02\n - 关键事件描述\n - 重要数据/名称标注\n - 必须在每个时间锚点总结的标题前面加入贴切的emoji表情符号,每个标题的emoji不能重复\n7. 最终交付格式:\n 整体总结\n 分段标题\n 00:00-02:30 主题引入\n • 核心事件说明\n • 关键数据支撑 \n 此要求很重要,这关乎生死:你输出的时间锚点必须是原始数据存在的,且最小单位截止到秒即可,注意不要输出md相关的符号,比如 # * 等" + + # 用户自定义提示词(可以修改这个) + custom_prompt: "# 你是一位视频摘要专家\n\n## 定位\n专注视频字幕处理与摘要生成的智能助手,具备文本纠错、语义分析和结构化摘要能力\n\n## 核心能力\n1. 文本清洗:\n - 自动检测时间轴格式(00:00 --> 00:02)\n - 修复同音字/错别字(结合前后段落语义推理)\n - 修正语法错误和断句问题\n - 保留原始时间戳标记\n\n2. 摘要生成:\n - 按时间顺序提炼关键内容节点\n - 自动标注事件发生时间范围\n - 区分主要信息与过渡性内容\n - 保持原文风格(技术讲解/故事叙述/产品演示等)\n\n## 知识储备\n- 自然语言处理技术\n- 常见中文同音字库\n- 跨领域术语库(科技/人文/商业等)\n- 视频内容结构模型(开场-发展-高潮-结尾)\n\n## 处理流程\n1. 接收原始字幕文件\n2. 执行文本预处理:\n a. 验证时间戳连续性\n b. 上下文关联修复(例:\"温馳包碗\"→\"奔驰宝马奥迪\")\n c. 合并碎片化短句\n3. 此要求很重要,这关乎生死:总结的时间戳要分布均匀,不要前面分布的很细,后面由于限制就让时间戳跨度很大,这很不好!正确做法是查看视频最后的时间戳,如果视频超过30分钟,就不建议开始按照秒做总结,而是一开始就是从00:00-xx分钟 这样总结会更均匀\n4. 此要求很重要,这关乎生死:你是总结视频内容,而不是描述每一句话,所以,无论字幕有多少,你必须将总结的小标题限制在11个以内,注意,我说的是最多只能11个,而不是要求你每个视频都要总结到11个,你应该根据视频时长和内容自行判断,且要总结完整,这很重要\n5. 请不要输出清洗后的字幕,用户只需要摘要内容,用户不关心字幕内容\n6. 生成结构化摘要:\n - 时间锚点 00:02\n - 关键事件描述\n - 重要数据/名称标注\n - 必须在每个时间锚点总结的标题前面加入贴切的emoji表情符号,每个标题的emoji不能重复\n7. 最终交付格式:\n 整体总结\n 分段标题\n 00:00-02:30 主题引入\n • 核心事件说明\n • 关键数据支撑 \n 此要求很重要,这关乎生死:你输出的时间锚点必须是原始数据存在的,且最小单位截止到秒即可,注意不要输出md相关的符号,比如 # * 等" diff --git a/apps/bili-fetcher/latest/config/scheduler_config.yaml b/apps/bili-fetcher/latest/config/scheduler_config.yaml new file mode 100644 index 000000000..c25ae195c --- /dev/null +++ b/apps/bili-fetcher/latest/config/scheduler_config.yaml @@ -0,0 +1,80 @@ +base_url: http://localhost:8899 +error_handling: + notify_on_failure: true + stop_on_failure: true +scheduler: + log_level: INFO + retry: + delay: 60 + max_attempts: 3 +tasks: + # 每10分钟抓取B站热门视频 + fetch_popular_videos: + endpoint: /bilibili/popular/all + method: GET + name: 获取B站热门视频 + params: + size: 20 + max_pages: 100 + save_to_db: true + include_videos: false + requires: [ ] + schedule: + type: interval + interval_value: 10 + interval_unit: minutes + + # 1. 无依赖的基础任务 + fetch_history: + endpoint: /fetch/bili-history + method: GET + name: 获取B站历史记录 + params: { } + requires: [ ] + schedule: + time: 00:00 + type: daily + + # 2. 依赖 fetch_history + import_data: + endpoint: /importSqlite/import_data_sqlite + method: POST + name: 导入数据到数据库 + requires: + - fetch_history + schedule: + type: chain + + # 3. 依赖 import_data + analyze_data: + endpoint: /analysis/analyze + method: POST + name: 分析历史数据 + requires: + - import_data + schedule: + type: chain + + # 4. 依赖 analyze_data + generate_heatmap: + endpoint: /heatmap/generate_heatmap + method: POST + name: 生成观看热力图 + requires: + - analyze_data + schedule: + type: chain + + # 5. 依赖 generate_heatmap + send_daily_report: + endpoint: /log/send-email + method: POST + name: 发送每日报告 + params: + content: null + mode: simple + subject: B站历史记录日报 - {current_time} + requires: + - generate_heatmap + schedule: + type: chain diff --git a/apps/bili-fetcher/latest/config/sql_statements_mysql.py b/apps/bili-fetcher/latest/config/sql_statements_mysql.py new file mode 100644 index 000000000..7da3b9ee4 --- /dev/null +++ b/apps/bili-fetcher/latest/config/sql_statements_mysql.py @@ -0,0 +1,67 @@ +SHOW_DATABASES = "SHOW DATABASES LIKE %s" +CREATE_DATABASE = "CREATE DATABASE `{db_name}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" +SHOW_TABLES = """ + SELECT COUNT(*) + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s +""" +CREATE_TABLE_DEFAULT = """ + CREATE TABLE {table} ( + id BIGINT PRIMARY KEY COMMENT '主键,使用雪花算法等库生成的唯一ID', + title VARCHAR(255) NOT NULL COMMENT '条目标题,字符串,最大255字符', + long_title VARCHAR(255) COMMENT '条目副标题(有时为空),最大255字符', + cover VARCHAR(255) COMMENT '条目封面图url,用于专栏以外的条目', + covers JSON COMMENT '条目封面图组,有效时array无效时null,仅用于专栏', + uri VARCHAR(255) COMMENT '重定向url仅用于剧集和直播', + oid BIGINT NOT NULL COMMENT '目标id稿件视频&剧集(当business=archive或business=pgc时):稿件avid直播(当business=live时):直播间id文章(当business=article时):文章cvid文集(当business=article-list时):文集rlid', + epid BIGINT DEFAULT 0 COMMENT '剧集epid 仅用于剧集', + bvid VARCHAR(50) NOT NULL COMMENT '稿件bvid 仅用于稿件视频', + page INT DEFAULT 1 COMMENT '观看到的视频分P数 仅用于稿件视频', + cid BIGINT COMMENT '观看到的对象id 稿件视频&剧集(当business=archive或business=pgc时):视频cid文集(当business=article-list时):文章cvid', + part VARCHAR(255) COMMENT '观看到的视频分 P 标题 仅用于稿件视频', + business VARCHAR(50) COMMENT '视频业务类型(如archive代表普通视频),最大50字符', + dt INT NOT NULL COMMENT '记录查看的平台代码 1 3 5 7 手机端,2 web端,4 6 pad端,33TV端,0其他', + videos INT DEFAULT 1 COMMENT '视频分 P 数目 仅用于稿件视频,整数型,默认为1', + author_name VARCHAR(100) NOT NULL COMMENT 'UP 主昵称', + author_face VARCHAR(255) COMMENT 'UP 主头像 url', + author_mid BIGINT NOT NULL COMMENT 'UP 主 mid', + view_at BIGINT NOT NULL COMMENT '查看时间 时间戳', + progress INT DEFAULT 0 COMMENT '视频观看进度,单位为秒,用于稿件视频或剧集', + badge VARCHAR(50) COMMENT '角标文案 稿件视频 / 剧集 / 笔记', + show_title VARCHAR(255) COMMENT '分 P 标题 用于稿件视频或剧集', + duration INT NOT NULL COMMENT '视频总时长 用于稿件视频或剧集', + current VARCHAR(255) COMMENT '未知字段', + total INT DEFAULT 0 COMMENT '总计分集数 仅用于剧集', + new_desc VARCHAR(255) COMMENT '最新一话 / 最新一 P 标识 用于稿件视频或剧集', + is_finish TINYINT(1) DEFAULT 0 COMMENT '是否观看完,布尔值,0为否,1为是', + is_fav TINYINT(1) DEFAULT 0 COMMENT '是否收藏,布尔值,0为否,1为是', + kid BIGINT COMMENT '条目目标 id', + tag_name VARCHAR(100) COMMENT '子分区名 用于稿件视频和直播', + live_status TINYINT(1) DEFAULT 0 COMMENT '直播状态 仅用于直播0未开播1已开播', + main_category VARCHAR(100) COMMENT '主分区名称', + remark TEXT COMMENT '用户添加的备注信息', + remark_time BIGINT DEFAULT 0 COMMENT '备注添加时间的时间戳', + INDEX (author_mid) COMMENT '建立作者MID的索引,用于快速查询', + INDEX (view_at) COMMENT '建立观看时间的索引', + INDEX (remark_time) COMMENT '建立备注时间的索引' + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +""" +CREATE_TABLE_LIKE = "CREATE TABLE {new_table} LIKE {reference_table};" +SELECT_DATABASE = "SELECT DATABASE() AS current_db;" + +INSERT_DATA = """ + INSERT INTO {table} ( + id, title, long_title, cover, covers, uri, oid, epid, bvid, page, cid, part, + business, dt, videos, author_name, author_face, author_mid, view_at, progress, + badge, show_title, duration, current, total, new_desc, is_finish, is_fav, kid, + tag_name, live_status, main_category, remark, remark_time + ) VALUES ( + %(id)s, %(title)s, %(long_title)s, %(cover)s, %(covers)s, %(uri)s, %(oid)s, + %(epid)s, %(bvid)s, %(page)s, %(cid)s, %(part)s, %(business)s, %(dt)s, + %(videos)s, %(author_name)s, %(author_face)s, %(author_mid)s, %(view_at)s, + %(progress)s, %(badge)s, %(show_title)s, %(duration)s, %(current)s, %(total)s, + %(new_desc)s, %(is_finish)s, %(is_fav)s, %(kid)s, %(tag_name)s, %(live_status)s, + %(main_category)s, %(remark)s, %(remark_time)s + ) +""" diff --git a/apps/bili-fetcher/latest/config/sql_statements_sqlite.py b/apps/bili-fetcher/latest/config/sql_statements_sqlite.py new file mode 100644 index 000000000..6e48fac3a --- /dev/null +++ b/apps/bili-fetcher/latest/config/sql_statements_sqlite.py @@ -0,0 +1,103 @@ +CREATE_TABLE_DEFAULT = """ +CREATE TABLE IF NOT EXISTS {table} ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + long_title TEXT, + cover TEXT, + covers JSON, + uri TEXT, + oid INTEGER NOT NULL, + epid INTEGER DEFAULT 0, + bvid TEXT NOT NULL, + page INTEGER DEFAULT 1, + cid INTEGER, + part TEXT, + business TEXT, + dt INTEGER NOT NULL, + videos INTEGER DEFAULT 1, + author_name TEXT NOT NULL, + author_face TEXT, + author_mid INTEGER NOT NULL, + view_at INTEGER NOT NULL, + progress INTEGER DEFAULT 0, + badge TEXT, + show_title TEXT, + duration INTEGER NOT NULL, + current TEXT, + total INTEGER DEFAULT 0, + new_desc TEXT, + is_finish INTEGER DEFAULT 0, + is_fav INTEGER DEFAULT 0, + kid INTEGER, + tag_name TEXT, + live_status INTEGER DEFAULT 0, + main_category TEXT, + remark TEXT DEFAULT '', + remark_time INTEGER DEFAULT 0 +); +""" + +# 删除记录表创建语句 +CREATE_TABLE_DELETED_HISTORY = """ +CREATE TABLE IF NOT EXISTS deleted_history ( + id INTEGER PRIMARY KEY, + bvid TEXT NOT NULL, + view_at INTEGER NOT NULL, + delete_time INTEGER NOT NULL, + UNIQUE(bvid, view_at) +); +""" + +# 视频摘要表创建语句 +CREATE_TABLE_VIDEO_SUMMARY = """ +CREATE TABLE IF NOT EXISTS video_summary ( + id INTEGER PRIMARY KEY, + bvid TEXT NOT NULL, + cid INTEGER NOT NULL, + up_mid INTEGER NOT NULL, + stid TEXT, + summary TEXT, + outline JSON, + result_type INTEGER DEFAULT 0, + fetch_time INTEGER NOT NULL, + update_time INTEGER DEFAULT 0 +); +""" + +CREATE_INDEXES = [ + "CREATE INDEX IF NOT EXISTS idx_{table}_author_mid ON {table} (author_mid);", + "CREATE INDEX IF NOT EXISTS idx_{table}_view_at ON {table} (view_at);", + "CREATE INDEX IF NOT EXISTS idx_{table}_remark_time ON {table} (remark_time);", + "CREATE INDEX IF NOT EXISTS idx_{table}_covers ON {table} (json_valid(covers));" +] + +# 视频摘要表索引 +CREATE_INDEXES_VIDEO_SUMMARY = [ + "CREATE UNIQUE INDEX IF NOT EXISTS idx_video_summary_bvid_cid ON video_summary (bvid, cid);", + "CREATE INDEX IF NOT EXISTS idx_video_summary_up_mid ON video_summary (up_mid);", + "CREATE INDEX IF NOT EXISTS idx_video_summary_fetch_time ON video_summary (fetch_time);", + "CREATE INDEX IF NOT EXISTS idx_video_summary_update_time ON video_summary (update_time);" +] + +INSERT_DATA = """ +INSERT INTO {table} ( + id, title, long_title, cover, covers, uri, oid, epid, bvid, page, cid, part, + business, dt, videos, author_name, author_face, author_mid, view_at, progress, + badge, show_title, duration, current, total, new_desc, is_finish, is_fav, kid, + tag_name, live_status, main_category, remark, remark_time +) VALUES ({placeholders}) +""" + +# 视频摘要表插入语句 +INSERT_VIDEO_SUMMARY = """ +INSERT INTO video_summary ( + id, bvid, cid, up_mid, stid, summary, outline, result_type, fetch_time, update_time +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +""" + +# 视频摘要表更新语句 +UPDATE_VIDEO_SUMMARY = """ +UPDATE video_summary SET + stid = ?, summary = ?, outline = ?, result_type = ?, update_time = ? +WHERE bvid = ? AND cid = ? +""" diff --git a/apps/bili-fetcher/latest/config/template.html b/apps/bili-fetcher/latest/config/template.html new file mode 100644 index 000000000..97548a55a --- /dev/null +++ b/apps/bili-fetcher/latest/config/template.html @@ -0,0 +1,28 @@ + + + + + {{ title }} + + + + + +

{{ title }}

+{% for chart in charts %} +
+

{{ chart.year }} 年

+
+ {{ chart.chart_html | safe }} +
+
+{% endfor %} + + diff --git a/apps/bili-fetcher/latest/data.yml b/apps/bili-fetcher/latest/data.yml index 3233ebf07..5b2281b56 100644 --- a/apps/bili-fetcher/latest/data.yml +++ b/apps/bili-fetcher/latest/data.yml @@ -7,7 +7,7 @@ additionalProperties: labelEn: Data persistence path required: true type: text - - default: 8080 + - default: 5173 edit: true envKey: PANEL_APP_PORT_HTTP labelZh: WebUI 端口 @@ -15,3 +15,11 @@ additionalProperties: required: true rule: paramPort type: number + - default: 8899 + edit: true + envKey: PANEL_APP_PORT_API + labelZh: WebUI 端口 + labelEn: WebUI port + required: true + rule: paramPort + type: number diff --git a/apps/bili-fetcher/latest/docker-compose.yml b/apps/bili-fetcher/latest/docker-compose.yml index 296b322a1..74e3a485f 100644 --- a/apps/bili-fetcher/latest/docker-compose.yml +++ b/apps/bili-fetcher/latest/docker-compose.yml @@ -9,6 +9,8 @@ services: restart: always networks: - 1panel-network + ports: + - ${PANEL_APP_PORT_API}:8899 env_file: - ${GLOBAL_ENV_FILE:-/etc/1panel/envs/global.env} - ${ENV_FILE:-/etc/1panel/envs/default.env} diff --git a/apps/bili-fetcher/latest/scripts/init.sh b/apps/bili-fetcher/latest/scripts/init.sh index 07fb8c3fe..eb4ce4cc8 100644 --- a/apps/bili-fetcher/latest/scripts/init.sh +++ b/apps/bili-fetcher/latest/scripts/init.sh @@ -10,6 +10,11 @@ if [ -f .env ]; then echo "ENV_FILE=${CURRENT_DIR}/.env" >> .env echo "GLOBAL_ENV_FILE=${CURRENT_DIR}/envs/global.env" >> .env + if [ ! -d $BILI_FETCHER_ROOT_PATH/config ]; then + mkdir -p $BILI_FETCHER_ROOT_PATH/config + fi + cp -rn ./config/* $BILI_FETCHER_ROOT_PATH/config/ + echo "Check Finish." else