blog-hexo/public/posts/21037/index.html
2024-04-05 11:25:02 +08:00

368 lines
28 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
RAG知识库召回率
</title>
<meta name="description" content="">
<meta name="keywords" content="">
<meta name="author" content="Mozzie">
<link rel="canonical" href="https://maxshader.com/posts/21037/">
<link rel="icon" type="image/svg" href='data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 7h1a2 2 0 0 1 2 2v.5a.5.5 0 0 0 .5.5a.5.5 0 0 1 .5.5v3a.5.5 0 0 1-.5.5a.5.5 0 0 0-.5.5v.5a2 2 0 0 1-2 2h-2"></path><path d="M8 7H6a2 2 0 0 0-2 2v6a2 2 0 0 0 2 2h1"></path><path d="M12 8l-2 4h3l-2 4"></path></g></svg>'>
<link rel="stylesheet" href="/css/b4c95347.css">
<script>window.i18n = {"tip-status-done":"完成","tip-status-default":"全部","tip-status-todo":"计划","tip-status-doing":"进行","tip-status-other":"其他","text-select":"选择","text-move":"移动","text-esc":"退出","January":"一月","February":"二月","March":"三月","April":"四月","May":"五月","June":"六月","July":"七月","August":"八月","September":"九月","October":"十月","November":"十一月","December":"十二月"}</script>
<meta name="generator" content="Hexo 7.0.0"></head>
<body id="app">
<aside id="aside-box" class="left-aside">
<div class="header">
<link rel="stylesheet" href="/css/61875ce9.css">
<div class="profile">
<a class="badge" href="/">
<span>Hi</span>
<span>Mozzie</span>
</a>
<cosy-tooltip id="left-aside-button" placement="right">
<span slot="content">
<span>显示 / 隐藏 左侧导航</span>
<cosy-short-key>[</cosy-short-key>
</span>
<cosy-icon>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M16 4c1.104-.019 2 .896 2 2v8a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h12zm1 2a1 1 0 0 0-1-1h-2.995v10H16a1 1 0 0 0 1-1V6zm-4.995 9V5H4.001a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8.004z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
<script src="/js/69622cae.js"></script>
<cosy-search id="post-search" placeholder="搜索">
<div slot="short-key">
<cosy-short-key></cosy-short-key>
<cosy-short-key>K</cosy-short-key>
</div>
</cosy-search>
<script>
window.algolia = {
appId: "5DTW808BZ8",
SearchOnlyAPIKey: "27845b245efc8a2853cc0bdc7366ea26"
}
</script>
<script src="/js/0a6f193b.js"></script>
</div>
<div class="aside-category">
<link rel="stylesheet" href="/css/db04a759.css">
<nav class="category-nav cosy-scrollbar">
<ul><li data-path="archives">
<a href="/archives">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M3.5 3A1.5 1.5 0 0 0 2 4.5v4A1.5 1.5 0 0 0 3.5 10h9A1.5 1.5 0 0 0 14 8.5v-4A1.5 1.5 0 0 0 12.5 3h-9zM3 4.5a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 .5.5v4a.5.5 0 0 1-.5.5h-9a.5.5 0 0 1-.5-.5v-4zm.5 6.5A1.5 1.5 0 0 0 2 12.5v4A1.5 1.5 0 0 0 3.5 18h9a1.5 1.5 0 0 0 1.5-1.5v-4a1.5 1.5 0 0 0-1.5-1.5h-9zM3 12.5a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 .5.5v4a.5.5 0 0 1-.5.5h-9a.5.5 0 0 1-.5-.5v-4zm14-.063a2.003 2.003 0 0 1-2.5-1.937A2 2 0 0 1 16 8.563a2.005 2.005 0 0 1 1 0a2 2 0 0 1 0 3.874zM16.5 3a.5.5 0 0 1 .5.5v4.041a3.02 3.02 0 0 0-1 0V3.5a.5.5 0 0 1 .5-.5zm0 10.5c-.17 0-.337-.014-.5-.041V17.5a.5.5 0 0 0 1 0v-4.041c-.163.027-.33.041-.5.041z" fill="currentColor"></path></g></svg>
<div class="ellipsis">归档</div>
</a>
</li><li data-path="cosy-roadmap">
<a href="/cosy-roadmap">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M9.384 2a1 1 0 0 0-.966.742L4.616 17H2.5a.5.5 0 0 0 0 1h15a.5.5 0 0 0 0-1h-2.116L11.582 2.742A1 1 0 0 0 10.616 2H9.384zM5.651 17l.8-3H11.5a.5.5 0 0 0 0-1H6.717l.534-2H10.5a.5.5 0 0 0 0-1H7.517l1.867-7h1.232l3.733 14H5.651z" fill="currentColor"></path></g></svg>
<div class="ellipsis">路线图</div>
</a>
</li><li data-path="cosy-resume">
<a href="/cosy-resume">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M8.5 4.498a1.5 1.5 0 1 1 3 0a1.5 1.5 0 0 1-3 0zm1.5-2.5a2.5 2.5 0 0 0-2.43 3.086L5.471 4.15a1.761 1.761 0 0 0-2.317.88c-.4.882-.008 1.917.877 2.31L7 8.662v2.287l-1.877 4.645a1.75 1.75 0 0 0 3.245 1.311l1.556-3.849a.073.073 0 0 1 .028-.038a.086.086 0 0 1 .046-.012c.02 0 .035.005.046.012a.074.074 0 0 1 .028.038l1.555 3.849a1.75 1.75 0 0 0 3.245-1.311L13 10.96V8.662l2.968-1.322a1.74 1.74 0 0 0 .877-2.31a1.761 1.761 0 0 0-2.317-.88l-2.097.934a2.5 2.5 0 0 0-2.43-3.086zM4.065 5.444a.761.761 0 0 1 1-.38l3.918 1.744a2.5 2.5 0 0 0 2.034 0l3.918-1.744a.761.761 0 0 1 1 .38a.739.739 0 0 1-.373.983l-2.969 1.321a1 1 0 0 0-.593.914v2.298a1 1 0 0 0 .073.375l1.872 4.633a.75.75 0 0 1-1.39.562l-1.556-3.849c-.364-.9-1.639-.9-2.003 0l-1.555 3.85a.75.75 0 1 1-1.39-.562l1.876-4.646A1 1 0 0 0 8 10.95V8.662a1 1 0 0 0-.593-.914L4.438 6.427a.739.739 0 0 1-.373-.983z" fill="currentColor"></path></g></svg>
<div class="ellipsis">简历</div>
</a>
</li></ul>
<ul><li class="active">
<a href="/categories/AI/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M6.13 2.793A3.91 3.91 0 0 1 8.5 2a1.757 1.757 0 0 1 1.5.78A1.757 1.757 0 0 1 11.5 2a3.91 3.91 0 0 1 2.37.793c.525.408.93.973 1.073 1.656c.328.025.628.161.88.366c.382.31.66.775.835 1.267c.274.765.348 1.74.064 2.57c.072.034.143.074.212.12c.275.183.484.445.638.754c.303.605.428 1.449.428 2.474c0 1.141-.435 1.907-.987 2.38a2.68 2.68 0 0 1-1.054.555c-.1.558-.38 1.204-.819 1.752C14.57 17.402 13.686 18 12.5 18c-.94 0-1.688-.52-2.174-1.03a4.252 4.252 0 0 1-.326-.385a4.245 4.245 0 0 1-.326.385C9.188 17.48 8.441 18 7.5 18c-1.186 0-2.069-.598-2.64-1.313a4.057 4.057 0 0 1-.819-1.752a2.68 2.68 0 0 1-1.054-.555C2.435 13.907 2 13.14 2 12c0-1.025.126-1.87.428-2.474c.154-.309.363-.57.638-.755a1.58 1.58 0 0 1 .212-.118c-.284-.832-.21-1.806.064-2.571c.175-.492.453-.957.835-1.267c.252-.205.552-.34.88-.366c.144-.683.549-1.248 1.074-1.656zM9.5 4.5V4.49l-.002-.05a2.744 2.744 0 0 0-.154-.764a1.222 1.222 0 0 0-.309-.49A.76.76 0 0 0 8.5 3a2.91 2.91 0 0 0-1.756.582C6.28 3.943 6 4.432 6 5a.5.5 0 0 1-.658.474c-.188-.062-.356-.027-.535.117c-.196.16-.387.444-.524.827c-.279.782-.25 1.729.133 2.305A.5.5 0 0 1 4.5 9h.75a2.25 2.25 0 0 1 2.25 2.25v.335a1.5 1.5 0 1 1-1 0v-.335c0-.69-.56-1.25-1.25-1.25H3.5a.499.499 0 0 1-.175-.032l-.003.006C3.124 10.369 3 11.025 3 12c0 .859.315 1.343.638 1.62c.347.298.732.38.862.38a.5.5 0 0 1 .5.5c0 .368.2 1.011.64 1.563c.429.535 1.046.937 1.86.937c.56 0 1.062-.313 1.45-.72c.191-.2.34-.407.437-.577a1.573 1.573 0 0 0 .113-.236V7.5H8.415a1.5 1.5 0 1 1 0-1H9.5v-2zm1 9.999v.967a1.575 1.575 0 0 0 .113.236c.098.17.246.377.436.577c.389.407.892.72 1.451.72c.814 0 1.431-.402 1.86-.937c.44-.552.64-1.195.64-1.563a.5.5 0 0 1 .5-.5c.13 0 .515-.082.862-.38c.323-.277.638-.761.638-1.62c0-.975-.125-1.63-.322-2.026a.923.923 0 0 0-.3-.37A.657.657 0 0 0 16 9.5a.5.5 0 0 1-.416-.777c.384-.576.412-1.523.133-2.305c-.137-.383-.328-.668-.524-.827c-.179-.144-.347-.18-.535-.117A.5.5 0 0 1 14 5c0-.568-.28-1.057-.745-1.418A2.91 2.91 0 0 0 11.5 3a.76.76 0 0 0-.535.186a1.22 1.22 0 0 0-.31.49a2.579 2.579 0 0 0-.155.814v9.01h.75c.69 0 1.25-.56 1.25-1.25v-1.835a1.5 1.5 0 1 1 1 0v1.835a2.25 2.25 0 0 1-2.25 2.25h-.75zM6.5 7a.5.5 0 1 0 1 0a.5.5 0 0 0-1 0zM13 9.5a.5.5 0 1 0 0-1a.5.5 0 0 0 0 1zm-6 3a.5.5 0 1 0 0 1a.5.5 0 0 0 0-1z" fill="currentColor"></path></g></svg>
<div class="ellipsis">
<span>AI</span>
</div>
</a>
</li><li class="">
<a href="/categories/CS/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 4l-2 14.5l-6 2l-6-2L4 4z"></path><path d="M7.5 8h3v8l-2-1"></path><path d="M16.5 8H14a.5.5 0 0 0-.5.5v3a.5.5 0 0 0 .5.5h1.423a.5.5 0 0 1 .495.57L15.5 15.5l-2 .5"></path></g></svg>
<div class="ellipsis">
<span>CS</span>
</div>
</a>
</li><li class="">
<a href="/categories/EQ/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5.636 5.636a9 9 0 0 1 13.397.747L13.414 12l5.619 5.617A9 9 0 1 1 5.636 5.636z"></path><circle cx="11.5" cy="7.5" r="1" fill="currentColor"></circle></g></svg>
<div class="ellipsis">
<span>EQ</span>
</div>
</a>
</li><li class="">
<a href="/categories/Hexo/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 17v1a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-1"></path><path d="M8 16h8"></path><path d="M8.322 12.582l7.956.836"></path><path d="M8.787 9.168l7.826 1.664"></path><path d="M10.096 5.764l7.608 2.472"></path></g></svg>
<div class="ellipsis">
<span>Hexo</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%87%AA%E5%AA%92%E4%BD%93/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><path d="M9 12a4 4 0 1 0 4 4V4a5 5 0 0 0 5 5" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path></svg>
<div class="ellipsis">
<span>自媒体</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%AF%BB%E4%B9%A6/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M4 16V4a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v11a1 1 0 0 1-1 1H5a1 1 0 0 0 1 1h9.5a.5.5 0 0 1 0 1H6a2 2 0 0 1-2-2zM15 4a1 1 0 0 0-1-1H6a1 1 0 0 0-1 1v11h10V4zM7.041 8h.973c.045-.773.192-1.485.42-2.059A3.002 3.002 0 0 0 7.04 8zM6 8.5a4 4 0 1 1 8 0a4 4 0 0 1-8 0zm6.959-.5a3.002 3.002 0 0 0-1.392-2.059c.227.574.374 1.286.419 2.059h.973zm-.973 1c-.045.773-.192 1.486-.42 2.059A3.002 3.002 0 0 0 12.96 9h-.973zm-1.002-1c-.046-.707-.189-1.324-.383-1.778c-.12-.28-.25-.474-.368-.591c-.117-.115-.195-.131-.233-.131c-.038 0-.116.016-.233.13c-.118.118-.248.312-.368.592c-.194.454-.337 1.07-.383 1.778h1.968zM9.016 9c.046.707.189 1.324.383 1.778c.12.28.25.474.368.591c.117.115.195.131.233.131c.038 0 .116-.016.233-.13c.118-.118.248-.313.368-.592c.194-.454.336-1.07.383-1.778H9.016zM8.014 9h-.973c.147.87.668 1.614 1.392 2.059c-.227-.573-.374-1.286-.419-2.059z" fill="currentColor"></path></g></svg>
<div class="ellipsis">
<span>读书</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%B4%A2%E7%BB%8F/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="9"></circle><path d="M14.8 9A2 2 0 0 0 13 8h-2a2 2 0 0 0 0 4h2a2 2 0 0 1 0 4h-2a2 2 0 0 1-1.8-1"></path><path d="M12 6v2m0 8v2"></path></g></svg>
<div class="ellipsis">
<span>财经</span>
</div>
</a>
</li></ul>
</nav>
<script src="/js/066f8989.js"></script>
</div>
<div class="bottom">
<cosy-tooltip id="button-preference" placement="right">
<span slot="content">
<span>偏好</span>
<cosy-short-key></cosy-short-key>
<cosy-short-key>p</cosy-short-key>
</span>
<cosy-icon bordered id="button-about-cosy-theme">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 16 16">
<g fill="none">
<path d="M8 6a2 2 0 1 0 0 4a2 2 0 0 0 0-4zM7 8a1 1 0 1 1 2 0a1 1 0 0 1-2 0zm3.618-3.602a.708.708 0 0 1-.824-.567l-.26-1.416a.354.354 0 0 0-.275-.282a6.072 6.072 0 0 0-2.519 0a.354.354 0 0 0-.275.282l-.259 1.416a.71.71 0 0 1-.936.538l-1.359-.484a.355.355 0 0 0-.382.095c-.569.627-1 1.367-1.262 2.173a.352.352 0 0 0 .108.378l1.102.931a.704.704 0 0 1 0 1.076l-1.102.931a.352.352 0 0 0-.108.378A5.986 5.986 0 0 0 3.53 12.02a.355.355 0 0 0 .382.095l1.36-.484a.708.708 0 0 1 .936.538l.258 1.416c.026.14.135.252.275.281a6.075 6.075 0 0 0 2.52 0a.353.353 0 0 0 .274-.281l.26-1.416a.71.71 0 0 1 .936-.538l1.359.484c.135.048.286.01.382-.095c.569-.627 1-1.367 1.262-2.173a.352.352 0 0 0-.108-.378l-1.102-.931a.703.703 0 0 1 0-1.076l1.102-.931a.352.352 0 0 0 .108-.378A5.985 5.985 0 0 0 12.47 3.98a.355.355 0 0 0-.382-.095l-1.36.484a.71.71 0 0 1-.111.03zm-6.62.58l.937.333a1.71 1.71 0 0 0 2.255-1.3l.177-.97a5.105 5.105 0 0 1 1.265 0l.178.97a1.708 1.708 0 0 0 2.255 1.3L12 4.977c.255.334.467.698.63 1.084l-.754.637a1.704 1.704 0 0 0 0 2.604l.755.637a4.99 4.99 0 0 1-.63 1.084l-.937-.334a1.71 1.71 0 0 0-2.255 1.3l-.178.97a5.099 5.099 0 0 1-1.265 0l-.177-.97a1.708 1.708 0 0 0-2.255-1.3L4 11.023a4.987 4.987 0 0 1-.63-1.084l.754-.638a1.704 1.704 0 0 0 0-2.603l-.755-.637c.164-.386.376-.75.63-1.084z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
</aside>
<main>
<link rel="stylesheet" href="/css/4dbc8f91.css">
<div class="post-container">
<header>
<div class="left">
<link rel="stylesheet" href="/css/7d333f9e.css">
<nav class="breadcrumb">
<section>
<cosy-tooltip placement="bottom-left">
<span slot="content"><span>首页</span>
<cosy-short-key></cosy-short-key>
<cosy-short-key>H</cosy-short-key>
</span>
<cosy-icon href="/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M8.998 2.388a1.5 1.5 0 0 1 2.005 0l5.5 4.942A1.5 1.5 0 0 1 17 8.445V15.5a1.5 1.5 0 0 1-1.5 1.5H13a1.5 1.5 0 0 1-1.5-1.5V12a.5.5 0 0 0-.5-.5H9a.5.5 0 0 0-.5.5v3.5A1.5 1.5 0 0 1 7 17H4.5A1.5 1.5 0 0 1 3 15.5V8.445c0-.425.18-.83.498-1.115l5.5-4.942zm1.336.744a.5.5 0 0 0-.668 0l-5.5 4.942A.5.5 0 0 0 4 8.445V15.5a.5.5 0 0 0 .5.5H7a.5.5 0 0 0 .5-.5V12A1.5 1.5 0 0 1 9 10.5h2a1.5 1.5 0 0 1 1.5 1.5v3.5a.5.5 0 0 0 .5.5h2.5a.5.5 0 0 0 .5-.5V8.445a.5.5 0 0 0-.166-.371l-5.5-4.942z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
<svg class="arrow" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M7.733 4.207a.75.75 0 0 1 1.06.026l5.001 5.25a.75.75 0 0 1 0 1.035l-5 5.25a.75.75 0 1 1-1.087-1.034L12.216 10l-4.51-4.734a.75.75 0 0 1 .027-1.06z" fill="currentColor"></path>
</g>
</svg>
<a class="ellipsis" href="/categories/AI/">
AI
</a>
<svg class="arrow" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M7.733 4.207a.75.75 0 0 1 1.06.026l5.001 5.25a.75.75 0 0 1 0 1.035l-5 5.25a.75.75 0 1 1-1.087-1.034L12.216 10l-4.51-4.734a.75.75 0 0 1 .027-1.06z" fill="currentColor"></path>
</g>
</svg>
<span class="ellipsis">
RAG知识库召回率
</span>
</section>
</nav>
<script src="/js/31d6cfe0.js"></script>
</div>
<div class="right">
<cosy-tooltip id="toc-show-button" placement="left">
<span slot="content">
<span>显示 / 隐藏 文章目录</span>
<cosy-short-key>]</cosy-short-key>
</span>
<cosy-icon>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path
d="M4 4c-1.104-.019-2 .896-2 2v8a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2H4zM3 6a1 1 0 0 1 1-1h2.995v10H4a1 1 0 0 1-1-1V6zm4.995 9V5h8.004a1 1 0 0 1 1 1v8a1 1 0 0 1-1 1H7.995z"
fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
</header>
<div class="content">
<main class="cosy-scrollbar">
<div class="article-container">
<!-- 渲染文章内容 -->
<article>
<!-- 文章标题 -->
<h1 class="post-title">RAG知识库召回率</h1>
<div class="last-updated">
上次更新: 2024-03-31 08:57:01
</div>
<!-- 文章 -->
<h1 id="通用的RAG方案的召回率"><a href="#通用的RAG方案的召回率" class="headerlink" title="通用的RAG方案的召回率"></a>通用的RAG方案的召回率</h1><p>不一定召回率越高越好,对于准确性也需要考虑,通用的召回率大概在<code>70%</code>,不论是<code>dify</code>还是<code>fastgpt</code>,对于生产场景,要求起码到<code>90%</code>的水平</p>
<h1 id="如何提升召回率"><a href="#如何提升召回率" class="headerlink" title="如何提升召回率"></a>如何提升召回率</h1><p>首先是针对搜索query来说通过文本框的<code>打字输入</code>的角度,人类一定是倾向于偷懒,只输入关键词,这就是给表达真实意图带来的难度。</p>
<p>语音交互,才有可能让大家使用起来更舒服。</p>
<h2 id="数据预处理"><a href="#数据预处理" class="headerlink" title="数据预处理"></a>数据预处理</h2><p>针对<code>LLM</code>模型来说,数据治理变的很重要,在数据切分的过程中,需要符合人的逻辑去<code>分词</code>。按照人的逻辑,例如:表格、章节、目录……</p>
<h3 id="分词-chunk"><a href="#分词-chunk" class="headerlink" title="分词 chunk"></a>分词 chunk</h3><p><code>chunk</code><code>size</code>越大,召回越少。但是分的越精细,同样会损失上下文连贯性</p>
<h1 id="意图分类"><a href="#意图分类" class="headerlink" title="意图分类"></a>意图分类</h1><p>用户提问的内容,很短、缩写的情况,如何命中用户的<code>真实意图</code></p>
<h2 id="缓存库"><a href="#缓存库" class="headerlink" title="缓存库"></a>缓存库</h2><p>可以做一个缓存库,直接命中返回结构就行,不需要走<code>LLM</code></p>
<h2 id="QA"><a href="#QA" class="headerlink" title="QA"></a>QA</h2><p>处理成<code>QA</code>的格式,类似 <code>xxx企业的董事长是谁</code>,答案是必须正确的,上一代的客服系统必备。直接数据级别的匹配就行,召回一般都是比较准确的。</p>
<h2 id="关键词-词典-迭代"><a href="#关键词-词典-迭代" class="headerlink" title="关键词+词典+迭代"></a>关键词+词典+迭代</h2><p>成本非常低,可控。思路是帮用户模糊,先提取关键词,再结合词典的词条解释,让表达更加的领域话、专业化,再扔给大语言模型。</p>
<p>例如:某个提问<code>什么是RAG</code>,首先<code>RAG</code>对应的解释是:<code>增强向量检索的知识库</code>,那对应又引入的新的<code>关键词</code>,继续递归对新的<code>关键词</code>给出解释,通常迭代个<code>2-5轮</code>,就会有非常好的召回效果。字典里面通常会定义:同义词、类别、上下关系。私用飞书的话,内部标准自带一个<code>词典</code>应用,统一的业务领域的知识、语言体系。并且提供<code>api</code>从词典中提取关键字。直接通过关键字,把词条的内容读取出来。文本提取<code>关键词</code>也有很多开源模型能够提取,但是针对某些专业领域,一些开源不一定具备这样的提取能力。</p>
<p>针对某些场景例如<code>2021年的xxxx</code>,同样的<code>2022年的xxxx</code>也发生了,使用<code>embedding</code>容易给错误召回的,而<code>ES</code>的效果会更好。相当于通过从用户的问题中,捕捉关键词,然后通过这些<code>关键词</code>去库里召回。这样的<code>关键词</code>,可以不断的积累<code>字典</code>,针对关键词给解释。</p>
<p>但是针对有日期、数字类的一般效果会非常不好,可以考虑采用<code>nl2sql</code>的方案,使用<code>fine-tuning</code>,能够做到召回率<code>90%</code>,有专门的模型针对这块,例如微软的<code>RAT-SQL</code>模型。</p>
<p>针对<code>excel</code>类型的文件,直接把数据存到一张<code>宽表</code>中,不要跨表,降低复杂度。针对<code>nl2sql</code>的开源模型的能力基本都是<code>单表</code>,基本可以达到预期</p>
<h2 id="知识图谱"><a href="#知识图谱" class="headerlink" title="知识图谱"></a>知识图谱</h2><p>难点在于构建知识图谱,例如<code>dify</code>在针对某些回答不满意的情况,可以对回复进行修正。这样的话会让下次回答更加可控</p>
<h1 id="embedding哪个算法好"><a href="#embedding哪个算法好" class="headerlink" title="embedding哪个算法好"></a>embedding哪个算法好</h1><p>目前openai的最好能够输入1536个输入输出结果能切分到700多个维度。针对模型来说重要性并不是很高。可以考虑替代方案。差距不会非常大从实际角度来说并不是很重要。</p>
<h1 id="国内知识库方案哪些底座比较好"><a href="#国内知识库方案哪些底座比较好" class="headerlink" title="国内知识库方案哪些底座比较好"></a>国内知识库方案哪些底座比较好</h1><p><code>qWen</code> 和 gpt 做过对齐,然后 <code>14B</code><code>72B int4</code> 效果就比较好,虽然比较慢,针对中文语料还是比较好的。</p>
<p>另一个就是 <code>yi-34b</code> 也还行。</p>
<h1 id="如何设计大模型测试样本"><a href="#如何设计大模型测试样本" class="headerlink" title="如何设计大模型测试样本"></a>如何设计大模型测试样本</h1><p>从用户的提问当中选取50-100个场景作为<code>大模型</code>选择、<code>知识库</code>效果的测评的数据集,例如需要测<code>text2sql</code>的场景的模型效果,首先就是要找到哪些查询的频次最高。</p>
<h1 id="RAG的发展方向"><a href="#RAG的发展方向" class="headerlink" title="RAG的发展方向"></a>RAG的发展方向</h1><ol>
<li>知识库作为专业领域的基座,大模型一定是无法覆盖的</li>
<li>从用户的角度来说大厂商可能会说服开发者去接入自家agent贡献领域知识</li>
<li>逐渐变成 Agent 场景下的底层设施,有清晰地 SOP目标明确</li>
</ol>
<div class="post-tags">
<!-- 文章tags -->
</div>
<p class="motto">重拾纯粹的写作</p>
</article>
<!-- 评论 -->
<div id="vcomments"></div>
<div id="tcomment"></div>
</div>
</main>
<!-- toc -->
<cosy-drag-box id="toc-drag-box" trigger="left" min-width="220" uid="toc-box">
<div class="meta-container">
<div class="toc-wrapper cosy-scrollbar">
<cosy-tooltip placement="right">
<p class="catalog">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24">
<g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M4 6h16"></path>
<path d="M4 12h16"></path>
<path d="M4 18h12"></path>
</g>
</svg>
<span>目录</span>
</p>
<span slot="content">
<span>隐藏目录</span>
<cosy-short-key>]</cosy-short-key>
</span>
</cosy-tooltip>
<!-- 文章toc -->
<ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#%E9%80%9A%E7%94%A8%E7%9A%84RAG%E6%96%B9%E6%A1%88%E7%9A%84%E5%8F%AC%E5%9B%9E%E7%8E%87"><span class="toc-number">1.</span> <span class="toc-text">通用的RAG方案的召回率</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%A6%82%E4%BD%95%E6%8F%90%E5%8D%87%E5%8F%AC%E5%9B%9E%E7%8E%87"><span class="toc-number">2.</span> <span class="toc-text">如何提升召回率</span></a><ol class="toc-child"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E6%95%B0%E6%8D%AE%E9%A2%84%E5%A4%84%E7%90%86"><span class="toc-number">2.1.</span> <span class="toc-text">数据预处理</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%88%86%E8%AF%8D-chunk"><span class="toc-number">2.1.1.</span> <span class="toc-text">分词 chunk</span></a></li></ol></li></ol></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E6%84%8F%E5%9B%BE%E5%88%86%E7%B1%BB"><span class="toc-number">3.</span> <span class="toc-text">意图分类</span></a><ol class="toc-child"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%BC%93%E5%AD%98%E5%BA%93"><span class="toc-number">3.1.</span> <span class="toc-text">缓存库</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#QA"><span class="toc-number">3.2.</span> <span class="toc-text">QA</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%85%B3%E9%94%AE%E8%AF%8D-%E8%AF%8D%E5%85%B8-%E8%BF%AD%E4%BB%A3"><span class="toc-number">3.3.</span> <span class="toc-text">关键词+词典+迭代</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%9F%A5%E8%AF%86%E5%9B%BE%E8%B0%B1"><span class="toc-number">3.4.</span> <span class="toc-text">知识图谱</span></a></li></ol></li><li class="toc-item toc-level-1"><a class="toc-link" href="#embedding%E5%93%AA%E4%B8%AA%E7%AE%97%E6%B3%95%E5%A5%BD"><span class="toc-number">4.</span> <span class="toc-text">embedding哪个算法好</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%9B%BD%E5%86%85%E7%9F%A5%E8%AF%86%E5%BA%93%E6%96%B9%E6%A1%88%E5%93%AA%E4%BA%9B%E5%BA%95%E5%BA%A7%E6%AF%94%E8%BE%83%E5%A5%BD"><span class="toc-number">5.</span> <span class="toc-text">国内知识库方案哪些底座比较好</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%A6%82%E4%BD%95%E8%AE%BE%E8%AE%A1%E5%A4%A7%E6%A8%A1%E5%9E%8B%E6%B5%8B%E8%AF%95%E6%A0%B7%E6%9C%AC"><span class="toc-number">6.</span> <span class="toc-text">如何设计大模型测试样本</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#RAG%E7%9A%84%E5%8F%91%E5%B1%95%E6%96%B9%E5%90%91"><span class="toc-number">7.</span> <span class="toc-text">RAG的发展方向</span></a></li></ol>
</div>
</div>
</cosy-drag-box>
</div>
</div>
<script>
window.page = {
use: ''
}
window.katex = {
enable: "",
jsCdn: "//cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.js",
cssCdn: "//cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css"
}
window.mermaid = {
enable: "",
theme: "",
cdn: "//cdn.jsdelivr.net/npm/mermaid@10.4.0/dist/mermaid.min.js",
}
window.valine = {
enable: "",
appId: 'TisMit6uhflounFqAN3ZGjgq-MdYXbMMI',
appKey: 'CdjirjYdz07U5i62ElsJvXUh',
avatar: 'monsterid',
cdn: '//unpkg.com/valine@latest/dist/Valine.min.js',
serverURLs: '//tismit6u.api.lncldglobal.com'
};
window.twikoo = {
enable: "",
envId: "https://twikoo.maxshader.com",
cdn: 'https://cdn.staticfile.org/twikoo/1.6.32/twikoo.all.min.js',
lang: 'zh-CN',
}
</script>
<script src="/js/8f5fa89f.js"></script>
</main>
</body>
<script>
window.theme = {
color: 'hsl(238,50%,56%)'
}
</script>
<script src="/js/e5a59732.js"></script>
</html>