blog-hexo/public/posts/21037/index.html

353 lines
27 KiB
HTML
Raw Normal View History

2024-03-28 11:14:49 +08:00
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
RAG知识库召回率
</title>
<meta name="description" content="">
<meta name="keywords" content="">
<meta name="author" content="Mozzie">
<link rel="canonical" href="https://maxshader.com/posts/21037/">
<link rel="icon" type="image/svg" href='data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 7h1a2 2 0 0 1 2 2v.5a.5.5 0 0 0 .5.5a.5.5 0 0 1 .5.5v3a.5.5 0 0 1-.5.5a.5.5 0 0 0-.5.5v.5a2 2 0 0 1-2 2h-2"></path><path d="M8 7H6a2 2 0 0 0-2 2v6a2 2 0 0 0 2 2h1"></path><path d="M12 8l-2 4h3l-2 4"></path></g></svg>'>
<link rel="stylesheet" href="/css/b4c95347.css">
<script>window.i18n = {"tip-status-done":"完成","tip-status-default":"全部","tip-status-todo":"计划","tip-status-doing":"进行","tip-status-other":"其他","text-select":"选择","text-move":"移动","text-esc":"退出","January":"一月","February":"二月","March":"三月","April":"四月","May":"五月","June":"六月","July":"七月","August":"八月","September":"九月","October":"十月","November":"十一月","December":"十二月"}</script>
<meta name="generator" content="Hexo 7.0.0"></head>
<body id="app">
<aside id="aside-box" class="left-aside">
<div class="header">
<link rel="stylesheet" href="/css/61875ce9.css">
<div class="profile">
<a class="badge" href="/">
<span>Hi</span>
<span>Mozzie</span>
</a>
<cosy-tooltip id="left-aside-button" placement="right">
<span slot="content">
<span>显示 / 隐藏 左侧导航</span>
<cosy-short-key>[</cosy-short-key>
</span>
<cosy-icon>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M16 4c1.104-.019 2 .896 2 2v8a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h12zm1 2a1 1 0 0 0-1-1h-2.995v10H16a1 1 0 0 0 1-1V6zm-4.995 9V5H4.001a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8.004z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
<script src="/js/e0a67917.js"></script>
<cosy-search id="post-search" placeholder="搜索">
<div slot="short-key">
<cosy-short-key></cosy-short-key>
<cosy-short-key>K</cosy-short-key>
</div>
</cosy-search>
<script>
window.algolia = {
appId: "5DTW808BZ8",
SearchOnlyAPIKey: "27845b245efc8a2853cc0bdc7366ea26"
}
</script>
<script src="/js/62d6af47.js"></script>
</div>
<div class="aside-category">
<link rel="stylesheet" href="/css/db04a759.css">
<nav class="category-nav cosy-scrollbar">
<ul><li data-path="archives">
<a href="/archives">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M3.5 3A1.5 1.5 0 0 0 2 4.5v4A1.5 1.5 0 0 0 3.5 10h9A1.5 1.5 0 0 0 14 8.5v-4A1.5 1.5 0 0 0 12.5 3h-9zM3 4.5a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 .5.5v4a.5.5 0 0 1-.5.5h-9a.5.5 0 0 1-.5-.5v-4zm.5 6.5A1.5 1.5 0 0 0 2 12.5v4A1.5 1.5 0 0 0 3.5 18h9a1.5 1.5 0 0 0 1.5-1.5v-4a1.5 1.5 0 0 0-1.5-1.5h-9zM3 12.5a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 .5.5v4a.5.5 0 0 1-.5.5h-9a.5.5 0 0 1-.5-.5v-4zm14-.063a2.003 2.003 0 0 1-2.5-1.937A2 2 0 0 1 16 8.563a2.005 2.005 0 0 1 1 0a2 2 0 0 1 0 3.874zM16.5 3a.5.5 0 0 1 .5.5v4.041a3.02 3.02 0 0 0-1 0V3.5a.5.5 0 0 1 .5-.5zm0 10.5c-.17 0-.337-.014-.5-.041V17.5a.5.5 0 0 0 1 0v-4.041c-.163.027-.33.041-.5.041z" fill="currentColor"></path></g></svg>
<div class="ellipsis">归档</div>
</a>
</li><li data-path="cosy-roadmap">
<a href="/cosy-roadmap">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M9.384 2a1 1 0 0 0-.966.742L4.616 17H2.5a.5.5 0 0 0 0 1h15a.5.5 0 0 0 0-1h-2.116L11.582 2.742A1 1 0 0 0 10.616 2H9.384zM5.651 17l.8-3H11.5a.5.5 0 0 0 0-1H6.717l.534-2H10.5a.5.5 0 0 0 0-1H7.517l1.867-7h1.232l3.733 14H5.651z" fill="currentColor"></path></g></svg>
<div class="ellipsis">路线图</div>
</a>
</li><li data-path="cosy-resume">
<a href="/cosy-resume">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M8.5 4.498a1.5 1.5 0 1 1 3 0a1.5 1.5 0 0 1-3 0zm1.5-2.5a2.5 2.5 0 0 0-2.43 3.086L5.471 4.15a1.761 1.761 0 0 0-2.317.88c-.4.882-.008 1.917.877 2.31L7 8.662v2.287l-1.877 4.645a1.75 1.75 0 0 0 3.245 1.311l1.556-3.849a.073.073 0 0 1 .028-.038a.086.086 0 0 1 .046-.012c.02 0 .035.005.046.012a.074.074 0 0 1 .028.038l1.555 3.849a1.75 1.75 0 0 0 3.245-1.311L13 10.96V8.662l2.968-1.322a1.74 1.74 0 0 0 .877-2.31a1.761 1.761 0 0 0-2.317-.88l-2.097.934a2.5 2.5 0 0 0-2.43-3.086zM4.065 5.444a.761.761 0 0 1 1-.38l3.918 1.744a2.5 2.5 0 0 0 2.034 0l3.918-1.744a.761.761 0 0 1 1 .38a.739.739 0 0 1-.373.983l-2.969 1.321a1 1 0 0 0-.593.914v2.298a1 1 0 0 0 .073.375l1.872 4.633a.75.75 0 0 1-1.39.562l-1.556-3.849c-.364-.9-1.639-.9-2.003 0l-1.555 3.85a.75.75 0 1 1-1.39-.562l1.876-4.646A1 1 0 0 0 8 10.95V8.662a1 1 0 0 0-.593-.914L4.438 6.427a.739.739 0 0 1-.373-.983z" fill="currentColor"></path></g></svg>
<div class="ellipsis">简历</div>
</a>
</li></ul>
<ul><li class="active">
<a href="/categories/AI/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M6.13 2.793A3.91 3.91 0 0 1 8.5 2a1.757 1.757 0 0 1 1.5.78A1.757 1.757 0 0 1 11.5 2a3.91 3.91 0 0 1 2.37.793c.525.408.93.973 1.073 1.656c.328.025.628.161.88.366c.382.31.66.775.835 1.267c.274.765.348 1.74.064 2.57c.072.034.143.074.212.12c.275.183.484.445.638.754c.303.605.428 1.449.428 2.474c0 1.141-.435 1.907-.987 2.38a2.68 2.68 0 0 1-1.054.555c-.1.558-.38 1.204-.819 1.752C14.57 17.402 13.686 18 12.5 18c-.94 0-1.688-.52-2.174-1.03a4.252 4.252 0 0 1-.326-.385a4.245 4.245 0 0 1-.326.385C9.188 17.48 8.441 18 7.5 18c-1.186 0-2.069-.598-2.64-1.313a4.057 4.057 0 0 1-.819-1.752a2.68 2.68 0 0 1-1.054-.555C2.435 13.907 2 13.14 2 12c0-1.025.126-1.87.428-2.474c.154-.309.363-.57.638-.755a1.58 1.58 0 0 1 .212-.118c-.284-.832-.21-1.806.064-2.571c.175-.492.453-.957.835-1.267c.252-.205.552-.34.88-.366c.144-.683.549-1.248 1.074-1.656zM9.5 4.5V4.49l-.002-.05a2.744 2.744 0 0 0-.154-.764a1.222 1.222 0 0 0-.309-.49A.76.76 0 0 0 8.5 3a2.91 2.91 0 0 0-1.756.582C6.28 3.943 6 4.432 6 5a.5.5 0 0 1-.658.474c-.188-.062-.356-.027-.535.117c-.196.16-.387.444-.524.827c-.279.782-.25 1.729.133 2.305A.5.5 0 0 1 4.5 9h.75a2.25 2.25 0 0 1 2.25 2.25v.335a1.5 1.5 0 1 1-1 0v-.335c0-.69-.56-1.25-1.25-1.25H3.5a.499.499 0 0 1-.175-.032l-.003.006C3.124 10.369 3 11.025 3 12c0 .859.315 1.343.638 1.62c.347.298.732.38.862.38a.5.5 0 0 1 .5.5c0 .368.2 1.011.64 1.563c.429.535 1.046.937 1.86.937c.56 0 1.062-.313 1.45-.72c.191-.2.34-.407.437-.577a1.573 1.573 0 0 0 .113-.236V7.5H8.415a1.5 1.5 0 1 1 0-1H9.5v-2zm1 9.999v.967a1.575 1.575 0 0 0 .113.236c.098.17.246.377.436.577c.389.407.892.72 1.451.72c.814 0 1.431-.402 1.86-.937c.44-.552.64-1.195.64-1.563a.5.5 0 0 1 .5-.5c.13 0 .515-.082.862-.38c.323-.277.638-.761.638-1.62c0-.975-.125-1.63-.322-2.026a.923.923 0 0 0-.3-.37A.657.657 0 0 0 16 9.5a.5.5 0 0 1-.416-.777c.384-.576.412-1.523.133-2.305c-.137-.383-.328-.668-.524-.827c-.179-.144-.347-.18-.535-.117A.5.5 0 0 1 14 5c0-.568-.28-1.057-.745-1.418A2.91 2.91 0 0 0 11.5 3a.76.76 0 0 0-.535.186a1.22 1.22 0 0 0-.31.49a2.579 2.579 0 0 0-.155.814v9.01h.75c.69 0 1.25-.56 1.25-1.25v-1.835a1.5 1.5 0 1 1 1 0v1.835a2.25 2.25 0 0 1-2.25 2.25h-.75zM6.5 7a.5.5 0 1 0 1 0a.5.5 0 0 0-1 0zM13 9.5a.5.5 0 1 0 0-1a.5.5 0 0 0 0 1zm-6 3a.5.5 0 1 0 0 1a.5.5 0 0 0 0-1z" fill="currentColor"></path></g></svg>
<div class="ellipsis">
<span>AI</span>
</div>
</a>
</li><li class="">
<a href="/categories/CS/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 4l-2 14.5l-6 2l-6-2L4 4z"></path><path d="M7.5 8h3v8l-2-1"></path><path d="M16.5 8H14a.5.5 0 0 0-.5.5v3a.5.5 0 0 0 .5.5h1.423a.5.5 0 0 1 .495.57L15.5 15.5l-2 .5"></path></g></svg>
<div class="ellipsis">
<span>CS</span>
</div>
</a>
</li><li class="">
<a href="/categories/EQ/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5.636 5.636a9 9 0 0 1 13.397.747L13.414 12l5.619 5.617A9 9 0 1 1 5.636 5.636z"></path><circle cx="11.5" cy="7.5" r="1" fill="currentColor"></circle></g></svg>
<div class="ellipsis">
<span>EQ</span>
</div>
</a>
</li><li class="">
<a href="/categories/Hexo/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 17v1a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-1"></path><path d="M8 16h8"></path><path d="M8.322 12.582l7.956.836"></path><path d="M8.787 9.168l7.826 1.664"></path><path d="M10.096 5.764l7.608 2.472"></path></g></svg>
<div class="ellipsis">
<span>Hexo</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%87%AA%E5%AA%92%E4%BD%93/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><path d="M9 12a4 4 0 1 0 4 4V4a5 5 0 0 0 5 5" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path></svg>
<div class="ellipsis">
<span>自媒体</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%AF%BB%E4%B9%A6/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20"><g fill="none"><path d="M4 16V4a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v11a1 1 0 0 1-1 1H5a1 1 0 0 0 1 1h9.5a.5.5 0 0 1 0 1H6a2 2 0 0 1-2-2zM15 4a1 1 0 0 0-1-1H6a1 1 0 0 0-1 1v11h10V4zM7.041 8h.973c.045-.773.192-1.485.42-2.059A3.002 3.002 0 0 0 7.04 8zM6 8.5a4 4 0 1 1 8 0a4 4 0 0 1-8 0zm6.959-.5a3.002 3.002 0 0 0-1.392-2.059c.227.574.374 1.286.419 2.059h.973zm-.973 1c-.045.773-.192 1.486-.42 2.059A3.002 3.002 0 0 0 12.96 9h-.973zm-1.002-1c-.046-.707-.189-1.324-.383-1.778c-.12-.28-.25-.474-.368-.591c-.117-.115-.195-.131-.233-.131c-.038 0-.116.016-.233.13c-.118.118-.248.312-.368.592c-.194.454-.337 1.07-.383 1.778h1.968zM9.016 9c.046.707.189 1.324.383 1.778c.12.28.25.474.368.591c.117.115.195.131.233.131c.038 0 .116-.016.233-.13c.118-.118.248-.313.368-.592c.194-.454.336-1.07.383-1.778H9.016zM8.014 9h-.973c.147.87.668 1.614 1.392 2.059c-.227-.573-.374-1.286-.419-2.059z" fill="currentColor"></path></g></svg>
<div class="ellipsis">
<span>读书</span>
</div>
</a>
</li><li class="">
<a href="/categories/%E8%B4%A2%E7%BB%8F/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24"><g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="9"></circle><path d="M14.8 9A2 2 0 0 0 13 8h-2a2 2 0 0 0 0 4h2a2 2 0 0 1 0 4h-2a2 2 0 0 1-1.8-1"></path><path d="M12 6v2m0 8v2"></path></g></svg>
<div class="ellipsis">
<span>财经</span>
</div>
</a>
</li></ul>
</nav>
<script src="/js/da8f6845.js"></script>
</div>
<div class="bottom">
<cosy-tooltip id="button-preference" placement="right">
<span slot="content">
<span>偏好</span>
<cosy-short-key></cosy-short-key>
<cosy-short-key>p</cosy-short-key>
</span>
<cosy-icon bordered id="button-about-cosy-theme">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 16 16">
<g fill="none">
<path d="M8 6a2 2 0 1 0 0 4a2 2 0 0 0 0-4zM7 8a1 1 0 1 1 2 0a1 1 0 0 1-2 0zm3.618-3.602a.708.708 0 0 1-.824-.567l-.26-1.416a.354.354 0 0 0-.275-.282a6.072 6.072 0 0 0-2.519 0a.354.354 0 0 0-.275.282l-.259 1.416a.71.71 0 0 1-.936.538l-1.359-.484a.355.355 0 0 0-.382.095c-.569.627-1 1.367-1.262 2.173a.352.352 0 0 0 .108.378l1.102.931a.704.704 0 0 1 0 1.076l-1.102.931a.352.352 0 0 0-.108.378A5.986 5.986 0 0 0 3.53 12.02a.355.355 0 0 0 .382.095l1.36-.484a.708.708 0 0 1 .936.538l.258 1.416c.026.14.135.252.275.281a6.075 6.075 0 0 0 2.52 0a.353.353 0 0 0 .274-.281l.26-1.416a.71.71 0 0 1 .936-.538l1.359.484c.135.048.286.01.382-.095c.569-.627 1-1.367 1.262-2.173a.352.352 0 0 0-.108-.378l-1.102-.931a.703.703 0 0 1 0-1.076l1.102-.931a.352.352 0 0 0 .108-.378A5.985 5.985 0 0 0 12.47 3.98a.355.355 0 0 0-.382-.095l-1.36.484a.71.71 0 0 1-.111.03zm-6.62.58l.937.333a1.71 1.71 0 0 0 2.255-1.3l.177-.97a5.105 5.105 0 0 1 1.265 0l.178.97a1.708 1.708 0 0 0 2.255 1.3L12 4.977c.255.334.467.698.63 1.084l-.754.637a1.704 1.704 0 0 0 0 2.604l.755.637a4.99 4.99 0 0 1-.63 1.084l-.937-.334a1.71 1.71 0 0 0-2.255 1.3l-.178.97a5.099 5.099 0 0 1-1.265 0l-.177-.97a1.708 1.708 0 0 0-2.255-1.3L4 11.023a4.987 4.987 0 0 1-.63-1.084l.754-.638a1.704 1.704 0 0 0 0-2.603l-.755-.637c.164-.386.376-.75.63-1.084z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
</aside>
<main>
<link rel="stylesheet" href="/css/9bb9a539.css">
<div class="post-container">
<header>
<div class="left">
<link rel="stylesheet" href="/css/7d333f9e.css">
<nav class="breadcrumb">
<section>
<cosy-tooltip placement="bottom-left">
<span slot="content"><span>首页</span>
<cosy-short-key></cosy-short-key>
<cosy-short-key>H</cosy-short-key>
</span>
<cosy-icon href="/">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M8.998 2.388a1.5 1.5 0 0 1 2.005 0l5.5 4.942A1.5 1.5 0 0 1 17 8.445V15.5a1.5 1.5 0 0 1-1.5 1.5H13a1.5 1.5 0 0 1-1.5-1.5V12a.5.5 0 0 0-.5-.5H9a.5.5 0 0 0-.5.5v3.5A1.5 1.5 0 0 1 7 17H4.5A1.5 1.5 0 0 1 3 15.5V8.445c0-.425.18-.83.498-1.115l5.5-4.942zm1.336.744a.5.5 0 0 0-.668 0l-5.5 4.942A.5.5 0 0 0 4 8.445V15.5a.5.5 0 0 0 .5.5H7a.5.5 0 0 0 .5-.5V12A1.5 1.5 0 0 1 9 10.5h2a1.5 1.5 0 0 1 1.5 1.5v3.5a.5.5 0 0 0 .5.5h2.5a.5.5 0 0 0 .5-.5V8.445a.5.5 0 0 0-.166-.371l-5.5-4.942z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
<svg class="arrow" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M7.733 4.207a.75.75 0 0 1 1.06.026l5.001 5.25a.75.75 0 0 1 0 1.035l-5 5.25a.75.75 0 1 1-1.087-1.034L12.216 10l-4.51-4.734a.75.75 0 0 1 .027-1.06z" fill="currentColor"></path>
</g>
</svg>
<a class="ellipsis" href="/categories/AI/">
AI
</a>
<svg class="arrow" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M7.733 4.207a.75.75 0 0 1 1.06.026l5.001 5.25a.75.75 0 0 1 0 1.035l-5 5.25a.75.75 0 1 1-1.087-1.034L12.216 10l-4.51-4.734a.75.75 0 0 1 .027-1.06z" fill="currentColor"></path>
</g>
</svg>
<span class="ellipsis">
RAG知识库召回率
</span>
</section>
</nav>
<script src="/js/31d6cfe0.js"></script>
</div>
<div class="right">
<cosy-tooltip id="toc-show-button" placement="left">
<span slot="content">
<span>显示 / 隐藏 文章目录</span>
<cosy-short-key>]</cosy-short-key>
</span>
<cosy-icon>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 20 20">
<g fill="none">
<path d="M4 4c-1.104-.019-2 .896-2 2v8a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2H4zM3 6a1 1 0 0 1 1-1h2.995v10H4a1 1 0 0 1-1-1V6zm4.995 9V5h8.004a1 1 0 0 1 1 1v8a1 1 0 0 1-1 1H7.995z" fill="currentColor"></path>
</g>
</svg>
</cosy-icon>
</cosy-tooltip>
</div>
</header>
<div class="content">
<main class="cosy-scrollbar">
<div class="article-container">
<!-- 渲染文章内容 -->
<article>
<!-- 文章标题 -->
<h1 class="post-title">RAG知识库召回率</h1>
<div class="last-updated">
2024-03-28 13:54:22 +08:00
上次更新: 2024-03-28 13:47:49
2024-03-28 11:14:49 +08:00
</div>
<!-- 文章 -->
2024-03-28 12:11:12 +08:00
<h1 id="通用的RAG方案的召回率"><a href="#通用的RAG方案的召回率" class="headerlink" title="通用的RAG方案的召回率"></a>通用的RAG方案的召回率</h1><p>不一定召回率越高越好,对于准确性也需要考虑,通用的召回率大概在<code>70%</code>,不论是<code>dify</code>还是<code>fastgpt</code>,对于生产场景,要求起码到<code>90%</code>的水平</p>
2024-03-28 13:12:04 +08:00
<h1 id="如何提升召回率"><a href="#如何提升召回率" class="headerlink" title="如何提升召回率"></a>如何提升召回率</h1><p>首先是针对搜索query来说通过文本框的<code>打字输入</code>的角度,人类一定是倾向于偷懒,只输入关键词,这就是给表达真实意图带来的难度。</p>
<p>语音交互,才有可能让大家使用起来更舒服。</p>
<h2 id="数据预处理"><a href="#数据预处理" class="headerlink" title="数据预处理"></a>数据预处理</h2><p>针对<code>LLM</code>模型来说,数据治理变的很重要,在数据切分的过程中,需要符合人的逻辑去<code>分词</code>。按照人的逻辑,例如:表格、章节、目录……</p>
2024-03-28 12:59:20 +08:00
<h3 id="分词-chunk"><a href="#分词-chunk" class="headerlink" title="分词 chunk"></a>分词 chunk</h3><p><code>chunk</code><code>size</code>越大,召回越少。但是分的越精细,同样会损失上下文连贯性</p>
<h1 id="意图分类"><a href="#意图分类" class="headerlink" title="意图分类"></a>意图分类</h1><p>用户提问的内容,很短、缩写的情况,如何命中用户的<code>真实意图</code></p>
<h2 id="缓存库"><a href="#缓存库" class="headerlink" title="缓存库"></a>缓存库</h2><p>可以做一个缓存库,直接命中返回结构就行,不需要走<code>LLM</code></p>
<h2 id="QA"><a href="#QA" class="headerlink" title="QA"></a>QA</h2><p>处理成<code>QA</code>的格式,类似 <code>xxx企业的董事长是谁</code>,答案是必须正确的,上一代的客服系统必备。直接数据级别的匹配就行,召回一般都是比较准确的。</p>
<h2 id="关键词-词典-迭代"><a href="#关键词-词典-迭代" class="headerlink" title="关键词+词典+迭代"></a>关键词+词典+迭代</h2><p>成本非常低,可控。思路是帮用户模糊,先提取关键词,再结合词典的词条解释,让表达更加的领域话、专业化,再扔给大语言模型。</p>
<p>例如:某个提问<code>什么是RAG</code>,首先<code>RAG</code>对应的解释是:<code>增强向量检索的知识库</code>,那对应又引入的新的<code>关键词</code>,继续递归对新的<code>关键词</code>给出解释,通常迭代个<code>2-5轮</code>,就会有非常好的召回效果。字典里面通常会定义:同义词、类别、上下关系。私用飞书的话,内部标准自带一个<code>词典</code>应用,统一的业务领域的知识、语言体系。并且提供<code>api</code>从词典中提取关键字。直接通过关键字,把词条的内容读取出来。文本提取<code>关键词</code>也有很多开源模型能够提取,但是针对某些专业领域,一些开源不一定具备这样的提取能力。</p>
<p>针对某些场景例如<code>2021年的xxxx</code>,同样的<code>2022年的xxxx</code>也发生了,使用<code>embedding</code>容易给错误召回的,而<code>ES</code>的效果会更好。相当于通过从用户的问题中,捕捉关键词,然后通过这些<code>关键词</code>去库里召回。这样的<code>关键词</code>,可以不断的积累<code>字典</code>,针对关键词给解释。</p>
2024-03-28 12:32:12 +08:00
<p>但是针对有日期、数字类的一般效果会非常不好,可以考虑采用<code>nl2sql</code>的方案,使用<code>fine-tuning</code>,能够做到召回率<code>90%</code>,有专门的模型针对这块,例如微软的<code>RAT-SQL</code>模型。</p>
2024-03-28 12:11:12 +08:00
<p>针对<code>excel</code>类型的文件,直接把数据存到一张<code>宽表</code>中,不要跨表,降低复杂度。针对<code>nl2sql</code>的开源模型的能力基本都是<code>单表</code>,基本可以达到预期</p>
2024-03-28 12:59:20 +08:00
<h2 id="知识图谱"><a href="#知识图谱" class="headerlink" title="知识图谱"></a>知识图谱</h2><p>难点在于构建知识图谱,例如<code>dify</code>在针对某些回答不满意的情况,可以对回复进行修正。这样的话会让下次回答更加可控</p>
<h1 id="embedding哪个算法好"><a href="#embedding哪个算法好" class="headerlink" title="embedding哪个算法好"></a>embedding哪个算法好</h1><p>目前openai的最好能够输入1536个输入输出结果能切分到700多个维度。针对模型来说重要性并不是很高。可以考虑替代方案。差距不会非常大从实际角度来说并不是很重要。</p>
<h1 id="国内知识库方案哪些底座比较好"><a href="#国内知识库方案哪些底座比较好" class="headerlink" title="国内知识库方案哪些底座比较好"></a>国内知识库方案哪些底座比较好</h1><p><code>qWen</code> 和 gpt 做过对齐,然后 <code>14B</code><code>72B int4</code> 效果就比较好,虽然比较慢,针对中文语料还是比较好的。</p>
<p>另一个就是 <code>yi-34b</code> 也还行。</p>
2024-03-28 13:12:04 +08:00
<h1 id="如何设计大模型测试样本"><a href="#如何设计大模型测试样本" class="headerlink" title="如何设计大模型测试样本"></a>如何设计大模型测试样本</h1><p>从用户的提问当中选取50-100个场景作为<code>大模型</code>选择、<code>知识库</code>效果的测评的数据集,例如需要测<code>text2sql</code>的场景的模型效果,首先就是要找到哪些查询的频次最高。</p>
2024-03-28 13:54:22 +08:00
<h1 id="RAG的发展方向"><a href="#RAG的发展方向" class="headerlink" title="RAG的发展方向"></a>RAG的发展方向</h1><ol>
<li>知识库作为专业领域的基座,大模型一定是无法覆盖的</li>
<li>从用户的角度来说大厂商可能会说服开发者去接入自家agent贡献领域知识</li>
<li>逐渐变成 Agent 场景下的底层设施,有清晰地 SOP目标明确</li>
</ol>
2024-03-28 13:12:04 +08:00
2024-03-28 11:14:49 +08:00
<div class="post-tags">
<!-- 文章tags -->
</div>
<p class="motto">重拾纯粹的写作</p>
</article>
<!-- 评论 -->
<div id="vcomments"></div>
</div>
</main>
<!-- toc -->
<cosy-drag-box id="toc-drag-box" trigger="left" min-width="220" uid="toc-box">
<div class="meta-container">
<div class="toc-wrapper cosy-scrollbar">
<p class="catalog">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 24 24">
<g fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M4 6h16"></path>
<path d="M4 12h16"></path>
<path d="M4 18h12"></path>
</g>
</svg>
<span>目录</span>
</p>
<!-- 文章toc -->
2024-03-28 13:12:04 +08:00
<ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#%E9%80%9A%E7%94%A8%E7%9A%84RAG%E6%96%B9%E6%A1%88%E7%9A%84%E5%8F%AC%E5%9B%9E%E7%8E%87"><span class="toc-number">1.</span> <span class="toc-text">通用的RAG方案的召回率</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%A6%82%E4%BD%95%E6%8F%90%E5%8D%87%E5%8F%AC%E5%9B%9E%E7%8E%87"><span class="toc-number">2.</span> <span class="toc-text">如何提升召回率</span></a><ol class="toc-child"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E6%95%B0%E6%8D%AE%E9%A2%84%E5%A4%84%E7%90%86"><span class="toc-number">2.1.</span> <span class="toc-text">数据预处理</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%88%86%E8%AF%8D-chunk"><span class="toc-number">2.1.1.</span> <span class="toc-text">分词 chunk</span></a></li></ol></li></ol></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E6%84%8F%E5%9B%BE%E5%88%86%E7%B1%BB"><span class="toc-number">3.</span> <span class="toc-text">意图分类</span></a><ol class="toc-child"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%BC%93%E5%AD%98%E5%BA%93"><span class="toc-number">3.1.</span> <span class="toc-text">缓存库</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#QA"><span class="toc-number">3.2.</span> <span class="toc-text">QA</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%85%B3%E9%94%AE%E8%AF%8D-%E8%AF%8D%E5%85%B8-%E8%BF%AD%E4%BB%A3"><span class="toc-number">3.3.</span> <span class="toc-text">关键词+词典+迭代</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%9F%A5%E8%AF%86%E5%9B%BE%E8%B0%B1"><span class="toc-number">3.4.</span> <span class="toc-text">知识图谱</span></a></li></ol></li><li class="toc-item toc-level-1"><a class="toc-link" href="#embedding%E5%93%AA%E4%B8%AA%E7%AE%97%E6%B3%95%E5%A5%BD"><span class="toc-number">4.</span> <span class="toc-text">embedding哪个算法好</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%9B%BD%E5%86%85%E7%9F%A5%E8%AF%86%E5%BA%93%E6%96%B9%E6%A1%88%E5%93%AA%E4%BA%9B%E5%BA%95%E5%BA%A7%E6%AF%94%E8%BE%83%E5%A5%BD"><span class="toc-number">5.</span> <span class="toc-text">国内知识库方案哪些底座比较好</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%A6%82%E4%BD%95%E8%AE%BE%E8%AE%A1%E5%A4%A7%E6%A8%A1%E5%9E%8B%E6%B5%8B%E8%AF%95%E6%A0%B7%E6%9C%AC"><span class="toc-number">6.</span> <span class="toc-text">如何设计大模型测试样本</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#RAG%E7%9A%84%E5%8F%91%E5%B1%95%E6%96%B9%E5%90%91"><span class="toc-number">7.</span> <span class="toc-text">RAG的发展方向</span></a></li></ol>
2024-03-28 11:14:49 +08:00
</div>
</div>
</cosy-drag-box>
</div>
</div>
<script>
window.page = {
use: ''
}
window.katex = {
enable: "",
jsCdn: "//cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.js",
cssCdn: "//cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css"
}
window.mermaid = {
enable: "",
theme: "",
cdn: "//cdn.jsdelivr.net/npm/mermaid@10.4.0/dist/mermaid.min.js",
}
window.valine = {
enable: "",
appId: 'TisMit6uhflounFqAN3ZGjgq-MdYXbMMI',
appKey: 'CdjirjYdz07U5i62ElsJvXUh',
avatar: 'monsterid',
cdn: '//unpkg.com/valine@latest/dist/Valine.min.js',
serverURLs: '//tismit6u.api.lncldglobal.com'
};
</script>
<script src="/js/5bf38c1b.js"></script>
</main>
</body>
<script>
window.theme = {
color: 'hsl(238,50%,56%)'
}
</script>
<script src="/js/82a967e8.js"></script>
</html>