Merge branch 'feat/parent-child-retrieval' of https://github.com/langgenius/dify into feat/parent-child-retrieval
This commit is contained in:
commit
4017c65c1f
@ -96,6 +96,8 @@ export enum IndexingType {
|
||||
}
|
||||
|
||||
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
|
||||
const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
|
||||
const DEFAULT_OVERLAP = 50
|
||||
|
||||
type ParentChildConfig = {
|
||||
chunkForContext: ParentMode
|
||||
@ -155,9 +157,9 @@ const StepTwo = ({
|
||||
const setSegmentIdentifier = useCallback((value: string) => {
|
||||
doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
|
||||
}, [])
|
||||
const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length
|
||||
const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
|
||||
const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
|
||||
const [overlap, setOverlap] = useState(50)
|
||||
const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
|
||||
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
||||
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
||||
const hasSetIndexType = !!indexingType
|
||||
|
@ -32,7 +32,7 @@ export const DelimiterInput: FC<InputProps> = (props) => {
|
||||
<Input
|
||||
type="text"
|
||||
className='h-9'
|
||||
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''}
|
||||
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder')!}
|
||||
{...props}
|
||||
/>
|
||||
</FormField>
|
||||
@ -46,7 +46,7 @@ export const MaxLengthInput: FC<InputNumberProps> = (props) => {
|
||||
<InputNumber
|
||||
type="number"
|
||||
className='h-9'
|
||||
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
||||
placeholder={'≤ 4000'}
|
||||
max={4000}
|
||||
min={1}
|
||||
{...props}
|
||||
|
@ -539,7 +539,6 @@ const DocumentList: FC<IDocumentListProps> = ({
|
||||
}}>
|
||||
<td className='text-left align-middle text-text-tertiary text-xs'>
|
||||
<div className='flex items-center' onClick={e => e.stopPropagation()}>
|
||||
|
||||
<Checkbox
|
||||
className='shrink-0 mr-2'
|
||||
checked={selectedIds.includes(doc.id)}
|
||||
|
@ -43,7 +43,7 @@ export const EditSlice: FC<EditSliceProps> = (props) => {
|
||||
>
|
||||
<SliceLabel
|
||||
className={classNames(
|
||||
isDestructive && '!bg-red-500 !text-text-primary-on-surface',
|
||||
isDestructive && '!bg-state-destructive-solid !text-text-primary-on-surface',
|
||||
)}
|
||||
>
|
||||
{label}
|
||||
|
137
web/app/components/datasets/hit-testing/assets/test-data.ts
Normal file
137
web/app/components/datasets/hit-testing/assets/test-data.ts
Normal file
@ -0,0 +1,137 @@
|
||||
export const generalResultData = [
|
||||
{
|
||||
segment: {
|
||||
id: 'b621b153-f8a7-4e85-bd3d-07feaf61bd9e',
|
||||
position: 1,
|
||||
document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
content: '张家界森林覆盖率达 90%以上,生物多样性丰富。这里是许多珍稀动植物的栖息地,例\r\n如银杉、中华秋沙鸭等。清新的空气和丰富的负氧离子,让它成为“ 天然氧吧”。\r\n历史背景\r\n1. 古代历史\r\n张家界地区在古代是土家族和苗族等少数民族的居住地,历史可以追溯到新石器时代。\r\n这里曾是楚国的属地,后来成为武陵山地区的重要组成部分。\r\n2. 近代发展\r\n张家界介绍\r\n张家界概述\r\n张家界位于中国湖南省西北部,是中国知名的旅游胜地,以独特的喀斯特地貌和壮美的\r\n自然风光闻名世界。它不仅是自然景观的瑰宝,还蕴含了丰富的历史与人文底蕴。\r\n地理特色\r\n1. 地貌特征\r\n张家界以其石英砂岩峰林地貌而著称,峰峦如刀劈斧削,形态各异,被誉为“ 天然山水\r\n画卷”。\r\n• 武陵源风景名胜区\r\n被列入联合国教科文组织世界自然遗产名录,其中包括张家界国家森林公园、天子山、\r\n索溪峪等景区。',
|
||||
answer: null,
|
||||
word_count: 387,
|
||||
tokens: 471,
|
||||
keywords: [
|
||||
'氧吧',
|
||||
'丰富',
|
||||
'90%',
|
||||
'天子山',
|
||||
'地貌',
|
||||
'历史',
|
||||
'张家界',
|
||||
'索溪峪',
|
||||
'天然',
|
||||
'负氧离子',
|
||||
],
|
||||
index_node_id: '483fad87-3b7e-486d-afae-75e4f0b2f3dd',
|
||||
index_node_hash: '61bb7556a32e3e09ed83f2de731c2ac2d669c598de6d85708e11f78817c882bb',
|
||||
hit_count: 0,
|
||||
enabled: true,
|
||||
disabled_at: null,
|
||||
disabled_by: null,
|
||||
status: 'completed',
|
||||
created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7',
|
||||
created_at: 1732605173,
|
||||
indexing_at: 1732605173,
|
||||
completed_at: 1732605177,
|
||||
error: null,
|
||||
stopped_at: null,
|
||||
document: {
|
||||
id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
data_source_type: 'upload_file',
|
||||
name: '张家界介绍.pdf',
|
||||
doc_type: null,
|
||||
},
|
||||
},
|
||||
child_chunks: null,
|
||||
score: 0.8771945,
|
||||
tsne_position: null,
|
||||
},
|
||||
{
|
||||
segment: {
|
||||
id: '0859a14d-697e-4703-b59d-2ff69a7a9795',
|
||||
position: 5,
|
||||
document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
content: '茅岩河漂流和黄石寨徒步是体验张家界山水魅力的绝佳方式。\r\n总结\r\n张家界是集自然奇观与人文风情于一体的旅游胜地。无论是其独特的地貌景观,还是浓\r\n郁的土家文化,都展现了人与自然的和谐之美。这里的每一座山、每一片森林,似乎都\r\n在诉说着古老的故事,吸引着来自世界各地的游客流连忘返。',
|
||||
answer: null,
|
||||
word_count: 140,
|
||||
tokens: 173,
|
||||
keywords: [
|
||||
'绝佳',
|
||||
'徒步',
|
||||
'人与自然',
|
||||
'流连忘返',
|
||||
'河漂流',
|
||||
'之美',
|
||||
'张家界',
|
||||
'黄石寨',
|
||||
'诉说着',
|
||||
'茅岩',
|
||||
],
|
||||
index_node_id: '1d8e46bd-27ea-47fa-b8c4-87737bf2e021',
|
||||
index_node_hash: '8ac318494724ac44120b2f9db397bb02186b456fff76f9f8b86156fb8a864999',
|
||||
hit_count: 0,
|
||||
enabled: true,
|
||||
disabled_at: null,
|
||||
disabled_by: null,
|
||||
status: 'completed',
|
||||
created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7',
|
||||
created_at: 1732605173,
|
||||
indexing_at: 1732605173,
|
||||
completed_at: 1732605177,
|
||||
error: null,
|
||||
stopped_at: null,
|
||||
document: {
|
||||
id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
data_source_type: 'upload_file',
|
||||
name: '张家界介绍.pdf',
|
||||
doc_type: null,
|
||||
},
|
||||
},
|
||||
child_chunks: null,
|
||||
score: 0.8642928,
|
||||
tsne_position: null,
|
||||
},
|
||||
{
|
||||
segment: {
|
||||
id: 'f5e63d62-984f-419f-a8ec-781e1280c739',
|
||||
position: 4,
|
||||
document_id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
content: '葛粉汤\r\n一种用当地葛根制成的食品,清热解毒,深受游客喜爱。\r\n3. 艺术与传说\r\n张家界的山水常与中国传统文化和神话传说相结合,例如天子山据说是土家族起义领袖',
|
||||
answer: null,
|
||||
word_count: 80,
|
||||
tokens: 94,
|
||||
keywords: [
|
||||
'葛根',
|
||||
'清热解毒',
|
||||
'葛粉',
|
||||
'天子山',
|
||||
'起义领袖',
|
||||
'深受',
|
||||
'张家界',
|
||||
'神话传说',
|
||||
'土家族',
|
||||
'山水',
|
||||
],
|
||||
index_node_id: '80f71f0d-6218-4160-8575-c59d58ac15e3',
|
||||
index_node_hash: '155ad96a96b984d7058fdb377f98bd50158d58574b75bea0187c9e3af5680ad5',
|
||||
hit_count: 0,
|
||||
enabled: true,
|
||||
disabled_at: null,
|
||||
disabled_by: null,
|
||||
status: 'completed',
|
||||
created_by: '6d8ad01f-edf9-43a6-b863-a034b1828ac7',
|
||||
created_at: 1732605173,
|
||||
indexing_at: 1732605173,
|
||||
completed_at: 1732605177,
|
||||
error: null,
|
||||
stopped_at: null,
|
||||
document: {
|
||||
id: '990c1ba7-a170-42ed-a71f-579e4875eaba',
|
||||
data_source_type: 'upload_file',
|
||||
name: '张家界介绍.pdf',
|
||||
doc_type: null,
|
||||
},
|
||||
},
|
||||
child_chunks: null,
|
||||
score: 0.80618876,
|
||||
tsne_position: null,
|
||||
},
|
||||
]
|
@ -0,0 +1,33 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { SegmentIndexTag } from '../../documents/detail/completed'
|
||||
import type { HitTesting } from '@/models/datasets'
|
||||
import cn from '@/utils/classnames'
|
||||
type Props = {
|
||||
payload: HitTesting
|
||||
}
|
||||
|
||||
const ResultItem: FC<Props> = ({
|
||||
payload,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const { segment } = payload
|
||||
const { position, word_count } = segment
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className='flex justify-between items-center'>
|
||||
<div className='flex items-center space-x-2'>
|
||||
<SegmentIndexTag positionId={position} className={cn('w-fit group-hover:opacity-100')} />
|
||||
<div className='text-xs font-medium text-text-quaternary'>·</div>
|
||||
<div className='system-xs-medium text-text-tertiary'>{word_count} {t('datasetDocuments.segment.characters')}</div>
|
||||
</div>
|
||||
{/* Score */}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default React.memo(ResultItem)
|
@ -7,11 +7,11 @@ import { omit } from 'lodash-es'
|
||||
import { useBoolean } from 'ahooks'
|
||||
import { useContext } from 'use-context-selector'
|
||||
import SegmentCard from '../documents/detail/completed/SegmentCard'
|
||||
import docStyle from '../documents/detail/completed/style.module.css'
|
||||
import Textarea from './textarea'
|
||||
import s from './style.module.css'
|
||||
import HitDetail from './hit-detail'
|
||||
import ModifyRetrievalModal from './modify-retrieval-modal'
|
||||
import { generalResultData } from './assets/test-data'
|
||||
import cn from '@/utils/classnames'
|
||||
import type { ExternalKnowledgeBaseHitTestingResponse, ExternalKnowledgeBaseHitTesting as ExternalKnowledgeBaseHitTestingType, HitTestingResponse, HitTesting as HitTestingType } from '@/models/datasets'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
@ -24,7 +24,6 @@ import DatasetDetailContext from '@/context/dataset-detail'
|
||||
import type { RetrievalConfig } from '@/types/app'
|
||||
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
|
||||
import useTimestamp from '@/hooks/use-timestamp'
|
||||
|
||||
const limit = 10
|
||||
|
||||
type Props = {
|
||||
@ -49,6 +48,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
|
||||
const isMobile = media === MediaType.mobile
|
||||
|
||||
const [hitResult, setHitResult] = useState<HitTestingResponse | undefined>() // 初始化记录为空数组
|
||||
// console.log(hitResult?.records)
|
||||
const [externalHitResult, setExternalHitResult] = useState<ExternalKnowledgeBaseHitTestingResponse | undefined>()
|
||||
const [submitLoading, setSubmitLoading] = useState(false)
|
||||
const [currParagraph, setCurrParagraph] = useState<{ paraInfo?: HitTestingType; showModal: boolean }>({ showModal: false })
|
||||
@ -77,7 +77,6 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
|
||||
const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict as RetrievalConfig)
|
||||
const [isShowModifyRetrievalModal, setIsShowModifyRetrievalModal] = useState(false)
|
||||
const [isShowRightPanel, { setTrue: showRightPanel, setFalse: hideRightPanel, set: setShowRightPanel }] = useBoolean(!isMobile)
|
||||
|
||||
const renderHitResults = (results: any[], onClickCard: (record: any) => void) => (
|
||||
<>
|
||||
<div className='text-gray-600 font-semibold mb-4'>{t('datasetHitTesting.hit.title')}</div>
|
||||
@ -106,12 +105,14 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
|
||||
)
|
||||
|
||||
const renderEmptyState = () => (
|
||||
<div className='h-full flex flex-col justify-center items-center'>
|
||||
<div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-gray-200 !h-14 !w-14')} />
|
||||
<div className='text-gray-300 text-[13px] mt-3'>
|
||||
{t('datasetHitTesting.hit.emptyTip')}
|
||||
</div>
|
||||
</div>
|
||||
// for test
|
||||
<div></div>
|
||||
// <div className='h-full flex flex-col justify-center items-center'>
|
||||
// <div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-gray-200 !h-14 !w-14')} />
|
||||
// <div className='text-gray-300 text-[13px] mt-3'>
|
||||
// {t('datasetHitTesting.hit.emptyTip')}
|
||||
// </div>
|
||||
// </div>
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
@ -190,6 +191,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
|
||||
</div>
|
||||
<FloatRightContainer panelClassname='!justify-start !overflow-y-auto' showClose isMobile={isMobile} isOpen={isShowRightPanel} onClose={hideRightPanel} footer={null}>
|
||||
<div className={cn(s.rightDiv, 'p-0 sm:px-8 sm:pt-[42px] sm:pb-[26px]')}>
|
||||
{renderHitResults(generalResultData, onClickCard)}
|
||||
{submitLoading
|
||||
? <div className={s.cardWrapper}>
|
||||
<SegmentCard
|
||||
|
@ -21,7 +21,7 @@ const translation = {
|
||||
},
|
||||
action: {
|
||||
uploadFile: 'Upload new file',
|
||||
settings: 'Segment settings',
|
||||
settings: 'Chunking Settings',
|
||||
addButton: 'Add chunk',
|
||||
add: 'Add a chunk',
|
||||
batchAdd: 'Batch add',
|
||||
|
Loading…
Reference in New Issue
Block a user