From 90421b5fb5549ca6a28baaf85cb73d69319579d1 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 13:39:27 +0800 Subject: [PATCH 1/7] feat: preview container components --- web/app/components/base/divider/index.tsx | 2 +- .../components/base/divider/with-label.tsx | 23 ++++++++ .../components/datasets/preview/container.tsx | 27 ++++++++++ .../components/datasets/preview/header.tsx | 23 ++++++++ web/app/components/datasets/preview/index.tsx | 0 web/app/dev-preview/page.tsx | 53 ++++++++++--------- 6 files changed, 101 insertions(+), 27 deletions(-) create mode 100644 web/app/components/base/divider/with-label.tsx create mode 100644 web/app/components/datasets/preview/container.tsx create mode 100644 web/app/components/datasets/preview/header.tsx create mode 100644 web/app/components/datasets/preview/index.tsx diff --git a/web/app/components/base/divider/index.tsx b/web/app/components/base/divider/index.tsx index 4b351dea99..2f1245e782 100644 --- a/web/app/components/base/divider/index.tsx +++ b/web/app/components/base/divider/index.tsx @@ -18,7 +18,7 @@ const dividerVariants = cva( }, ) -type DividerProps = { +export type DividerProps = { className?: string style?: CSSProperties } & VariantProps diff --git a/web/app/components/base/divider/with-label.tsx b/web/app/components/base/divider/with-label.tsx new file mode 100644 index 0000000000..608bc79998 --- /dev/null +++ b/web/app/components/base/divider/with-label.tsx @@ -0,0 +1,23 @@ +import type { FC } from 'react' +import type { DividerProps } from '.' +import Divider from '.' +import classNames from '@/utils/classnames' + +export type DividerWithLabelProps = DividerProps & { + label: string +} + +export const DividerWithLabel: FC = (props) => { + const { label, className, ...rest } = props + return
+ + + {label} + + +
+} + +export default DividerWithLabel diff --git a/web/app/components/datasets/preview/container.tsx b/web/app/components/datasets/preview/container.tsx new file mode 100644 index 0000000000..7ce8e226e2 --- /dev/null +++ b/web/app/components/datasets/preview/container.tsx @@ -0,0 +1,27 @@ +import type { ComponentProps, FC, ReactNode } from 'react' +import { forwardRef } from 'react' +import classNames from '@/utils/classnames' + +export type PreviewContainerProps = ComponentProps<'div'> & { + header: ReactNode +} + +export const PreviewContainer: FC = forwardRef((props, ref) => { + const { children, className, header, ...rest } = props + return
+
+ {header} +
+
+ {children} +
+
+}) +PreviewContainer.displayName = 'PreviewContainer' diff --git a/web/app/components/datasets/preview/header.tsx b/web/app/components/datasets/preview/header.tsx new file mode 100644 index 0000000000..1f17f2ca2d --- /dev/null +++ b/web/app/components/datasets/preview/header.tsx @@ -0,0 +1,23 @@ +import type { ComponentProps, FC } from 'react' +import classNames from '@/utils/classnames' + +export type PreviewHeaderProps = Omit, 'title'> & { + title: string +} + +export const PreviewHeader: FC = (props) => { + const { title, className, children, ...rest } = props + return
+
+ {title} +
+ {children} +
+} diff --git a/web/app/components/datasets/preview/index.tsx b/web/app/components/datasets/preview/index.tsx new file mode 100644 index 0000000000..e69de29bb2 diff --git a/web/app/dev-preview/page.tsx b/web/app/dev-preview/page.tsx index 72434cafd9..d25fa9777a 100644 --- a/web/app/dev-preview/page.tsx +++ b/web/app/dev-preview/page.tsx @@ -2,34 +2,35 @@ import { FormattedText } from '../components/datasets/formatted-text/formatted' import { PreviewSlice } from '../components/datasets/formatted-text/flavours/preview-slice' -import { EditSlice } from '../components/datasets/formatted-text/flavours/edit-slice' +import { PreviewContainer } from '../components/datasets/preview/container' +import { PreviewHeader } from '../components/datasets/preview/header' +import FileIcon from '../components/base/file-icon' +import { ChevronDown } from '../components/base/icons/src/vender/solid/arrows' +import Badge from '../components/base/badge' +import { DividerWithLabel } from '../components/base/divider/with-label' export default function Page() { return
- - - - - - - - -
- - - - - -
+ +
+ +

EOS R3 Tech Sheet.pdf

+ + +
+ + }> + + + + + + + + +
} From c960f78035991cd41fdf24cd76c251b323539b31 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 14:14:37 +0800 Subject: [PATCH 2/7] refactor: step 2 --- .../datasets/create/step-two/index.tsx | 101 +++--------------- 1 file changed, 17 insertions(+), 84 deletions(-) diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index ac8f2c873a..8381125f2f 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1,10 +1,8 @@ 'use client' -import type { FC, PropsWithChildren, ReactNode } from 'react' -import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react' +import type { FC, PropsWithChildren } from 'react' +import React, { useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' -import { useBoolean } from 'ahooks' -import { XMarkIcon } from '@heroicons/react/20/solid' import { RiArrowLeftLine, RiCloseLine, @@ -65,13 +63,6 @@ const TextLabel: FC = (props) => { return } -const FormField: FC> = (props) => { - return
- {props.label} - {props.children} -
-} - type ValueOf = T[keyof T] type StepTwoProps = { isSetting?: boolean @@ -117,7 +108,6 @@ type ParentChildConfig = { delimiter: string maxLength: number } - rules: PreProcessingRule[] } const defaultParentChildConfig: ParentChildConfig = { @@ -130,7 +120,6 @@ const defaultParentChildConfig: ParentChildConfig = { delimiter: '\\n\\n', maxLength: 4000, }, - rules: [], } const StepTwo = ({ @@ -162,10 +151,6 @@ const StepTwo = ({ const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type - const scrollRef = useRef(null) - const [scrolled, setScrolled] = useState(false) - const previewScrollRef = useRef(null) - const [previewScrolled, setPreviewScrolled] = useState(false) const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) const setSegmentIdentifier = useCallback((value: string) => { @@ -191,32 +176,17 @@ const StepTwo = ({ ) const [QATipHide, setQATipHide] = useState(false) const [previewSwitched, setPreviewSwitched] = useState(false) - const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) - const fileIndexingEstimate = (() => { - return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate - })() + const fileIndexingEstimate = segmentationType === SegmentType.AUTO + ? automaticFileIndexingEstimate + : customFileIndexingEstimate + const [isCreating, setIsCreating] = useState(false) const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) - const scrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setScrolled(true) - - else - setScrolled(false) - } - - const previewScrollHandle = (e: Event) => { - if ((e.target as HTMLDivElement).scrollTop > 0) - setPreviewScrolled(true) - - else - setPreviewScrolled(false) - } const getFileName = (name: string) => { const arr = name.split('.') return arr.slice(0, -1).join('.') @@ -248,7 +218,7 @@ const StepTwo = ({ if (defaultConfig) { setSegmentIdentifier(defaultConfig.segmentation.separator) setMax(defaultConfig.segmentation.max_tokens) - setOverlap(defaultConfig.segmentation.chunk_overlap) + setOverlap(defaultConfig.segmentation.chunk_overlap!) setRules(defaultConfig.pre_processing_rules) } setParentChildConfig(defaultParentChildConfig) @@ -263,13 +233,12 @@ const StepTwo = ({ setAutomaticFileIndexingEstimate(res) } - const confirmChangeCustomConfig = () => { + const updatePreview = () => { if (segmentationType === SegmentType.CUSTOM && max > 4000) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) return } setCustomFileIndexingEstimate(null) - setShowPreview() fetchFileIndexingEstimate() setPreviewSwitched(false) } @@ -468,7 +437,7 @@ const StepTwo = ({ const separator = res.rules.segmentation.separator setSegmentIdentifier(separator) setMax(res.rules.segmentation.max_tokens) - setOverlap(res.rules.segmentation.chunk_overlap) + setOverlap(res.rules.segmentation.chunk_overlap!) setRules(res.rules.pre_processing_rules) setDefaultConfig(res.rules) } @@ -540,8 +509,8 @@ const StepTwo = ({ } } - const handleSwitch = (state: boolean) => { - if (state) + const handleDocformSwitch = (isQAMode: boolean) => { + if (isQAMode) setDocForm(DocForm.QA) else setDocForm(DocForm.TEXT) @@ -587,22 +556,6 @@ const StepTwo = ({ } }, []) - useEffect(() => { - scrollRef.current?.addEventListener('scroll', scrollHandle) - return () => { - scrollRef.current?.removeEventListener('scroll', scrollHandle) - } - }, []) - - useLayoutEffect(() => { - if (showPreview) { - previewScrollRef.current?.addEventListener('scroll', previewScrollHandle) - return () => { - previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle) - } - } - }, [showPreview]) - useEffect(() => { if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA) setDocForm(DocForm.TEXT) @@ -620,12 +573,10 @@ const StepTwo = ({ useEffect(() => { if (segmentationType === SegmentType.AUTO) { setAutomaticFileIndexingEstimate(null) - !isMobile && setShowPreview() fetchFileIndexingEstimate() setPreviewSwitched(false) } else { - hidePreview() setCustomFileIndexingEstimate(null) setPreviewSwitched(false) } @@ -659,7 +610,7 @@ const StepTwo = ({ onClick={() => setSegmentationType(SegmentType.AUTO)} actions={ <> - @@ -714,7 +665,7 @@ const StepTwo = ({ onClick={() => setSegmentationType(SegmentType.CUSTOM)} actions={ <> - @@ -910,7 +861,7 @@ const StepTwo = ({ @@ -1000,11 +951,10 @@ const StepTwo = ({ { }} footer={null}> - {showPreview &&
-
+
{t('datasetCreation.stepTwo.previewTitle')}
@@ -1012,9 +962,6 @@ const StepTwo = ({ )}
-
- -
{docForm === DocForm.QA && !previewSwitched && (
@@ -1049,21 +996,7 @@ const StepTwo = ({
)}
-
} - {!showPreview && ( -
-
- -
{t('datasetCreation.stepTwo.sideTipTitle')}
-
-

{t('datasetCreation.stepTwo.sideTipP1')}

-

{t('datasetCreation.stepTwo.sideTipP2')}

-

{t('datasetCreation.stepTwo.sideTipP3')}

-

{t('datasetCreation.stepTwo.sideTipP4')}

-
-
-
- )} +
) From 94eb069a97e572884e15dab05e99d15a213f542e Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 14:34:18 +0800 Subject: [PATCH 3/7] refactor: step 2 --- .../datasets/create/step-two/index.tsx | 2 +- web/service/use-datasets.ts | 131 ++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 web/service/use-datasets.ts diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 8381125f2f..7bcb0f96d3 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -91,7 +91,7 @@ enum SegmentType { AUTO = 'automatic', CUSTOM = 'custom', } -enum IndexingType { +export enum IndexingType { QUALIFIED = 'high_quality', ECONOMICAL = 'economy', } diff --git a/web/service/use-datasets.ts b/web/service/use-datasets.ts new file mode 100644 index 0000000000..53ca309c72 --- /dev/null +++ b/web/service/use-datasets.ts @@ -0,0 +1,131 @@ +import groupBy from 'lodash-es/groupBy' +import type { IndexingType } from '@/app/components/datasets/create/step-two' +import type { CrawlOptions, CrawlResultItem, CustomFile, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' +import { DataSourceType } from '@/models/datasets' +import type { DataSourceProvider, NotionPage } from '@/models/common' + +const getNotionInfo = ( + notionPages: NotionPage[], +) => { + const workspacesMap = groupBy(notionPages, 'workspace_id') + const workspaces = Object.keys(workspacesMap).map((workspaceId) => { + return { + workspaceId, + pages: workspacesMap[workspaceId], + } + }) + return workspaces.map((workspace) => { + return { + workspace_id: workspace.workspaceId, + pages: workspace.pages.map((page) => { + const { page_id, page_name, page_icon, type } = page + return { + page_id, + page_name, + page_icon, + type, + } + }), + } + }) as NotionInfo[] +} + +const getWebsiteInfo = ( + opts: { + websiteCrawlProvider: DataSourceProvider + websiteCrawlJobId: string + websitePages: CrawlResultItem[] + crawlOptions?: CrawlOptions + }, +) => { + const { websiteCrawlProvider, websiteCrawlJobId, websitePages, crawlOptions } = opts + return { + provider: websiteCrawlProvider, + job_id: websiteCrawlJobId, + urls: websitePages.map(page => page.source_url), + only_main_content: crawlOptions?.only_main_content, + } +} + +type GetFileIndexingEstimateParamsOption = { + docForm: DocForm + docLanguage: string + dataSourceType: DataSourceType + files: CustomFile[] + indexingTechnique: IndexingType + processRule: ProcessRule + dataset_id: string + notionPages?: NotionPage[] + websitePages?: CrawlResultItem[] + crawlOptions?: CrawlOptions + websiteCrawlProvider?: DataSourceProvider + websiteCrawlJobId?: string +} + +const getFileIndexingEstimateParams = ({ + docForm, + docLanguage, + dataSourceType, + files, + indexingTechnique, + processRule, + dataset_id, + notionPages, + websitePages, + crawlOptions, + websiteCrawlProvider, + websiteCrawlJobId, +}: GetFileIndexingEstimateParamsOption): IndexingEstimateParams | undefined => { + if (dataSourceType === DataSourceType.FILE) { + return { + info_list: { + data_source_type: dataSourceType, + file_info_list: { + file_ids: files.map(file => file.id) as string[], + }, + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } + } + if (dataSourceType === DataSourceType.NOTION) { + return { + info_list: { + data_source_type: dataSourceType, + notion_info_list: getNotionInfo( + notionPages as NotionPage[], + ), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } + } + if (dataSourceType === DataSourceType.WEB) { + return { + info_list: { + data_source_type: dataSourceType, + website_info_list: getWebsiteInfo({ + websiteCrawlProvider: websiteCrawlProvider as DataSourceProvider, + websiteCrawlJobId: websiteCrawlJobId as string, + websitePages: websitePages as CrawlResultItem[], + crawlOptions, + }), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } + } +} + +export const useFetchFileIndexingEstimate = () => { + +} From a77aa169b4ae3d5a887018830316fec36b02279a Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 14:43:15 +0800 Subject: [PATCH 4/7] refactor: step 2 --- web/service/use-datasets.ts | 172 +++++++++++++++++++++++------------- 1 file changed, 111 insertions(+), 61 deletions(-) diff --git a/web/service/use-datasets.ts b/web/service/use-datasets.ts index 53ca309c72..7ac9635cd4 100644 --- a/web/service/use-datasets.ts +++ b/web/service/use-datasets.ts @@ -1,7 +1,8 @@ import groupBy from 'lodash-es/groupBy' +import { useMutation } from '@tanstack/react-query' +import { fetchFileIndexingEstimate } from './datasets' import type { IndexingType } from '@/app/components/datasets/create/step-two' -import type { CrawlOptions, CrawlResultItem, CustomFile, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' -import { DataSourceType } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, CustomFile, DataSourceType, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' import type { DataSourceProvider, NotionPage } from '@/models/common' const getNotionInfo = ( @@ -47,22 +48,33 @@ const getWebsiteInfo = ( } } -type GetFileIndexingEstimateParamsOption = { +type GetFileIndexingEstimateParamsOptionBase = { docForm: DocForm docLanguage: string - dataSourceType: DataSourceType - files: CustomFile[] indexingTechnique: IndexingType processRule: ProcessRule dataset_id: string - notionPages?: NotionPage[] - websitePages?: CrawlResultItem[] - crawlOptions?: CrawlOptions - websiteCrawlProvider?: DataSourceProvider - websiteCrawlJobId?: string } -const getFileIndexingEstimateParams = ({ +type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.FILE + files: CustomFile[] +} + +type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.NOTION + notionPages: NotionPage[] +} + +type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.WEB + websitePages: CrawlResultItem[] + crawlOptions?: CrawlOptions + websiteCrawlProvider: DataSourceProvider + websiteCrawlJobId: string +} + +const getFileIndexingEstimateParamsForFile = ({ docForm, docLanguage, dataSourceType, @@ -70,62 +82,100 @@ const getFileIndexingEstimateParams = ({ indexingTechnique, processRule, dataset_id, +}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + file_info_list: { + file_ids: files.map(file => file.id) as string[], + }, + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } +} + +const getFileIndexingEstimateParamsForNotion = ({ + docForm, + docLanguage, + dataSourceType, notionPages, + indexingTechnique, + processRule, + dataset_id, +}: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + notion_info_list: getNotionInfo(notionPages), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, + } +} + +const getFileIndexingEstimateParamsForWeb = ({ + docForm, + docLanguage, + dataSourceType, websitePages, crawlOptions, websiteCrawlProvider, websiteCrawlJobId, -}: GetFileIndexingEstimateParamsOption): IndexingEstimateParams | undefined => { - if (dataSourceType === DataSourceType.FILE) { - return { - info_list: { - data_source_type: dataSourceType, - file_info_list: { - file_ids: files.map(file => file.id) as string[], - }, - }, - indexing_technique: indexingTechnique, - process_rule: processRule, - doc_form: docForm, - doc_language: docLanguage, - dataset_id, - } - } - if (dataSourceType === DataSourceType.NOTION) { - return { - info_list: { - data_source_type: dataSourceType, - notion_info_list: getNotionInfo( - notionPages as NotionPage[], - ), - }, - indexing_technique: indexingTechnique, - process_rule: processRule, - doc_form: docForm, - doc_language: docLanguage, - dataset_id, - } - } - if (dataSourceType === DataSourceType.WEB) { - return { - info_list: { - data_source_type: dataSourceType, - website_info_list: getWebsiteInfo({ - websiteCrawlProvider: websiteCrawlProvider as DataSourceProvider, - websiteCrawlJobId: websiteCrawlJobId as string, - websitePages: websitePages as CrawlResultItem[], - crawlOptions, - }), - }, - indexing_technique: indexingTechnique, - process_rule: processRule, - doc_form: docForm, - doc_language: docLanguage, - dataset_id, - } + indexingTechnique, + processRule, + dataset_id, +}: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => { + return { + info_list: { + data_source_type: dataSourceType, + website_info_list: getWebsiteInfo({ + websiteCrawlProvider, + websiteCrawlJobId, + websitePages, + crawlOptions, + }), + }, + indexing_technique: indexingTechnique, + process_rule: processRule, + doc_form: docForm, + doc_language: docLanguage, + dataset_id, } } -export const useFetchFileIndexingEstimate = () => { - +export const useFetchFileIndexingEstimateForFile = ( + options: GetFileIndexingEstimateParamsOptionFile, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options)) + }, + }) +} + +export const useFetchFileIndexingEstimateForNotion = ( + options: GetFileIndexingEstimateParamsOptionNotion, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options)) + }, + }) +} + +export const useFetchFileIndexingEstimateForWeb = ( + options: GetFileIndexingEstimateParamsOptionWeb, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options)) + }, + }) } From dfdc4ed3b118f1d4616f1805ac513fc25a598a9f Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 15:23:51 +0800 Subject: [PATCH 5/7] refactor: step 2 --- .../datasets/create/step-two/index.tsx | 255 +++++++----------- web/service/use-datasets.ts | 15 +- 2 files changed, 114 insertions(+), 156 deletions(-) diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 7bcb0f96d3..c6e5e82194 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -9,7 +9,6 @@ import { RiSearchEyeLine, } from '@remixicon/react' import Link from 'next/link' -import { groupBy } from 'lodash-es' import Image from 'next/image' import SettingCog from '../assets/setting-gear-mod.svg' import OrangeEffect from '../assets/option-card-effect-orange.svg' @@ -17,23 +16,21 @@ import FamilyMod from '../assets/family-mod.svg' import Note from '../assets/note-mod.svg' import FileList from '../assets/file-list-3-fill.svg' import { indexMethodIcon } from '../icons' -import PreviewItem, { PreviewType } from './preview-item' import s from './index.module.css' import unescape from './unescape' import escape from './escape' import { OptionCard } from './option-card' import LanguageSelect from './language-select' import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' +import PreviewItem, { PreviewType } from './preview-item' import cn from '@/utils/classnames' -import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import { createDocument, createFirstDocument, - fetchFileIndexingEstimate as didFetchFileIndexingEstimate, fetchDefaultProcessRule, } from '@/service/datasets' import Button from '@/app/components/base/button' -import Loading from '@/app/components/base/loading' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' @@ -58,6 +55,8 @@ import { MessageChatSquare } from '@/app/components/base/icons/src/public/common import { IS_CE_EDITION } from '@/config' import Switch from '@/app/components/base/switch' import Divider from '@/app/components/base/divider' +import { getNotionInfo, getWebsiteInfo, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/use-datasets' +import Loading from '@/app/components/base/loading' const TextLabel: FC = (props) => { return @@ -87,7 +86,7 @@ type StepTwoProps = { onCancel?: () => void } -enum SegmentType { +export enum SegmentType { AUTO = 'automatic', CUSTOM = 'custom', } @@ -176,17 +175,92 @@ const StepTwo = ({ ) const [QATipHide, setQATipHide] = useState(false) const [previewSwitched, setPreviewSwitched] = useState(false) - const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) - const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) - - const fileIndexingEstimate = segmentationType === SegmentType.AUTO - ? automaticFileIndexingEstimate - : customFileIndexingEstimate - const [isCreating, setIsCreating] = useState(false) const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) + const getIndexing_technique = () => indexingType || indexType + + const getProcessRule = () => { + const processRule: ProcessRule = { + rules: {} as any, // api will check this. It will be removed after api refactored. + mode: segmentationType, + } + if (segmentationType === SegmentType.CUSTOM) { + const ruleObj = { + pre_processing_rules: rules, + segmentation: { + separator: unescape(segmentIdentifier), + max_tokens: max, + chunk_overlap: overlap, + }, + } + processRule.rules = ruleObj + } + return processRule + } + + const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.FILE, + files, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId!, + }) + const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.NOTION, + notionPages, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) + + const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({ + docForm: docForm as DocForm, + docLanguage, + dataSourceType: DataSourceType.WEB, + websitePages, + crawlOptions, + websiteCrawlProvider, + websiteCrawlJobId, + indexingTechnique: getIndexing_technique() as any, + processRule: getProcessRule(), + dataset_id: datasetId || '', + }) + + const fetchEstimate = useCallback(() => { + if (dataSourceType === DataSourceType.FILE) + fileIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.NOTION) + notionIndexingEstimateQuery.mutate() + + if (dataSourceType === DataSourceType.WEB) + websiteIndexingEstimateQuery.mutate() + }, [dataSourceType, fileIndexingEstimateQuery, notionIndexingEstimateQuery, websiteIndexingEstimateQuery]) + + const estimate + = dataSourceType === DataSourceType.FILE + ? fileIndexingEstimateQuery.data + : dataSourceType === DataSourceType.NOTION + ? notionIndexingEstimateQuery.data + : websiteIndexingEstimateQuery.data + + const getIsEstimateReady = useCallback(() => { + if (dataSourceType === DataSourceType.FILE) + return fileIndexingEstimateQuery.isSuccess + + if (dataSourceType === DataSourceType.NOTION) + return notionIndexingEstimateQuery.isSuccess + + if (dataSourceType === DataSourceType.WEB) + return websiteIndexingEstimateQuery.isSuccess + }, [dataSourceType, fileIndexingEstimateQuery.isSuccess, notionIndexingEstimateQuery.isSuccess, websiteIndexingEstimateQuery.isSuccess]) + const getFileName = (name: string) => { const arr = name.split('.') return arr.slice(0, -1).join('.') @@ -224,122 +298,15 @@ const StepTwo = ({ setParentChildConfig(defaultParentChildConfig) } - const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => { - // eslint-disable-next-line @typescript-eslint/no-use-before-define - const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!) - if (segmentationType === SegmentType.CUSTOM) - setCustomFileIndexingEstimate(res) - else - setAutomaticFileIndexingEstimate(res) - } - const updatePreview = () => { if (segmentationType === SegmentType.CUSTOM && max > 4000) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) return } - setCustomFileIndexingEstimate(null) - fetchFileIndexingEstimate() + fetchEstimate() setPreviewSwitched(false) } - const getIndexing_technique = () => indexingType || indexType - - const getProcessRule = () => { - const processRule: ProcessRule = { - rules: {} as any, // api will check this. It will be removed after api refactored. - mode: segmentationType, - } - if (segmentationType === SegmentType.CUSTOM) { - const ruleObj = { - pre_processing_rules: rules, - segmentation: { - separator: unescape(segmentIdentifier), - max_tokens: max, - chunk_overlap: overlap, - }, - } - processRule.rules = ruleObj - } - return processRule - } - - const getNotionInfo = () => { - const workspacesMap = groupBy(notionPages, 'workspace_id') - const workspaces = Object.keys(workspacesMap).map((workspaceId) => { - return { - workspaceId, - pages: workspacesMap[workspaceId], - } - }) - return workspaces.map((workspace) => { - return { - workspace_id: workspace.workspaceId, - pages: workspace.pages.map((page) => { - const { page_id, page_name, page_icon, type } = page - return { - page_id, - page_name, - page_icon, - type, - } - }), - } - }) as NotionInfo[] - } - - const getWebsiteInfo = () => { - return { - provider: websiteCrawlProvider, - job_id: websiteCrawlJobId, - urls: websitePages.map(page => page.source_url), - only_main_content: crawlOptions?.only_main_content, - } - } - - const getFileIndexingEstimateParams = (docForm: DocForm, language?: string): IndexingEstimateParams | undefined => { - if (dataSourceType === DataSourceType.FILE) { - return { - info_list: { - data_source_type: dataSourceType, - file_info_list: { - file_ids: files.map(file => file.id) as string[], - }, - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.NOTION) { - return { - info_list: { - data_source_type: dataSourceType, - notion_info_list: getNotionInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - if (dataSourceType === DataSourceType.WEB) { - return { - info_list: { - data_source_type: dataSourceType, - website_info_list: getWebsiteInfo(), - }, - indexing_technique: getIndexing_technique() as string, - process_rule: getProcessRule(), - doc_form: docForm, - doc_language: language || docLanguage, - dataset_id: datasetId as string, - } - } - } const { modelList: rerankModelList, defaultModel: rerankDefaultModel, @@ -423,10 +390,15 @@ const StepTwo = ({ } } if (dataSourceType === DataSourceType.NOTION) - params.data_source.info_list.notion_info_list = getNotionInfo() + params.data_source.info_list.notion_info_list = getNotionInfo(notionPages) - if (dataSourceType === DataSourceType.WEB) - params.data_source.info_list.website_info_list = getWebsiteInfo() + if (dataSourceType === DataSourceType.WEB) { + params.data_source.info_list.website_info_list = getWebsiteInfo({ + websiteCrawlProvider, + websiteCrawlJobId, + websitePages, + }) + } } return params } @@ -519,16 +491,7 @@ const StepTwo = ({ const previewSwitch = async (language?: string) => { setPreviewSwitched(true) setIsLanguageSelectDisabled(true) - if (segmentationType === SegmentType.AUTO) - setAutomaticFileIndexingEstimate(null) - else - setCustomFileIndexingEstimate(null) - try { - await fetchFileIndexingEstimate(DocForm.QA, language) - } - finally { - setIsLanguageSelectDisabled(false) - } + fetchEstimate() } const handleSelect = (language: string) => { @@ -570,18 +533,6 @@ const StepTwo = ({ setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) }, [isAPIKeySet, indexingType, datasetId]) - useEffect(() => { - if (segmentationType === SegmentType.AUTO) { - setAutomaticFileIndexingEstimate(null) - fetchFileIndexingEstimate() - setPreviewSwitched(false) - } - else { - setCustomFileIndexingEstimate(null) - setPreviewSwitched(false) - } - }, [segmentationType, indexType]) - const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { search_method: RETRIEVE_METHOD.semantic, reranking_enable: false, @@ -971,26 +922,26 @@ const StepTwo = ({ )}
- {previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && ( + {previewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( <> - {fileIndexingEstimate?.qa_preview.map((item, index) => ( + {estimate?.qa_preview.map((item, index) => ( ))} )} - {(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && ( + {(docForm === DocForm.TEXT || !previewSwitched) && estimate?.preview && ( <> - {fileIndexingEstimate?.preview.map((item, index) => ( + {estimate?.preview.map((item, index) => ( ))} )} - {previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && ( + {previewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && (
)} - {!previewSwitched && !fileIndexingEstimate?.preview && ( + {!previewSwitched && !estimate?.preview && (
diff --git a/web/service/use-datasets.ts b/web/service/use-datasets.ts index 7ac9635cd4..221e258100 100644 --- a/web/service/use-datasets.ts +++ b/web/service/use-datasets.ts @@ -1,11 +1,12 @@ import groupBy from 'lodash-es/groupBy' +import type { MutationOptions } from '@tanstack/react-query' import { useMutation } from '@tanstack/react-query' import { fetchFileIndexingEstimate } from './datasets' -import type { IndexingType } from '@/app/components/datasets/create/step-two' -import type { CrawlOptions, CrawlResultItem, CustomFile, DataSourceType, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' +import { type IndexingType } from '@/app/components/datasets/create/step-two' +import type { CrawlOptions, CrawlResultItem, CustomFile, DataSourceType, DocForm, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' import type { DataSourceProvider, NotionPage } from '@/models/common' -const getNotionInfo = ( +export const getNotionInfo = ( notionPages: NotionPage[], ) => { const workspacesMap = groupBy(notionPages, 'workspace_id') @@ -31,7 +32,7 @@ const getNotionInfo = ( }) as NotionInfo[] } -const getWebsiteInfo = ( +export const getWebsiteInfo = ( opts: { websiteCrawlProvider: DataSourceProvider websiteCrawlJobId: string @@ -152,30 +153,36 @@ const getFileIndexingEstimateParamsForWeb = ({ export const useFetchFileIndexingEstimateForFile = ( options: GetFileIndexingEstimateParamsOptionFile, + mutationOptions: MutationOptions = {}, ) => { return useMutation({ mutationFn: async () => { return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options)) }, + ...mutationOptions, }) } export const useFetchFileIndexingEstimateForNotion = ( options: GetFileIndexingEstimateParamsOptionNotion, + mutationOptions: MutationOptions = {}, ) => { return useMutation({ mutationFn: async () => { return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options)) }, + ...mutationOptions, }) } export const useFetchFileIndexingEstimateForWeb = ( options: GetFileIndexingEstimateParamsOptionWeb, + mutationOptions: MutationOptions = {}, ) => { return useMutation({ mutationFn: async () => { return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options)) }, + ...mutationOptions, }) } From bebad5cbddb119613bd2d8c6c7140e36c1b4ebc5 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Tue, 3 Dec 2024 17:26:45 +0800 Subject: [PATCH 6/7] refactor: step 2 --- .../datasets/create/step-two/index.tsx | 172 +++++++++--------- web/service/use-datasets.ts | 113 ++++++++---- 2 files changed, 162 insertions(+), 123 deletions(-) diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index c6e5e82194..30c2db1276 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -25,11 +25,7 @@ import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import PreviewItem, { PreviewType } from './preview-item' import cn from '@/utils/classnames' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' -import { - createDocument, - createFirstDocument, - fetchDefaultProcessRule, -} from '@/service/datasets' + import Button from '@/app/components/base/button' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' @@ -55,7 +51,7 @@ import { MessageChatSquare } from '@/app/components/base/icons/src/public/common import { IS_CE_EDITION } from '@/config' import Switch from '@/app/components/base/switch' import Divider from '@/app/components/base/divider' -import { getNotionInfo, getWebsiteInfo, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/use-datasets' +import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/use-datasets' import Loading from '@/app/components/base/loading' const TextLabel: FC = (props) => { @@ -174,8 +170,7 @@ const StepTwo = ({ (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), ) const [QATipHide, setQATipHide] = useState(false) - const [previewSwitched, setPreviewSwitched] = useState(false) - const [isCreating, setIsCreating] = useState(false) + const [qaPreviewSwitched, setQAPreviewSwitched] = useState(false) const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) @@ -250,21 +245,21 @@ const StepTwo = ({ ? notionIndexingEstimateQuery.data : websiteIndexingEstimateQuery.data - const getIsEstimateReady = useCallback(() => { - if (dataSourceType === DataSourceType.FILE) - return fileIndexingEstimateQuery.isSuccess + // const getIsEstimateReady = useCallback(() => { + // if (dataSourceType === DataSourceType.FILE) + // return fileIndexingEstimateQuery.isSuccess - if (dataSourceType === DataSourceType.NOTION) - return notionIndexingEstimateQuery.isSuccess + // if (dataSourceType === DataSourceType.NOTION) + // return notionIndexingEstimateQuery.isSuccess - if (dataSourceType === DataSourceType.WEB) - return websiteIndexingEstimateQuery.isSuccess - }, [dataSourceType, fileIndexingEstimateQuery.isSuccess, notionIndexingEstimateQuery.isSuccess, websiteIndexingEstimateQuery.isSuccess]) + // if (dataSourceType === DataSourceType.WEB) + // return websiteIndexingEstimateQuery.isSuccess + // }, [dataSourceType, fileIndexingEstimateQuery.isSuccess, notionIndexingEstimateQuery.isSuccess, websiteIndexingEstimateQuery.isSuccess]) - const getFileName = (name: string) => { - const arr = name.split('.') - return arr.slice(0, -1).join('.') - } + // const getFileName = (name: string) => { + // const arr = name.split('.') + // return arr.slice(0, -1).join('.') + // } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') @@ -304,7 +299,7 @@ const StepTwo = ({ return } fetchEstimate() - setPreviewSwitched(false) + setQAPreviewSwitched(false) } const { @@ -403,20 +398,22 @@ const StepTwo = ({ return params } - const getRules = async () => { - try { - const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) - const separator = res.rules.segmentation.separator + const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule({ + onSuccess(data) { + const separator = data.rules.segmentation.separator setSegmentIdentifier(separator) - setMax(res.rules.segmentation.max_tokens) - setOverlap(res.rules.segmentation.chunk_overlap!) - setRules(res.rules.pre_processing_rules) - setDefaultConfig(res.rules) - } - catch (err) { - console.log(err) - } - } + setMax(data.rules.segmentation.max_tokens) + setOverlap(data.rules.segmentation.chunk_overlap!) + setRules(data.rules.pre_processing_rules) + setDefaultConfig(data.rules) + }, + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) const getRulesFromDetail = () => { if (documentDetail) { @@ -426,7 +423,7 @@ const StepTwo = ({ const overlap = rules.segmentation.chunk_overlap setSegmentIdentifier(separator) setMax(max) - setOverlap(overlap) + setOverlap(overlap as number) setRules(rules.pre_processing_rules) setDefaultConfig(rules) } @@ -437,48 +434,55 @@ const StepTwo = ({ setSegmentationType(documentDetail.dataset_process_rule.mode) } - const createHandle = async () => { - if (isCreating) - return - setIsCreating(true) - try { - let res - const params = getCreationParams() - if (!params) - return false - - setIsCreating(true) - if (!datasetId) { - res = await createFirstDocument({ - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - // eslint-disable-next-line @typescript-eslint/no-use-before-define - updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) - } - else { - res = await createDocument({ - datasetId, - body: params as CreateDocumentReq, - }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) - updateResultCache && updateResultCache(res) - } - if (mutateDatasetRes) - mutateDatasetRes() - onStepChange && onStepChange(+1) - isSetting && onSave && onSave() - } - catch (err) { + const createFirstDocumentMutation = useCreateFirstDocument({ + onError(error) { Toast.notify({ type: 'error', - message: `${err}`, + message: `${error}`, + }) + }, + }) + const createDocumentMutation = useCreateDocument(datasetId!, { + onError(error) { + Toast.notify({ + type: 'error', + message: `${error}`, + }) + }, + }) + + const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending + + const createHandle = async () => { + const params = getCreationParams() + if (!params) + return false + + if (!datasetId) { + await createFirstDocumentMutation.mutateAsync( + params, + { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) + }, + }, + ) + } + else { + await createDocumentMutation.mutateAsync(params, { + onSuccess(data) { + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) + updateResultCache && updateResultCache(data) + }, }) } - finally { - setIsCreating(false) - } + if (mutateDatasetRes) + mutateDatasetRes() + onStepChange && onStepChange(+1) + isSetting && onSave && onSave() } const handleDocformSwitch = (isQAMode: boolean) => { @@ -488,8 +492,8 @@ const StepTwo = ({ setDocForm(DocForm.TEXT) } - const previewSwitch = async (language?: string) => { - setPreviewSwitched(true) + const previewSwitch = () => { + setQAPreviewSwitched(true) setIsLanguageSelectDisabled(true) fetchEstimate() } @@ -497,8 +501,8 @@ const StepTwo = ({ const handleSelect = (language: string) => { setDocLanguage(language) // Switch language, re-cutter - if (docForm === DocForm.QA && previewSwitched) - previewSwitch(language) + if (docForm === DocForm.QA && qaPreviewSwitched) + previewSwitch() } const changeToEconomicalType = () => { @@ -511,7 +515,7 @@ const StepTwo = ({ useEffect(() => { // fetch rules if (!isSetting) { - getRules() + fetchDefaultProcessRuleMutation.mutate('/datasets/process-rule') } else { getRulesFromDetail() @@ -909,12 +913,12 @@ const StepTwo = ({
{t('datasetCreation.stepTwo.previewTitle')}
- {docForm === DocForm.QA && !previewSwitched && ( + {docForm === DocForm.QA && !qaPreviewSwitched && ( )}
- {docForm === DocForm.QA && !previewSwitched && ( + {docForm === DocForm.QA && !qaPreviewSwitched && (
{t('datasetCreation.stepTwo.previewSwitchTipStart')} {t('datasetCreation.stepTwo.previewSwitchTipEnd')} @@ -922,26 +926,26 @@ const StepTwo = ({ )}
- {previewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( + {qaPreviewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( <> {estimate?.qa_preview.map((item, index) => ( ))} )} - {(docForm === DocForm.TEXT || !previewSwitched) && estimate?.preview && ( + {(docForm === DocForm.TEXT || !qaPreviewSwitched) && estimate?.preview && ( <> {estimate?.preview.map((item, index) => ( ))} )} - {previewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && ( + {qaPreviewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && (
)} - {!previewSwitched && !estimate?.preview && ( + {!qaPreviewSwitched && !estimate?.preview && (
diff --git a/web/service/use-datasets.ts b/web/service/use-datasets.ts index 221e258100..a00c34ec12 100644 --- a/web/service/use-datasets.ts +++ b/web/service/use-datasets.ts @@ -1,9 +1,9 @@ import groupBy from 'lodash-es/groupBy' import type { MutationOptions } from '@tanstack/react-query' import { useMutation } from '@tanstack/react-query' -import { fetchFileIndexingEstimate } from './datasets' +import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from './datasets' import { type IndexingType } from '@/app/components/datasets/create/step-two' -import type { CrawlOptions, CrawlResultItem, CustomFile, DataSourceType, DocForm, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, DocForm, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets' import type { DataSourceProvider, NotionPage } from '@/models/common' export const getNotionInfo = ( @@ -62,19 +62,6 @@ type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOpti files: CustomFile[] } -type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & { - dataSourceType: DataSourceType.NOTION - notionPages: NotionPage[] -} - -type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & { - dataSourceType: DataSourceType.WEB - websitePages: CrawlResultItem[] - crawlOptions?: CrawlOptions - websiteCrawlProvider: DataSourceProvider - websiteCrawlJobId: string -} - const getFileIndexingEstimateParamsForFile = ({ docForm, docLanguage, @@ -99,6 +86,23 @@ const getFileIndexingEstimateParamsForFile = ({ } } +export const useFetchFileIndexingEstimateForFile = ( + options: GetFileIndexingEstimateParamsOptionFile, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options)) + }, + ...mutationOptions, + }) +} + +type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.NOTION + notionPages: NotionPage[] +} + const getFileIndexingEstimateParamsForNotion = ({ docForm, docLanguage, @@ -121,6 +125,26 @@ const getFileIndexingEstimateParamsForNotion = ({ } } +export const useFetchFileIndexingEstimateForNotion = ( + options: GetFileIndexingEstimateParamsOptionNotion, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async () => { + return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options)) + }, + ...mutationOptions, + }) +} + +type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & { + dataSourceType: DataSourceType.WEB + websitePages: CrawlResultItem[] + crawlOptions?: CrawlOptions + websiteCrawlProvider: DataSourceProvider + websiteCrawlJobId: string +} + const getFileIndexingEstimateParamsForWeb = ({ docForm, docLanguage, @@ -151,30 +175,6 @@ const getFileIndexingEstimateParamsForWeb = ({ } } -export const useFetchFileIndexingEstimateForFile = ( - options: GetFileIndexingEstimateParamsOptionFile, - mutationOptions: MutationOptions = {}, -) => { - return useMutation({ - mutationFn: async () => { - return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options)) - }, - ...mutationOptions, - }) -} - -export const useFetchFileIndexingEstimateForNotion = ( - options: GetFileIndexingEstimateParamsOptionNotion, - mutationOptions: MutationOptions = {}, -) => { - return useMutation({ - mutationFn: async () => { - return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options)) - }, - ...mutationOptions, - }) -} - export const useFetchFileIndexingEstimateForWeb = ( options: GetFileIndexingEstimateParamsOptionWeb, mutationOptions: MutationOptions = {}, @@ -186,3 +186,38 @@ export const useFetchFileIndexingEstimateForWeb = ( ...mutationOptions, }) } + +export const useCreateFirstDocument = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (createDocumentReq: CreateDocumentReq, + ) => { + return createFirstDocument({ body: createDocumentReq }) + }, + ...mutationOptions, + }) +} + +export const useCreateDocument = ( + datasetId: string, + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (req: CreateDocumentReq) => { + return createDocument({ datasetId, body: req }) + }, + ...mutationOptions, + }) +} + +export const useFetchDefaultProcessRule = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationFn: async (url: string) => { + return fetchDefaultProcessRule({ url }) + }, + ...mutationOptions, + }) +} From 629152ff2ceeeab738d889d964fbacfcafd2b950 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Wed, 4 Dec 2024 11:52:05 +0800 Subject: [PATCH 7/7] feat: ui component finish for chunk preview --- .../datasets/assets/selection-mod-nocolor.svg | 13 +++ web/app/components/datasets/chunk.tsx | 55 ++++++++++++ .../datasets/create/step-two/index.module.css | 13 --- .../datasets/create/step-two/index.tsx | 85 ++++++++----------- .../formatted-text/flavours/shared.tsx | 2 +- web/app/dev-preview/page.tsx | 54 ++++++++++-- web/models/datasets.ts | 1 + 7 files changed, 154 insertions(+), 69 deletions(-) create mode 100644 web/app/components/datasets/assets/selection-mod-nocolor.svg create mode 100644 web/app/components/datasets/chunk.tsx diff --git a/web/app/components/datasets/assets/selection-mod-nocolor.svg b/web/app/components/datasets/assets/selection-mod-nocolor.svg new file mode 100644 index 0000000000..ae3c9c5c75 --- /dev/null +++ b/web/app/components/datasets/assets/selection-mod-nocolor.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/web/app/components/datasets/chunk.tsx b/web/app/components/datasets/chunk.tsx new file mode 100644 index 0000000000..08797fbb59 --- /dev/null +++ b/web/app/components/datasets/chunk.tsx @@ -0,0 +1,55 @@ +import type { FC, PropsWithChildren } from 'react' +import Image from 'next/image' +import SelectionMod from './assets/selection-mod-nocolor.svg' +import type { QA } from '@/models/datasets' + +export type ChunkLabelProps = { + label: string + characterCount: number +} + +export const ChunkLabel: FC = (props) => { + const { label, characterCount } = props + return
+ Selection Mod +

+ {label} + + + · + + + {`${characterCount} characters`} +

+
+} + +export type ChunkContainerProps = ChunkLabelProps & PropsWithChildren + +export const ChunkContainer: FC = (props) => { + const { label, characterCount, children } = props + return
+ +

+ {children} +

+
+} + +export type QAPreviewProps = { + qa: QA +} + +export const QAPreview: FC = (props) => { + const { qa } = props + return
+
+ +

{qa.question}

+
+
+ +

{qa.answer}

+
+
+} diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css index 4d857968b7..85a7f8ab35 100644 --- a/web/app/components/datasets/create/step-two/index.module.css +++ b/web/app/components/datasets/create/step-two/index.module.css @@ -394,19 +394,6 @@ max-width: 524px; } -.previewHeader { - position: sticky; - top: 0; - left: 0; - padding-top: 42px; - background-color: #fff; - font-weight: 600; - font-size: 18px; - line-height: 28px; - color: #101828; - z-index: 10; -} - /* * `fixed` must under `previewHeader` because of style override would not work */ diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 30c2db1276..b904ed17bc 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -16,13 +16,15 @@ import FamilyMod from '../assets/family-mod.svg' import Note from '../assets/note-mod.svg' import FileList from '../assets/file-list-3-fill.svg' import { indexMethodIcon } from '../icons' +import { PreviewContainer } from '../../preview/container' +import { ChunkContainer, QAPreview } from '../../chunk' +import { PreviewHeader } from '../../preview/header' import s from './index.module.css' import unescape from './unescape' import escape from './escape' import { OptionCard } from './option-card' import LanguageSelect from './language-select' import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' -import PreviewItem, { PreviewType } from './preview-item' import cn from '@/utils/classnames' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' @@ -58,14 +60,13 @@ const TextLabel: FC = (props) => { return } -type ValueOf = T[keyof T] type StepTwoProps = { isSetting?: boolean documentDetail?: FullDocumentDetail isAPIKeySet: boolean onSetting: () => void datasetId?: string - indexingType?: ValueOf + indexingType?: IndexingType retrievalMethod?: string dataSourceType: DataSourceType files: CustomFile[] @@ -156,7 +157,7 @@ const StepTwo = ({ const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType - const [indexType, setIndexType] = useState>( + const [indexType, setIndexType] = useState( (indexingType || isAPIKeySet) ? IndexingType.QUALIFIED @@ -906,52 +907,40 @@ const StepTwo = ({
{ }} footer={null}> -
+ } + className={cn(s.previewWrap, isMobile && s.isMobile, 'relative h-full overflow-y-scroll space-y-4')} > -
-
-
-
{t('datasetCreation.stepTwo.previewTitle')}
- {docForm === DocForm.QA && !qaPreviewSwitched && ( - - )} -
+ {qaPreviewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( + estimate?.qa_preview.map(item => ( + + )) + )} + {(docForm === DocForm.TEXT || !qaPreviewSwitched) && estimate?.preview && ( + estimate?.preview.map((item, index) => ( + + {item} + + )) + )} + {qaPreviewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && ( +
+
- {docForm === DocForm.QA && !qaPreviewSwitched && ( -
- {t('datasetCreation.stepTwo.previewSwitchTipStart')} - {t('datasetCreation.stepTwo.previewSwitchTipEnd')} -
- )} -
-
- {qaPreviewSwitched && docForm === DocForm.QA && estimate?.qa_preview && ( - <> - {estimate?.qa_preview.map((item, index) => ( - - ))} - - )} - {(docForm === DocForm.TEXT || !qaPreviewSwitched) && estimate?.preview && ( - <> - {estimate?.preview.map((item, index) => ( - - ))} - - )} - {qaPreviewSwitched && docForm === DocForm.QA && !estimate?.qa_preview && ( -
- -
- )} - {!qaPreviewSwitched && !estimate?.preview && ( -
- -
- )} -
-
+ )} + {!qaPreviewSwitched && !estimate?.preview && ( +
+ +
+ )} +
) diff --git a/web/app/components/datasets/formatted-text/flavours/shared.tsx b/web/app/components/datasets/formatted-text/flavours/shared.tsx index b8102e4ebf..0ce17db7e4 100644 --- a/web/app/components/datasets/formatted-text/flavours/shared.tsx +++ b/web/app/components/datasets/formatted-text/flavours/shared.tsx @@ -34,7 +34,7 @@ export const SliceContent: FC = forwardRef((props, ref) => { const { className, children, ...rest } = props return {children} diff --git a/web/app/dev-preview/page.tsx b/web/app/dev-preview/page.tsx index d25fa9777a..92263d99a0 100644 --- a/web/app/dev-preview/page.tsx +++ b/web/app/dev-preview/page.tsx @@ -1,5 +1,6 @@ 'use client' +import { useState } from 'react' import { FormattedText } from '../components/datasets/formatted-text/formatted' import { PreviewSlice } from '../components/datasets/formatted-text/flavours/preview-slice' import { PreviewContainer } from '../components/datasets/preview/container' @@ -8,9 +9,22 @@ import FileIcon from '../components/base/file-icon' import { ChevronDown } from '../components/base/icons/src/vender/solid/arrows' import Badge from '../components/base/badge' import { DividerWithLabel } from '../components/base/divider/with-label' +import Button from '../components/base/button' +import { ChunkContainer, QAPreview } from '../components/datasets/chunk' +import classNames from '@/utils/classnames' export default function Page() { + const [parentChild, setParentChild] = useState(false) + const [vertical, setVertical] = useState(false) + const [qa, setQa] = useState(false) return
+
+ + + +
@@ -23,13 +37,39 @@ export default function Page() {
}> - - - - - - - +
{parentChild + ? Array.from({ length: 4 }, (_, i) => { + return + + {Array.from({ length: 4 }, (_, i) => { + return + })} + + + }) + : Array.from({ length: 2 }, (_, i) => { + return + { + qa + ? + : 'In December of 2009, I was preparing to teach SI502 - Networked Programming at the University of Michigan for the fifth semester in a row and decided it was time to write a Python textbook that focused on exploring data instead of understanding algorithms and abstractions. My goal in SI502 is to teach people life-long data handling skills using Python. Few of my students were planning to be professional computer programmers. Instead, they planned be librarians, managers, lawyers, biologists, economists, etc. who happened to want to skillfully use technology in their chosen field.' + } + + }) + }
diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 0641c4e338..0274a19387 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -330,6 +330,7 @@ export type NotionPage = { } export type ProcessRule = { + processRule: { pre_processing_rules: PreProcessingRule[]; segmentation: { separator: string; max_tokens: number; chunk_overlap: number } } mode: string rules: Rules }