dify/web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx
Amir Mohsen 29a4dec387 feat: Integrate WaterCrawl.dev as a new knowledge base provider
Add WaterCrawl.dev as an alternative provider for website crawling in datasets/knowledge base alongside Firecrawl and Jina Reader. This integration enhances the data source options for knowledge bases, allowing users to configure and use WaterCrawl for their website content extraction needs. Resolved #15950
2025-03-21 00:18:18 +01:00

133 lines
4.7 KiB
TypeScript

'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Panel from '../panel'
import { DataSourceType } from '../panel/types'
import ConfigFirecrawlModal from './config-firecrawl-modal'
import ConfigWatercrawlModal from './config-watercrawl-modal'
import ConfigJinaReaderModal from './config-jina-reader-modal'
import cn from '@/utils/classnames'
import s from '@/app/components/datasets/create/website/index.module.css'
import { fetchDataSources, removeDataSourceApiKeyBinding } from '@/service/datasets'
import type { DataSourceItem } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import { useAppContext } from '@/context/app-context'
import Toast from '@/app/components/base/toast'
type Props = {
provider: DataSourceProvider
}
const DataSourceWebsite: FC<Props> = ({ provider }) => {
const { t } = useTranslation()
const { isCurrentWorkspaceManager } = useAppContext()
const [sources, setSources] = useState<DataSourceItem[]>([])
const checkSetApiKey = useCallback(async () => {
const res = await fetchDataSources() as any
const list = res.sources
setSources(list)
}, [])
useEffect(() => {
checkSetApiKey()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const [configTarget, setConfigTarget] = useState<DataSourceProvider | null>(null)
const showConfig = useCallback((provider: DataSourceProvider) => {
setConfigTarget(provider)
}, [setConfigTarget])
const hideConfig = useCallback(() => {
setConfigTarget(null)
}, [setConfigTarget])
const handleAdded = useCallback(() => {
checkSetApiKey()
hideConfig()
}, [checkSetApiKey, hideConfig])
const getIdByProvider = (provider: DataSourceProvider): string | undefined => {
const source = sources.find(item => item.provider === provider)
return source?.id
}
const getProviderName = (provider: DataSourceProvider): string => {
if (provider === DataSourceProvider.fireCrawl)
return 'Firecrawl'
if (provider === DataSourceProvider.waterCrawl)
return 'WaterCrawl'
return 'Jina Reader'
}
const handleRemove = useCallback((provider: DataSourceProvider) => {
return async () => {
const dataSourceId = getIdByProvider(provider)
if (dataSourceId) {
await removeDataSourceApiKeyBinding(dataSourceId)
setSources(sources.filter(item => item.provider !== provider))
Toast.notify({
type: 'success',
message: t('common.api.remove'),
})
}
}
}, [sources, t])
return (
<>
<Panel
type={DataSourceType.website}
provider={provider}
isConfigured={sources.find(item => item.provider === provider) !== undefined}
onConfigure={() => showConfig(provider)}
readOnly={!isCurrentWorkspaceManager}
configuredList={sources.filter(item => item.provider === provider).map(item => ({
id: item.id,
logo: ({ className }: { className: string }) => {
if (item.provider === DataSourceProvider.fireCrawl) {
return (
<div
className={cn(className, 'flex items-center justify-center w-5 h-5 !bg-background-default border border-divider-subtle text-xs font-medium text-text-tertiary rounded ml-3')}>🔥</div>
)
}
if (item.provider === DataSourceProvider.waterCrawl) {
return (
<div
className={cn(className, 'flex items-center justify-center w-5 h-5 !bg-background-default border border-divider-subtle text-xs font-medium text-text-tertiary rounded ml-3')}>
<span className={s.watercrawlLogo}/>
</div>
)
}
return (
<div
className={cn(className, 'flex items-center justify-center w-5 h-5 !bg-background-default border border-divider-subtle text-xs font-medium text-text-tertiary rounded ml-3')}>
<span className={s.jinaLogo}/>
</div>
)
},
name: getProviderName(item.provider),
isActive: true,
}))}
onRemove={handleRemove(provider)}
/>
{configTarget === DataSourceProvider.fireCrawl && (
<ConfigFirecrawlModal onSaved={handleAdded} onCancel={hideConfig}/>
)}
{configTarget === DataSourceProvider.waterCrawl && (
<ConfigWatercrawlModal onSaved={handleAdded} onCancel={hideConfig}/>
)}
{configTarget === DataSourceProvider.jinaReader && (
<ConfigJinaReaderModal onSaved={handleAdded} onCancel={hideConfig}/>
)}
</>
)
}
export default React.memo(DataSourceWebsite)