From d7aada38a111028ade07b597ff2f9e201d57e35b Mon Sep 17 00:00:00 2001 From: ice yao Date: Mon, 23 Sep 2024 19:57:21 +0800 Subject: [PATCH] Add nomic embedding model provider (#8640) --- .../model_providers/nomic/__init__.py | 0 .../nomic/_assets/icon_l_en.svg | 13 ++ .../nomic/_assets/icon_s_en.png | Bin 0 -> 25814 bytes .../model_providers/nomic/_common.py | 28 +++ .../model_providers/nomic/nomic.py | 26 +++ .../model_providers/nomic/nomic.yaml | 29 +++ .../nomic/text_embedding/__init__.py | 0 .../text_embedding/nomic-embed-text-v1.5.yaml | 8 + .../text_embedding/nomic-embed-text-v1.yaml | 8 + .../nomic/text_embedding/text_embedding.py | 170 ++++++++++++++++++ api/poetry.lock | 78 +++++++- api/pyproject.toml | 2 + .../model_runtime/__mock/nomic_embeddings.py | 59 ++++++ .../model_runtime/nomic/__init__.py | 0 .../model_runtime/nomic/test_embeddings.py | 62 +++++++ .../model_runtime/nomic/test_provider.py | 22 +++ dev/pytest/pytest_model_runtime.sh | 3 +- 17 files changed, 506 insertions(+), 2 deletions(-) create mode 100644 api/core/model_runtime/model_providers/nomic/__init__.py create mode 100644 api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg create mode 100644 api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png create mode 100644 api/core/model_runtime/model_providers/nomic/_common.py create mode 100644 api/core/model_runtime/model_providers/nomic/nomic.py create mode 100644 api/core/model_runtime/model_providers/nomic/nomic.yaml create mode 100644 api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py create mode 100644 api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml create mode 100644 api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml create mode 100644 api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py create mode 100644 api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py create mode 100644 api/tests/integration_tests/model_runtime/nomic/__init__.py create mode 100644 api/tests/integration_tests/model_runtime/nomic/test_embeddings.py create mode 100644 api/tests/integration_tests/model_runtime/nomic/test_provider.py diff --git a/api/core/model_runtime/model_providers/nomic/__init__.py b/api/core/model_runtime/model_providers/nomic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg new file mode 100644 index 0000000000..6c4a1058ab --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png new file mode 100644 index 0000000000000000000000000000000000000000..3eba3b82bc1e3fcb6b27cd9038786e19b7c2adcf GIT binary patch literal 25814 zcmce8c{o*H`?qc#G9K{_ZDQ;ou+Q{u_24=Hr{Y2>&m?^_GR7 z`1k~x9X4&V0E2h$-VF~A9~c;DY;3HotYk14=gysriHW&(?OIh;)$`}i-@JKq`SRtr zZ{IdGHPzJAE zO=o9k5n*8~E32fWq>mpzDk>?l+3YiC&gkjsMMg#<2$GhTW^QgiciucYo&M<2qoJXp zg9i^vN=T@xsumU&E?&I2y1M$ri4!MJo@{S#-??+=yLayn95}#Yv0PnU@87?F<;s=$ z^XD5G8Ch6ZJbwIm=B!yGBO^zT9t{l*6%rDvudmnD)g2rh%*@P`m6c^OnGgg;M@Rel z`Ptdog@lBxSh2#!#>U;<-O|!hO-=32ojV^sd=M28xpe8$g9i^98XEfg`lO_!UcP)e zIy$PUsX1%*>_dkRJ$(4^#*G{Q{PRyjLPAPPijR-a_3PJ%hlkg!S<}|mrlhP~P*8C8 z?AdMGw#CNAmXwrqc6R#u`l_p|8yFarm6Zhq1UNZ4xwyEjTenV9Qu683r!6fl$;ruK zVPQHtI@`Bz-?eL3U|?WkVxpj+U`|fX#fum7^Yb@u+}P34v1-+-O`A5=*4DPRwq|5x zWM^lanwoCdut8ZxB`z+GLZJi)2XEfIIVdRT`0?YrckfP5PtVHAl2=ezzkdDfIdiP7 zttG_8U%YrBCMxRh?>|po{`KqEMMXs@ist3ztz5bC!i5XX&CR!N-I_CZE)2uHy}f(( z>{+^W>5?T&jvP56ARyr7<;Bm>udS_ZW@aWVuD=o3;-@29?6&g#{@wqZ_n*4oWq;^LcUAUq_vZa67j00{( zd|LNnJnF6&mE3uB#kT~0Q8V0OxAN+uAK4+ox=lZ58BR05T)cVjw9F2LrT!LD zX>kp=a?J==Mp5R|VYe1mOgKBFzV5g3KS^{{i%N47S~8fTH^;{476;!ilsPtej^)uL+RE-Z8X_a|`auUSr+RpEgcUro*5gR1BbyM%C}bPd9}i7dkv3LBv3 z(p(1E!tg>iTd=R;p7JESJNb<{GF)oe*j9|it;WKH=_kjPkBsQD40UI0f=Z;hWiHlG zpVsf=Lk2-*PinRsMtF0Kc4auSs_uW@M~~8tbGwEaERGP*VhvFAktA%*YW^oC=c4S2 zZwYULG+M@(g>aJn5=d6-^~EG#`o0;LFh!-&J=s+6(x|QWQ3nUST%lvItvfQox4cP# zcEum*QTfq7^TCY=vXX&O-laDOCLuQEF;%uuZKPU9-kM9;3)AT2d@7pm{Pn)<>t4iJQBZ%qb3N6Msewul zEKq$os%n>8D3b^O@^j4SWO8rD&x1ckjemdaLoX8b<$U_$BWl}!yZ)xFH57FT{p|Yc z+fPMjewNk5KBg{Oa%gRfujzTo&H`9>2Q(?^3*KGlZ{N!1GcZxQ4V@j?xkrbAY5((X zTwZQ*9Hy;(46d!N#yZ_mxHhO+h%G&8jikM})(_hZ&ieU00)h0F*>6R9%7;2e_ctPG z<}B_LWbLf)3#&D}>57>GbQc-t=3qe|j@!Z-bAS7U*5fuEn5Kp+LvSU+=~;C zX|{jU+Y^nx2jx0ljq9mLfOKr{O}KqO{=1?#+Aa&JJ#lexqneEny4+<@o%n7SdQ2I1MPc+FP%?dHra9eNp-Zmn zq)+^}JwGj(PGe(>H&1_KeouW>NrG<(w2sB4=a?Mu;%r}3dT{1rr=A5jV8_$YCw!|q zjs9B4r-LDB>sVbH>60xdu{#c*ki2yh58iI`M_DU-qpYA-_CGI9(s!*c%|#_T*(usZ;=8vxIv&WuB6P+b&$+uFBXf)4;ngAU`8F^0l?~mWreN6K zz4!)fc^di-_EZubKX-dkvCh5wNNr-0r;J?g4J_zs_-4KFEy-bvy-H&TD5k(N=a#=2 z{0!}{54ak84>67*Uc8POXs)I#Pg+W!5JC@ZS>U!(+`i!=_Ne;9tIy8+A1=L^eh-1n z7TJZOk+RqI-$fZZz4SVP-BJG7P^GT?WnZXyD_j2is|cxa2@lUQ?8jAAxSMw23R1i1 z_cyKnqB%iV81V6j_3rL=_Ald^hMCT@MlY$9x+`xy<;yWxcjXgO+c#RqSlIcVzRCjH zF8f<>weB9XR<=oJ&!T}ig|Sul+5YJB`8ndHaM$47D;A+)`+hiXWmbt(UDhm#ezgJm zQnp5ZrNaF^q1BjCX5rZqxM$3*aou-&726h9!Fu>2Ft{#&Sg}ZdJD?(Tvi{kddfJ6! z$b{uD-L%a^9L|ClQr*nbt)eEC~WNy$DhBk zt$#IaqjD_Nhvjlr_4`EzJQ0?p)xT72!yet>w3d%j?fM=O$h31N+=hV5W~Ry#iU|(UMXmH8tAD_4k{L zS(AEKtBlU-qrza@SZx1oEMr5Bv$46GMlDyv+9J~*pQmXSxQA7BZ@+l($2V0`+v&fL zuG{{!D`;+F9<|fMOK<1m1A2Nc(2`91zJv6#p*YozA>yHCWyx98NAt|4}zKp8mjo_X~d%Rv&P5xgw|i!K{Y1&LS5pCpPuFmJ70=tFn@hk2yZw&$5t7kl!1`j4)h`*iE0(z}13xRM3un?z|X zypGMxxB*-L^WmiTgc;;_K2X1`0w zg2N9z)3w`%!t?If*lTnhR}2>ylg*dc*bH5ZhRy%& z{xc}0_R6LU*zpfw#wDay>h{_}&*170+jm5}DU4oE+lxL_8C_(iacHrx?6~+<`(F?o zeSFE6m9;Lm>ef(cP|VFUmyonmeVG?H_D#a7Pd7kM1In@2GTbeIsa;V22wHby?hw?+ z`JS;5{WdB3QI{1xIq&NitCX*NnKQ;*im0*kcS3ujp(jAxC9vQ%% z`Q9A4D&;p?(mDOUvJY?7j^50cNU^{v2<5 zFqm_)o&BWwd3Sc1KrkvY1VX&P)z1VHZ;2qygG8#Xi3-ty&X{VyZ|>%PSj+& zc)0&Q#j}XChg&zY4=S_>!k&e!Yb$Sq&|-d`t-Y4uaW-3V2$^4uC@ zSxQ?ILKRlq42^~t#}vRi>cNlRMsk`5eeUjU@zz&BBv;ARn9&s(f-XD@9?*B!?mARF?xYpsc0Y!0bMLkd8x#qeZek4Hd_P{91k9*y@i zSzPse`IsHzV)r`PT7a@6$7XzZ%S}$Ve(R<4+F!?mX&A+dzPhh1yd`JO*xlZ*p4dMN zeK>~)jo!&?WMK*U5sLfP9T$_gy7G05Iqy()sxFdt4>3P(XmG4_0lloNhk3;^3;R)# z9&movW@v){GW%eg#Q67!-=2>iH?**|Kc9x8(F-@@FBieD_zwk@c?ZsJ{&*Gf>$nY|j3H0o_sz~4%3m!Un)BDuV$XQ;P=%`X=s*~o##mn-zLsD=J8B; zei0uh+FgqWT-g9GWIyn>h9<+pPH3%1-hBJGcq?@CG!z07a{SEvC20ES zti9^3lu{?d%ZTKv-bQ8A8oFp?>&-cqsU{gBzCjJuZD@Bb7;t@0F0~Od{2{dS4D<1D zhwTk)zBG3wK#7z^c3Y7zlZ$Wv5058q+9#aa0Nr-&d+Vdq?5`6H;PB?x`nmA0=cd+k z)zHt!0+WhhT0PtgFp$;RwiK-R&RG+=x9YoFDN%mtLxoZFa;>m;{EYnNsLM#wUVehu zZlvc#lB|17@A3`Md7}f~oHdyXB@+n7JK1zN{_?ujgH^Yj6EOKhNGltg+NlKo)p#{x zLC9)Iw3V{J&Ka4wYJWkPB+r*_Z&#PN7r#=2At*k(iaejTAyQ zvEFp)Oid5o9GwP#op6$+Ja|!9)_HY;qf%I{9{x#kbP2Qje0Jq&LN<}uKT_N=Qe++o zRAhfqmr$bwbDl?asu`h1#WvU9CYf4aZYg-_pt_pPu4e{Z?~&5Cg*?CSy&TV5YMm5U z038&X8|p*nIL|;n(jH)N~v%%v_KAr z87Vw?xac!J>pgRjo4Uq3zN;v0J{p#sbTg_nDTuFZ}mg#4Zv zTx@?S#6;={cyYFO=@0)>FH)BI@Z+%iUvsGYOAkI}IziP=I2hCfYh}E;mPlB@KBS1+ zyG)T#$#d`0yV>@N-tpZW1S+YT8oA?YQ!QC_k#b~(pa1T zF!0OPG=#8+b1Wx%wbL5X9@^P{nI>I)CBUX1B1Hzh%SfAE?nB?1X+p^L-;X)-d5_;A zMXLZCS9jxVY=-QTln7M^pdE^%b4Zt&%c_dDr|=$6X1(y|Aa1iYa1+?o?i#)?)nr02 z9qbEM((hfSO2!!eIoRltsmE^q z7woQul5!}759}TzJW?L?lU8YbTv%z)upAdeAvM!{LA=l!8%Rov87v7#4(jqo;9qwr z~Q5xB&MZaqO7kR@_>O{n{daQ&xR zxGP+Tw~l-vRHKHN-`E*9EBO4o^DFT=HUc2oQ+ON6QPmDzuBYy0M(<6*PCWRWwE>#^ z@{F8FLnbm?x>iI(bzYqC9QgF>qnq>KsOMjl`Hv)H@-)O;8>IY%7WRc!SMx)+HQU(8 zdy;xJG+k=<1Eo>*Y&a>GH!uJBpGD2bM(ds!$Ey_2w}I9YYv%JdbjMR6-UKAq%b4WF zoJ)AZD+9LJ<)Ezdl-c#FaZQseZL4=Ye;&Mn?1QbE{WxYF3j3uKiWg~f%^|yOis#-~ zT*G8oF9;2TmQuKt8*7<{KfSAE^RFrmlts)_@#W2HXY1ANOWiEsCbBRj@6Rap;MC~z zx_ymX1$1N=-n#W?^s=hrElTq?#DmXA=Y_v4p2(#ie$H}$CVOH@mKlhoUaufXJ_K8! z^ZAGrlLKm@FnSFbZRs^5*NZ+)$NUH)qkMFp`pc^D*4YM?>XO7q`N*F2`@QJV2a|o5 zoDj*Gk7Kr=lc|R?aw9T;UF?8<8~j-F$X`|-o$Pses_sq>tOk_Qk@2mG!L+*j&OCy} zRq0>%dw{YMnRxLsx$ab}Yl4yMY+w3~1Eg>5fZ_$_{gA~qn(g5vFmEHFonpB3<$J=g zBUvCWJpWl%)!aQzf`*NHsLo!5nZG@!!Q49IefvEwy zY!7L$)!2L~?kApB&w+l(6MZ7XO5XqplZrTQG|1JFLtD}?1NUJGsbc)+Fpps$i6Z>XE1xP$-~S@6et_~aim z1O-p`N)i&B!+d19TX=T6qB-NkhbAJXFTr!A4IVjwMck)BwL9PMoa3d5>TYOpMaiy` zuV&ml5u~<_x6%bHPBjb-irZPG)c5PV#XP}@3%FN;xn|5~q!e=Cn24Ck7y z5YZ;w7QB$hi*5_OIg9%Lxr^%=@^0yiCNvyEpV`7SAnLw6G~bAuj|GYe;Gft7?Wn z{McV7kSB>*sKPI`jnK2&K8x#^k2KyHAbh#3gfQ_Hot>{o6dhQb6h(&~{?(pX9%O-`SfS9+^Jro0!G86s`Ib@qe^TRn* zk<|$FZD>(tJ=LAb{aEN5L0V9xm+Uyd(R1Dgao7vt&K)v%NV~Ol9=|UU6C;@1mX@v^ zDm<^whLa;!rPRY$c?7DVVItKHQwzMDKJr{ZyQXh3NqhJNqt>7PJ+*O;)bBP#9Rm zEY1O*^*(85x7Mvp!=sb)vAQ+xmkGCKU@}75wNK_Z#n2W2x#eL;>)~fSeb7)9(cK1* z=aW*uf!#4ZcKP`Xp-iQ|q2Z{`Kb*Hy#!g0jSl?4%+sMn%0v(jGL&byAnteB7j`>?3x@}|fh9s*H){*Clyo;TiJugL;8_~kf zjdBv;DZZ03+`K^zh)B&1dL`_bLYRE8sE)8Bk;VCa;{&}GHiHT)p%gBPNY2F(UvNi% zZrw&AMY^NFvQ1jm_3Gmt*qQ*GOe9YIvUR^)8iGS`O0jio0|4c}_l#9bM;;py+ zqQ-fA`nW*7n}VyIa7pe0aPR!#gGvdWja`~xYKF$#4a#uO z{o%0%@c8y#Ze#=nY`+d2m*zhGJ2a*jvdvqK5lxi%4N2IpHt=XpZig5Ht_HtyP$x{#Qcm_vPZEs-V(Fn<2jI(e1V*i9pUn2uDHTu7UG6%nzcf%c^#15bvP>W+iKj_yS_!QB1{LMql5jg zXGsY1F9$g!7x@CpXxzzDULOTpyiE`F+M~t*y+Fne$czI2a#FZ;d{vF#;Fl&6EZuv2%-xhu(4L6Sa$ZMWM$vE4uO+51X2ZsiRinbjWW=@ zfe}TlZ(0wx;xTTEvQ8^AT3Im!+Nnki?S{1fJ7hIAM5%{Y)1K=baZJD!Z2|CA=F~G2dRRo~woRandwiaVQH$IoBmPS}kM}g^ShQgTZm$>v2RWsKemW4g)HnRup~ubNm5L%Fyp~UjzdM{xjsDm=Nh>J zl(t$wAUBIf6ZDRcjKs7BH|9ul_w&!u8P--O@cVjHJt|Q&Mu1-$>?49ysE#J%AQG1t z2*|Oa|7=!cL^2=Ot%K_PIgA_!(O7Mxe?ZOyg_)!%nyjKIt`BTb4Kb##mmVgHm*-~` z2l-c#^6bc%AcY=z3i~D2UTvh__K*gf#TAg7Z$s6iErU!`J!aFw0DY&h1oXC^dJ^8% z^IZ}*uMm$T&qB6+pC9pqXdwktOCD7*y?&aI-xknd-ksq#Q>*~=;Q&aaN6fDn z*noE$aQ2US(X%hm@U?-`r7h=lD{7t~e7!UKoFCupL>#+=!yz_L%!`okbpyQddS9y@X~@sDVI>2u5a$4}5`A%OH)Oe-2%JT-(%{cUKjAOp01>nx#o zzU|Ef;NQD<4r%7#tm|HHiJF{)E8^jvO>G>=c*0{^8-eI+)$oZRJ{4v+j;ySaScBJ8 zwdZdHEG^R^5&}Sp<1LbyrkD_(@8rEXFzs@;gW6uE=+CHSgkF!Jt5p@nw}Cd3JUAR= zJj-xkP$M@M#cMC%4FkzjE~|fg0@J&*F`)hl&IOi%&zA*)aP$O@*K?l0 zwI>oTaT*H+77Nm`R58+c)CW>@YBQe%K_8Ia!74cBHxtn-a6K5-tiqrEPc znoVH1`Q{)zFUI)W*YZ6C%H5AtD2!gFe%eIrat6!0BObA69GC6P6_yu`8b|Js#%2ZS2>4POrG?r-4^#~iw3q#k4sFaP5MaJmSf*S*60U>kj>hF^a*+3aQb3TnYi)eOG@r2swzJ~~qiMiA!z(Ffe zt2vr1%N^lqyrGq?*3%dbB{hA66ojIAP?^4 z(O`on%W$ReY7M03hPI41ykJp?#Zg>}e5*yzV{w+*BwzMW9N;S%ESiT*2s!wD$BimX zAa*`s&n9>zp(M@|D*)X!8ncSBYh)ecu)1StE>t=*1lj55I74k()M9i~tnpz8ik{iqxfcZ_^=I3U51!{hbToyWlqt9cQ z-PuE{90=GR;|K7@TuRCeG0kNl%yqf4{hN;Ql;%g|o^*aADMm5i$`#aaE}tga=7xoT z&2n3qOQ!EiV9d>5^aS|#0lUMHMi3}?*>T`Hi~~%3dk77iQE>G2#Z?*tzIQHNTt~!_ zanSD7pGg8ZZhp?WE`d4u1nOs5FZm_3?IMwSKrtZXxL-)%IuOkWD^;&o5AXluG1d)7 z#!dO6#E>$xYLP?07;f+fF382PfAKS_wp0&~kjUv25f*Jknw9ckKFX#kT58`KZIM4=jRa-PJ2=ZG1 z3bm05C5I!QaT!c5Uv+|*dzF=X9C#M!?Sh)9p*>57&*Bw4^f6e_>28AJzy?MEQM@if zCLRste&7W~ZKPIQv*svnC|g0H3O8t`v7SFLG*vBwdHRZIN`?ig@&A!#e!iA4lVe+UAKkp@+>`)KnlK zeByfOgKDFRRKbtRQ(bqYs|_ZrQzj#?6CMUij5lP3SO=;)e-1}dXL4f5ri~XHHCr$J-)W&Ipmp~8>dOj2` z&4V82n1hFa(7{Xnxzr=ogZK#~uC2>hM0OdrL5e^mN4+`AXno_H>XL}zI~Q8q2$$H0 z95&LQSpfR0pIX_vV7PR}nBdV(mf=Kby6BCArYI(jW&yZN#hM2)+IW$29|DQ*fRFD| zV`Kqaw}oEfR)D6qx$>4ZKfbYPlO*m`AanNU=6{N!4U{li*ioy|U?B4cpP8D66iZz(`SqEMmZ5rf!>36_K{~Q06sVUqp?s}KmvHT9NL$$T_MC|`PiFa zA4hIa;QH5gcxx$LKt^7o-Hai3%_iHrcp*uYd}Jv-U*-S5Q=UW`nxaBwEl=N1&4riizL zR`gLHjwqP@De4)Uw6D&LZ@lE4k2zXau0D~h2NA_^)()r>_})8EUSuG8@?_5jlo#NH zVka%~B8|2hsJjSbpB$-3GJID_)W;S#u^U1XBNyewG(jU8RKNiTKR-?g3mU8F?bHx_ zNYv<>os@u4@279nA_D-uPAD}VEHHT3`M;aqK!#l+{FqUa<;LXDZs-v&7Tf{1TDrYN zyY>Z>yWYai17vy~h%S`H)!Un`Qk>AYlMvlITy!Qe@A9Xohym)6q~p zfOGNEHULG1wpGt0U-|A+>QCNVz+K}OC<5ERe5TP>0ah!pU*|`TXokO_YvI91G7g(5 zLvDJNRi#zlc1oT^!0X@1KGt9tUG93^gV2?{R}%)(_$A;CJ0aLMu|#H1 z+lf1bB39rHj)lLX4e?QcfwP6bnZ^1Aie+jKO=vo};iO6mqUx$A5k(MlaZUPiLiZM# zZ}>i|8(ihaPaGK39$;Pn0IAYZ^c0v2&G|~UW2~X?K*_p*iz-awG_;79sdsoaedYO& z3sh=_tw}i9WNu5M_PK~2U;} z#8{wvm=>q$O;j=9jKf8`eTOJO#TP(m_AL$OEAQr4c4#LXC4blc)WsG}crMc~XyHP( zF|w$QJP!#+sWE;*d`)u$2_C=+gceZpMxbOl6yDa(uMj8o`eW7&9ex#}WCb;(7DG%? zOcPN2smt}fe<^;Ap9K^(3stvh&Xi+ZWIm2yz;yN6^hfrMS2hyo1M8uD0Si|?0D1uv zLTE|sNRXm$>ZoKy2XfVrG-u@ProDHQj4GJ#c7s+LPL3r~frl%w9m9*W0Us=%K8X9z z90?&%fyp~CyCw|E2gE~btKaNq#PDXDn+n#Lf>;U`==b8x-djJ*D0lk!pVj-l=ySl} zj%kDI(LKPUPXW<`k8a}qjtYFbtYv;#@(5Ai-`@!B=lS~?%yB6_3Eb{bMZY-;ARCAy ztz2pau#`2x+Am=26C+1w(3j9P367VcC4%jd6cHIvxXLFmqeIgnU8jqN7l{sDGM1q5 z{M-gLMjSAQ9N0HOMd$UEvD2jLS~I$V>QsR0tRa}I?(EtyA@zeiN!kQCPEixIawy8A z=+ITV;@2MWA)>W+>M~Xg!U4GDDzyXYk?tN_7o;~toU}^n0W2$_9vHC_keY%8t*^YD zpdvlBYCPX5ipWR4eLx6W=preu77w4SXk%}oU4tXn&=deUwh-MCGkKJ@9Gs(pM&*5y z8hp|jETz)xh+z4k?!hvzD*fgSM5HiA6?jT-W(FJMgH_;Y5D#1A$TlFEW}wbFc+5Fh-+3|0kWy`HVMgk1(9wKEHZ&eugQ1zcocqZS6#c*wSfq{a?P z6-FL&zir(E|V*w8X%K(%|L>N70?E3xtv7Vwi}}@>SQx9Lzz8 z=wQn%a7NubHu7ef-`V@Mpsj?<1rJYdVtL|t63nG_vMbYLD_OpOy88+|A`@6u`#rN)p^79ij*tj&p*et*y=X~i zx7Vs#OVaH+GVTGl1D7KCcS7m))OsF}@?%w*IvjZL>TQGsFuL_nh4jRM))voM^`;en z&b5o+t&u(p0Pjly?=KN3E4yjd=`d1lMbs#uHhgL&_CDzAW#5-%W_yGz1IQ4s@Z5b% zx!uhY*ffPgUJkMDOF?DqCbOV;Xn#%uPT0X-ItPs5{RuEOk)U0h_TRB$&?f|AZ3)`Om#2@xeC@`p zB0kW)$4jh@Jt-Xdmv2=nZEpyX?dMZ0%u+RS)$2O~)qv`wixKy+Co52MKxlAdkN>{h`vtph#l3BkQbmI1J^1GFRy7;xm@F2ggH zp`_+Z76l)nF}b>LGGwKPJ|`TCkLR5)-ro%k^9h%Z?vT|2oU}Ky0|bw0dG?y~R_#&M zdEf$i5X}T7oCeM&ztkwT3Wl3PSlB@ z>8~PkQ5n?TWuR^4U=68ewD;Fz(uB|k7;rfd+Cy;Gu@Onr?>)>rjk|9SSxjZU&fp>A z&#PIPhYIeR@?wAq)9DCU&=D+X13`8Lck|7|2);qbU@hbQyZX6;zQDd%OhahhWVg&W z0^m!sc))fD`6qdp~2K5s{SRtGbnUXoKqe40fpNC z*|5Eh{k=l##u46tDmsT3`9P`hEdSf9$LD_c<$vGDyDjpm#TCRka5Nmh)&hk+i@!sm z=SZflpu3{iWp+vwgGzW?JKNObA@}nVax+yloo^ew|I-%tTwp|5*j(k{J>d?IWYlEJK6d!}t|qyzglR3G<$>Vg+~IMuJy&kY|`l*cfncT#$0;WJ@9U z>=&QRp=}fKJSzQLU4H>8*8wVR(dXfxY99*@4-$tDk{YOr@I{r|Gk1Whx}Xrwc%w}U z3-m2MnCeLH7T3b*?_l@*pnqAqoRF0UdVueuN+>WAP$)ZWhd85mdy1HZ(_LkzhLg`h z6VGq6IjKI2;psHe8sbBIu z0fcS~>F}MhS1FVE&$}E7co%rsK9D`UJ8xP05S1|2+Np$57f_ z--Cn*&OHCFOt!I$K!$0hoN~_)pZi$S45jjKqm)VQ042{9UEjSq>lkC1c{+lQVBska zjw3y)1t$FNnX;LQvjfu?1G$dQ@ncltEM+XYwVltrrm0Uhn4oK?+COk!224bvQKs_*{ z4BWI(LpO^NjrC9MY-I+<1k+a$2x3`4)}`|Kyx=8=hG|q_duXUPeT94M|6|7@EC3G% z(|5dq7|+l&4U{ee*9oR}1X(Vz_qD9RL!HcWksSa_f-?;%zfBRkt0$*WljWG~@B*h%tV?1^N-5<`}|R zu>G8N(Wl`n1HlRQUqdK?@i+1A*^~=aVRt0kGnstV1kjt+Wlxfnr^Y0PRhoRD<#d(WO(fA9o!O57uh8p;YhZG3lQw2nkUVa`hIf1nT7XkNX zr<-P2Rq?|sw3vrZg5ZkHU*?6tAXc-eBfvm7ORCg^=RI5fF2lP{FRzoIi_QdP(QRliy?FL6kQv)lQRZ7hs zEf4Eh(q{02^ zzjOfC_4H1vk68Q$=m&ovG^HPbba8-}E*M^*13&hAuB!xIDon)%Bg6)@?W;kK<9(Y*94<^5e;js4;Kfrq)e>y|}vGeBK*bEWH=XYhQH%+6E;my%xwEP^7Xq=wo zuV6CZRZy?u?*gP^1Z+P}kZ1e0kP=_)d)XP0|C$1NnwO6V$i(($Yv89!3;ra-es5Xa znKw?`v)`Mez}RY!EWkcm{>iuZ+Snen{J~u6Kcxpjiaa&z;q14c$Q%Dh5$AbB%wqh@ zt=K=12k@iew~IgBRp(!usrkqPr0_4F1pOUPM)JQVpD2K-k&eW7WoIx7u%vfuc_-I$ z;5*-TZUO!&+Vriu{B}_#m^H5W$7@p*uWhG*TFQSqXt>u-@%MT=NPI9h<+z*B1L9vV z;CHpSZptaplT63YhPzsf*Qgo1*=-{LlHOtNl8-bw#{A<;?{MMDY@t zo;)2`;0RxQ`V)Uvuxj%GePH41A+vrUO;Mh~g1~QBaZriB&vJT@1h1Qtum8!X!cNfP zh=bYFqewM4cO5bS-y;6?#vnxn7-WQjJ^#-2ZZtl?5)ewWCAOAe)GYF z)7@?-)X@EG!Sr1kkTglA61dwrE&R~U>W|ZE6Q}Rvhw?{T#sQQBJi!x$-Jeptuq17uL4Tp{K&Yz>4G_fvG*c}4lq0nr_0$55*1Xboi69^ zh_UR@O8|C3@*W2QRI_R)j#=+@xsDSo|FHxMVKiNUIWirPf;5zVpwTlTC4KtNbHD{E zn^)5{w|OlTEV$fw`nF5GO20R6AUcVvhq5k>E#UFi-t79}U)zF+Y^KEhsn=WqZlq2z zoOK%07C9CKQ0!D52d$#hv?lQR_*5E;QAD8GL)}1;fVT?3n~5X|Ouzu>PEFSva0l<< zye58Iy8`kXnJEHOkXP5CIzQH%H}}ueePe%Rq!jG(>LDM$_}ITT$@gKG7e0FG^bBngf5q4 zs*t_<%V3wQ;#0f4DyJJOc^S|CL)4{1DMJ3pu|;T~5HSOI?usau)1l#HyeR}eA_foC z(2keKmZM>kL&j3XytPQt&2PJq9?|I!0ztG29GaNJ;H)6RhvNV+|2e=!C<|UlwX=7V z&s!ixn*PuXjPq$@o7HocyQpvL*|ja9Lv}bA)pKFMm{2MUr>`G;`WFr5Uv||XQ@Sg`glG7gmLF9jDzA)t(YXauu2WMd!0=r~MKm7AR zgm?gApa?(%2l)UR(0%=E6Ivq8q=3VXKXSJ~)A^@A0K`{)XPU}-QRV*9bPgGY=L}KB z{yw>a7z+~*fH_0JoNwXxiGZ-oSyc<0A-TVTUB%)GGPr7l-4d#qfrho^2XYSn>!GiY z4tKh~t|L7ZFH^q1`&~yS^qNEYz`Ee>0tmM!Oa?dO;wwaI<)^<0ghR)?`-b~O^i?DN za=9I)O8n;mXl9U>zX4iR>FNYE@%QDE7xFbh1ssik+aZ5we=&AYsK%beh`3-?TG(Nx zC4+~k&0+YVqIW!DJ%Oy?HUD`~rSqQ$slm930PS;txW=gG4yvIUGN3TMxa$vn>QNJ@|kh{G!T4 z#ILPnc*%hkol8r=XN~`PurKmI54vf;3(3IH>Ka?J0O`4x|4n{yuQD!15cR1*Y7JY% zdF!*IBlxzGag*5K1}w|RpGU$Ff=3r<;=d0b^G|;x&Wp1ovs3btq|l`kgxszGuPMeO z4N&_Nr14LZCCvQ}mhjx3?CJ3SjS0@&^sfg&qSGG$Jyx2Q%nJ}paK}$vd3yZDs#-uk zSP$6)D;pCc)Z}Rij(~jB?l1gn#e!a(RN6`Fzw!`mX5akR10bLW`QMCW`DQHst6B;T zDiS0f0EsOF6073bStW)E%-=+IRB#Tc5!f-h(8^7JU7C6V`1te(fCY`PT{^F=)#}&3 zM0#Wzn31+c(h-h5;0Iw&;lLZK6n-7rQqc98RvI09%vSvb)+w@b)5#LpCUTQ;Hxm)} z3M{lO+KhuoZ%!NSs%Pvm6~3=)o)vV>p6|txB9@(SQfpgdP}0f2YqAKu3oc-#<50h4 zo}LqQSN+aM@VhjTc0#C15-1Fvp$6l_bJ84(9(3O!_D>Ody?bw)PdKAP$8VRdU<=#8 z4xabfQSE`yOT_%Wilv!mL+y)k8@)FK39=!k&tnZrS0 zuT8+O_k^gAR(P+9l|!5b#dP^AF}Y>oeR&=6k&Zc;*6X3h!vMa8UUfFf1mD+%e)33j zr1S4mm<&IrsLl*Fl>0{jTqgYz_v_f)bUVv6i4<=wP%GuZ9T7&8Nw&KtufLWAMQ}9W zED6*XIJwz(Pg`hgC>wM>qDWLF7(=Ha?Pq zp5?@?90CX7pyi9MRn42owa8XVv~w#T(vOe_mzZt|$GH2--m;qft?PBtc|J;ngCsL2K89pe6wOSRTAL|0NQ|mY7s&oVXqFbsP#&6$9KQmhUK6E-M92|66G6 zEBJjU!AaFFv`|Tx8LV_f_R%$8PG;gZkyqP#XMXsSF=h&4_bD%ALNOu#?!J)Bwgl7P zyV;&A&S2V~eeRq|ON7eAYOvAuTasl>;!gf5KYZcdX^=7{T9{Q@V1~_S!RRvXkMz05 zitDA&Z|wNqlZS?o9=UVde^&JhUNaDN9udRAyDh}!3;8iwva!dV^G$krkp3C)jfrH9 zq~tYUS!iUkNTT+(RNy%4NgSwmz}j6C%5nwYyQm{D`&Hw9n>O(*>QHI-n`TxQq;hdM zvV3u7UEbda$|U(;H%-|vV8#hB{*{}+{0hEf_c&Z+}rpI@t4Px z_J#Xa*v8@v*g_x%+{VCfEp;-&E8pE%Jj()Vh6dELq^{i#$F%$CgoewKl0^Ur*)!G(X# zD7!QGYIc?v-SeyK*CH>@PW_(6JKvd|O8H%I j#hd$5Bz3DL**g^>vTwt4UwW3_V z9g*2R>O21oCZBnk?{}ph__eriGFJ8I<;}{+$~)xER=s-Z0Cez~87+G?fIZxBZ>{9Z z-cXO2v*0_c{@Tq}5a(1|?(f1t-_T*p!7ZS?oh{(QQBZ3Ypjhk3bd8#M+^w)!-@?8Q zx`v(2uJ?D|iC>g*TuKMUI!5@(mobju<+%wzhOfz#%t=L`l?M(cq33a-SMsFoeZR;`@M#t*)#b`jq7@u+jXr)bhU8noYUdfC4+(5|EITW|A#6K<7A_< zMN8STjYJ}3Q#x5O)+LqLO-!G48J9_oq*+~D<}hw;YE?t`*v6tr%0x4U#v!6wng|(& z&QOf4=`_TgL&MCz{)zqheLm+rpXc*F?{oXU-*?)hna(B5r>}uTes#IS+xpwSROg31 zOoLCkjvGU@i0iH~Y|p*etrRW8SjVTR9C1RYlp|3-B~K^z8e;J*)Gb{av4``)a<(dY z^L1+Ciz|)-u2SLrM<0Xb1v{H{b48pd4S?&BDbIq>&Quiy9 zxEaxyTGi#ZC1=MNf^3(#ENigg^TaODWDsvC&Hmy+|9RIAgb~y9&||e^U+5xE$ztqr z!fsvu66j8&L@+2|JqWf5_pO%ce=hLk5vvGe+7kok@=^rdw~!mXiczWv0x}dZ?r2Kc zu0BqPz%(J@;Qq!^e8}g=;7P2qqj@@=&Q9Runa9op4`lI6a$hcN*5fY#M9Wh(fjy+- z!X16tjR0`(5sE#?A)jr%V!sI1zm2?otqRx&BQ$+#M~YiuWEpVyT8S_7UELZzd%wQm zwb+_W^hUk}R?;RGc!*PUJ5KTLj1R?s>jfX)oH@mGDTYc*@h<-{1dUnm5UV@T+iz8jT>j`~`_2;7NvKD4LkZ=HaF^=O+pbeLIBk+F<^y`X(X2&pT^fdvME4;m5cJ9mrWO(yMw5T28?}RWT3{D*_2)NHjWQj zVR`hOFw(8j@)G_1?D-1Nz4!KgWO=f1K@1mCHMUbHGiHfTk^unywv@c4GQ!xzR|gh_ zW?RJ*=$dNl*~sj$;h}Mwc@DdHT>F{V?lP9vNFR! zJEKpLFEI^H2U0ee689*)bb7+~bD46bkuQvth!Jv5Z4Mc)^Ow zHI7+{^Mk=Njw+FR#CgqcLdhnOITq#)tndf&H@E3!GgybP3wEIA+L@0tk);B?b!=Ix zw7gailx6Q*>z36;YJ->^Uy`*%synCt*|dtx2fY}dB3GbmHp_64YoqW4*N;Nxsh-8p{~bnJJUYSp)SZ{xD=Au)hy9vq(sbd!`!dQlMcj(4<=<&jVmPYjcDH$22;RV`KF{;)-h%9}ACh=h`Y=U2n0 z7aOK6V4Iza!(P(XN@z!9_al<4lZHPCQkTJ|^ z=W+^HPlQM{akHN={{93IH0WXQB)I8G@`J^C?fY8_Ny2y)L{ekrGf=7~t^Rn4K5e#! z47xqdMwX+v|J) literal 0 HcmV?d00001 diff --git a/api/core/model_runtime/model_providers/nomic/_common.py b/api/core/model_runtime/model_providers/nomic/_common.py new file mode 100644 index 0000000000..406577dcd7 --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/_common.py @@ -0,0 +1,28 @@ +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class _CommonNomic: + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [InvokeConnectionError], + InvokeServerUnavailableError: [InvokeServerUnavailableError], + InvokeRateLimitError: [InvokeRateLimitError], + InvokeAuthorizationError: [InvokeAuthorizationError], + InvokeBadRequestError: [KeyError, InvokeBadRequestError], + } diff --git a/api/core/model_runtime/model_providers/nomic/nomic.py b/api/core/model_runtime/model_providers/nomic/nomic.py new file mode 100644 index 0000000000..d4e5da2e98 --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/nomic.py @@ -0,0 +1,26 @@ +import logging + +from core.model_runtime.entities.model_entities import ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.model_provider import ModelProvider + +logger = logging.getLogger(__name__) + + +class NomicAtlasProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING) + model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") + raise ex diff --git a/api/core/model_runtime/model_providers/nomic/nomic.yaml b/api/core/model_runtime/model_providers/nomic/nomic.yaml new file mode 100644 index 0000000000..60dcf1facb --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/nomic.yaml @@ -0,0 +1,29 @@ +provider: nomic +label: + zh_Hans: Nomic Atlas + en_US: Nomic Atlas +icon_small: + en_US: icon_s_en.png +icon_large: + en_US: icon_l_en.svg +background: "#EFF1FE" +help: + title: + en_US: Get your API key from Nomic Atlas + zh_Hans: 从Nomic Atlas获取 API Key + url: + en_US: https://atlas.nomic.ai/data +supported_model_types: + - text-embedding +configurate_methods: + - predefined-model +provider_credential_schema: + credential_form_schemas: + - variable: nomic_api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml new file mode 100644 index 0000000000..111452df57 --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml @@ -0,0 +1,8 @@ +model: nomic-embed-text-v1.5 +model_type: text-embedding +model_properties: + context_size: 8192 +pricing: + input: "0.1" + unit: "0.000001" + currency: USD diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml new file mode 100644 index 0000000000..ac59f106ed --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml @@ -0,0 +1,8 @@ +model: nomic-embed-text-v1 +model_type: text-embedding +model_properties: + context_size: 8192 +pricing: + input: "0.1" + unit: "0.000001" + currency: USD diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py new file mode 100644 index 0000000000..6cccff6d46 --- /dev/null +++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py @@ -0,0 +1,170 @@ +import time +from functools import wraps +from typing import Optional + +from nomic import embed +from nomic import login as nomic_login + +from core.model_runtime.entities.model_entities import PriceType +from core.model_runtime.entities.text_embedding_entities import ( + EmbeddingUsage, + TextEmbeddingResult, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.text_embedding_model import ( + TextEmbeddingModel, +) +from core.model_runtime.model_providers.nomic._common import _CommonNomic + + +def nomic_login_required(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + if not kwargs.get("credentials"): + raise ValueError("missing credentials parameters") + credentials = kwargs.get("credentials") + if "nomic_api_key" not in credentials: + raise ValueError("missing nomic_api_key in credentials parameters") + # nomic login + nomic_login(credentials["nomic_api_key"]) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + return func(*args, **kwargs) + + return wrapper + + +class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel): + """ + Model class for nomic text embedding model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :return: embeddings result + """ + embeddings, prompt_tokens, total_tokens = self.embed_text( + model=model, + credentials=credentials, + texts=texts, + ) + + # calc usage + usage = self._calc_response_usage( + model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens + ) + return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + if len(texts) == 0: + return 0 + + _, prompt_tokens, _ = self.embed_text( + model=model, + credentials=credentials, + texts=texts, + ) + return prompt_tokens + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + # call embedding model + self.embed_text(model=model, credentials=credentials, texts=["ping"]) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + @nomic_login_required + def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]: + """Call out to Nomic's embedding endpoint. + + Args: + model: The model to use for embedding. + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text, and tokens usage. + """ + embeddings: list[list[float]] = [] + prompt_tokens = 0 + total_tokens = 0 + + response = embed.text( + model=model, + texts=texts, + ) + + if not (response and "embeddings" in response): + raise ValueError("Embedding data is missing in the response.") + + if not (response and "usage" in response): + raise ValueError("Response usage is missing.") + + if "prompt_tokens" not in response["usage"]: + raise ValueError("Response usage does not contain prompt tokens.") + + if "total_tokens" not in response["usage"]: + raise ValueError("Response usage does not contain total tokens.") + + embeddings = [list(map(float, e)) for e in response["embeddings"]] + total_tokens = response["usage"]["total_tokens"] + prompt_tokens = response["usage"]["prompt_tokens"] + return embeddings, prompt_tokens, total_tokens + + def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param tokens: prompt tokens + :param total_tokens: total tokens + :return: usage + """ + # get input price info + input_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.INPUT, + tokens=tokens, + ) + + # transform usage + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=total_tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at, + ) + + return usage diff --git a/api/poetry.lock b/api/poetry.lock index 78816683d8..184cdb9e81 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -4135,6 +4135,20 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsonlines" +version = "4.0.0" +description = "Library with helpers for the jsonlines file format" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"}, + {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"}, +] + +[package.dependencies] +attrs = ">=19.2.0" + [[package]] name = "jsonpath-ng" version = "1.6.1" @@ -4469,6 +4483,24 @@ files = [ {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, ] +[[package]] +name = "loguru" +version = "0.7.2" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, + {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] + [[package]] name = "lxml" version = "5.3.0" @@ -5320,6 +5352,36 @@ plot = ["matplotlib"] tgrep = ["pyparsing"] twitter = ["twython"] +[[package]] +name = "nomic" +version = "3.1.2" +description = "The official Nomic python client." +optional = false +python-versions = "*" +files = [ + {file = "nomic-3.1.2.tar.gz", hash = "sha256:2de1ab1dcf2429011c92987bb2f1eafe1a3a4901c3185b18f994bf89616f606d"}, +] + +[package.dependencies] +click = "*" +jsonlines = "*" +loguru = "*" +numpy = "*" +pandas = "*" +pillow = "*" +pyarrow = "*" +pydantic = "*" +pyjwt = "*" +requests = "*" +rich = "*" +tqdm = "*" + +[package.extras] +all = ["nomic[aws,local]"] +aws = ["boto3", "sagemaker"] +dev = ["black (==24.3.0)", "cairosvg", "coverage", "isort", "mkautodoc", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "myst-parser", "nomic[all]", "pandas", "pillow", "pylint", "pyright", "pytest", "pytorch-lightning", "twine"] +local = ["gpt4all (>=2.5.0,<3)"] + [[package]] name = "novita-client" version = "0.5.7" @@ -9919,6 +9981,20 @@ files = [ beautifulsoup4 = "*" requests = ">=2.0.0,<3.0.0" +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + [[package]] name = "wrapt" version = "1.16.0" @@ -10422,4 +10498,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "eb7ef7be5c7790e214f37f17f92b69407ad557cb80055ef7e49e36eb51b3fca6" +content-hash = "17c4108d92c415d987f8b437ea3e0484c5601a05bfe175339a8546c93c159bc5" diff --git a/api/pyproject.toml b/api/pyproject.toml index 066b4772a9..41244f516c 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -100,6 +100,7 @@ exclude = [ OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii" UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa" FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa" +NOMIC_API_KEY = "nk-aaaaaaaaaaaaaaaaaaaa" AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com" AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94" ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz" @@ -217,6 +218,7 @@ azure-ai-inference = "^1.0.0b3" volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"} oci = "^2.133.0" tos = "^2.7.1" +nomic = "^3.1.2" [tool.poetry.group.indriect.dependencies] kaleido = "0.2.1" rank-bm25 = "~0.2.2" diff --git a/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py new file mode 100644 index 0000000000..281e866e45 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py @@ -0,0 +1,59 @@ +import os +from collections.abc import Callable +from typing import Any, Literal, Union + +import pytest + +# import monkeypatch +from _pytest.monkeypatch import MonkeyPatch +from nomic import embed + + +def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict: + texts_len = len(texts) + + foo_embedding_sample = 0.123456 + + combined = { + "embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)], + "usage": {"prompt_tokens": texts_len, "total_tokens": texts_len}, + "model": model, + "inference_mode": "remote", + } + + return combined + + +def mock_nomic( + monkeypatch: MonkeyPatch, + methods: list[Literal["text_embedding"]], +) -> Callable[[], None]: + """ + mock nomic module + + :param monkeypatch: pytest monkeypatch fixture + :return: unpatch function + """ + + def unpatch() -> None: + monkeypatch.undo() + + if "text_embedding" in methods: + monkeypatch.setattr(embed, "text", create_embedding) + + return unpatch + + +MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true" + + +@pytest.fixture +def setup_nomic_mock(request, monkeypatch): + methods = request.param if hasattr(request, "param") else [] + if MOCK: + unpatch = mock_nomic(monkeypatch, methods=methods) + + yield + + if MOCK: + unpatch() diff --git a/api/tests/integration_tests/model_runtime/nomic/__init__.py b/api/tests/integration_tests/model_runtime/nomic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py new file mode 100644 index 0000000000..52dc96ee95 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py @@ -0,0 +1,62 @@ +import os + +import pytest + +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel +from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock + + +@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True) +def test_validate_credentials(setup_nomic_mock): + model = NomicTextEmbeddingModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model="nomic-embed-text-v1.5", + credentials={ + "nomic_api_key": "invalid_key", + }, + ) + + model.validate_credentials( + model="nomic-embed-text-v1.5", + credentials={ + "nomic_api_key": os.environ.get("NOMIC_API_KEY"), + }, + ) + + +@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True) +def test_invoke_model(setup_nomic_mock): + model = NomicTextEmbeddingModel() + + result = model.invoke( + model="nomic-embed-text-v1.5", + credentials={ + "nomic_api_key": os.environ.get("NOMIC_API_KEY"), + }, + texts=["hello", "world"], + user="foo", + ) + + assert isinstance(result, TextEmbeddingResult) + assert result.model == "nomic-embed-text-v1.5" + assert len(result.embeddings) == 2 + assert result.usage.total_tokens == 2 + + +@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True) +def test_get_num_tokens(setup_nomic_mock): + model = NomicTextEmbeddingModel() + + num_tokens = model.get_num_tokens( + model="nomic-embed-text-v1.5", + credentials={ + "nomic_api_key": os.environ.get("NOMIC_API_KEY"), + }, + texts=["hello", "world"], + ) + + assert num_tokens == 2 diff --git a/api/tests/integration_tests/model_runtime/nomic/test_provider.py b/api/tests/integration_tests/model_runtime/nomic/test_provider.py new file mode 100644 index 0000000000..6cad400c06 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/nomic/test_provider.py @@ -0,0 +1,22 @@ +import os + +import pytest + +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.nomic.nomic import NomicAtlasProvider +from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel +from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock + + +@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True) +def test_validate_provider_credentials(setup_nomic_mock): + provider = NomicAtlasProvider() + + with pytest.raises(CredentialsValidateFailedError): + provider.validate_provider_credentials(credentials={}) + + provider.validate_provider_credentials( + credentials={ + "nomic_api_key": os.environ.get("NOMIC_API_KEY"), + }, + ) diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh index 4c1c6bf4f3..4c0083a2de 100755 --- a/dev/pytest/pytest_model_runtime.sh +++ b/dev/pytest/pytest_model_runtime.sh @@ -7,4 +7,5 @@ pytest api/tests/integration_tests/model_runtime/anthropic \ api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \ api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \ api/tests/integration_tests/model_runtime/upstage \ - api/tests/integration_tests/model_runtime/fireworks + api/tests/integration_tests/model_runtime/fireworks \ + api/tests/integration_tests/model_runtime/nomic