From fa9709faa873dbb8333ff3df544480a4ef7dbd82 Mon Sep 17 00:00:00 2001 From: "Dr. Kiji" Date: Sat, 23 Nov 2024 10:23:57 +0900 Subject: [PATCH] fork for fta --- api/controllers/console/__init__.py | 1 + api/controllers/console/datasets/fta_test.py | 145 ++++++++++++++++++ .../builtin/file_extractor/_assets/icon.png | Bin 0 -> 4363 bytes .../builtin/file_extractor/file_extractor.py | 8 + .../file_extractor/file_extractor.yaml | 15 ++ .../file_extractor/tools/file_extractor.py | 45 ++++++ .../file_extractor/tools/file_extractor.yaml | 49 ++++++ api/models/fta.py | 78 ++++++++++ 8 files changed, 341 insertions(+) create mode 100644 api/controllers/console/datasets/fta_test.py create mode 100644 api/core/tools/provider/builtin/file_extractor/_assets/icon.png create mode 100644 api/core/tools/provider/builtin/file_extractor/file_extractor.py create mode 100644 api/core/tools/provider/builtin/file_extractor/file_extractor.yaml create mode 100644 api/core/tools/provider/builtin/file_extractor/tools/file_extractor.py create mode 100644 api/core/tools/provider/builtin/file_extractor/tools/file_extractor.yaml create mode 100644 api/models/fta.py diff --git a/api/controllers/console/__init__.py b/api/controllers/console/__init__.py index f46d5b6b13..4eba07582d 100644 --- a/api/controllers/console/__init__.py +++ b/api/controllers/console/__init__.py @@ -62,6 +62,7 @@ from .datasets import ( external, hit_testing, website, + fta_test, ) # Import explore controllers diff --git a/api/controllers/console/datasets/fta_test.py b/api/controllers/console/datasets/fta_test.py new file mode 100644 index 0000000000..f246da7828 --- /dev/null +++ b/api/controllers/console/datasets/fta_test.py @@ -0,0 +1,145 @@ +import json + +import requests +from flask import Response +from flask_restful import Resource, reqparse +from sqlalchemy import text + +from controllers.console import api +from extensions.ext_database import db +from extensions.ext_storage import storage +from models.fta import ComponentFailure, ComponentFailureStats + + +class FATTestApi(Resource): + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("log_process_data", nullable=False, required=True, type=str, location="args") + args = parser.parse_args() + print(args["log_process_data"]) + # Extract the JSON string from the text field + json_str = args["log_process_data"].strip("```json\\n").strip("```").strip().replace("\\n", "") + log_data = json.loads(json_str) + db.session.query(ComponentFailure).delete() + for data in log_data: + if not isinstance(data, dict): + raise TypeError("Data must be a dictionary.") + + required_keys = {"Date", "Component", "FailureMode", "Cause", "RepairAction", "Technician"} + if not required_keys.issubset(data.keys()): + raise ValueError(f"Data dictionary must contain the following keys: {required_keys}") + + try: + # Clear existing stats + component_failure = ComponentFailure( + Date=data["Date"], + Component=data["Component"], + FailureMode=data["FailureMode"], + Cause=data["Cause"], + RepairAction=data["RepairAction"], + Technician=data["Technician"], + ) + db.session.add(component_failure) + db.session.commit() + except Exception as e: + print(e) + # Clear existing stats + db.session.query(ComponentFailureStats).delete() + + # Insert calculated statistics + try: + db.session.execute( + text(""" + INSERT INTO component_failure_stats ("Component", "FailureMode", "Cause", "PossibleAction", "Probability", "MTBF") + SELECT + cf."Component", + cf."FailureMode", + cf."Cause", + cf."RepairAction" as "PossibleAction", + COUNT(*) * 1.0 / (SELECT COUNT(*) FROM component_failure WHERE "Component" = cf."Component") AS "Probability", + COALESCE(AVG(EXTRACT(EPOCH FROM (next_failure_date::timestamp - cf."Date"::timestamp)) / 86400.0),0)AS "MTBF" + FROM ( + SELECT + "Component", + "FailureMode", + "Cause", + "RepairAction", + "Date", + LEAD("Date") OVER (PARTITION BY "Component", "FailureMode", "Cause" ORDER BY "Date") AS next_failure_date + FROM + component_failure + ) cf + GROUP BY + cf."Component", cf."FailureMode", cf."Cause", cf."RepairAction"; + """) + ) + db.session.commit() + except Exception as e: + db.session.rollback() + print(f"Error during stats calculation: {e}") + # output format + # [ + # (17, 'Hydraulic system', 'Leak', 'Hose rupture', 'Replaced hydraulic hose', 0.3333333333333333, None), + # (18, 'Hydraulic system', 'Leak', 'Seal Wear', 'Replaced the faulty seal', 0.3333333333333333, None), + # (19, 'Hydraulic system', 'Pressure drop', 'Fluid leak', 'Replaced hydraulic fluid and seals', 0.3333333333333333, None) + # ] + + component_failure_stats = db.session.query(ComponentFailureStats).all() + # Convert stats to list of tuples format + stats_list = [] + for stat in component_failure_stats: + stats_list.append( + ( + stat.StatID, + stat.Component, + stat.FailureMode, + stat.Cause, + stat.PossibleAction, + stat.Probability, + stat.MTBF, + ) + ) + return {"data": stats_list}, 200 + + +# generate-fault-tree +class GenerateFaultTreeApi(Resource): + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("llm_text", nullable=False, required=True, type=str, location="args") + args = parser.parse_args() + entities = args["llm_text"].replace("```", "").replace("\\n", "\n") + print(entities) + request_data = {"fault_tree_text": entities} + url = "https://fta.cognitech-dev.live/generate-fault-tree" + headers = {"accept": "application/json", "Content-Type": "application/json"} + + response = requests.post(url, json=request_data, headers=headers) + print(response.json()) + return {"data": response.json()}, 200 + + +class ExtractSVGApi(Resource): + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("svg_text", nullable=False, required=True, type=str, location="args") + args = parser.parse_args() + # svg_text = ''.join(args["svg_text"].splitlines()) + svg_text = args["svg_text"].replace("\n", "") + svg_text = svg_text.replace('"', '"') + print(svg_text) + svg_text_json = json.loads(svg_text) + svg_content = svg_text_json.get("data").get("svg_content")[0] + svg_content = svg_content.replace("\n", "").replace('"', '"') + file_key = "fta_svg/" + "fat.svg" + if storage.exists(file_key): + storage.delete(file_key) + storage.save(file_key, svg_content.encode("utf-8")) + generator = storage.load(file_key, stream=True) + + return Response(generator, mimetype="image/svg+xml") + + +api.add_resource(FATTestApi, "/fta/db-handler") +api.add_resource(GenerateFaultTreeApi, "/fta/generate-fault-tree") +api.add_resource(ExtractSVGApi, "/fta/extract-svg") diff --git a/api/core/tools/provider/builtin/file_extractor/_assets/icon.png b/api/core/tools/provider/builtin/file_extractor/_assets/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..4bb8d8c1e5f65f729b7843cb152758afaf7d307e GIT binary patch literal 4363 zcmbuCc{CJm-^OPc24kyaUqeEM5V9{54P~v2CHoR#?E5k(+sM9TnIQ@xOu}THpC!pw z*$HD!h%sbuynfI7p5K4(bIx;~`<&~(u5+Ew{nz)L`#yJ~v7sIl12+Qz0APaOftj9r z__>nkXwUb@W6)**fYH!RN5>ehqa$eSi*#}Ga0USGBxaj~5oUetSSwQ@5FJcYf9)P? z`bABBI;p)BK?xyxR{dmpt)3KU4bk$#h3k^d3?-Am3q0$Ul4yq@DKyQq>;Rufo1Y6< zHFG0Y!geG4esr7_9{nivbKPpA2DD{LXRnsz1D4@rXWUb^el>!ymuD4ZtKBsI8Q z6S@M^i>F4!&2;2WC<047YHwX@piG_FqH~NzH~=V^P9Zc4LKakK1k7DY`%DH3>I)&& zC2cFaomn?N700qpT=OYoXu9Up%wWs)J~4Gg`xFFtr(7i657I1UAy-prT^)Vd^*A~c zb}dJ96O1#mnPU{{MhiAZD|j0=n-*ax*@mn0@*{fkN2dc9s*{}S(x|(A-JnrJXL%`S zkI254yZjJcl~_?o;)9QjyZa)px-TwP>6|{y;OC;jDFCiP*_o#$Z^#*3$S8iN=LzzK z4sh9NtHQB7y*!JK&rVS?2~0vmY0J8CEK_XG39(rUGEtA8DHU%EY)DyIC zY<4SG_YOG*zks7T^7g6R+i`G<^l$xa)GAvb!2OmCo@eO8 zv!F_>AOS3!Mh_#{Cvfqn%TEeNPbeAFMaJI&EekK5g2QeX1IR_XIDJFa&Nns8f{qnN zZ!)N{+J)dwO=zn*qXb-6^$y5LP?!?07mlEjiL*vz|2e}7xOB+yetODsBWZ!lV$a^d zLGT8OQ=9S1^dc>uW$RgdSCv5ZkP^=UXUY;fiIn2U)?K?Q_)&!0XMS!~#BeCfA~!qq5v2D_Lza9fc-ViODLv69@#HwDlSsz>M!Q z@BAuV$ix$AISRmg-!D$PT%lwgB1BLG^KGd|*f9=7GbQ(O;epabFqPJ|1gafDuDh{w zu73qUsp|=Y2fGanP4kbqjO z7qxXb6Vx4I5LfmS!`&Q2Ns@jmEs`+2I6;aJ7NF#53UglEq{o8z?PY~BWzfcVuZeAn@Cp^ihP^?P z^6!YqCXT~`@En zU%%2OT9tn9)6lY@FN1|P^h$1KP^M8@ZAMYLhOxYO4x5oa+wD7fIu^I5wA{+9%lu8$ zjTJCBQR4C7>p> zvROOg^)d9f*?Q2Z?bpZhlKBEs&vJ~c;8m|NFV%|M#_S#|xs&9dazk!b%5%%txVILY zt>ySLM{u{L*%)fTI__?dy?Siv=6mxfA0l5RpXxsMZF9om@x3oe-#(96zHG4CD0$eV z0H4uUF}A8`yC2rF&-JU9G1-2IC<{s!P3BAXONR25q%<%aiCLB3cda#SF)XTStU-H~ zYQ_S1$<$S)7eBrr;dRgSa*bU^~$MvJ1iF^8c`#)#D zgw+mzV;LR(YFj#A5j77`tdmQ)@xr;Thghh!jlke+9BCX4KJxRPpoQEYyK+8Jn(%J1 zFR<@O$C=cnk3KH_%2jIPScWU|Y4pk4pJ9zNUTCe9++$ic(i zB@LAi$}7shyqi_D!nD_(me3DgqWCxZu`8M1vMst(z$x#u&|Q`&{b=(M3~FWV)go&x zY@Pd|u{qM$FDUcFSj%s`6~6of(zl~$++MHVyiPipRrOSjJ@kEW>#qOqz=7)8QG9N! zdCG)W?G}7I6P6ixH+0i;b?{(rb@pc$8NPcoS3WW^n!9cF>*Uk$ccZnYk!`NtoXo+g zu8@W8W$9gw#k07LKFqM%WfTnvoCSIhb_ZVtJ)~8qlZAX{tc0vX0-3Np;Sx_-FS5V8 zy39VDAVwS`A8XI*{D{XgnX#ZDRg7V5UK~&eGDTBZTX_8Tcl7(@ESQ3B^fjB&=8BWA z<6q@UQ+rq)OB^d5upbQ*FdSj(DHjygyn?=O1fLx~*a^DiagmMwZQ@?{X!mrtA{+>R zlBq0~TBI69e(eEjJGAPhtA%C7ack-trSoNVCgG*Q2%?$s15)6_#cG}U1{VXDra@dmefUs) zXK|;%>Y^WwpVg1NAuHAGh47(MPe>ZgPlzQ)zO;37-4hd1Nq*00&nxYaAtqyQZCY(@&Se*o4qk}3;Ef5Ip(rP zgyKR=(l+fwmue5&l}7h|_eJ-<^@1ni6Qk8*mR9)vo#uo9Cg_^qy$K~wvg&FP-&_#5+gRnEY=->fIbf9(6a3Q}ZJvg8Gs{J4*~ z>$z15G76Ma;`lv1_ubWf2X-cF*hg|ImzsOcF~wL!r~}byp^swr&8)3l&Pc-xGZ%U= zuq-z*{ISqegClUOGgV6idu#Ys6glSLSq>V@kxJ>NyxGZ^c^#)P(I~9ZLJ5C0`?hW2 z-N{OsB)@sS%GH2E{Ce2$y#~}14Vxd2I{byny1j!>3!0Fx=hj=LT4y7~4u<+|86uCo zgTAVDwq=~;AM^`ws#j3P_9r4RUYK41T2+rQwu7}T$G?_-7%dyA2TDcghgnhvmMuJ+ z{DKUOjEA%5%TC9CTOXh5Aa}McJCLFFyNe`&$f)^_iih1I{YWxDt_}RNLNe0A8qXq#!7iSf>nEQMGLpv}@N zU<6RU(*UHn0JN%#rsWkw@v$@}7&q{RLnA~$9Pf26z-U+0@e7;7AU_D|j|!kJ=6LTu zpu`Q(9R~1U>`qq>&*4!Gl7L*mGPS2tvaq?#lzS!^cn3MmACgrGwevrbCi=$SddhcI z==_7>;f#R0+`9*mIL~wd5Re-{bDn_bQU~(lL!^x zu^gxQ(};+|;4C=tULKrlF9SgAfr1D?=^Ra*&(s2n!=QK)zv)dtiyC`a2PzSJE?!fB zs_>E==-GI3`Iv)oprq&h%-~i8RnJG~cxs#L(GE-Pd^fk3j#lWFUB0 z=6(*pY)KHY;e_+n2*e-F94t%vx`q-12_!NHo03e< zBdx%B>r0wjLqSjnFt9bKl|z}RO;r059}bQ1kDG`Tva3o(YD9YRZG{eu*iPsw*d+6{ zU-&!_4=Az-a#SEFYUdrX66H3$251j;uf5gjS9BN?KvhAuHU&t0(axs|YbpEE@38O^2W4W;QUe)!W?q`0V)52qg0E?Aa7n7|+XJ$)B=O&l z!JIr^`5S7M<+qyR#;lqm) zE7$;F;hR1#xu*p@-f*vnY(B?+?FVQV#%EEqI8;?Y-K9Mpq`{0Ks zW|Yr*$M!msMs5SQMHE2JV^qzF)mjJnPx{8|-^86y-*au})HhUF;~>9bZ{vE0M6Pu% z(@Kc{S^N&xrGl?W^Pp*ON2)$X3`kJ>Nk2{nO;~l~@-_;MX+S|rLM&;&XJYN7h%t|` oIlzNgVh9LcFu&aSKSe3j)fTOb$-Dgz{=O67w+&%6+IBJj1Bulxh5!Hn literal 0 HcmV?d00001 diff --git a/api/core/tools/provider/builtin/file_extractor/file_extractor.py b/api/core/tools/provider/builtin/file_extractor/file_extractor.py new file mode 100644 index 0000000000..7fc67df375 --- /dev/null +++ b/api/core/tools/provider/builtin/file_extractor/file_extractor.py @@ -0,0 +1,8 @@ +from typing import Any + +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class FileExtractorProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict[str, Any]) -> None: + pass diff --git a/api/core/tools/provider/builtin/file_extractor/file_extractor.yaml b/api/core/tools/provider/builtin/file_extractor/file_extractor.yaml new file mode 100644 index 0000000000..fa197a1255 --- /dev/null +++ b/api/core/tools/provider/builtin/file_extractor/file_extractor.yaml @@ -0,0 +1,15 @@ +identity: + author: Jyong + name: file_extractor + label: + en_US: File Extractor + zh_Hans: 文件提取 + pt_BR: File Extractor + description: + en_US: Extract text from file + zh_Hans: 从文件中提取文本 + pt_BR: Extract text from file + icon: icon.png + tags: + - utilities + - productivity diff --git a/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.py b/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.py new file mode 100644 index 0000000000..ea7746337f --- /dev/null +++ b/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.py @@ -0,0 +1,45 @@ +import tempfile +from typing import Any, Union + +from core.file.enums import FileType +from core.file.file_manager import download_to_target_path +from core.rag.extractor.text_extractor import TextExtractor +from core.rag.splitter.fixed_text_splitter import FixedRecursiveCharacterTextSplitter +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.errors import ToolParameterValidationError +from core.tools.tool.builtin_tool import BuiltinTool + + +class FileExtractorTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + invoke tools + """ + # image file for workflow mode + file = tool_parameters.get("text_file") + if file and file.type != FileType.DOCUMENT: + raise ToolParameterValidationError("Not a valid document") + + if file: + with tempfile.TemporaryDirectory() as temp_dir: + file_path = download_to_target_path(file, temp_dir) + extractor = TextExtractor(file_path, autodetect_encoding=True) + documents = extractor.extract() + character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder( + chunk_size=tool_parameters.get("max_token", 500), + chunk_overlap=0, + fixed_separator=tool_parameters.get("separator", "\n\n"), + separators=["\n\n", "。", ". ", " ", ""], + embedding_model_instance=None, + ) + chunks = character_splitter.split_documents(documents) + + content = "\n".join([chunk.page_content for chunk in chunks]) + return self.create_text_message(content) + + else: + raise ToolParameterValidationError("Please provide either file") diff --git a/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.yaml b/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.yaml new file mode 100644 index 0000000000..5937fbc873 --- /dev/null +++ b/api/core/tools/provider/builtin/file_extractor/tools/file_extractor.yaml @@ -0,0 +1,49 @@ +identity: + name: text extractor + author: Jyong + label: + en_US: Text extractor + zh_Hans: Text 文本解析 + description: + en_US: Extract content from text file and support split to chunks by split characters and token length + zh_Hans: 支持从文本文件中提取内容并支持通过分割字符和令牌长度分割成块 + pt_BR: Extract content from text file and support split to chunks by split characters and token length +description: + human: + en_US: Text extractor is a text extract tool + zh_Hans: Text extractor 是一个文本提取工具 + pt_BR: Text extractor is a text extract tool + llm: Text extractor is a tool used to extract text file +parameters: + - name: text_file + type: file + label: + en_US: Text file + human_description: + en_US: The text file to be extracted. + zh_Hans: 要提取的 text 文档。 + llm_description: you should not input this parameter. just input the image_id. + form: llm + - name: separator + type: string + required: false + label: + en_US: split character + zh_Hans: 分隔符号 + human_description: + en_US: Text content split character + zh_Hans: 用于文档分隔的符号 + llm_description: it is used for split content to chunks + form: form + - name: max_token + type: number + required: false + label: + en_US: Maximum chunk length + zh_Hans: 最大分段长度 + human_description: + en_US: Maximum chunk length + zh_Hans: 最大分段长度 + llm_description: it is used for limit chunk's max length + form: form + diff --git a/api/models/fta.py b/api/models/fta.py new file mode 100644 index 0000000000..434e7fcaca --- /dev/null +++ b/api/models/fta.py @@ -0,0 +1,78 @@ +from extensions.ext_database import db + + +class ComponentFailure(db.Model): + __tablename__ = "component_failure" + __table_args__ = ( + db.UniqueConstraint("Date", "Component", "FailureMode", "Cause", "Technician", name="unique_failure_entry"), + ) + + FailureID = db.Column(db.Integer, primary_key=True, autoincrement=True) + Date = db.Column(db.Date, nullable=False) + Component = db.Column(db.String(255), nullable=False) + FailureMode = db.Column(db.String(255), nullable=False) + Cause = db.Column(db.String(255), nullable=False) + RepairAction = db.Column(db.Text, nullable=True) + Technician = db.Column(db.String(255), nullable=False) + + +class Maintenance(db.Model): + __tablename__ = "maintenance" + + MaintenanceID = db.Column(db.Integer, primary_key=True, autoincrement=True) + MaintenanceType = db.Column(db.String(255), nullable=False) + MaintenanceDate = db.Column(db.Date, nullable=False) + ServiceDescription = db.Column(db.Text, nullable=True) + PartsReplaced = db.Column(db.Text, nullable=True) + Technician = db.Column(db.String(255), nullable=False) + + +class OperationalData(db.Model): + __tablename__ = "operational_data" + + OperationID = db.Column(db.Integer, primary_key=True, autoincrement=True) + CraneUsage = db.Column(db.Integer, nullable=False) + LoadWeight = db.Column(db.Float, nullable=False) + LoadFrequency = db.Column(db.Integer, nullable=False) + EnvironmentalConditions = db.Column(db.Text, nullable=True) + + +class IncidentData(db.Model): + __tablename__ = "incident_data" + + IncidentID = db.Column(db.Integer, primary_key=True, autoincrement=True) + IncidentDescription = db.Column(db.Text, nullable=False) + IncidentDate = db.Column(db.Date, nullable=False) + Consequences = db.Column(db.Text, nullable=True) + ResponseActions = db.Column(db.Text, nullable=True) + + +class ReliabilityData(db.Model): + __tablename__ = "reliability_data" + + ComponentID = db.Column(db.Integer, primary_key=True, autoincrement=True) + ComponentName = db.Column(db.String(255), nullable=False) + MTBF = db.Column(db.Float, nullable=False) + FailureRate = db.Column(db.Float, nullable=False) + + +class SafetyData(db.Model): + __tablename__ = "safety_data" + + SafetyID = db.Column(db.Integer, primary_key=True, autoincrement=True) + SafetyInspectionDate = db.Column(db.Date, nullable=False) + SafetyFindings = db.Column(db.Text, nullable=True) + SafetyIncidentDescription = db.Column(db.Text, nullable=True) + ComplianceStatus = db.Column(db.String(50), nullable=False) + + +class ComponentFailureStats(db.Model): + __tablename__ = "component_failure_stats" + + StatID = db.Column(db.Integer, primary_key=True, autoincrement=True) + Component = db.Column(db.String(255), nullable=False) + FailureMode = db.Column(db.String(255), nullable=False) + Cause = db.Column(db.String(255), nullable=False) + PossibleAction = db.Column(db.Text, nullable=True) + Probability = db.Column(db.Float, nullable=False) + MTBF = db.Column(db.Float, nullable=False)