Startup de conduite autonome
Constructeur automobile
Logiciel & matériel
Transport & livraison
Équipementier automobile
Autres
AIPythonLangchainGraph
🚗 Visualiser les partenariats du marché de la voiture autonome avec un agent IA
L'IA est capable de transformer du texte en données structurées pour créer des visualisations de données.
Notre objectif est de créer un graphe de réseau des partenariats entre les entreprises du marché du véhicule autonome à l'aide d'un agent IA. Cet agent identifie de façon autonome les collaborations à partir d'articles de presse et gère la base de données du graphe.
Méthodologie :
- Collecter des articles de presse relatifs aux partenariats sur le marché de la voiture autonome
- Mettre en place un extracteur pour obtenir des données structurées sur les partenariats
- Créer un agent et ses outils
- Exécuter l'agent IA sur les articles et suivre les résultats
1import pandas as pd
2import json
3import os
4
5from pydantic import BaseModel, Field
6from typing import Optional, List
7from enum import Enum
8from dotenv import load_dotenv
9
10from langchain.chat_models import init_chat_model
11from langchain_core.prompts import ChatPromptTemplate
12from langchain_core.tools import tool
13from langchain.agents import create_tool_calling_agent, AgentExecutor
14
15# The dataset
16news = pd.read_csv("news.csv")
17
18#The extractor
19class Category(str, Enum):
20 self_driving_startup = "self driving startup"
21 automaker = "automaker"
22 software_hardware = "software/hardware"
23 transport_delivery = "transport/delivery"
24 automotive_supplier = "automotive supplier"
25 other = "other"
26
27class Company(BaseModel):
28 """Information about a company on the self-driving car market"""
29
30 name: Optional[str] = Field(default=None, description="The name of the company")
31 category: Optional[Category] = Field(default=None, description="The type of company on the self-driving car market")
32
33
34class Partnership(BaseModel):
35 """Information about a partnership"""
36
37 companies: List[Company] = Field(default_factory=list, min_items=2, description="The partnership stakeholders")
38
39class Data(BaseModel):
40 """Extracted data about partnerships"""
41
42 partnerships: List[Partnership] = Field(default_factory=list)
43
44extract_prompt_template = ChatPromptTemplate.from_messages(
45 [
46 (
47 "system",
48 "You are an expert extraction algorithm. "
49 "Only extract partnerships between companies on the self-driving car market from the text. "
50 "If you do not know the value of an attribute asked to extract, "
51 "return null for the attribute's value.",
52 ),
53 ("human", "{text}"),
54 ]
55)
56
57load_dotenv()
58api_key = os.getenv("OPENAI_API_KEY")
59llm = init_chat_model("gpt-4o", model_provider="openai")
60structured_llm = llm.with_structured_output(schema=Data)
61
62# Function to load the data from JSON file
63def load_json() -> dict:
64 with open("data.json", "r") as data_json:
65 data = json.load(data_json)
66
67 return data
68
69# Function to save the data into the JSON file
70def save_json(dict: dict):
71 dict_json = json.dumps(dict)
72 with open("data.json", "w") as data_json:
73 data_json.write(dict_json)
74
75
76# The tools
77@tool
78def extract_partnerships(text: str) -> tuple[str, list]:
79 """Extract partnerships from self-driving car market text."""
80
81 extract_prompt = extract_prompt_template.invoke({"text": text})
82 result = structured_llm.invoke(extract_prompt)
83
84 partnerships = []
85
86 for partnership in result.partnerships:
87 partnership_data = {
88 "companies": [
89 {
90 "name": company.name,
91 "category": company.category.value if company.category else None
92 }
93 for company in partnership.companies
94 ]
95 }
96 partnerships.append(partnership_data)
97
98 return f"Found {len(partnerships)} partnerships", partnerships
99
100@tool
101def search_node (name: str, category: str) -> str:
102 """Check if a node is already stored in the graph database."""
103
104 data = load_json()
105
106 search_prompt = f"""You are an expert search algorithm. I am going to give you the name of a company, and you have to check if the company is already stored in the list.
107
108 You have 3 possibilities to answer:
109
110 1. Exact match found: If a node with the same or very similar name already exists in the list:
111 → Answer: "Node found with id [ID] and name [NAME]."
112
113 2. Similar node found (requires merging): If a node represents the same company but with a different name variation (subsidiaries, regional divisions, different spelling):
114 → Answer: "Similar node found with id [ID] and name [NAME]. Should be merged/renamed to [SUGGESTED_NAME] to avoid duplicates"
115
116 Examples of nodes that should be merged:
117 - "Toyota China" and "Toyota" → merge to "Toyota"
118 - "Tesla Motors" and "Tesla Inc" → merge to "Tesla"
119 - "Waymo" and "Google Self-Driving" → merge to "Waymo (Google)"
120 - "General Motors" and "GM" → merge to "General Motors"
121
122 3. No match found: If the company is genuinely new and doesn't match any existing node:
123 → Answer: "Node not found. Need to create a new node with name [NAME] and category [CATEGORY]"
124
125 Important guidelines:
126 - Consider company subsidiaries, regional divisions, and alternative names as the same entity
127 - Prioritize the most commonly used or official company name
128 - When in doubt between similar companies, prefer merging over creating duplicates
129 - Be case-insensitive in your search
130 - Look for partial matches and common abbreviations
131
132 The node to search: {name}
133 The category of the node: {category}
134 The existing nodes list: {data["nodes"]}
135
136 Your analysis:
137 """
138
139 search_result = llm.invoke(search_prompt)
140
141 return search_result.content
142
143@tool
144def add_node (name: str, category: str) -> tuple[str, dict]:
145 """Add a node to the nodes list."""
146
147 data = load_json()
148
149 id = len(data["nodes"])
150 new_node = {"id": id, "name": name, "category": category}
151 data["nodes"].append(new_node)
152
153 save_json(data)
154
155 return f"Node added with the id {id}: ", new_node
156
157@tool
158def rename_node (id: int, new_name: str) -> str:
159 """Update the name of a node from a given ID."""
160
161 data = load_json()
162 nodes = data["nodes"]
163
164 if id >= len(nodes) or id < 0:
165 return f"Error: Node with id {id} does not exist."
166
167 previous_name = nodes[id]["name"]
168 nodes[id]["name"] = new_name
169
170 data["nodes"] = nodes
171 save_json(data)
172
173 return f"Node with id {id} renamed from '{previous_name}' to '{new_name}'."
174
175@tool
176def search_edge(from_id: int, to_id: int) -> str:
177 """Check if an edge is already stored in the graph database."""
178
179 data = load_json()
180
181 for edge in data["edges"]:
182 if (edge["from"] == from_id and edge["to"] == to_id) or (edge["from"] == to_id and edge["to"] == from_id):
183 return f"Edge found between nodes {from_id} and {to_id}."
184
185 return f"Edge not found. Need to create a new edge between the nodes {from_id} and {to_id}."
186
187@tool
188def add_edge (from_id: int, to_id: int) -> tuple[str, dict]:
189 """Add an edge in the edges list."""
190
191 data = load_json()
192 edges = data["edges"]
193
194 id = len(edges)
195 new_edge = {"id": id, "from": from_id, "to": to_id}
196 edges.append(new_edge)
197
198 data["edges"] = edges
199 save_json(data)
200
201 return f"edge added with the id {id}: ", new_edge
202
203# The agent
204llm = init_chat_model("gpt-4o", model_provider="openai")
205
206tools = [search_node, search_edge, add_node, rename_node, add_edge, extract_partnerships]
207
208prompt = ChatPromptTemplate.from_messages([
209 ("system", """You are an assistant specialized in analyzing partnerships in the self-driving car market.
210
211Your capabilities:
212- Extract partnerships from text articles
213- Create and manage a node-based graph of partnerships
214- Analyze companies and their relationships to avoid duplicates in the graph
215
216Process for analyzing partnerships:
2171. Use extract_partnerships to analyze the input text
2182. For each company found, check if similar nodes already exist (search_node) or rename it if necessary (rename_node)
2193. Add new nodes if necessary (add_node)
2204. Check if relationships already exist (search_edge)
2215. Add new edges between the nodes if necessary (add_edge)
222
223Be precise and remember that the graph is undirected. Merge companies and their subsidiaries into a single node."""),
224
225 ("human", "{input}"),
226 ("placeholder", "{agent_scratchpad}")
227])
228
229agent = create_tool_calling_agent(
230 llm=llm,
231 tools=tools,
232 prompt=prompt
233)
234
235agent_executor = AgentExecutor(
236 agent=agent,
237 tools=tools,
238 verbose=True,
239 max_iterations=20,
240 return_intermediate_steps=True
241)
242
243# Invoke the agent on one article
244result = agent_executor.invoke({
245 "input": news["content"][48]
246})