Self-driving startup
Automaker
Software & hardware
Transport & delivery
Automotive supplier
Other
AIPythonLangchainGraph
🚗 Visualising partnerships on the self-driving car market with an AI agent
AI has the capacity to transform text into structured data to create data visualisations.
Our objective is to create a network graph of partnerships between companies in the autonomous vehicle market using an AI agent. This agent will autonomously identify collaborations from news articles and manage the graph database.
Methodology:
- Collecting news articles related to partnerships on the self-driving car market
- Setting up an extractor to obtain structured data on partnerships
- Creating an agent and tools
- Executing the AI agent on articles and monitoring the results
1import pandas as pd
2import json
3import os
4
5from pydantic import BaseModel, Field
6from typing import Optional, List
7from enum import Enum
8from dotenv import load_dotenv
9
10from langchain.chat_models import init_chat_model
11from langchain_core.prompts import ChatPromptTemplate
12from langchain_core.tools import tool
13from langchain.agents import create_tool_calling_agent, AgentExecutor
14
15# The dataset
16news = pd.read_csv("news.csv")
17
18#The extractor
19class Category(str, Enum):
20 self_driving_startup = "self driving startup"
21 automaker = "automaker"
22 software_hardware = "software/hardware"
23 transport_delivery = "transport/delivery"
24 automotive_supplier = "automotive supplier"
25 other = "other"
26
27class Company(BaseModel):
28 """Information about a company on the self-driving car market"""
29
30 name: Optional[str] = Field(default=None, description="The name of the company")
31 category: Optional[Category] = Field(default=None, description="The type of company on the self-driving car market")
32
33
34class Partnership(BaseModel):
35 """Information about a partnership"""
36
37 companies: List[Company] = Field(default_factory=list, min_items=2, description="The partnership stakeholders")
38
39class Data(BaseModel):
40 """Extracted data about partnerships"""
41
42 partnerships: List[Partnership] = Field(default_factory=list)
43
44extract_prompt_template = ChatPromptTemplate.from_messages(
45 [
46 (
47 "system",
48 "You are an expert extraction algorithm. "
49 "Only extract partnerships between companies on the self-driving car market from the text. "
50 "If you do not know the value of an attribute asked to extract, "
51 "return null for the attribute's value.",
52 ),
53 ("human", "{text}"),
54 ]
55)
56
57load_dotenv()
58api_key = os.getenv("OPENAI_API_KEY")
59llm = init_chat_model("gpt-4o", model_provider="openai")
60structured_llm = llm.with_structured_output(schema=Data)
61
62# Function to load the data from JSON file
63def load_json() -> dict:
64 with open("data.json", "r") as data_json:
65 data = json.load(data_json)
66
67 return data
68
69# Function to save the data into the JSON file
70def save_json(dict: dict):
71 dict_json = json.dumps(dict)
72 with open("data.json", "w") as data_json:
73 data_json.write(dict_json)
74
75
76# The tools
77@tool
78def extract_partnerships(text: str) -> tuple[str, list]:
79 """Extract partnerships from self-driving car market text."""
80
81 extract_prompt = extract_prompt_template.invoke({"text": text})
82 result = structured_llm.invoke(extract_prompt)
83
84 partnerships = []
85
86 for partnership in result.partnerships:
87 partnership_data = {
88 "companies": [
89 {
90 "name": company.name,
91 "category": company.category.value if company.category else None
92 }
93 for company in partnership.companies
94 ]
95 }
96 partnerships.append(partnership_data)
97
98 return f"Found {len(partnerships)} partnerships", partnerships
99
100@tool
101def search_node (name: str, category: str) -> str:
102 """Check if a node is already stored in the graph database."""
103
104 data = load_json()
105
106 search_prompt = f"""You are an expert search algorithm. I am going to give you the name of a company, and you have to check if the company is already stored in the list.
107
108 You have 3 possibilities to answer:
109
110 1. Exact match found: If a node with the same or very similar name already exists in the list:
111 → Answer: "Node found with id [ID] and name [NAME]."
112
113 2. Similar node found (requires merging): If a node represents the same company but with a different name variation (subsidiaries, regional divisions, different spelling):
114 → Answer: "Similar node found with id [ID] and name [NAME]. Should be merged/renamed to [SUGGESTED_NAME] to avoid duplicates"
115
116 Examples of nodes that should be merged:
117 - "Toyota China" and "Toyota" → merge to "Toyota"
118 - "Tesla Motors" and "Tesla Inc" → merge to "Tesla"
119 - "Waymo" and "Google Self-Driving" → merge to "Waymo (Google)"
120 - "General Motors" and "GM" → merge to "General Motors"
121
122 3. No match found: If the company is genuinely new and doesn't match any existing node:
123 → Answer: "Node not found. Need to create a new node with name [NAME] and category [CATEGORY]"
124
125 Important guidelines:
126 - Consider company subsidiaries, regional divisions, and alternative names as the same entity
127 - Prioritize the most commonly used or official company name
128 - When in doubt between similar companies, prefer merging over creating duplicates
129 - Be case-insensitive in your search
130 - Look for partial matches and common abbreviations
131
132 The node to search: {name}
133 The category of the node: {category}
134 The existing nodes list: {data["nodes"]}
135
136 Your analysis:
137 """
138
139 search_result = llm.invoke(search_prompt)
140
141 return search_result.content
142
143@tool
144def add_node (name: str, category: str) -> tuple[str, dict]:
145 """Add a node to the nodes list."""
146
147 data = load_json()
148
149 id = len(data["nodes"])
150 new_node = {"id": id, "name": name, "category": category}
151 data["nodes"].append(new_node)
152
153 save_json(data)
154
155 return f"Node added with the id {id}: ", new_node
156
157@tool
158def rename_node (id: int, new_name: str) -> str:
159 """Update the name of a node from a given ID."""
160
161 data = load_json()
162 nodes = data["nodes"]
163
164 if id >= len(nodes) or id < 0:
165 return f"Error: Node with id {id} does not exist."
166
167 previous_name = nodes[id]["name"]
168 nodes[id]["name"] = new_name
169
170 data["nodes"] = nodes
171 save_json(data)
172
173 return f"Node with id {id} renamed from '{previous_name}' to '{new_name}'."
174
175@tool
176def search_edge(from_id: int, to_id: int) -> str:
177 """Check if an edge is already stored in the graph database."""
178
179 data = load_json()
180
181 for edge in data["edges"]:
182 if (edge["from"] == from_id and edge["to"] == to_id) or (edge["from"] == to_id and edge["to"] == from_id):
183 return f"Edge found between nodes {from_id} and {to_id}."
184
185 return f"Edge not found. Need to create a new edge between the nodes {from_id} and {to_id}."
186
187@tool
188def add_edge (from_id: int, to_id: int) -> tuple[str, dict]:
189 """Add an edge in the edges list."""
190
191 data = load_json()
192 edges = data["edges"]
193
194 id = len(edges)
195 new_edge = {"id": id, "from": from_id, "to": to_id}
196 edges.append(new_edge)
197
198 data["edges"] = edges
199 save_json(data)
200
201 return f"edge added with the id {id}: ", new_edge
202
203# The agent
204llm = init_chat_model("gpt-4o", model_provider="openai")
205
206tools = [search_node, search_edge, add_node, rename_node, add_edge, extract_partnerships]
207
208prompt = ChatPromptTemplate.from_messages([
209 ("system", """You are an assistant specialized in analyzing partnerships in the self-driving car market.
210
211Your capabilities:
212- Extract partnerships from text articles
213- Create and manage a node-based graph of partnerships
214- Analyze companies and their relationships to avoid duplicates in the graph
215
216Process for analyzing partnerships:
2171. Use extract_partnerships to analyze the input text
2182. For each company found, check if similar nodes already exist (search_node) or rename it if necessary (rename_node)
2193. Add new nodes if necessary (add_node)
2204. Check if relationships already exist (search_edge)
2215. Add new edges between the nodes if necessary (add_edge)
222
223Be precise and remember that the graph is undirected. Merge companies and their subsidiaries into a single node."""),
224
225 ("human", "{input}"),
226 ("placeholder", "{agent_scratchpad}")
227])
228
229agent = create_tool_calling_agent(
230 llm=llm,
231 tools=tools,
232 prompt=prompt
233)
234
235agent_executor = AgentExecutor(
236 agent=agent,
237 tools=tools,
238 verbose=True,
239 max_iterations=20,
240 return_intermediate_steps=True
241)
242
243# Invoke the agent on one article
244result = agent_executor.invoke({
245 "input": news["content"][48]
246})