# Session 03: Intro to LLM Agents

In [None]:
# Optional: install dependencies
#!pip install openai-agents

In [None]:
import json
from IPython.display import display, Markdown

def mdprint(text):
    """Helper function for printing markdown text."""
    display(Markdown(text))

def pprint(result):
    """Helper function for pretty-printing raw model responses."""
    for item in result.new_items:
        print(item.__class__, json.dumps(item.to_input_item(), indent=2))

In [None]:
API_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1/"
#API_KEY = "<KEY>"
API_MODEL = "1 - GPT-OSS-120b - an open model released by OpenAI in August 2025" # Best for fast dev runs

In [None]:
from agents import AsyncOpenAI, set_tracing_disabled, OpenAIChatCompletionsModel, Agent, Runner, ModelSettings
from openai.types.shared import Reasoning
# Disable the tracing feature
set_tracing_disabled(True)

# Instantiate the model with custom endpoint
model = OpenAIChatCompletionsModel(
    model=API_MODEL,
    openai_client=AsyncOpenAI(api_key=API_KEY, base_url=API_URL)
)

### Base Agent

The most basic agent is just an LLM call, without any further specifications.

In [None]:
input = "Whats the weather like in Kassel?"

In [None]:
base_agent = Agent(
    name="base_agent",
    model=model,
    model_settings=ModelSettings(
        reasoning=Reasoning(effort="low")
    )
)
base_result = await Runner.run(base_agent, input=input)

In [None]:
mdprint(base_result.final_output)

### Reasoning Agent

The base agent can be improved upon by enabling it to reason; it will usually provide better responses by allowing it to "think before answering".

In [None]:
reasoning_agent = Agent(
    name="base_agent",
    model=model,
    model_settings=ModelSettings(
        reasoning=Reasoning(effort="high")
    )
)
reasoning_result = await Runner.run(reasoning_agent, input=input)

In [None]:
mdprint(reasoning_result.final_output)
#pprint(result)

### Tool Use

As we have seen before, LLMs could greatly benefit from being able to interact with the world to, for example, retrieve up-to-date data. This is achieved through *tools*.

Lets implement a basic weather information tool, based on the `wttr.in` API.

In [None]:
!curl 'wttr.in/Kassel?format=j1'

In [None]:
import requests
from typing import Any
from agents import Agent, Runner, function_tool

@function_tool
def get_weather(city: str) -> dict[str, Any]:
    """Retrieves the weather forecast for a specified location."""
    return requests.get(f"http://wttr.in/{city}?format=j1").json()


In [None]:
tool_agent = Agent(
    name="tool_agent",
    model=model,
    instructions="Always use the provided tools to solve the task given by the user. Provide very succint answers.",
    tools=[get_weather],
)
tool_result = await Runner.run(tool_agent, input=input)

In [None]:
mdprint(tool_result.final_output)

In [None]:
result = await Runner.run(tool_agent, input="Do i need a jacket when going outside in Kassel?")

In [None]:
mdprint(result.final_output)

### Structured Outputs

For many workflows, it is helpful to have agents return their response in a structured format (most commonly JSON), to be able to parse it into a python data structure and interface with program flow. The `agents` package uses `pydantic` for data modeling internally, so we will opt for that as well.

**Note**: due to API limitations, we cannot use the `output_type` parameter of the `Agent` class directly, but have to emulate its behaviour through explicit prompting.

Let's implement a basic `Feedback` data model, consisting of a written feedback and a score enum, that we can use to control program flow later:

In [None]:
# This is for casting output types to a JSON schema we can supply to the model.
from pydantic.dataclasses import dataclass
from typing import Literal

@dataclass
class Feedback:
    feedback: str
    score: Literal["pass", "needs_improvement", "fail"]

Pydantic also provides a handy way to generate an explicit JSON schema that models should conform to:

In [None]:
from pydantic import TypeAdapter

TypeAdapter(Feedback).json_schema()

### LLM-as-a-judge (Adaptive Loops)

We can extend our agent workflow to include multiple agents in multiple roles. For example, consider a story writing task with two agents, with the following flow:
- The first agent generates an outline for a story
- The second agent judges the outline and provides feedback
- We loop until the judge is satisfied with the outline

Here, the structured output defined previously is needed: we can use the `score` property of the judges' output to either continue refining, or exit.

**Note**: pay attention to cap the number of iterations, either by prompting or with a hard limit. Its easy to get stuck in an infinite feedback loop otherwise!

In [None]:
story_outline_generator = Agent(
    name="story_outline_generator",
    instructions=(
        "You generate a very short story outline based on the user's input. "
        "Do not write a full story, just the outline. "
        "If there is any feedback provided, use it to improve the outline."
    ),
    model=model
)

evaluator = Agent(
    name="evaluator",
    instructions=(
        "You evaluate a story outline and decide if it's good enough. "
        "If it's not good enough, you provide feedback on what needs to be improved. "
        "Never give it a pass on the first try. "
        "After 5 attempts, you can give it a pass if the story outline is good enough - do not go for perfection. "
        "Reply in the given structured format, conforming exactly to its specification: "
        f"{TypeAdapter(Feedback).json_schema()}"
    ),
    model=model
)

In [None]:
input_items = [{"content": "A story about a rainy afternoon in Kassel.", "role": "user"}]
outlines = []
feedbacks = []

while True:
    story_outline_result = await Runner.run(story_outline_generator, input_items)
    input_items = story_outline_result.to_input_list()
    outlines.append(story_outline_result.final_output)

    evaluator_result = await Runner.run(evaluator, input_items)
    result = Feedback(**json.loads(evaluator_result.final_output)) # Cast raw response to feedback dataclass
    feedbacks.append(result.feedback)

    print(f"Evaluator score: {result.score}")

    if result.score == "pass":
        print("Story outline is good enough, exiting.")
        break

    print("Re-running with feedback")

    input_items.append({"content": f"Feedback: {result.feedback}", "role": "user"})

In [None]:
mdprint(feedbacks[0])

### Agents-as-tools

We have already considered basic programmatic tools. However, we extend on that and call other agents for tools, to delegate tasks from a main coordinating agent. This is as simple as calling the `.as_tool` function of an agent, and passing it to the main agent.

In [None]:
german_agent = Agent(
    name="german_agent",
    instructions="You translate the user's message to German",
    handoff_description="An english to german translator",
    model=model
)

french_agent = Agent(
    name="french_agent",
    instructions="You translate the user's message to French",
    handoff_description="An english to french translator",
    model=model
)

italian_agent = Agent(
    name="italian_agent",
    instructions="You translate the user's message to Italian",
    handoff_description="An english to italian translator",
    model=model
)

orchestrator_agent = Agent(
    name="orchestrator_agent",
    instructions=(
        "You are a translation agent. You use the tools given to you to translate."
        "If asked for multiple translations, you call the relevant tools in order."
        "You never translate on your own, you always use the provided tools."
    ),
    tools=[
        german_agent.as_tool(
            tool_name="translate_to_german",
            tool_description="Translate the user's message to German",
        ),
        french_agent.as_tool(
            tool_name="translate_to_french",
            tool_description="Translate the user's message to French",
        ),
        italian_agent.as_tool(
            tool_name="translate_to_italian",
            tool_description="Translate the user's message to Italian",
        ),
    ],
    model=model
)

synthesizer_agent = Agent(
    name="synthesizer_agent",
    instructions="You inspect translations, correct them if needed, and produce a final concatenated response.",
    model=model
)

In [None]:
msg = "Translate 'It's raining cats and dogs in Kassel.' to german and french."

orchestrator_result = await Runner.run(orchestrator_agent, msg)
synthesizer_result = await Runner.run(synthesizer_agent, orchestrator_result.to_input_list())

mdprint(synthesizer_result.final_output)


### Triage Agents & Handoff

Agents-as-tools have a limited flow: a main agent calls another, which returns a result, and control flow resumes to the main agent. If we want the called agent to continue as the main, we can instead implement a *triage* pattern, where agents can hand off tasks to one another, and then have the called agent continue with the main conversation without returning control.

In [None]:
german_agent = Agent(
    name="german_agent",
    instructions="You only speak German",
    model=model
)

spanish_agent = Agent(
    name="spanish_agent",
    instructions="You only speak Spanish",
    model=model
)

english_agent = Agent(
    name="english_agent",
    instructions="You only speak English",
    model=model
)

triage_agent = Agent(
    name="triage_agent",
    instructions="Handoff to the appropriate agent based on the language of the request.",
    handoffs=[german_agent, spanish_agent, english_agent],
    model=model
)

In [None]:
msg = "Hi, i would like know more about your return policy."
triage_result = await Runner.run(triage_agent, msg)
mdprint(triage_result.final_output)

In [None]:
msg = "Guten Tag, ich würde gern wissen wie ich eine Rücksendung erstelle."
triage_result = await Runner.run(triage_agent, msg)
mdprint(triage_result.final_output)

In [None]:
triage_result

### Guardrails

Guardrails are checks that run in parallel to the agent's execution. We discern between input guardrails and output guardrails.

Input guardrails are used to, for example:
- Check if input messages are off-topic
- Check that input messages don't violate any policies
- Take over control of the agent's execution if an unexpected input is detected

Output guardrails are used to, for example:
- Check if the output contains sensitive data
- Check if the output is a valid response to the user's message

In [None]:
@dataclass
class GuardrailCriterion:
    reasoning: str
    trigger: bool

In [None]:
from agents import input_guardrail, GuardrailFunctionOutput

input_guardrail_agent = Agent(
    name="Guardrail check",
    instructions=(
        "Check if the user is asking you to do their math homework."
        "Reply in the given structured format, conforming exactly to its specification: "
        f"{TypeAdapter(GuardrailCriterion).json_schema()}"
    ),
    model=model
)

@input_guardrail
async def math_guardrail(context, agent, input):
    """This is an input guardrail function, which happens to call an agent to check if the input
    is a math homework question.
    """
    result = await Runner.run(input_guardrail_agent, input, context=context.context)
    criterion = GuardrailCriterion(**json.loads(result.final_output))
    return GuardrailFunctionOutput(
        output_info=criterion.reasoning,
        tripwire_triggered=criterion.trigger
    )

In [None]:
from agents import output_guardrail

@output_guardrail
async def sensitive_data_check(context, agent, output):
    phone_number_in_response = "+49" in output

    return GuardrailFunctionOutput(
        output_info="Phone number in response!",
        tripwire_triggered=phone_number_in_response,
    )

In [None]:
from agents import InputGuardrailTripwireTriggered, OutputGuardrailTripwireTriggered

agent = Agent(
    name="Friendly agent",
    instructions="You are a friendly helpful agent, eager to help the user with whatever they request.",
    input_guardrails=[math_guardrail],
    output_guardrails=[sensitive_data_check],
    model=model
)

async def call(prompt):
    try:
        result = await Runner.run(agent, prompt)
        print(result.final_output)
    except InputGuardrailTripwireTriggered:
        print("Sorry, I can't help you with your math homework.")
    except OutputGuardrailTripwireTriggered:
        print("Sorry, I can't provide you with sensitive data.")

In [None]:
await call("Please solve this equation for x: 4x^2 + 2x = 19")

In [None]:
await call("Can you give me the phone number of the university of kassel?")