Skip to content

API Reference

This page provides detailed information about the LangGraph OpenAI Serve API endpoints and schemas.

langgraph-openai-serve package.

GraphConfig

Bases: BaseModel

resolve_graph async

resolve_graph()

Get the graph instance, handling both direct instances and async callables.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
async def resolve_graph(self) -> CompiledStateGraph:
    """Get the graph instance, handling both direct instances and async callables."""
    if inspect.iscoroutinefunction(self.graph):
        return await self.graph()
    return self.graph

GraphRegistry

Bases: BaseModel

get_graph

get_graph(name)

Get a graph by its name.

Parameters:

Name Type Description Default
name str

The name of the graph to retrieve.

required

Returns:

Type Description
GraphConfig

The graph configuration associated with the given name.

Raises:

Type Description
ValueError

If the graph name is not found in the registry.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
def get_graph(self, name: str) -> GraphConfig:
    """Get a graph by its name.

    Args:
        name: The name of the graph to retrieve.

    Returns:
        The graph configuration associated with the given name.

    Raises:
        ValueError: If the graph name is not found in the registry.
    """
    if name not in self.registry:
        raise ValueError(f"Graph '{name}' not found in registry.")
    return self.registry[name]

get_graph_names

get_graph_names()

Get the names of all registered graphs.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
def get_graph_names(self) -> list[str]:
    """Get the names of all registered graphs."""
    return list(self.registry.keys())

LangchainOpenaiApiServe

LangchainOpenaiApiServe(app=None, graphs=None, configure_cors=False)

Server class to connect LangGraph instances with an OpenAI-compatible API.

This class serves as a bridge between LangGraph instances and an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Attributes:

Name Type Description
app

The FastAPI application to attach routers to.

graphs

A GraphRegistry instance containing the graphs to serve.

Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

Parameters:

Name Type Description Default
app FastAPI | None

The FastAPI application to attach routers to. If None, a new FastAPI app will be created.

None
graphs GraphRegistry | None

A GraphRegistry instance containing the graphs to serve. If None, a default simple graph will be used.

None
configure_cors bool

Optional; Whether to configure CORS for the FastAPI application.

False
Source code in src/langgraph_openai_serve/openai_server.py
def __init__(
    self,
    app: FastAPI | None = None,
    graphs: GraphRegistry | None = None,
    configure_cors: bool = False,
):
    """Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

    Args:
        app: The FastAPI application to attach routers to. If None, a new FastAPI app will be created.
        graphs: A GraphRegistry instance containing the graphs to serve.
                If None, a default simple graph will be used.
        configure_cors: Optional; Whether to configure CORS for the FastAPI application.
    """
    self.app = app

    if app is None:
        app = FastAPI(
            title="LangGraph OpenAI Compatible API",
            description="An OpenAI-compatible API for LangGraph",
            version="0.0.1",
        )
    self.app = app

    if graphs is None:
        logger.info("Graphs not provided, using default simple graph")
        default_graph_config = GraphConfig(graph=simple_graph)
        self.graph_registry = GraphRegistry(
            registry={"simple-graph": default_graph_config}
        )
    elif isinstance(graphs, GraphRegistry):
        logger.info("Using provided GraphRegistry instance")
        self.graph_registry = graphs
    else:
        raise TypeError(
            "Invalid type for graphs parameter. Expected GraphRegistry or None."
        )

    # Attach the registry to the app's state for dependency injection
    self.app.state.graph_registry = self.graph_registry

    # Configure CORS if requested
    if configure_cors:
        self._configure_cors()

    logger.info(
        f"Initialized LangchainOpenaiApiServe with {len(self.graph_registry.registry)} graphs"
    )
    logger.info(
        f"Available graphs: {', '.join(self.graph_registry.get_graph_names())}"
    )

bind_openai_chat_completion

bind_openai_chat_completion(prefix='/v1')

Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

Parameters:

Name Type Description Default
prefix str

Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".

'/v1'
Source code in src/langgraph_openai_serve/openai_server.py
def bind_openai_chat_completion(self, prefix: str = "/v1"):
    """Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

    Args:
        prefix: Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".
    """
    self.app.include_router(chat_views.router, prefix=prefix)
    self.app.include_router(health_views.router, prefix=prefix)
    self.app.include_router(models_views.router, prefix=prefix)

    logger.info(f"Bound OpenAI chat completion endpoints with prefix: {prefix}")

    return self

api

chat

schemas

Pydantic models for the OpenAI API.

This module defines Pydantic models that match the OpenAI API request and response formats.

ChatCompletionRequest

Bases: BaseModel

Model for a chat completion request.

ChatCompletionRequestMessage

Bases: BaseModel

Model for a chat completion request message.

ChatCompletionResponse

Bases: BaseModel

Model for a chat completion response.

ChatCompletionResponseChoice

Bases: BaseModel

Model for a chat completion response choice.

ChatCompletionResponseMessage

Bases: BaseModel

Model for a chat completion response message.

ChatCompletionStreamResponse

Bases: BaseModel

Model for a chat completion stream response.

ChatCompletionStreamResponseChoice

Bases: BaseModel

Model for a chat completion stream response choice.

ChatCompletionStreamResponseDelta

Bases: BaseModel

Model for a chat completion stream response delta.

ChatMessage

Bases: BaseModel

Model for a chat message.

FunctionCall

Bases: BaseModel

Model for a function call.

FunctionDefinition

Bases: BaseModel

Model for a function definition.

Role

Bases: str, Enum

Role options for chat messages.

Tool

Bases: BaseModel

Model for a tool.

ToolCall

Bases: BaseModel

Model for a tool call.

ToolCallFunction

Bases: BaseModel

Model for a tool call function.

ToolFunction

Bases: BaseModel

Model for a tool function.

UsageInfo

Bases: BaseModel

Model for usage information.

service

Chat completion service.

This module provides a service for handling chat completions, implementing business logic that was previously in the router.

ChatCompletionService

Service for handling chat completions.

generate_completion async
generate_completion(chat_request, graph_registry)

Generate a chat completion.

Parameters:

Name Type Description Default
chat_request ChatCompletionRequest

The chat completion request.

required
graph_registry GraphRegistry

The GraphRegistry object containing registered graphs.

required

Returns:

Type Description
ChatCompletionResponse

A chat completion response.

Raises:

Type Description
Exception

If there is an error generating the completion.

Source code in src/langgraph_openai_serve/api/chat/service.py
async def generate_completion(
    self, chat_request: ChatCompletionRequest, graph_registry: GraphRegistry
) -> ChatCompletionResponse:
    """Generate a chat completion.

    Args:
        chat_request: The chat completion request.
        graph_registry: The GraphRegistry object containing registered graphs.

    Returns:
        A chat completion response.

    Raises:
        Exception: If there is an error generating the completion.
    """
    start_time = time.time()

    # Get the completion from the LangGraph model
    completion, tokens_used = await run_langgraph(
        model=chat_request.model,
        messages=chat_request.messages,
        graph_registry=graph_registry,
    )

    # Build the response
    response = ChatCompletionResponse(
        id=f"chatcmpl-{uuid.uuid4()}",
        created=int(time.time()),
        model=chat_request.model,
        choices=[
            ChatCompletionResponseChoice(
                index=0,
                message=ChatCompletionResponseMessage(
                    role=Role.ASSISTANT,
                    content=completion,
                ),
                finish_reason="stop",
            )
        ],
        usage=UsageInfo(
            prompt_tokens=tokens_used["prompt_tokens"],
            completion_tokens=tokens_used["completion_tokens"],
            total_tokens=tokens_used["total_tokens"],
        ),
    )

    logger.info(
        f"Chat completion finished in {time.time() - start_time:.2f}s. "
        f"Total tokens: {tokens_used['total_tokens']}"
    )

    return response
stream_completion async
stream_completion(chat_request, graph_registry)

Stream a chat completion response.

Parameters:

Name Type Description Default
chat_request ChatCompletionRequest

The chat completion request.

required
graph_registry GraphRegistry

The GraphRegistry object containing registered graphs.

required

Yields:

Type Description
AsyncIterator[str]

Chunks of the chat completion response.

Source code in src/langgraph_openai_serve/api/chat/service.py
async def stream_completion(
    self, chat_request: ChatCompletionRequest, graph_registry: GraphRegistry
) -> AsyncIterator[str]:
    """Stream a chat completion response.

    Args:
        chat_request: The chat completion request.
        graph_registry: The GraphRegistry object containing registered graphs.

    Yields:
        Chunks of the chat completion response.
    """
    start_time = time.time()
    response_id = f"chatcmpl-{uuid.uuid4()}"
    created = int(time.time())

    try:
        # Send the initial response with the role
        yield self._format_stream_chunk(
            ChatCompletionStreamResponse(
                id=response_id,
                created=created,
                model=chat_request.model,
                choices=[
                    ChatCompletionStreamResponseChoice(
                        index=0,
                        delta=ChatCompletionStreamResponseDelta(
                            role=Role.ASSISTANT,
                        ),
                        finish_reason=None,
                    )
                ],
            )
        )

        # Stream the completion from the LangGraph model
        async for chunk, _ in run_langgraph_stream(
            model=chat_request.model,
            messages=chat_request.messages,
            graph_registry=graph_registry,
        ):
            # Send the content chunk
            yield self._format_stream_chunk(
                ChatCompletionStreamResponse(
                    id=response_id,
                    created=created,
                    model=chat_request.model,
                    choices=[
                        ChatCompletionStreamResponseChoice(
                            index=0,
                            delta=ChatCompletionStreamResponseDelta(
                                content=chunk,
                            ),
                            finish_reason=None,
                        )
                    ],
                )
            )

        # Send the final response with finish_reason
        yield self._format_stream_chunk(
            ChatCompletionStreamResponse(
                id=response_id,
                created=created,
                model=chat_request.model,
                choices=[
                    ChatCompletionStreamResponseChoice(
                        index=0,
                        delta=ChatCompletionStreamResponseDelta(),
                        finish_reason="stop",
                    )
                ],
            )
        )

        # Send the [DONE] message
        yield "data: [DONE]\n\n"

        logger.info(
            f"Streamed chat completion finished in {time.time() - start_time:.2f}s"
        )

    except Exception as e:
        logger.exception("Error streaming chat completion")
        # In case of an error, send an error message
        error_response = {"error": {"message": str(e), "type": "server_error"}}
        yield f"data: {json.dumps(error_response)}\n\n"
        yield "data: [DONE]\n\n"

views

Chat completion router.

This module provides the FastAPI router for the chat completion endpoint, implementing an OpenAI-compatible interface.

create_chat_completion async
create_chat_completion(chat_request, service, graph_registry)

Create a chat completion.

This endpoint is compatible with OpenAI's chat completion API.

Parameters:

Name Type Description Default
chat_request ChatCompletionRequest

The parsed chat completion request.

required
graph_registry Annotated[GraphRegistry, Depends(get_graph_registry_dependency)]

The graph registry dependency.

required
service Annotated[ChatCompletionService, Depends(ChatCompletionService)]

The chat completion service dependency.

required

Returns:

Type Description
StreamingResponse | ChatCompletionResponse

A chat completion response, either as a complete response or as a stream.

Source code in src/langgraph_openai_serve/api/chat/views.py
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(
    chat_request: ChatCompletionRequest,
    service: Annotated[ChatCompletionService, Depends(ChatCompletionService)],
    graph_registry: Annotated[GraphRegistry, Depends(get_graph_registry_dependency)],
) -> StreamingResponse | ChatCompletionResponse:
    """Create a chat completion.

    This endpoint is compatible with OpenAI's chat completion API.

    Args:
        chat_request: The parsed chat completion request.
        graph_registry: The graph registry dependency.
        service: The chat completion service dependency.

    Returns:
        A chat completion response, either as a complete response or as a stream.
    """

    logger.info(
        f"Received chat completion request for model: {chat_request.model}, "
        f"stream: {chat_request.stream}"
    )

    if chat_request.stream:
        logger.info("Streaming chat completion response")
        return StreamingResponse(
            service.stream_completion(chat_request, graph_registry),
            media_type="text/event-stream",
        )

    logger.info("Generating non-streaming chat completion response")
    response = await service.generate_completion(chat_request, graph_registry)
    logger.info("Returning non-streaming chat completion response")
    return response

health

views

health_check
health_check()

Checks the health of a project.

It returns 200 if the project is healthy.

Source code in src/langgraph_openai_serve/api/health/views.py
@router.get("")
def health_check() -> None:
    """Checks the health of a project.

    It returns 200 if the project is healthy.
    """

models

schemas

Model

Bases: BaseModel

Individual model information.

ModelList

Bases: BaseModel

List of available models.

ModelPermission

Bases: BaseModel

Model permission information.

service

Model service.

This module provides a service for handling OpenAI model information.

ModelService

Service for handling model operations.

get_models
get_models(graph_registry)

Get a list of available models.

Parameters:

Name Type Description Default
graph_registry GraphRegistry

The GraphRegistry containing registered graphs.

required

Returns:

Type Description
ModelList

A list of models in OpenAI compatible format.

Source code in src/langgraph_openai_serve/api/models/service.py
def get_models(self, graph_registry: GraphRegistry) -> ModelList:
    """Get a list of available models.

    Args:
        graph_registry: The GraphRegistry containing registered graphs.

    Returns:
        A list of models in OpenAI compatible format.
    """
    permission = ModelPermission(
        id="modelperm-04cadfeee8ad4eb8ad479a5af3bc261d",
        created=1743771509,
        allow_create_engine=False,
        allow_sampling=True,
        allow_logprobs=True,
        allow_search_indices=False,
        allow_view=True,
        allow_fine_tuning=False,
        organization="*",
        group=None,
        is_blocking=False,
    )

    models = [
        Model(
            id=name,
            created=1743771509,
            owned_by="langgraph-openai-serve",
            root=f"{name}-root",
            parent=None,
            max_model_len=16000,
            permission=[permission],
        )
        for name in graph_registry.registry
    ]

    logger.info(f"Retrieved {len(models)} available models")
    return ModelList(data=models)

views

Models router.

This module provides the FastAPI router for the models endpoint, implementing an OpenAI-compatible interface for model listing.

get_graph_registry_dependency
get_graph_registry_dependency(request)

Dependency to get the graph registry from the app state.

Source code in src/langgraph_openai_serve/api/models/views.py
def get_graph_registry_dependency(request: Request) -> GraphRegistry:
    """Dependency to get the graph registry from the app state."""
    return request.app.state.graph_registry
list_models
list_models(service, graph_registry)

Get a list of available models.

Source code in src/langgraph_openai_serve/api/models/views.py
@router.get("", response_model=ModelList)
def list_models(
    service: Annotated[ModelService, Depends(ModelService)],
    graph_registry: Annotated[GraphRegistry, Depends(get_graph_registry_dependency)],
):
    """Get a list of available models."""
    logger.info("Received request to list models")
    models = service.get_models(graph_registry)
    logger.info(f"Returning {len(models.data)} models")
    return models

core

settings

Settings

Bases: BaseSettings

This class is used to load environment variables either from environment or from a .env file and store them as class attributes. NOTE: - environment variables will always take priority over values loaded from a dotenv file - environment variable names are case-insensitive - environment variable type is inferred from the type hint of the class attribute - For environment variables that are not set, a default value should be provided

For more info, see the related pydantic docs: https://docs.pydantic.dev/latest/concepts/pydantic_settings

check_langfuse_settings
check_langfuse_settings(v)

Validate Langfuse settings if enabled.

Source code in src/langgraph_openai_serve/core/settings.py
@field_validator("ENABLE_LANGFUSE")
def check_langfuse_settings(cls, v: bool) -> bool:
    """Validate Langfuse settings if enabled."""
    if v is False:
        return v

    # Check if langfuse package is installed
    if importlib.util.find_spec("langfuse") is None:
        raise RuntimeError(
            "Langfuse is enabled but the 'langfuse' package is not installed. "
            "Please install it, e.g., with `uv add langgraph-openai-serve[tracing]`."
        )

    # Check for required environment variables
    required_env_vars = [
        "LANGFUSE_HOST",
        "LANGFUSE_PUBLIC_KEY",
        "LANGFUSE_SECRET_KEY",
    ]
    missing_vars = [var for var in required_env_vars if os.getenv(var) is None]

    if missing_vars:
        raise RuntimeError(
            "Langfuse is enabled but the following environment variables are not set: "
            f"{', '.join(missing_vars)}. Please set these variables."
        )

    return v

graph

Service package for the LangGraph OpenAI compatible API.

graph_registry

GraphConfig

Bases: BaseModel

resolve_graph async
resolve_graph()

Get the graph instance, handling both direct instances and async callables.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
async def resolve_graph(self) -> CompiledStateGraph:
    """Get the graph instance, handling both direct instances and async callables."""
    if inspect.iscoroutinefunction(self.graph):
        return await self.graph()
    return self.graph

GraphRegistry

Bases: BaseModel

get_graph
get_graph(name)

Get a graph by its name.

Parameters:

Name Type Description Default
name str

The name of the graph to retrieve.

required

Returns:

Type Description
GraphConfig

The graph configuration associated with the given name.

Raises:

Type Description
ValueError

If the graph name is not found in the registry.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
def get_graph(self, name: str) -> GraphConfig:
    """Get a graph by its name.

    Args:
        name: The name of the graph to retrieve.

    Returns:
        The graph configuration associated with the given name.

    Raises:
        ValueError: If the graph name is not found in the registry.
    """
    if name not in self.registry:
        raise ValueError(f"Graph '{name}' not found in registry.")
    return self.registry[name]
get_graph_names
get_graph_names()

Get the names of all registered graphs.

Source code in src/langgraph_openai_serve/graph/graph_registry.py
def get_graph_names(self) -> list[str]:
    """Get the names of all registered graphs."""
    return list(self.registry.keys())

runner

LangGraph runner service.

This module provides functionality to run LangGraph models with an OpenAI-compatible interface. It handles conversion between OpenAI's message format and LangChain's message format, and provides both streaming and non-streaming interfaces for running LangGraph workflows.

Examples:

>>> from langgraph_openai_serve.services.graph_runner import run_langgraph
>>> response, usage = await run_langgraph("my-model", messages, registry)
>>> from langgraph_openai_serve.services.graph_runner import run_langgraph_stream
>>> async for chunk, metrics in run_langgraph_stream("my-model", messages, registry):
...     print(chunk)

The module contains the following functions: - convert_to_lc_messages(messages) - Converts OpenAI messages to LangChain messages. - register_graphs(graphs) - Validates and returns the provided graph dictionary. - run_langgraph(model, messages, graph_registry) - Runs a LangGraph model with the given messages. - run_langgraph_stream(model, messages, graph_registry) - Runs a LangGraph model in streaming mode.

register_graphs

register_graphs(graphs)

Validate and return the provided graph dictionary.

Parameters:

Name Type Description Default
graphs Dict[str, Any]

A dictionary mapping graph names to LangGraph instances.

required

Returns:

Type Description
Dict[str, Any]

The validated graph dictionary.

Source code in src/langgraph_openai_serve/graph/runner.py
def register_graphs(graphs: Dict[str, Any]) -> Dict[str, Any]:
    """Validate and return the provided graph dictionary.

    Args:
        graphs: A dictionary mapping graph names to LangGraph instances.

    Returns:
        The validated graph dictionary.
    """
    # Potential future validation can go here
    logger.info(f"Registered {len(graphs)} graphs: {', '.join(graphs.keys())}")
    return graphs

run_langgraph async

run_langgraph(model, messages, graph_registry)

Run a LangGraph model with the given messages using the compiled workflow.

This function processes input messages through a LangGraph workflow and returns the generated response along with token usage information.

Examples:

>>> response, usage = await run_langgraph("my-model", messages, registry)
>>> print(response)
>>> print(usage)

Parameters:

Name Type Description Default
model str

The name of the model to use, which also determines which graph to use.

required
messages list[ChatCompletionRequestMessage]

A list of messages to process through the LangGraph.

required
graph_registry GraphRegistry

The GraphRegistry instance containing registered graphs.

required

Returns:

Type Description
tuple[str, dict[str, int]]

A tuple containing the generated response string and a dictionary of token usage information.

Source code in src/langgraph_openai_serve/graph/runner.py
async def run_langgraph(
    model: str,
    messages: list[ChatCompletionRequestMessage],
    graph_registry: GraphRegistry,
) -> tuple[str, dict[str, int]]:
    """Run a LangGraph model with the given messages using the compiled workflow.

    This function processes input messages through a LangGraph workflow and returns
    the generated response along with token usage information.

    Examples:
        >>> response, usage = await run_langgraph("my-model", messages, registry)
        >>> print(response)
        >>> print(usage)

    Args:
        model: The name of the model to use, which also determines which graph to use.
        messages: A list of messages to process through the LangGraph.
        graph_registry: The GraphRegistry instance containing registered graphs.

    Returns:
        A tuple containing the generated response string and a dictionary of token usage information.
    """
    logger.info(f"Running LangGraph model {model} with {len(messages)} messages")
    start_time = time.time()

    # Use graph_registry.get_graph to get the graph config and then the graph
    try:
        graph_config = graph_registry.get_graph(model)
        graph = await graph_config.resolve_graph()
    except ValueError as e:
        logger.error(f"Error getting graph for model '{model}': {e}")
        raise e

    # Convert OpenAI messages to LangChain messages
    lc_messages = convert_to_lc_messages(messages)

    # Run the graph with the messages
    result = await graph.ainvoke({"messages": lc_messages})
    response = result["messages"][-1].content if result["messages"] else ""

    # Calculate token usage (approximate)
    prompt_tokens = sum(len((m.content or "").split()) for m in messages)
    completion_tokens = len((response or "").split())
    token_usage = {
        "prompt_tokens": prompt_tokens,
        "completion_tokens": completion_tokens,
        "total_tokens": prompt_tokens + completion_tokens,
    }

    logger.info(f"LangGraph completion generated in {time.time() - start_time:.2f}s")
    return response, token_usage

run_langgraph_stream async

run_langgraph_stream(model, messages, graph_registry)

Run a LangGraph model in streaming mode.

Parameters:

Name Type Description Default
model str

The name of the model (graph) to run.

required
messages list[ChatCompletionRequestMessage]

A list of OpenAI-compatible messages.

required
graph_registry GraphRegistry

The registry containing the graph configurations.

required

Yields:

Type Description
AsyncGenerator[tuple[str, dict[str, int]], None]

A tuple containing the content chunk and token usage metrics.

Source code in src/langgraph_openai_serve/graph/runner.py
async def run_langgraph_stream(
    model: str,
    messages: list[ChatCompletionRequestMessage],
    graph_registry: GraphRegistry,
) -> AsyncGenerator[tuple[str, dict[str, int]], None]:
    """Run a LangGraph model in streaming mode.

    Args:
        model: The name of the model (graph) to run.
        messages: A list of OpenAI-compatible messages.
        graph_registry: The registry containing the graph configurations.

    Yields:
        A tuple containing the content chunk and token usage metrics.
    """
    logger.info(f"Starting streaming LangGraph completion for model '{model}'")

    try:
        graph_config = graph_registry.get_graph(model)
        graph = await graph_config.resolve_graph()
        streamable_node_names = graph_config.streamable_node_names
    except ValueError as e:
        logger.error(f"Error getting graph for model '{model}': {e}")
        raise e

    # Convert OpenAI messages to LangChain messages
    lc_messages = convert_to_lc_messages(messages)

    inputs = {"messages": lc_messages}
    runnable_config = None

    if settings.ENABLE_LANGFUSE is True:
        trace = langfuse.trace(user_id="lgos_user", session_id=str(uuid4()))
        handler = trace.get_langchain_handler(update_parent=True)

        runnable_config = RunnableConfig(callbacks=[handler])

    async for event in graph.astream_events(
        inputs, config=runnable_config, version="v2"
    ):
        event_kind = event["event"]
        langgraph_node = event["metadata"].get("langgraph_node", None)

        if event_kind == "on_chat_model_stream":
            if langgraph_node not in streamable_node_names:
                continue

            ai_message_chunk: AIMessageChunk = event["data"]["chunk"]
            ai_message_content = ai_message_chunk.content
            if ai_message_content:
                yield f"{ai_message_content}", {"tokens": 1}

simple_graph

Simple LangGraph agent implementation.

This module defines a simple LangGraph agent that interfaces directly with an LLM model. It creates a straightforward workflow where a single node generates responses to user messages.

Examples:

>>> from langgraph_openai.utils.simple_graph import app
>>> result = await app.ainvoke({"messages": messages})
>>> print(result["messages"][-1].content)

The module contains the following components: - AgentState - Pydantic BaseModel defining the state schema for the graph. - generate(state) - Function that processes messages and generates responses. - workflow - The StateGraph instance defining the workflow. - app - The compiled workflow application ready for invocation.

AgentState

Bases: BaseModel

Type definition for the agent state.

This BaseModel defines the structure of the state that flows through the graph. It uses the add_messages annotation to properly handle message accumulation.

Attributes:

Name Type Description
messages Annotated[Sequence[BaseMessage], add_messages]

A sequence of BaseMessage objects annotated with add_messages.

SimpleConfigSchema

Bases: BaseModel

Configurable fields that are taken from the user

generate async

generate(state, config)

Generate a response to the latest message in the state.

This function extracts the latest message, creates a prompt with it, runs it through an LLM, and returns the response as an AIMessage.

Parameters:

Name Type Description Default
state AgentState

The current state containing the message history.

required

Returns:

Type Description
dict

A dict with a messages key containing the AI's response.

Source code in src/langgraph_openai_serve/graph/simple_graph.py
async def generate(state: AgentState, config: SimpleConfigSchema) -> dict:
    """Generate a response to the latest message in the state.

    This function extracts the latest message, creates a prompt with it,
    runs it through an LLM, and returns the response as an AIMessage.

    Args:
        state: The current state containing the message history.

    Returns:
        A dict with a messages key containing the AI's response.
    """
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0.7, streaming=True)

    system_message = (
        "system",
        "You are a helpful assistant called Langgraph Openai Serve. Chat with the user with friendly tone",
    )

    if config["configurable"]["use_history"] is False:
        question = state.messages[-1].content

        prompt = ChatPromptTemplate.from_messages(
            [system_message, ("human", "{question}")]
        )

        chain = prompt | model | StrOutputParser()
        response = await chain.ainvoke({"question": question})
    else:
        messages = state.messages
        prompt = ChatPromptTemplate.from_messages([system_message, *messages])
        chain = prompt | model | StrOutputParser()
        response = await chain.ainvoke({})

    return {
        "messages": [AIMessage(content=response)],
    }

openai_server

LangGraph OpenAI API Serve.

This module provides a server class that connects LangGraph instances to an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Examples:

>>> from langgraph_openai_serve import LangchainOpenaiApiServe
>>> from fastapi import FastAPI
>>> from your_graphs import simple_graph_1, simple_graph_2
>>>
>>> app = FastAPI(title="LangGraph OpenAI API")
>>> graph_serve = LangchainOpenaiApiServe(
...     app=app,
...     graphs={
...         "simple_graph_1": simple_graph_1,
...         "simple_graph_2": simple_graph_2
...     }
... )
>>> graph_serve.bind_openai_chat_completion(prefix="/v1")

LangchainOpenaiApiServe

LangchainOpenaiApiServe(app=None, graphs=None, configure_cors=False)

Server class to connect LangGraph instances with an OpenAI-compatible API.

This class serves as a bridge between LangGraph instances and an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Attributes:

Name Type Description
app

The FastAPI application to attach routers to.

graphs

A GraphRegistry instance containing the graphs to serve.

Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

Parameters:

Name Type Description Default
app FastAPI | None

The FastAPI application to attach routers to. If None, a new FastAPI app will be created.

None
graphs GraphRegistry | None

A GraphRegistry instance containing the graphs to serve. If None, a default simple graph will be used.

None
configure_cors bool

Optional; Whether to configure CORS for the FastAPI application.

False
Source code in src/langgraph_openai_serve/openai_server.py
def __init__(
    self,
    app: FastAPI | None = None,
    graphs: GraphRegistry | None = None,
    configure_cors: bool = False,
):
    """Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

    Args:
        app: The FastAPI application to attach routers to. If None, a new FastAPI app will be created.
        graphs: A GraphRegistry instance containing the graphs to serve.
                If None, a default simple graph will be used.
        configure_cors: Optional; Whether to configure CORS for the FastAPI application.
    """
    self.app = app

    if app is None:
        app = FastAPI(
            title="LangGraph OpenAI Compatible API",
            description="An OpenAI-compatible API for LangGraph",
            version="0.0.1",
        )
    self.app = app

    if graphs is None:
        logger.info("Graphs not provided, using default simple graph")
        default_graph_config = GraphConfig(graph=simple_graph)
        self.graph_registry = GraphRegistry(
            registry={"simple-graph": default_graph_config}
        )
    elif isinstance(graphs, GraphRegistry):
        logger.info("Using provided GraphRegistry instance")
        self.graph_registry = graphs
    else:
        raise TypeError(
            "Invalid type for graphs parameter. Expected GraphRegistry or None."
        )

    # Attach the registry to the app's state for dependency injection
    self.app.state.graph_registry = self.graph_registry

    # Configure CORS if requested
    if configure_cors:
        self._configure_cors()

    logger.info(
        f"Initialized LangchainOpenaiApiServe with {len(self.graph_registry.registry)} graphs"
    )
    logger.info(
        f"Available graphs: {', '.join(self.graph_registry.get_graph_names())}"
    )

bind_openai_chat_completion

bind_openai_chat_completion(prefix='/v1')

Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

Parameters:

Name Type Description Default
prefix str

Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".

'/v1'
Source code in src/langgraph_openai_serve/openai_server.py
def bind_openai_chat_completion(self, prefix: str = "/v1"):
    """Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

    Args:
        prefix: Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".
    """
    self.app.include_router(chat_views.router, prefix=prefix)
    self.app.include_router(health_views.router, prefix=prefix)
    self.app.include_router(models_views.router, prefix=prefix)

    logger.info(f"Bound OpenAI chat completion endpoints with prefix: {prefix}")

    return self

schemas

Models package for the LangGraph OpenAI compatible API.

utils

Utility functions.

message

convert_to_lc_messages

convert_to_lc_messages(messages)

Convert OpenAI messages to LangChain messages.

This function converts a list of OpenAI-compatible message objects to their LangChain equivalents for use with LangGraph.

Parameters:

Name Type Description Default
messages list[ChatCompletionRequestMessage]

A list of OpenAI chat completion request messages to convert.

required

Returns:

Type Description
list[BaseMessage]

A list of LangChain message objects.

Source code in src/langgraph_openai_serve/utils/message.py
def convert_to_lc_messages(
    messages: list[ChatCompletionRequestMessage],
) -> list[BaseMessage]:
    """Convert OpenAI messages to LangChain messages.

    This function converts a list of OpenAI-compatible message objects to their
    LangChain equivalents for use with LangGraph.

    Args:
        messages: A list of OpenAI chat completion request messages to convert.

    Returns:
        A list of LangChain message objects.
    """

    lc_messages = []
    for m in messages:
        if m.role == "system":
            lc_messages.append(SystemMessage(content=m.content or ""))
        elif m.role == "user":
            lc_messages.append(HumanMessage(content=m.content or ""))
        elif m.role == "assistant":
            lc_messages.append(AIMessage(content=m.content or ""))
    return lc_messages