API Reference¶

This page provides detailed information about the LangGraph OpenAI Serve API endpoints and schemas.

langgraph-openai-serve package.

GraphConfig ¶

Bases: BaseModel

resolve_graph `async` ¶

resolve_graph()

Get the graph instance, handling both direct instances and async callables.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

async def resolve_graph(self) -> CompiledStateGraph:
    """Get the graph instance, handling both direct instances and async callables."""
    if inspect.iscoroutinefunction(self.graph):
        return await self.graph()
    return self.graph

GraphRegistry ¶

Bases: BaseModel

get_graph ¶

get_graph(name)

Get a graph by its name.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the graph to retrieve.	required

Returns:

Type	Description
`GraphConfig`	The graph configuration associated with the given name.

Raises:

Type	Description
`ValueError`	If the graph name is not found in the registry.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

def get_graph(self, name: str) -> GraphConfig:
    """Get a graph by its name.

    Args:
        name: The name of the graph to retrieve.

    Returns:
        The graph configuration associated with the given name.

    Raises:
        ValueError: If the graph name is not found in the registry.
    """
    if name not in self.registry:
        raise ValueError(f"Graph '{name}' not found in registry.")
    return self.registry[name]

get_graph_names ¶

get_graph_names()

Get the names of all registered graphs.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

def get_graph_names(self) -> list[str]:
    """Get the names of all registered graphs."""
    return list(self.registry.keys())

LangchainOpenaiApiServe ¶

LangchainOpenaiApiServe(app=None, graphs=None, configure_cors=False)

Server class to connect LangGraph instances with an OpenAI-compatible API.

This class serves as a bridge between LangGraph instances and an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Attributes:

Name	Type	Description
`app`		The FastAPI application to attach routers to.
`graphs`		A GraphRegistry instance containing the graphs to serve.

Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

Parameters:

Name	Type	Description	Default
`app`	`FastAPI \| None`	The FastAPI application to attach routers to. If None, a new FastAPI app will be created.	`None`
`graphs`	`GraphRegistry \| None`	A GraphRegistry instance containing the graphs to serve. If None, a default simple graph will be used.	`None`
`configure_cors`	`bool`	Optional; Whether to configure CORS for the FastAPI application.	`False`

Source code in src/langgraph_openai_serve/openai_server.py

def __init__(
    self,
    app: FastAPI | None = None,
    graphs: GraphRegistry | None = None,
    configure_cors: bool = False,
):
    """Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

    Args:
        app: The FastAPI application to attach routers to. If None, a new FastAPI app will be created.
        graphs: A GraphRegistry instance containing the graphs to serve.
                If None, a default simple graph will be used.
        configure_cors: Optional; Whether to configure CORS for the FastAPI application.
    """
    self.app = app

    if app is None:
        app = FastAPI(
            title="LangGraph OpenAI Compatible API",
            description="An OpenAI-compatible API for LangGraph",
            version="0.0.1",
        )
    self.app = app

    if graphs is None:
        logger.info("Graphs not provided, using default simple graph")
        default_graph_config = GraphConfig(graph=simple_graph)
        self.graph_registry = GraphRegistry(
            registry={"simple-graph": default_graph_config}
        )
    elif isinstance(graphs, GraphRegistry):
        logger.info("Using provided GraphRegistry instance")
        self.graph_registry = graphs
    else:
        raise TypeError(
            "Invalid type for graphs parameter. Expected GraphRegistry or None."
        )

    # Attach the registry to the app's state for dependency injection
    self.app.state.graph_registry = self.graph_registry

    # Configure CORS if requested
    if configure_cors:
        self._configure_cors()

    logger.info(
        f"Initialized LangchainOpenaiApiServe with {len(self.graph_registry.registry)} graphs"
    )
    logger.info(
        f"Available graphs: {', '.join(self.graph_registry.get_graph_names())}"
    )

bind_openai_chat_completion ¶

bind_openai_chat_completion(prefix='/v1')

Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

Parameters:

Name	Type	Description	Default
`prefix`	`str`	Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".	`'/v1'`

Source code in src/langgraph_openai_serve/openai_server.py

def bind_openai_chat_completion(self, prefix: str = "/v1"):
    """Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

    Args:
        prefix: Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".
    """
    self.app.include_router(chat_views.router, prefix=prefix)
    self.app.include_router(health_views.router, prefix=prefix)
    self.app.include_router(models_views.router, prefix=prefix)

    logger.info(f"Bound OpenAI chat completion endpoints with prefix: {prefix}")

    return self

api ¶

chat ¶

schemas ¶

Pydantic models for the OpenAI API.

This module defines Pydantic models that match the OpenAI API request and response formats.

ChatCompletionRequest ¶

Bases: BaseModel

Model for a chat completion request.

ChatCompletionRequestMessage ¶

Bases: BaseModel

Model for a chat completion request message.

ChatCompletionResponse ¶

Bases: BaseModel

Model for a chat completion response.

ChatCompletionResponseChoice ¶

Bases: BaseModel

Model for a chat completion response choice.

ChatCompletionResponseMessage ¶

Bases: BaseModel

Model for a chat completion response message.

ChatCompletionStreamResponse ¶

Bases: BaseModel

Model for a chat completion stream response.

ChatCompletionStreamResponseChoice ¶

Bases: BaseModel

Model for a chat completion stream response choice.

ChatCompletionStreamResponseDelta ¶

Bases: BaseModel

Model for a chat completion stream response delta.

ChatMessage ¶

Bases: BaseModel

Model for a chat message.

FunctionCall ¶

Bases: BaseModel

Model for a function call.

FunctionDefinition ¶

Bases: BaseModel

Model for a function definition.

Role ¶

Bases: str, Enum

Role options for chat messages.

Tool ¶

Bases: BaseModel

Model for a tool.

ToolCall ¶

Bases: BaseModel

Model for a tool call.

ToolCallFunction ¶

Bases: BaseModel

Model for a tool call function.

ToolFunction ¶

Bases: BaseModel

Model for a tool function.

UsageInfo ¶

Bases: BaseModel

Model for usage information.

service ¶

Chat completion service.

This module provides a service for handling chat completions, implementing business logic that was previously in the router.

ChatCompletionService ¶

Service for handling chat completions.

generate_completion `async` ¶

generate_completion(chat_request, graph_registry)

Generate a chat completion.

Parameters:

Name	Type	Description	Default
`chat_request`	`ChatCompletionRequest`	The chat completion request.	required
`graph_registry`	`GraphRegistry`	The GraphRegistry object containing registered graphs.	required

Returns:

Type	Description
`ChatCompletionResponse`	A chat completion response.

Raises:

Type	Description
`Exception`	If there is an error generating the completion.

Source code in src/langgraph_openai_serve/api/chat/service.py

async def generate_completion(
    self, chat_request: ChatCompletionRequest, graph_registry: GraphRegistry
) -> ChatCompletionResponse:
    """Generate a chat completion.

    Args:
        chat_request: The chat completion request.
        graph_registry: The GraphRegistry object containing registered graphs.

    Returns:
        A chat completion response.

    Raises:
        Exception: If there is an error generating the completion.
    """
    start_time = time.time()

    # Get the completion from the LangGraph model
    completion, tokens_used = await run_langgraph(
        model=chat_request.model,
        messages=chat_request.messages,
        graph_registry=graph_registry,
    )

    # Build the response
    response = ChatCompletionResponse(
        id=f"chatcmpl-{uuid.uuid4()}",
        created=int(time.time()),
        model=chat_request.model,
        choices=[
            ChatCompletionResponseChoice(
                index=0,
                message=ChatCompletionResponseMessage(
                    role=Role.ASSISTANT,
                    content=completion,
                ),
                finish_reason="stop",
            )
        ],
        usage=UsageInfo(
            prompt_tokens=tokens_used["prompt_tokens"],
            completion_tokens=tokens_used["completion_tokens"],
            total_tokens=tokens_used["total_tokens"],
        ),
    )

    logger.info(
        f"Chat completion finished in {time.time() - start_time:.2f}s. "
        f"Total tokens: {tokens_used['total_tokens']}"
    )

    return response

stream_completion `async` ¶

stream_completion(chat_request, graph_registry)

Stream a chat completion response.

Parameters:

Name	Type	Description	Default
`chat_request`	`ChatCompletionRequest`	The chat completion request.	required
`graph_registry`	`GraphRegistry`	The GraphRegistry object containing registered graphs.	required

Yields:

Type	Description
`AsyncIterator[str]`	Chunks of the chat completion response.

Source code in src/langgraph_openai_serve/api/chat/service.py

async def stream_completion(
    self, chat_request: ChatCompletionRequest, graph_registry: GraphRegistry
) -> AsyncIterator[str]:
    """Stream a chat completion response.

    Args:
        chat_request: The chat completion request.
        graph_registry: The GraphRegistry object containing registered graphs.

    Yields:
        Chunks of the chat completion response.
    """
    start_time = time.time()
    response_id = f"chatcmpl-{uuid.uuid4()}"
    created = int(time.time())

    try:
        # Send the initial response with the role
        yield self._format_stream_chunk(
            ChatCompletionStreamResponse(
                id=response_id,
                created=created,
                model=chat_request.model,
                choices=[
                    ChatCompletionStreamResponseChoice(
                        index=0,
                        delta=ChatCompletionStreamResponseDelta(
                            role=Role.ASSISTANT,
                        ),
                        finish_reason=None,
                    )
                ],
            )
        )

        # Stream the completion from the LangGraph model
        async for chunk, _ in run_langgraph_stream(
            model=chat_request.model,
            messages=chat_request.messages,
            graph_registry=graph_registry,
        ):
            # Send the content chunk
            yield self._format_stream_chunk(
                ChatCompletionStreamResponse(
                    id=response_id,
                    created=created,
                    model=chat_request.model,
                    choices=[
                        ChatCompletionStreamResponseChoice(
                            index=0,
                            delta=ChatCompletionStreamResponseDelta(
                                content=chunk,
                            ),
                            finish_reason=None,
                        )
                    ],
                )
            )

        # Send the final response with finish_reason
        yield self._format_stream_chunk(
            ChatCompletionStreamResponse(
                id=response_id,
                created=created,
                model=chat_request.model,
                choices=[
                    ChatCompletionStreamResponseChoice(
                        index=0,
                        delta=ChatCompletionStreamResponseDelta(),
                        finish_reason="stop",
                    )
                ],
            )
        )

        # Send the [DONE] message
        yield "data: [DONE]\n\n"

        logger.info(
            f"Streamed chat completion finished in {time.time() - start_time:.2f}s"
        )

    except Exception as e:
        logger.exception("Error streaming chat completion")
        # In case of an error, send an error message
        error_response = {"error": {"message": str(e), "type": "server_error"}}
        yield f"data: {json.dumps(error_response)}\n\n"
        yield "data: [DONE]\n\n"

views ¶

Chat completion router.

This module provides the FastAPI router for the chat completion endpoint, implementing an OpenAI-compatible interface.

create_chat_completion `async` ¶

create_chat_completion(chat_request, service, graph_registry)

Create a chat completion.

This endpoint is compatible with OpenAI's chat completion API.

Parameters:

Name	Type	Description	Default
`chat_request`	`ChatCompletionRequest`	The parsed chat completion request.	required
`graph_registry`	`Annotated[GraphRegistry, Depends(get_graph_registry_dependency)]`	The graph registry dependency.	required
`service`	`Annotated[ChatCompletionService, Depends(ChatCompletionService)]`	The chat completion service dependency.	required

Returns:

Type	Description
`StreamingResponse \| ChatCompletionResponse`	A chat completion response, either as a complete response or as a stream.

Source code in src/langgraph_openai_serve/api/chat/views.py

@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(
    chat_request: ChatCompletionRequest,
    service: Annotated[ChatCompletionService, Depends(ChatCompletionService)],
    graph_registry: Annotated[GraphRegistry, Depends(get_graph_registry_dependency)],
) -> StreamingResponse | ChatCompletionResponse:
    """Create a chat completion.

    This endpoint is compatible with OpenAI's chat completion API.

    Args:
        chat_request: The parsed chat completion request.
        graph_registry: The graph registry dependency.
        service: The chat completion service dependency.

    Returns:
        A chat completion response, either as a complete response or as a stream.
    """

    logger.info(
        f"Received chat completion request for model: {chat_request.model}, "
        f"stream: {chat_request.stream}"
    )

    if chat_request.stream:
        logger.info("Streaming chat completion response")
        return StreamingResponse(
            service.stream_completion(chat_request, graph_registry),
            media_type="text/event-stream",
        )

    logger.info("Generating non-streaming chat completion response")
    response = await service.generate_completion(chat_request, graph_registry)
    logger.info("Returning non-streaming chat completion response")
    return response

health ¶

views ¶

health_check ¶

health_check()

Checks the health of a project.

It returns 200 if the project is healthy.

Source code in src/langgraph_openai_serve/api/health/views.py

@router.get("")
def health_check() -> None:
    """Checks the health of a project.

    It returns 200 if the project is healthy.
    """

models ¶

schemas ¶

Model ¶

Bases: BaseModel

Individual model information.

ModelList ¶

Bases: BaseModel

List of available models.

ModelPermission ¶

Bases: BaseModel

Model permission information.

service ¶

Model service.

This module provides a service for handling OpenAI model information.

ModelService ¶

Service for handling model operations.

get_models ¶

get_models(graph_registry)

Get a list of available models.

Parameters:

Name	Type	Description	Default
`graph_registry`	`GraphRegistry`	The GraphRegistry containing registered graphs.	required

Returns:

Type	Description
`ModelList`	A list of models in OpenAI compatible format.

Source code in src/langgraph_openai_serve/api/models/service.py

def get_models(self, graph_registry: GraphRegistry) -> ModelList:
    """Get a list of available models.

    Args:
        graph_registry: The GraphRegistry containing registered graphs.

    Returns:
        A list of models in OpenAI compatible format.
    """
    permission = ModelPermission(
        id="modelperm-04cadfeee8ad4eb8ad479a5af3bc261d",
        created=1743771509,
        allow_create_engine=False,
        allow_sampling=True,
        allow_logprobs=True,
        allow_search_indices=False,
        allow_view=True,
        allow_fine_tuning=False,
        organization="*",
        group=None,
        is_blocking=False,
    )

    models = [
        Model(
            id=name,
            created=1743771509,
            owned_by="langgraph-openai-serve",
            root=f"{name}-root",
            parent=None,
            max_model_len=16000,
            permission=[permission],
        )
        for name in graph_registry.registry
    ]

    logger.info(f"Retrieved {len(models)} available models")
    return ModelList(data=models)

views ¶

Models router.

This module provides the FastAPI router for the models endpoint, implementing an OpenAI-compatible interface for model listing.

get_graph_registry_dependency ¶

get_graph_registry_dependency(request)

Dependency to get the graph registry from the app state.

Source code in src/langgraph_openai_serve/api/models/views.py

def get_graph_registry_dependency(request: Request) -> GraphRegistry:
    """Dependency to get the graph registry from the app state."""
    return request.app.state.graph_registry

list_models ¶

list_models(service, graph_registry)

Get a list of available models.

Source code in src/langgraph_openai_serve/api/models/views.py

@router.get("", response_model=ModelList)
def list_models(
    service: Annotated[ModelService, Depends(ModelService)],
    graph_registry: Annotated[GraphRegistry, Depends(get_graph_registry_dependency)],
):
    """Get a list of available models."""
    logger.info("Received request to list models")
    models = service.get_models(graph_registry)
    logger.info(f"Returning {len(models.data)} models")
    return models

core ¶

settings ¶

Settings ¶

Bases: BaseSettings

This class is used to load environment variables either from environment or from a .env file and store them as class attributes. NOTE: - environment variables will always take priority over values loaded from a dotenv file - environment variable names are case-insensitive - environment variable type is inferred from the type hint of the class attribute - For environment variables that are not set, a default value should be provided

For more info, see the related pydantic docs: https://docs.pydantic.dev/latest/concepts/pydantic_settings

check_langfuse_settings ¶

check_langfuse_settings(v)

Validate Langfuse settings if enabled.

Source code in src/langgraph_openai_serve/core/settings.py

@field_validator("ENABLE_LANGFUSE")
def check_langfuse_settings(cls, v: bool) -> bool:
    """Validate Langfuse settings if enabled."""
    if v is False:
        return v

    # Check if langfuse package is installed
    if importlib.util.find_spec("langfuse") is None:
        raise RuntimeError(
            "Langfuse is enabled but the 'langfuse' package is not installed. "
            "Please install it, e.g., with `uv add langgraph-openai-serve[tracing]`."
        )

    # Check for required environment variables
    required_env_vars = [
        "LANGFUSE_HOST",
        "LANGFUSE_PUBLIC_KEY",
        "LANGFUSE_SECRET_KEY",
    ]
    missing_vars = [var for var in required_env_vars if os.getenv(var) is None]

    if missing_vars:
        raise RuntimeError(
            "Langfuse is enabled but the following environment variables are not set: "
            f"{', '.join(missing_vars)}. Please set these variables."
        )

    return v

graph ¶

Service package for the LangGraph OpenAI compatible API.

graph_registry ¶

GraphConfig ¶

Bases: BaseModel

resolve_graph `async` ¶

resolve_graph()

Get the graph instance, handling both direct instances and async callables.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

async def resolve_graph(self) -> CompiledStateGraph:
    """Get the graph instance, handling both direct instances and async callables."""
    if inspect.iscoroutinefunction(self.graph):
        return await self.graph()
    return self.graph

GraphRegistry ¶

Bases: BaseModel

get_graph ¶

get_graph(name)

Get a graph by its name.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the graph to retrieve.	required

Returns:

Type	Description
`GraphConfig`	The graph configuration associated with the given name.

Raises:

Type	Description
`ValueError`	If the graph name is not found in the registry.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

def get_graph(self, name: str) -> GraphConfig:
    """Get a graph by its name.

    Args:
        name: The name of the graph to retrieve.

    Returns:
        The graph configuration associated with the given name.

    Raises:
        ValueError: If the graph name is not found in the registry.
    """
    if name not in self.registry:
        raise ValueError(f"Graph '{name}' not found in registry.")
    return self.registry[name]

get_graph_names ¶

get_graph_names()

Get the names of all registered graphs.

Source code in src/langgraph_openai_serve/graph/graph_registry.py

def get_graph_names(self) -> list[str]:
    """Get the names of all registered graphs."""
    return list(self.registry.keys())

runner ¶

LangGraph runner service.

This module provides functionality to run LangGraph models with an OpenAI-compatible interface. It handles conversion between OpenAI's message format and LangChain's message format, and provides both streaming and non-streaming interfaces for running LangGraph workflows.

Examples:

>>> from langgraph_openai_serve.services.graph_runner import run_langgraph
>>> response, usage = await run_langgraph("my-model", messages, registry)
>>> from langgraph_openai_serve.services.graph_runner import run_langgraph_stream
>>> async for chunk, metrics in run_langgraph_stream("my-model", messages, registry):
...     print(chunk)

The module contains the following functions: - convert_to_lc_messages(messages) - Converts OpenAI messages to LangChain messages. - register_graphs(graphs) - Validates and returns the provided graph dictionary. - run_langgraph(model, messages, graph_registry) - Runs a LangGraph model with the given messages. - run_langgraph_stream(model, messages, graph_registry) - Runs a LangGraph model in streaming mode.

register_graphs ¶

register_graphs(graphs)

Validate and return the provided graph dictionary.

Parameters:

Name	Type	Description	Default
`graphs`	`Dict[str, Any]`	A dictionary mapping graph names to LangGraph instances.	required

Returns:

Type	Description
`Dict[str, Any]`	The validated graph dictionary.

Source code in src/langgraph_openai_serve/graph/runner.py

def register_graphs(graphs: Dict[str, Any]) -> Dict[str, Any]:
    """Validate and return the provided graph dictionary.

    Args:
        graphs: A dictionary mapping graph names to LangGraph instances.

    Returns:
        The validated graph dictionary.
    """
    # Potential future validation can go here
    logger.info(f"Registered {len(graphs)} graphs: {', '.join(graphs.keys())}")
    return graphs

run_langgraph `async` ¶

run_langgraph(model, messages, graph_registry)

Run a LangGraph model with the given messages using the compiled workflow.

This function processes input messages through a LangGraph workflow and returns the generated response along with token usage information.

Examples:

>>> response, usage = await run_langgraph("my-model", messages, registry)
>>> print(response)
>>> print(usage)

Parameters:

Name	Type	Description	Default
`model`	`str`	The name of the model to use, which also determines which graph to use.	required
`messages`	`list[ChatCompletionRequestMessage]`	A list of messages to process through the LangGraph.	required
`graph_registry`	`GraphRegistry`	The GraphRegistry instance containing registered graphs.	required

Returns:

Type	Description
`tuple[str, dict[str, int]]`	A tuple containing the generated response string and a dictionary of token usage information.

Source code in src/langgraph_openai_serve/graph/runner.py

async def run_langgraph(
    model: str,
    messages: list[ChatCompletionRequestMessage],
    graph_registry: GraphRegistry,
) -> tuple[str, dict[str, int]]:
    """Run a LangGraph model with the given messages using the compiled workflow.

    This function processes input messages through a LangGraph workflow and returns
    the generated response along with token usage information.

    Examples:
        >>> response, usage = await run_langgraph("my-model", messages, registry)
        >>> print(response)
        >>> print(usage)

    Args:
        model: The name of the model to use, which also determines which graph to use.
        messages: A list of messages to process through the LangGraph.
        graph_registry: The GraphRegistry instance containing registered graphs.

    Returns:
        A tuple containing the generated response string and a dictionary of token usage information.
    """
    logger.info(f"Running LangGraph model {model} with {len(messages)} messages")
    start_time = time.time()

    # Use graph_registry.get_graph to get the graph config and then the graph
    try:
        graph_config = graph_registry.get_graph(model)
        graph = await graph_config.resolve_graph()
    except ValueError as e:
        logger.error(f"Error getting graph for model '{model}': {e}")
        raise e

    # Convert OpenAI messages to LangChain messages
    lc_messages = convert_to_lc_messages(messages)

    # Run the graph with the messages
    result = await graph.ainvoke({"messages": lc_messages})
    response = result["messages"][-1].content if result["messages"] else ""

    # Calculate token usage (approximate)
    prompt_tokens = sum(len((m.content or "").split()) for m in messages)
    completion_tokens = len((response or "").split())
    token_usage = {
        "prompt_tokens": prompt_tokens,
        "completion_tokens": completion_tokens,
        "total_tokens": prompt_tokens + completion_tokens,
    }

    logger.info(f"LangGraph completion generated in {time.time() - start_time:.2f}s")
    return response, token_usage

run_langgraph_stream `async` ¶

run_langgraph_stream(model, messages, graph_registry)

Run a LangGraph model in streaming mode.

Parameters:

Name	Type	Description	Default
`model`	`str`	The name of the model (graph) to run.	required
`messages`	`list[ChatCompletionRequestMessage]`	A list of OpenAI-compatible messages.	required
`graph_registry`	`GraphRegistry`	The registry containing the graph configurations.	required

Yields:

Type	Description
`AsyncGenerator[tuple[str, dict[str, int]], None]`	A tuple containing the content chunk and token usage metrics.

Source code in src/langgraph_openai_serve/graph/runner.py

async def run_langgraph_stream(
    model: str,
    messages: list[ChatCompletionRequestMessage],
    graph_registry: GraphRegistry,
) -> AsyncGenerator[tuple[str, dict[str, int]], None]:
    """Run a LangGraph model in streaming mode.

    Args:
        model: The name of the model (graph) to run.
        messages: A list of OpenAI-compatible messages.
        graph_registry: The registry containing the graph configurations.

    Yields:
        A tuple containing the content chunk and token usage metrics.
    """
    logger.info(f"Starting streaming LangGraph completion for model '{model}'")

    try:
        graph_config = graph_registry.get_graph(model)
        graph = await graph_config.resolve_graph()
        streamable_node_names = graph_config.streamable_node_names
    except ValueError as e:
        logger.error(f"Error getting graph for model '{model}': {e}")
        raise e

    # Convert OpenAI messages to LangChain messages
    lc_messages = convert_to_lc_messages(messages)

    inputs = {"messages": lc_messages}
    runnable_config = None

    if settings.ENABLE_LANGFUSE is True:
        trace = langfuse.trace(user_id="lgos_user", session_id=str(uuid4()))
        handler = trace.get_langchain_handler(update_parent=True)

        runnable_config = RunnableConfig(callbacks=[handler])

    async for event in graph.astream_events(
        inputs, config=runnable_config, version="v2"
    ):
        event_kind = event["event"]
        langgraph_node = event["metadata"].get("langgraph_node", None)

        if event_kind == "on_chat_model_stream":
            if langgraph_node not in streamable_node_names:
                continue

            ai_message_chunk: AIMessageChunk = event["data"]["chunk"]
            ai_message_content = ai_message_chunk.content
            if ai_message_content:
                yield f"{ai_message_content}", {"tokens": 1}

simple_graph ¶

Simple LangGraph agent implementation.

This module defines a simple LangGraph agent that interfaces directly with an LLM model. It creates a straightforward workflow where a single node generates responses to user messages.

Examples:

>>> from langgraph_openai.utils.simple_graph import app
>>> result = await app.ainvoke({"messages": messages})
>>> print(result["messages"][-1].content)

The module contains the following components: - AgentState - Pydantic BaseModel defining the state schema for the graph. - generate(state) - Function that processes messages and generates responses. - workflow - The StateGraph instance defining the workflow. - app - The compiled workflow application ready for invocation.

AgentState ¶

Bases: BaseModel

Type definition for the agent state.

This BaseModel defines the structure of the state that flows through the graph. It uses the add_messages annotation to properly handle message accumulation.

Attributes:

Name	Type	Description
`messages`	`Annotated[Sequence[BaseMessage], add_messages]`	A sequence of BaseMessage objects annotated with add_messages.

SimpleConfigSchema ¶

Bases: BaseModel

Configurable fields that are taken from the user

generate `async` ¶

generate(state, config)

Generate a response to the latest message in the state.

This function extracts the latest message, creates a prompt with it, runs it through an LLM, and returns the response as an AIMessage.

Parameters:

Name	Type	Description	Default
`state`	`AgentState`	The current state containing the message history.	required

Returns:

Type	Description
`dict`	A dict with a messages key containing the AI's response.

Source code in src/langgraph_openai_serve/graph/simple_graph.py

async def generate(state: AgentState, config: SimpleConfigSchema) -> dict:
    """Generate a response to the latest message in the state.

    This function extracts the latest message, creates a prompt with it,
    runs it through an LLM, and returns the response as an AIMessage.

    Args:
        state: The current state containing the message history.

    Returns:
        A dict with a messages key containing the AI's response.
    """
    model = ChatOpenAI(model="gpt-4o-mini", temperature=0.7, streaming=True)

    system_message = (
        "system",
        "You are a helpful assistant called Langgraph Openai Serve. Chat with the user with friendly tone",
    )

    if config["configurable"]["use_history"] is False:
        question = state.messages[-1].content

        prompt = ChatPromptTemplate.from_messages(
            [system_message, ("human", "{question}")]
        )

        chain = prompt | model | StrOutputParser()
        response = await chain.ainvoke({"question": question})
    else:
        messages = state.messages
        prompt = ChatPromptTemplate.from_messages([system_message, *messages])
        chain = prompt | model | StrOutputParser()
        response = await chain.ainvoke({})

    return {
        "messages": [AIMessage(content=response)],
    }

openai_server ¶

LangGraph OpenAI API Serve.

This module provides a server class that connects LangGraph instances to an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Examples:

>>> from langgraph_openai_serve import LangchainOpenaiApiServe
>>> from fastapi import FastAPI
>>> from your_graphs import simple_graph_1, simple_graph_2
>>>
>>> app = FastAPI(title="LangGraph OpenAI API")
>>> graph_serve = LangchainOpenaiApiServe(
...     app=app,
...     graphs={
...         "simple_graph_1": simple_graph_1,
...         "simple_graph_2": simple_graph_2
...     }
... )
>>> graph_serve.bind_openai_chat_completion(prefix="/v1")

LangchainOpenaiApiServe ¶

LangchainOpenaiApiServe(app=None, graphs=None, configure_cors=False)

Server class to connect LangGraph instances with an OpenAI-compatible API.

This class serves as a bridge between LangGraph instances and an OpenAI-compatible API. It allows users to register their LangGraph instances and expose them through a FastAPI application.

Attributes:

Name	Type	Description
`app`		The FastAPI application to attach routers to.
`graphs`		A GraphRegistry instance containing the graphs to serve.

Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

Parameters:

Name	Type	Description	Default
`app`	`FastAPI \| None`	The FastAPI application to attach routers to. If None, a new FastAPI app will be created.	`None`
`graphs`	`GraphRegistry \| None`	A GraphRegistry instance containing the graphs to serve. If None, a default simple graph will be used.	`None`
`configure_cors`	`bool`	Optional; Whether to configure CORS for the FastAPI application.	`False`

Source code in src/langgraph_openai_serve/openai_server.py

def __init__(
    self,
    app: FastAPI | None = None,
    graphs: GraphRegistry | None = None,
    configure_cors: bool = False,
):
    """Initialize the server with a FastAPI app (optional) and a GraphRegistry instance (optional).

    Args:
        app: The FastAPI application to attach routers to. If None, a new FastAPI app will be created.
        graphs: A GraphRegistry instance containing the graphs to serve.
                If None, a default simple graph will be used.
        configure_cors: Optional; Whether to configure CORS for the FastAPI application.
    """
    self.app = app

    if app is None:
        app = FastAPI(
            title="LangGraph OpenAI Compatible API",
            description="An OpenAI-compatible API for LangGraph",
            version="0.0.1",
        )
    self.app = app

    if graphs is None:
        logger.info("Graphs not provided, using default simple graph")
        default_graph_config = GraphConfig(graph=simple_graph)
        self.graph_registry = GraphRegistry(
            registry={"simple-graph": default_graph_config}
        )
    elif isinstance(graphs, GraphRegistry):
        logger.info("Using provided GraphRegistry instance")
        self.graph_registry = graphs
    else:
        raise TypeError(
            "Invalid type for graphs parameter. Expected GraphRegistry or None."
        )

    # Attach the registry to the app's state for dependency injection
    self.app.state.graph_registry = self.graph_registry

    # Configure CORS if requested
    if configure_cors:
        self._configure_cors()

    logger.info(
        f"Initialized LangchainOpenaiApiServe with {len(self.graph_registry.registry)} graphs"
    )
    logger.info(
        f"Available graphs: {', '.join(self.graph_registry.get_graph_names())}"
    )

bind_openai_chat_completion ¶

bind_openai_chat_completion(prefix='/v1')

Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

Parameters:

Name	Type	Description	Default
`prefix`	`str`	Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".	`'/v1'`

Source code in src/langgraph_openai_serve/openai_server.py

def bind_openai_chat_completion(self, prefix: str = "/v1"):
    """Bind OpenAI-compatible chat completion endpoints to the FastAPI app.

    Args:
        prefix: Optional; The URL prefix for the OpenAI-compatible endpoints. Defaults to "/v1".
    """
    self.app.include_router(chat_views.router, prefix=prefix)
    self.app.include_router(health_views.router, prefix=prefix)
    self.app.include_router(models_views.router, prefix=prefix)

    logger.info(f"Bound OpenAI chat completion endpoints with prefix: {prefix}")

    return self

schemas ¶

Models package for the LangGraph OpenAI compatible API.

utils ¶

Utility functions.

message ¶

convert_to_lc_messages ¶

convert_to_lc_messages(messages)

Convert OpenAI messages to LangChain messages.

This function converts a list of OpenAI-compatible message objects to their LangChain equivalents for use with LangGraph.

Parameters:

Name	Type	Description	Default
`messages`	`list[ChatCompletionRequestMessage]`	A list of OpenAI chat completion request messages to convert.	required

Returns:

Type	Description
`list[BaseMessage]`	A list of LangChain message objects.

Source code in src/langgraph_openai_serve/utils/message.py

def convert_to_lc_messages(
    messages: list[ChatCompletionRequestMessage],
) -> list[BaseMessage]:
    """Convert OpenAI messages to LangChain messages.

    This function converts a list of OpenAI-compatible message objects to their
    LangChain equivalents for use with LangGraph.

    Args:
        messages: A list of OpenAI chat completion request messages to convert.

    Returns:
        A list of LangChain message objects.
    """

    lc_messages = []
    for m in messages:
        if m.role == "system":
            lc_messages.append(SystemMessage(content=m.content or ""))
        elif m.role == "user":
            lc_messages.append(HumanMessage(content=m.content or ""))
        elif m.role == "assistant":
            lc_messages.append(AIMessage(content=m.content or ""))
    return lc_messages

API Reference¶

GraphConfig ¶

resolve_graph async ¶

GraphRegistry ¶

get_graph ¶

get_graph_names ¶

LangchainOpenaiApiServe ¶

bind_openai_chat_completion ¶

api ¶

chat ¶

schemas ¶

ChatCompletionRequest ¶

ChatCompletionRequestMessage ¶

ChatCompletionResponse ¶

ChatCompletionResponseChoice ¶

ChatCompletionResponseMessage ¶

ChatCompletionStreamResponse ¶

ChatCompletionStreamResponseChoice ¶

ChatCompletionStreamResponseDelta ¶

ChatMessage ¶

FunctionCall ¶

FunctionDefinition ¶

Role ¶

Tool ¶

ToolCall ¶

ToolCallFunction ¶

ToolFunction ¶

UsageInfo ¶

service ¶

ChatCompletionService ¶

generate_completion async ¶

stream_completion async ¶

views ¶

create_chat_completion async ¶

health ¶

views ¶

health_check ¶

models ¶

schemas ¶

Model ¶

ModelList ¶

ModelPermission ¶

service ¶

ModelService ¶

get_models ¶

views ¶

get_graph_registry_dependency ¶

list_models ¶

core ¶

settings ¶

Settings ¶

check_langfuse_settings ¶

graph ¶

graph_registry ¶

GraphConfig ¶

resolve_graph async ¶

GraphRegistry ¶

get_graph ¶

get_graph_names ¶

runner ¶

register_graphs ¶

run_langgraph async ¶

run_langgraph_stream async ¶

simple_graph ¶

AgentState ¶

SimpleConfigSchema ¶

generate async ¶

openai_server ¶

LangchainOpenaiApiServe ¶

bind_openai_chat_completion ¶

schemas ¶

utils ¶

message ¶

convert_to_lc_messages ¶

resolve_graph `async` ¶

generate_completion `async` ¶

stream_completion `async` ¶

create_chat_completion `async` ¶

resolve_graph `async` ¶

run_langgraph `async` ¶

run_langgraph_stream `async` ¶

generate `async` ¶