Source code for langchain.output_parsers.json

from __future__ import annotations

import json
import re
from json import JSONDecodeError
from typing import Any, List

from langchain.schema import BaseOutputParser, OutputParserException


[docs]def parse_json_markdown(json_string: str) -> dict: """ Parse a JSON string from a Markdown string. Args: json_string: The Markdown string. Returns: The parsed JSON object as a Python dictionary. """ # Try to find JSON string within triple backticks match = re.search(r"```(json)?(.*)```", json_string, re.DOTALL) # If no match found, assume the entire string is a JSON string if match is None: json_str = json_string else: # If match found, use the content within the backticks json_str = match.group(2) # Strip whitespace and newlines from the start and end json_str = json_str.strip() # Parse the JSON string into a Python dictionary parsed = json.loads(json_str) return parsed
[docs]def parse_and_check_json_markdown(text: str, expected_keys: List[str]) -> dict: """ Parse a JSON string from a Markdown string and check that it contains the expected keys. Args: text: The Markdown string. expected_keys: The expected keys in the JSON string. Returns: The parsed JSON object as a Python dictionary. """ try: json_obj = parse_json_markdown(text) except json.JSONDecodeError as e: raise OutputParserException(f"Got invalid JSON object. Error: {e}") for key in expected_keys: if key not in json_obj: raise OutputParserException( f"Got invalid return object. Expected key `{key}` " f"to be present, but got {json_obj}" ) return json_obj
[docs]class SimpleJsonOutputParser(BaseOutputParser[Any]): """Parse the output of an LLM call to a JSON object."""
[docs] def parse(self, text: str) -> Any: text = text.strip() try: return json.loads(text) except JSONDecodeError as e: raise OutputParserException(f"Invalid json output: {text}") from e
@property def _type(self) -> str: return "simple_json_output_parser"