from typing import Iterator

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


class OpenCityDataLoader(BaseLoader):
    """Load from `Open City`."""

    def __init__(self, city_id: str, dataset_id: str, limit: int):
        """Initialize with dataset_id.
        Example: https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6
        e.g., city_id = data.sfgov.org
        e.g., dataset_id = vw6y-z8j6

        Args:
            city_id: The Open City city identifier.
            dataset_id: The Open City dataset identifier.
            limit: The maximum number of documents to load.
        """
        self.city_id = city_id
        self.dataset_id = dataset_id
        self.limit = limit

    def lazy_load(self) -> Iterator[Document]:
        """Lazy load records."""

        from sodapy import Socrata

        client = Socrata(self.city_id, None)
        results = client.get(self.dataset_id, limit=self.limit)
        for record in results:
            yield Document(
                page_content=str(record),
                metadata={
                    "source": self.city_id + "_" + self.dataset_id,
                },
            )
