How to parse ObjectId in a pydantic model?
Asked Answered
C

9

23

I am trying to parse MongoDB records to a pydantic model but failing to do so for ObjectId

From what I understood, I need to setup validator for ObjectId and did try to both extend ObjectId class and add the validator decorator to my class using ObjectId. which I did as follows.

from pydantic import BaseModel, validator
from bson.objectid import ObjectId


class ObjectId(ObjectId):
    pass
    @classmethod
    def __get_validators__(cls):
        yield cls.validate
    @classmethod
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)


class User(BaseModel):
    who: ObjectId


class User1(BaseModel):
    who: ObjectId
    @validator('who')
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)

data = {"who":ObjectId('123456781234567812345678')}

Unfortunately, both "solution" are failing as follows:

>>> test = User(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User
id
  field required (type=value_error.missing)
>>> test = User1(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User1
who
  ObjectId required (type=type_error)

There is definitely something that I am missing here.

Crystallite answered 27/12, 2019 at 16:28 Comment(0)
C
43

Pydantic 1

You first test case works fine. The problem is with how you overwrite ObjectId.

from pydantic import BaseModel
from bson.objectid import ObjectId as BsonObjectId


class PydanticObjectId(BsonObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        if not isinstance(v, BsonObjectId):
            raise TypeError('ObjectId required')
        return str(v)


class User(BaseModel):
    who: PydanticObjectId


print(User(who=BsonObjectId('123456781234567812345678')))

prints

who='123456781234567812345678'

Only pydantic should use pydantic type. Mongo will provide you with bsons ObjectId. So instantiate your data with real ObjectId. So data = {"who":ObjectId('123456781234567812345678')} is wrong, as it uses your child ObjectId class.

Pydantic 2

Use AfterValidator https://docs.pydantic.dev/latest/usage/validators/

from typing_extensions import Annotated
from pydantic import BaseModel
from pydantic.functional_validators import AfterValidator
from bson import ObjectId as _ObjectId


def check_object_id(value: str) -> str:
    if not _ObjectId.is_valid(value):
        raise ValueError('Invalid ObjectId')
    return value


ObjectId = Annotated[str, AfterValidator(check_object_id)]


class Example(BaseModel):
    id: ObjectId


print(Example(id='5f9b3b3b9d9f3d0001a3b3b3'))
print(Example(id='1'))
Capapie answered 27/12, 2019 at 17:5 Comment(3)
new solution for Pydantic2 ?Prewar
@AliRn docs.pydantic.dev/latest/usage/validators I did this ``` from typing_extensions import Annotated from pydantic import BaseModel from pydantic.functional_validators import AfterValidator from bson import ObjectId as _ObjectId def check_object_id(value: str) -> str: if not _ObjectId.is_valid(value): raise ValueError('Invalid ObjectId') return value ObjectId = Annotated[str, AfterValidator(check_object_id)] class Example(BaseModel): id: ObjectId print(Example(id='5f9b3b3b9d9f3d0001a3b3b3')) print(Example(id='1')) ```Lexington
Why are all of these validators seemingly returning str objects instead of ObjectId instances? Firstly return str(v), then def check_object_id(value: str) -> str:...Sibyls
H
10

Just another way to do this is with pydantic that i found useful from another source is:

Define a file called PyObjectId.py in a models folder.

from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId

class PyObjectId(ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate
    @classmethod
    def validate(cls, v):
        if not ObjectId.is_valid(v):
            raise ValueError("Invalid objectid")
        return ObjectId(v)
    @classmethod
    def __modify_schema__(cls, field_schema):
        field_schema.update(type="string")

Then you can use this in any of your object files like this users.py

from models.PyObjectId import PyObjectId
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class Users(BaseModel):
    id: PyObjectId = PydanticField(default_factory=PyObjectId, alias="_id")
    class Config:
        allow_population_by_field_name = True
        arbitrary_types_allowed = True #required for the _id 
        json_encoders = {ObjectId: str}
Hammered answered 29/7, 2021 at 17:43 Comment(0)
S
7

Getting Started with MongoDB and FastAPI

Mongo Developers

This code help you to use json encoder

from bson import ObjectId
from pydantic import BaseModel


class ObjId(ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v: str):
        try:
            return cls(v)
        except InvalidId:
            raise ValueError("Not a valid ObjectId")


class Foo(BaseModel):
    object_id_field: ObjId = None

    class Config:
        json_encoders = {
            ObjId: lambda v: str(v),
        }



obj = Foo(object_id_field="60cd778664dc9f75f4aadec8")
print(obj.dict())
# {'object_id_field': ObjectId('60cd778664dc9f75f4aadec8')}
print(obj.json())
# {'object_id_field': '60cd778664dc9f75f4aadec8'}

UPDATE:

You can use this Field type in your pydantic model:

from bson import ObjectId as BaseObjectId

class ObjectId(str):
"""Creating a ObjectId class for pydantic models."""

    @classmethod
    def validate(cls, value):
        """Validate given str value to check if good for being ObjectId."""
        try:
            return BaseObjectId(str(value))
        except InvalidId as e:
            raise ValueError("Not a valid ObjectId") from e

    @classmethod
    def __get_validators__(cls):
        yield cls.validate
Schlueter answered 4/10, 2021 at 6:19 Comment(0)
S
3

Looking the answers and other articles, I Use the following object and use ENCODERS_BY_TYPE from pydantic.json to make the encoding global from str to ObjectId and vice versa.

import bson
import bson.errors 
from pydantic.json import ENCODERS_BY_TYPE


class ObjectId(bson.ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        raise_error = False

        try:
            if isinstance(v, str):
                v = bson.ObjectId(v)

            if (
                not isinstance(v, (bson.ObjectId, cls))
                or not bson.ObjectId.is_valid(v)
            ):
                raise_error = True
        except bson.errors.InvalidId:
            raise_error = True

        if raise_error:
            raise ValueError("Invalid ObjectId")

        return v

    @classmethod
    def __modify_schema__(cls, field_schema):
        field_schema.update(type="string")


if ObjectId not in ENCODERS_BY_TYPE:
    ENCODERS_BY_TYPE[ObjectId] = str
    ENCODERS_BY_TYPE[bson.ObjectId] = str

Sulfonal answered 23/6, 2022 at 10:48 Comment(0)
F
1

After many experimentations I landed on this solution:

Tested with python 3.11

from bson.objectid import ObjectId
from pydantic import BaseModel, validator


@classmethod
def __get_validators__(cls):
    yield injected_validator


def injected_validator(v):
    if not isinstance(v, ObjectId):
        raise TypeError('ObjectId required')

    return v


# This does the trick. It forces ObjectId to have a validator 🎉
ObjectId.__get_validators__ = __get_validators__


def parse_object_id(v):
    if isinstance(v, str) and ObjectId.is_valid(v):
        return ObjectId(v)    

    if isinstance(v, ObjectId):
        return v

    raise TypeError(f"Invalid ObjectId: {v}")


class MyModel(BaseModel):
    id: ObjectId | None

    @validator("id", pre=True)
    def ensure_id_is_object_id(cls, v):
        return None if v is None else parse_object_id(v)


def ensure_oid(v):
    assert type(v.id) == ObjectId


assert MyModel().id is None

ensure_oid(MyModel(id=ObjectId()))
ensure_oid(MyModel(id=ObjectId("642796132887d08ca3a7a986")))

# Intellisense warn (but works): Expected type 'ObjectId | None', got 'str' instead
ensure_oid(MyModel(id="642796430b2fb0ed6292d1d2"))

ensure_oid(MyModel.parse_obj({"id": ObjectId()}))
ensure_oid(MyModel.parse_obj({"id": "642796893cd44d9ff690a455"}))
ensure_oid(MyModel.parse_obj({"id": ObjectId("642796abb14eb1e6a9183ae5")}))
ensure_oid(MyModel.parse_raw('{"id": "642796924f9a0adbea020d60"}'))

Unfortunately I couldn't get this working with _id field name. If you find the sollution, please share it with me!

A workaround to this would be to create a property _id like this:

@property
def _id(self) -> ObjectId | None:
    return self.id
Frontogenesis answered 1/4, 2023 at 2:43 Comment(0)
H
1

I will share my solution for pydantic 2.

For my use case, I needed that when the data enters the model as an ObjectId, I parse it to str. when it comes in as str I parse it to ObjectId

from typing_extensions import Annotated

from pydantic import BaseModel, ConfigDict
from pydantic.functional_validators import AfterValidator

from bson.objectid import ObjectId


def object_id_validate(v: ObjectId | str) -> ObjectId | str:
    assert ObjectId.is_valid(v), f'{v} is not a valid ObjectId'
    if isinstance(v, str):
        return ObjectId(v)
    return str(v)


PyObjectId = Annotated[ObjectId | str, AfterValidator(object_id_validate)]


class MyModel(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    user_id: PyObjectId


print(MyModel(user_id=str(ObjectId()))) # user_id=ObjectId('653087c8c8640ef5700a1bb5')
print(MyModel(user_id=ObjectId())) # user_id='653087c8c8640ef5700a1bb6'
Housekeeper answered 19/10, 2023 at 1:35 Comment(0)
S
1

Here is a custom type I use (FastAPI: 0.109.0, Pydantic: 2.5.3):

from bson.objectid import ObjectId
from pydantic import BaseModel, Field, ConfigDict

def validate_object_id(v: Any) -> ObjectId:
    if isinstance(v, ObjectId):
        return v
    if ObjectId.is_valid(v):
        return ObjectId(v)
    raise ValueError("Invalid ObjectId")

ObjectIdType = Annotated[
    str | ObjectId,
    PlainSerializer(lambda x: str(x), return_type=str, when_used="json"),
    AfterValidator(validate_object_id),
    WithJsonSchema({"type": "string"}, mode="serialization"),
]

Please pay attention to when_used="json" it allows to use default .model_dump method and have ObjectId there by default. In json mode ObjectId will be converted to string, so it will work fine for FastApi.

class BaseMongoModel(BaseModel):
    model_config = ConfigDict(populate_by_alias=True, populate_by_name=True, arbitrary_types_allowed=True,
                              validate_assignment=True)

    id: ObjectIdType | None = Field(None, alias="_id")
Survivor answered 31/1 at 16:42 Comment(0)
A
0

Tom Wojcik's solution slightly modified worked for me:

class PydanticObjectId(BsonObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        if not isinstance(v, BsonObjectId):
            raise TypeError('ObjectId required')
        return str(v)
    
class Bird(BaseModel):
    id: PydanticObjectId = Field(..., alias="_id")

Avie answered 1/5, 2023 at 2:50 Comment(0)
H
0

You can use this class for ObjectId types:

from bson import ObjectId

class PydanticObjectId(ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, value, validation_info=None):
        if not (
            value
            and (isinstance(value, ObjectId) or isinstance(value, str))
            and ObjectId().is_valid(value)
        ):
            raise ValueError("Not a valid ObjectId")

        return value

    @classmethod
    def __get_pydantic_json_schema__(cls, field_schema, handler):
        new_field_schema = {"type": "string", "examples": ["66488b368a6801e71d70dfe9"]}
        return new_field_schema

For example:

from pydantic import Field, BaseModel

class User(BaseModel):    
    id: PydanticObjectId = Field(alias="_id")
    group_id: PydanticObjectId | None = None
    name: str | None = None

Note: The implementation of get_pydantic_json_schema is for handling Open API config.

Homes answered 18/5 at 13:32 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.