Encoding#

Custom type encoding#

pydantic provides mechanisms to customize the default json encoding format. pydantic-xml uses custom encoders during the xml serialization too:

class File(BaseXmlModel):
    created: datetime = element()

    @field_serializer('created')
    def encode_created(self, value: datetime) -> float:
        return value.timestamp()

The following example illustrate how to encode bytes typed fields as Base64 string during the xml serialization:

model.py:

import base64
import pathlib
from typing import List, Optional, Union
from xml.etree.ElementTree import canonicalize

from pydantic import field_serializer, field_validator

from pydantic_xml import BaseXmlModel, RootXmlModel, attr, element


class File(BaseXmlModel):
    name: str = attr()
    content: bytes = element()

    @field_serializer('content')
    def encode_content(self, value: bytes) -> str:
        return base64.b64encode(value).decode()

    @field_validator('content', mode='before')
    def decode_content(cls, value: Optional[Union[str, bytes]]) -> Optional[bytes]:
        if isinstance(value, str):
            return base64.b64decode(value)

        return value


class Files(RootXmlModel, tag='files'):
    root: List[File] = element(tag='file', default=[])


files = Files()
for filename in ['./file1.txt', './file2.txt']:
    with open(filename, 'rb') as f:
        content = f.read()

    files.root.append(File(name=filename, content=content))

expected_xml_doc = pathlib.Path('./doc.xml').read_bytes()

assert canonicalize(files.to_xml(), strip_text=True) == canonicalize(expected_xml_doc, strip_text=True)

file1.txt:

hello world!!!

file2.txt:

¡Hola Mundo!

doc.xml:

<files>
  <file name="./file1.txt">
    <content>aGVsbG8gd29ybGQhISEK</content>
  </file>
  <file name="./file2.txt">
    <content>wqFIb2xhIE11bmRvIQo=</content>
  </file>
</files>

Custom xml serialization#

pydantic-xml provides functional serializers and validators to customise how a field is serialized to xml or validated from it. Use pydantic_xml.xml_field_serializer() decorator to mark a method as an xml serializer or pydantic_xml.xml_field_serializer() decorators to mark it as an xml validator.

The following example illustrate how to serialize xs:list element:

model.py:

import pathlib
from typing import List
from xml.etree.ElementTree import canonicalize

from pydantic_xml import BaseXmlModel, element, xml_field_serializer, xml_field_validator
from pydantic_xml.element import XmlElementReader, XmlElementWriter


class Plot(BaseXmlModel):
    x: List[float] = element()
    y: List[float] = element()

    @xml_field_validator('x', 'y')
    @classmethod
    def validate_space_separated_list(cls, element: XmlElementReader, field_name: str) -> List[float]:
        if (sub_element := element.pop_element(field_name, search_mode=cls.__xml_search_mode__)) and (
            text := sub_element.pop_text()
        ):
            return list(map(float, text.split()))

        return []

    @xml_field_serializer('x', 'y')
    def serialize_space_separated_list(self, element: XmlElementWriter, value: List[float], field_name: str) -> None:
        sub_element = element.make_element(tag=field_name, nsmap=None)
        sub_element.set_text(' '.join(map(str, value)))

        element.append_element(sub_element)


xml_doc = pathlib.Path('./doc.xml').read_text()
plot = Plot.from_xml(xml_doc)

assert canonicalize(plot.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

doc.xml:

<Plot>
    <x>0.0 1.0 2.0 3.0 4.0 5.0</x>
    <y>0.0 3.2 5.4 4.1 2.0 -1.2</y>
</Plot>

pydantic-xml also supports the Annotated typing form to attach metadata to an annotation:

model.py:

import pathlib
from typing import Annotated, List, Type
from xml.etree.ElementTree import canonicalize

import pydantic_xml as pxml
from pydantic_xml.element import XmlElementReader, XmlElementWriter


def validate_space_separated_list(
        cls: Type[pxml.BaseXmlModel],
        element: XmlElementReader,
        field_name: str,
) -> List[float]:
    if element := element.pop_element(field_name, search_mode=cls.__xml_search_mode__):
        return list(map(float, element.pop_text().split()))

    return []


def serialize_space_separated_list(
        model: pxml.BaseXmlModel,
        element: XmlElementWriter,
        value: List[float],
        field_name: str,
) -> None:
    sub_element = element.make_element(tag=field_name, nsmap=None)
    sub_element.set_text(' '.join(map(str, value)))

    element.append_element(sub_element)


SpaceSeparatedValueList = Annotated[
    List[float],
    pxml.XmlFieldValidator(validate_space_separated_list),
    pxml.XmlFieldSerializer(serialize_space_separated_list),
]


class Plot(pxml.BaseXmlModel):
    x: SpaceSeparatedValueList = pxml.element()
    y: SpaceSeparatedValueList = pxml.element()


xml_doc = pathlib.Path('./doc.xml').read_text()
plot = Plot.from_xml(xml_doc)

assert canonicalize(plot.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

JSON only field#

To prevent a field from serialization/deserialization only for xml format mark it with pydantic_xml.NoXml annotation:

from typing import Annotated, Optional
from xml.etree.ElementTree import canonicalize

from pydantic_xml import BaseXmlModel, NoXml, element


class Company(BaseXmlModel):
    title: str = element()
    website: Annotated[Optional[str], NoXml] = element(default=None)


xml_doc = '''
<Company>
    <title>SpaceX</title>
</Company>
'''

company = Company.from_xml(xml_doc)

assert canonicalize(company.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

json_doc = '''
{
    "title": "SpaceX",
    "website": "https://spacex.com/"
}
'''
company = Company.model_validate_json(json_doc)
assert company.model_dump_json(indent=4) == json_doc.strip()

Optional type encoding#

Since the xml format doesn’t support null type natively it is not obvious how to encode None fields (ignore it, encode it as an empty string or mark it as xsi:nil).

None values are encoded as empty strings by default, but the library provides some alternative ways:

Define your own encoding format for None values:

from typing import Annotated, Optional, TypeVar
from xml.etree.ElementTree import canonicalize

from pydantic import BeforeValidator, PlainSerializer

from pydantic_xml import BaseXmlModel, element

InnerType = TypeVar('InnerType')
XmlOptional = Annotated[
    Optional[InnerType],
    PlainSerializer(lambda val: val if val is not None else 'null'),
    BeforeValidator(lambda val: val if val != 'null' else None),
]


class Company(BaseXmlModel):
    title: XmlOptional[str] = element(default=None)


xml_doc = '''
<Company>
    <title>null</title>
</Company>
'''

company = Company.from_xml(xml_doc)

assert company.title is None
assert canonicalize(company.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

Mark an empty elements as nillable:

from typing import Optional
from xml.etree.ElementTree import canonicalize

from pydantic_xml import BaseXmlModel, element


class Company(BaseXmlModel):
    title: Optional[str] = element(default=None, nillable=True)


xml_doc = '''
<Company>
    <title xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true" />
</Company>
'''

company = Company.from_xml(xml_doc)

assert company.title is None
assert canonicalize(company.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

Drop empty elements at all:

from typing import Optional
from pydantic_xml import BaseXmlModel, element

class Company(BaseXmlModel, skip_empty=True):
    title: Optional[str] = element(default=None)


company = Company()
assert company.to_xml() == b'<Company/>'

Empty entities exclusion#

It is possible to exclude all empty entities from the resulting xml document at once. To do that just pass skip_empty=True parameter to pydantic_xml.BaseXmlModel.to_xml() during the serialization. That parameter is applied to the root model and all its sub-models by default. But it can be adjusted for a particular model during its declaration as illustrated in the following example:

class Product(BaseXmlModel, tag='Product', skip_empty=True):
    status: Optional[Literal['running', 'development']] = attr(default=None)
    launched: Optional[int] = attr(default=None)
    title: Optional[str] = element(tag='Title', default=None)


class Company(BaseXmlModel, tag='Company'):
    trade_name: str = attr(name='trade-name')
    website: str = element(tag='WebSite', default='')

    products: Tuple[Product, ...] = element()


company = Company(
    trade_name="SpaceX",
    products=[
        Product(status="running", launched=2013, title="Several launch vehicles"),
        Product(status="running", title="Starlink"),
        Product(status="development"),
        Product(),
    ],
)

<Company trade-name="SpaceX">
    <WebSite /><!--Company empty elements are not excluded-->

    <!--Product empty sub-elements and attributes are excluded-->
    <Product status="running" launched="2013">
        <Title>Several launch vehicles</Title>
    </Product>
    <Product status="running">
        <Title>Starlink</Title>
    </Product>
    <Product status="development"/>
    <Product />
</Company>

It is also possible to exclude None values:

class Product(BaseXmlModel, tag='Product'):
    title: Optional[str] = element(tag='Title', default=None)
    status: Optional[Literal['running', 'development']] = element(tag='Status', default=None)
    launched: Optional[int] = element(tag='Launched', default=None)


product = Product(title="Starlink", status=None)
xml = product.to_xml(exclude_none=True)

<Product>
    <Title>Starlink</Title>
</Product>

… or unset values:

class Product(BaseXmlModel, tag='Product'):
    title: Optional[str] = element(tag='Title', default=None)
    status: Optional[Literal['running', 'development']] = element(tag='Status', default=None)
    launched: Optional[int] = element(tag='Launched', default=None)


product = Product(title="Starlink", status=None)
xml = product.to_xml(exclude_unset=True)

<Product>
    <Title>Starlink</Title>
    <Status />
</Product>

Default namespace#

Xml default namespace is a namespace that is applied to the element and all its sub-elements without explicit definition.

In the following example the element company has no explicit namespace but the default namespace for that element and all its sub-elements is http://www.company.com/co. contacts element has no explicit namespace either but it doesn’t inherit it from company because it has its own default namespace. The same goes for socials element except that its sub-elements inherit a namespace from the parent:

<company xmlns="http://www.company.com/co">
    <contacts xmlns="http://www.company.com/cnt" >
        <socials xmlns="http://www.company.com/soc">
            <social>https://www.linkedin.com/company/spacex</social>
            <social>https://twitter.com/spacex</social>
            <social>https://www.youtube.com/spacex</social>
        </socials>
    </contacts>
</company>

A model for that document can be described like this:

class Socials(
    BaseXmlModel,
    tag='socials',
    nsmap={'': 'http://www.company.com/soc'},
):
    urls: List[str] = element(tag='social')


class Contacts(
    BaseXmlModel,
    tag='contacts',
    nsmap={'': 'http://www.company.com/cnt'},
):
    socials: Socials = element()


class Company(
    BaseXmlModel,
    tag='company',
    nsmap={'': 'http://www.company.com/co'},
):
    contacts: Contacts = element()

Look at the model’s parameter nsmap. To set a default namespace for a model and its sub-fields pass that namespace by an empty key.

Default namespace serialization

Standard libray xml serializer has a default namespace serialization problem: it doesn’t respect default namespaces definition moving namespaces definition to the root element substituting them with ns{0..} namespaces:

<ns0:company xmlns:ns0="http://www.company.com/co"
             xmlns:ns1="http://www.company.com/cnt"
             xmlns:ns2="http://www.company.com/soc">
    <ns1:contacts>
        <ns2:socials>
            <ns2:social>https://www.linkedin.com/company/spacex</ns2:social>
            <ns2:social>https://twitter.com/spacex</ns2:social>
            <ns2:social>https://www.youtube.com/spacex</ns2:social>
        </ns2:socials>
    </ns1:contacts>
</ns0:company>

That document is still correct but some parsers require namespace declaration kept untouched. To avoid that use lxml as a serialization backed since it doesn’t have that kind of problem. See lxml installation.

Computed entities#

pydantic supports computed fields. Computed fields allow property and cached_property to be included when serializing models or dataclasses. This is useful for fields that are computed from other fields, or for fields that are expensive to compute and should be cached.

pydantic-xml provides similar api for xml entities: text, attribute or element properties can be included into the xml document during serialization. To make a property computable decorate it with pydantic.computed_field to bind it to the current element, pydantic_xml.computed_attr() to bind it to an attribute or pydantic_xml.computed_element() to bind it to a sub-element.

The document:

doc.xml:

<Request Client="203.0.113.195">
  <Proxy>150.172.238.178</Proxy>
  <Proxy>150.172.230.21</Proxy>
  <Cookies PHPSESSID="298zf09hf012fh2" csrftoken="u32t4o3tb3gg43"/>
  <Authorization Type="Basic">**********</Authorization>
</Request>

produced by the following model:

model.py:

import pathlib
from ipaddress import IPv4Address
from typing import Dict, List
from xml.etree.ElementTree import canonicalize

from pydantic import Field, IPvAnyAddress, SecretStr, computed_field

from pydantic_xml import BaseXmlModel, attr, computed_attr, computed_element


class Auth(BaseXmlModel, tag='Authorization'):
    type: str = attr(name='Type')
    value: SecretStr


class Request(BaseXmlModel, tag='Request'):
    raw_forwarded_for: str = Field(exclude=True)
    raw_cookies: str = Field(exclude=True)
    raw_auth: str = Field(exclude=True)

    @computed_attr(name='Client')
    def client(self) -> IPv4Address:
        client, *proxies = [IPvAnyAddress(addr) for addr in self.raw_forwarded_for.split(',')]
        return client

    @computed_element(tag='Proxy')
    def proxy(self) -> List[IPv4Address]:
        client, *proxies = [IPvAnyAddress(addr) for addr in self.raw_forwarded_for.split(',')]
        return proxies

    @computed_element(tag='Cookies')
    def cookies(self) -> Dict[str, str]:
        return dict(
            tuple(pair.split('=', maxsplit=1))
            for cookie in self.raw_cookies.split(';')
            if (pair := cookie.strip())
        )

    @computed_field
    def auth(self) -> Auth:
        auth_type, auth_value = self.raw_auth.split(maxsplit=1)
        return Auth(type=auth_type, value=auth_value)


request = Request(
    raw_forwarded_for="203.0.113.195,150.172.238.178,150.172.230.21",
    raw_cookies="PHPSESSID=298zf09hf012fh2; csrftoken=u32t4o3tb3gg43;",
    raw_auth="Basic YWxhZGRpbjpvcGVuc2VzYW1l",
)

xml_doc = pathlib.Path('./doc.xml').read_text()
assert canonicalize(request.to_xml(), strip_text=True) == canonicalize(xml_doc, strip_text=True)

XML parser#

pydantic-xml tries to use the fastest xml parser in your system. It uses lxml if it is installed in your environment otherwise falls back to the standard library xml parser.

To force pydantic-xml to use standard xml.etree.ElementTree xml parser set FORCE_STD_XML environment variable.

XML serialization#

XML serialization process is customizable depending on which backend you use. For example lxml can pretty-print the output document or serialize it using a particular encoding (for more information see lxml.etree.tostring()). To set that parameters pass them to pydantic_xml.BaseXmlModel.to_xml() as extra arguments:

xml = obj.to_xml(
    pretty_print=True,
    encoding='UTF-8',
    standalone=True
)

print(xml)

Standard library serializer also supports customizations. For more information see xml.etree.ElementTree.tostring(),

Dynamic model creation#

There are some cases when it is necessary to create a model using runtime information to describe model fields. For this pydantic-xml provides the pydantic_xml.create_model() function to create a model on the fly:

Company = create_model(
    'Company',
    trade_name=(str, attr(name='trade-name')),
    type=(str, attr()),
)

Field specification syntax is similar to pydantic one. For more information see the documentation.

Document type declaration#

A document type declaration is an instruction that associates a particular XML document with a document type definition (DTD).

DTD is supported by lxml backend only so the library doesn’t provide an api for that natively, but it can be easily implemented by your hand:

from typing import Any, ClassVar, Union

import pydantic_xml as pxml
import lxml.etree


class DTDXmlModel(pxml.BaseXmlModel):
    DOC_PUBLIC_ID: ClassVar[str]
    DOC_SYSTEM_URL: ClassVar[str]

    def to_xml(
            self,
            *,
            skip_empty: bool = False,
            exclude_none: bool = False,
            exclude_unset: bool = False,
            **kwargs: Any,
    ) -> Union[str, bytes]:
        root = self.to_xml_tree(skip_empty=skip_empty, exclude_none=exclude_none, exclude_unset=exclude_unset)
        tree = lxml.etree.ElementTree(root)
        tree.docinfo.public_id = self.DOC_PUBLIC_ID
        tree.docinfo.system_url = self.DOC_SYSTEM_URL

        return lxml.etree.tostring(tree, **kwargs)


class Html(DTDXmlModel, tag='html'):
    DOC_PUBLIC_ID: ClassVar[str] = '-//W3C//DTD HTML 4.01//EN'
    DOC_SYSTEM_URL: ClassVar[str] = 'http://www.w3.org/TR/html4/strict.dtd'

    title: str = pxml.wrapped('head', pxml.element())
    body: str = pxml.element()


html_doc = Html(title="This is a title", body="Hello world!")
xml = html_doc.to_xml(pretty_print=True)

print(xml.decode())

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
  <head>
    <title>This is a title</title>
  </head>
  <body>Hello world!</body>
</html>

Mypy#

pydantic-xml provides a mypy plugin that adds some important pydantic-specific features to type-check your code.

To enable the plugin add the following to your mypy.ini config file:

[mypy]
plugins = pydantic_xml.mypy

or pyproject.toml:

[tool.mypy]
plugins = [
  "pydantic_xml.mypy"
]