Skip to content

TextFile

TextFile is inherited from File with additional methods for working with text files.

TextFile is generated when a DataChain is created from storage, using type="text" param:

import datachain as dc

chain = dc.read_storage("s3://bucket-name/", type="text")

TextFile

TextFile(**kwargs)

Bases: File

DataModel for reading text files.

Source code in datachain/lib/file.py
def __init__(self, **kwargs):
    super().__init__(**kwargs)
    self._catalog = None
    self._caching_enabled: bool = False
    self._download_cb: Callback = DEFAULT_CALLBACK
    self._fs_path_cache: tuple[str, str, str] | None = None

open

open(
    mode: str = "r",
    *,
    client_config: dict[str, Any] | None = None,
    **open_kwargs
) -> Iterator[Any]

Open the file and return a file-like object. Default to text mode

Source code in datachain/lib/file.py
@contextmanager
def open(
    self,
    mode: str = "r",
    *,
    client_config: dict[str, Any] | None = None,
    **open_kwargs,
) -> Iterator[Any]:
    """Open the file and return a file-like object.
    Default to text mode"""
    with super().open(
        mode=mode, client_config=client_config, **open_kwargs
    ) as stream:
        yield stream

read

read(**open_kwargs)

Return file contents as text (default mode for TextFile).

Source code in datachain/lib/file.py
def read(self, **open_kwargs):
    """Return file contents as text (default mode for TextFile)."""
    return self.read_text(**open_kwargs)

read_text

read_text(**open_kwargs)

Return file contents as text.

**open_kwargs : Any Extra keyword arguments forwarded to open() (e.g. encoding).

Source code in datachain/lib/file.py
def read_text(self, **open_kwargs):
    """Return file contents as text.

    **open_kwargs : Any
        Extra keyword arguments forwarded to ``open()`` (e.g. encoding).
    """
    with self.open(**open_kwargs) as stream:
        return stream.read()

save

save(
    destination: str, client_config: dict | None = None
) -> TextFile

Writes its content to destination

Source code in datachain/lib/file.py
def save(self, destination: str, client_config: dict | None = None) -> "TextFile":
    """Writes its content to destination"""
    result = super().save(destination, client_config=client_config)
    tf = TextFile(**result.model_dump())
    tf._set_stream(self._catalog)
    return tf