Jupyter Notebook

Query & search registries

This guide walks through different ways of querying & searching LaminDB registries.

Let’s start by creating a few exemplary datasets and saving them into a LaminDB instance (hidden cell).

Hide code cell content
# pip install 'lamindb[bionty]'
!lamin init --storage ./test-registries --modules bionty

# python
import lamindb as ln
import bionty as bt
from lamindb.core import datasets

ln.track("pd7UR7Z8hoTq0000")

# Create non-curated datasets
ln.Artifact(datasets.file_jpg_paradisi05(), key="images/my_image.jpg").save()
ln.Artifact(datasets.file_fastq(), key="raw/my_fastq.fastq").save()
ln.Artifact.from_df(datasets.df_iris(), key="iris/iris_collection.parquet").save()

# Create a more complex case
# observation-level metadata
ln.Feature(name="cell_medium", dtype="cat[ULabel]").save()
ln.Feature(name="sample_note", dtype="str").save()
ln.Feature(name="cell_type_by_expert", dtype="cat[bionty.CellType]").save()
ln.Feature(name="cell_type_by_model", dtype="cat[bionty.CellType]").save()
# dataset-level metadata
ln.Feature(name="temperature", dtype="float").save()
ln.Feature(name="study", dtype="cat[ULabel]").save()
ln.Feature(name="date_of_study", dtype="date").save()
ln.Feature(name="study_note", dtype="str").save()

## Permissible values for categoricals
ln.ULabel.from_values(["DMSO", "IFNG"], create=True).save()
ln.ULabel.from_values(
    ["Candidate marker study 1", "Candidate marker study 2"], create=True
).save()
bt.CellType.from_values(["B cell", "T cell"], create=True).save()

# Ingest dataset1
adata = datasets.small_dataset1(otype="AnnData")
curator = ln.Curator.from_anndata(
    adata,
    var_index=bt.Gene.ensembl_gene_id,
    categoricals={
        "cell_medium": ln.ULabel.name,
        "cell_type_by_expert": bt.CellType.name,
        "cell_type_by_model": bt.CellType.name,
    },
    organism="human",
)
artifact = curator.save_artifact(key="example_datasets/dataset1.h5ad")
artifact.features.add_values(adata.uns)

# Ingest dataset2
adata2 = datasets.small_dataset2(otype="AnnData")
curator = ln.Curator.from_anndata(
    adata2,
    var_index=bt.Gene.ensembl_gene_id,
    categoricals={
        "cell_medium": ln.ULabel.name,
        "cell_type_by_model": bt.CellType.name,
    },
    organism="human",
)
artifact2 = curator.save_artifact(key="example_datasets/dataset2.h5ad")
artifact2.features.add_values(adata2.uns)
 initialized lamindb: testuser1/test-registries
 connected lamindb: testuser1/test-registries
 created Transform('pd7UR7Z8hoTq0000'), started new Run('Di9iMdMK...') at 2025-02-20 07:29:43 UTC
 saving validated records of 'var_index'
 added 3 records from public with Gene.ensembl_gene_id for "var_index": 'ENSG00000153563', 'ENSG00000010610', 'ENSG00000170458'
 "var_index" is validated against Gene.ensembl_gene_id
 "cell_medium" is validated against ULabel.name
 "cell_type_by_expert" is validated against CellType.name
 "cell_type_by_model" is validated against CellType.name
 saving validated records of 'var_index'
 added 1 record from public with Gene.ensembl_gene_id for "var_index": 'ENSG00000004468'
 "var_index" is validated against Gene.ensembl_gene_id
 "cell_medium" is validated against ULabel.name
 "cell_type_by_model" is validated against CellType.name

Get an overview

The easiest way to get an overview over all artifacts is by typing df(), which returns the 100 latest artifacts in the Artifact registry.

import lamindb as ln

ln.Artifact.df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1

You can include fields from other registries.

ln.Artifact.df(
    include=[
        "created_by__name",
        "ulabels__name",
        "cell_types__name",
        "feature_sets__itype",
        "suffix",
    ]
)
Hide code cell output
uid key description created_by__name ulabels__name cell_types__name feature_sets__itype suffix
id
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None Test User1 {IFNG, Candidate marker study 2, DMSO} {B cell, T cell} {bionty.Gene, Feature} .h5ad
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None Test User1 {Candidate marker study 1, IFNG, DMSO} {B cell, T cell} {bionty.Gene, Feature} .h5ad
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None Test User1 {None} {None} {None} .parquet
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None Test User1 {None} {None} {None} .fastq.gz
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None Test User1 {None} {None} {None} .jpg

You can include information about which artifact measures which feature.

df = ln.Artifact.df(features=True)
ln.view(df)  # for clarity, leverage ln.view() to display dtype annotations
Hide code cell output
uidkeydescriptioncell_type_by_expertcell_type_by_modelstudycell_mediumtemperaturestudy_notedate_of_study
idstrstrstrcat[bionty.CellType]cat[bionty.CellType]cat[ULabel]cat[ULabel]floatstrdate
5HrM6IJXJCIErBBRz0000example_datasets/dataset2.h5adNonenan{'B cell', 'T cell'}{'Candidate marker study 2'}{'IFNG', 'DMSO'}{21.6}{'We had a great time performing this study and the results look compelling.'}{'2024-12-01'}
4nSbWoD5SbqJaoxQl0000example_datasets/dataset1.h5adNone{'B cell', 'T cell'}{'B cell', 'T cell'}{'Candidate marker study 1'}{'IFNG', 'DMSO'}nannannan
3GEIt7qOPvgJcmCPv0000iris/iris_collection.parquetNonenannannannannannannan
2d62QCe9CEAHSf2vv0000raw/my_fastq.fastqNonenannannannannannannan
1hyMQOeRqi1TEVpmd0000images/my_image.jpgNonenannannannannannannan

The flattened table that includes information from all relevant registries is easier to understand than the normalized data. For comparison, here is how to see the later.

ln.view()
Hide code cell output
****************
* module: core *
****************
Artifact
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
Feature
uid name dtype is_type unit description array_rank array_size array_shape proxy_dtype synonyms _expect_many _curation space_id type_id run_id created_at created_by_id _aux _branch_code
id
8 PSdiUx3CQUks study_note str None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.048000+00:00 1 {'af': {'0': None, '1': True}} 1
7 IX68oetLCM4j date_of_study date None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.042000+00:00 1 {'af': {'0': None, '1': True}} 1
6 JcX1MR809LBR study cat[ULabel] None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.036000+00:00 1 {'af': {'0': None, '1': True}} 1
5 gPQNp1yV6STH temperature float None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.029000+00:00 1 {'af': {'0': None, '1': True}} 1
4 3wRVnEJmWwSh cell_type_by_model cat[bionty.CellType] None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.023000+00:00 1 {'af': {'0': None, '1': True}} 1
3 NJSiptWcGDcI cell_type_by_expert cat[bionty.CellType] None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.017000+00:00 1 {'af': {'0': None, '1': True}} 1
2 SZKclkh3TKHy sample_note str None None None 0 0 None None None True None 1 None 1 2025-02-20 07:29:45.011000+00:00 1 {'af': {'0': None, '1': True}} 1
FeatureValue
value hash space_id feature_id run_id created_at created_by_id _aux _branch_code
id
1 21.6 None 1 5 1 2025-02-20 07:29:48.646000+00:00 1 None 1
2 2024-12-01 None 1 7 1 2025-02-20 07:29:48.646000+00:00 1 None 1
3 We had a great time performing this study and ... None 1 8 1 2025-02-20 07:29:48.646000+00:00 1 None 1
4 22.6 None 1 5 1 2025-02-20 07:29:51.070000+00:00 1 None 1
5 2025-02-13 None 1 7 1 2025-02-20 07:29:51.070000+00:00 1 None 1
Run
uid name started_at finished_at reference reference_type _is_consecutive _status_code space_id transform_id report_id _logfile_id environment_id initiated_by_run_id created_at created_by_id _aux _branch_code
id
1 Di9iMdMK6QHEjYelxFuv None 2025-02-20 07:29:43.041203+00:00 None None None None 0 1 1 None None None None 2025-02-20 07:29:43.042000+00:00 1 None 1
Schema
uid name description n dtype itype is_type otype hash minimal_set ordered_set maximal_set _curation slot space_id type_id validated_by_id composite_id run_id created_at created_by_id _aux _branch_code
id
1 3xJYsW5Mz1QqGecHzxFJ None None 3 int bionty.Gene False None f2UVeHefaZxXFjmUwo9Ozw True False False None None 1 None None None 1 2025-02-20 07:29:48.557000+00:00 1 None 1
2 bXj2wqNtLy3dJqDprFHr None None 4 None Feature False DataFrame Khw4t_plgXsmXnRdII5Y6A True False False None None 1 None None None 1 2025-02-20 07:29:48.562000+00:00 1 None 1
3 U2KEF91vKqgb8xzrp5Vd None None 3 int bionty.Gene False None QW2rHuIo5-eGNZbRxHMDCw True False False None None 1 None None None 1 2025-02-20 07:29:51.004000+00:00 1 None 1
4 qlM0dFzz16FaLYM7K1fR None None 2 None Feature False DataFrame x_o-rsB6PwRjMm_Bv-g9qA True False False None None 1 None None None 1 2025-02-20 07:29:51.009000+00:00 1 None 1
Storage
uid root description type region instance_uid space_id run_id created_at created_by_id _aux _branch_code
id
1 lpO9tdNTaLiM /home/runner/work/lamindb/lamindb/docs/test-re... None local None hlGq1WkbeSSf 1 None 2025-02-20 07:29:35.487000+00:00 1 None 1
Transform
uid key description type source_code hash reference reference_type space_id _template_id version is_latest created_at created_by_id _aux _branch_code
id
1 pd7UR7Z8hoTq0000 registries.ipynb Query & search registries notebook None None None None 1 None None True 2025-02-20 07:29:43.035000+00:00 1 None 1
ULabel
uid name is_type description reference reference_type space_id type_id run_id created_at created_by_id _aux _branch_code
id
5 FxaM68Br cell_medium False None None None 1 None 1 2025-02-20 07:29:48.456000+00:00 1 None 1
3 MYMtOLsC Candidate marker study 1 False None None None 1 None 1 2025-02-20 07:29:45.071000+00:00 1 None 1
4 6kXrBb8D Candidate marker study 2 False None None None 1 None 1 2025-02-20 07:29:45.071000+00:00 1 None 1
1 Tbc8vBN6 DMSO False None None None 1 None 1 2025-02-20 07:29:45.059000+00:00 1 None 1
2 vMrzit4x IFNG False None None None 1 None 1 2025-02-20 07:29:45.059000+00:00 1 None 1
******************
* module: bionty *
******************
CellType
uid name ontology_id abbr synonyms description space_id source_id run_id created_at created_by_id _aux _branch_code
id
1 1m3SGd1l B cell None None None None 1 None 1 2025-02-20 07:29:45.533000+00:00 1 None 1
2 7gRvACvc T cell None None None None 1 None 1 2025-02-20 07:29:45.533000+00:00 1 None 1
Gene
uid symbol stable_id ensembl_gene_id ncbi_gene_ids biotype synonyms description space_id source_id organism_id run_id created_at created_by_id _aux _branch_code
id
4 iFxDa8hoEWuW CD38 None ENSG00000004468 952 protein_coding CADPR1 CD38 molecule 1 11 1 1 2025-02-20 07:29:50.911000+00:00 1 None 1
1 6Aqvc8ckDYeN CD8A None ENSG00000153563 925 protein_coding P32|CD8|CD8ALPHA CD8 subunit alpha 1 11 1 1 2025-02-20 07:29:48.430000+00:00 1 None 1
2 1j4At3x7akJU CD4 None ENSG00000010610 920 protein_coding T4|LEU-3 CD4 molecule 1 11 1 1 2025-02-20 07:29:48.430000+00:00 1 None 1
3 3bhNYquOnA4s CD14 None ENSG00000170458 929 protein_coding CD14 molecule 1 11 1 1 2025-02-20 07:29:48.430000+00:00 1 None 1
Organism
uid name ontology_id scientific_name synonyms description space_id source_id run_id created_at created_by_id _aux _branch_code
id
1 1dpCL6Td human NCBITaxon:9606 homo_sapiens None None 1 1 1 2025-02-20 07:29:46.013000+00:00 1 None 1
Source
uid entity organism name in_db currently_used description url md5 source_website space_id dataframe_artifact_id version run_id created_at created_by_id _aux _branch_code
id
53 5Xov8Lap bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 78914fa236773c5ea6605f7570df6245 https://mondo.monarchinitiative.org 1 None 2024-02-06 None 2025-02-20 07:29:35.653000+00:00 1 None 1
54 69lnSXfR bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 73787d81b885cfa1a255ee293e38303d https://mondo.monarchinitiative.org 1 None 2024-01-03 None 2025-02-20 07:29:35.653000+00:00 1 None 1
55 4ss2Hizg bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 7f33767422042eec29f08b501fc851db https://mondo.monarchinitiative.org 1 None 2023-08-02 None 2025-02-20 07:29:35.653000+00:00 1 None 1
56 Hgw08Vk3 bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 700c43dd9ba51aecc7a8edfc3bc2dab1 https://mondo.monarchinitiative.org 1 None 2023-04-04 None 2025-02-20 07:29:35.653000+00:00 1 None 1
57 UUZUtULu bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 2b7d479d4bd02a94eab47d1c9e64c5db https://mondo.monarchinitiative.org 1 None 2023-02-06 None 2025-02-20 07:29:35.653000+00:00 1 None 1
58 7DH1aJIr bionty.Disease all mondo False False Mondo Disease Ontology http://purl.obolibrary.org/obo/mondo/releases/... 04b808d05c2c2e81430b20a0e87552bb https://mondo.monarchinitiative.org 1 None 2022-10-11 None 2025-02-20 07:29:35.653000+00:00 1 None 1
59 4kswnHVF bionty.Disease human doid False True Human Disease Ontology http://purl.obolibrary.org/obo/doid/releases/2... bbefd72247d638edfcd31ec699947407 https://disease-ontology.org 1 None 2024-05-29 None 2025-02-20 07:29:35.653000+00:00 1 None 1

Auto-complete records

For registries with less than 100k records, auto-completing a Lookup object is the most convenient way of finding a record.

import bionty as bt

# query the database for all ulabels or all cell types
ulabels = ln.ULabel.lookup()
cell_types = bt.CellType.lookup()
Show me a screenshot

With auto-complete, we find a ulabel:

study1 = ulabels.candidate_marker_study_1
study1
Hide code cell output
ULabel(uid='MYMtOLsC', name='Candidate marker study 1', is_type=False, created_by_id=1, run_id=1, space_id=1, created_at=2025-02-20 07:29:45 UTC)

Get one record

get errors if more than one matching records are found.

print(study1.uid)

# by uid
ln.ULabel.get(study1.uid)

# by field
ln.ULabel.get(name="Candidate marker study 1")
Hide code cell output
MYMtOLsC
ULabel(uid='MYMtOLsC', name='Candidate marker study 1', is_type=False, created_by_id=1, run_id=1, space_id=1, created_at=2025-02-20 07:29:45 UTC)

Query multiple records

Filter for all artifacts annotated by a ulabel:

ln.Artifact.filter(ulabels=study1).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1

To access the results encoded in a filter statement, execute its return value with one of:

  • df(): A pandas DataFrame with each record in a row.

  • all(): A QuerySet.

  • one(): Exactly one record. Will raise an error if there is none. Is equivalent to the .get() method shown above.

  • one_or_none(): Either one record or None if there is no query result.

Note

filter() returns a QuerySet.

The registries in LaminDB are Django Models and any Django query works.

LaminDB re-interprets Django’s API for data scientists.

What does this have to do with SQL?

Under the hood, any .filter() call translates into a SQL select statement.

LaminDB’s registries are object relational mappers (ORMs) that rely on Django for all the heavy lifting.

Of note, .one() and .one_or_none() are the two parts of LaminDB’s API that are borrowed from SQLAlchemy. In its first year, LaminDB built on SQLAlchemy.

Search for records

You can search every registry via search(). For example, the Artifact registry.

ln.Artifact.search("iris").df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1

Here is more background on search and examples for searching the entire cell type ontology: How does search work?

Filter operators

You can qualify the type of comparison in a query by using a comparator.

Below follows a list of the most import, but Django supports about two dozen field comparators field__comparator=value.

and

ln.Artifact.filter(suffix=".h5ad", ulabels=study1).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1

less than/ greater than

Or subset to artifacts greater than 10kB. Here, we can’t use keyword arguments, but need an explicit where statement.

ln.Artifact.filter(ulabels=study1, size__gt=1e4).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1

in

ln.Artifact.filter(suffix__in=[".jpg", ".fastq.gz"]).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None None md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None None md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1

order by

ln.Artifact.filter().order_by("created_at").df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
# reverse ordering
ln.Artifact.filter().order_by("-created_at").df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
ln.Artifact.filter().order_by("key").df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
# reverse ordering
ln.Artifact.filter().order_by("-key").df()
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1

contains

ln.Transform.filter(name__contains="search").df().head(5)
Hide code cell output
uid id key description type source_code hash reference reference_type space_id _template_id version is_latest created_at created_by_id _aux _branch_code

And case-insensitive:

ln.Transform.filter(name__icontains="Search").df().head(5)
Hide code cell output
uid id key description type source_code hash reference reference_type space_id _template_id version is_latest created_at created_by_id _aux _branch_code

startswith

ln.Transform.filter(name__startswith="Research").df()
Hide code cell output
uid id key description type source_code hash reference reference_type space_id _template_id version is_latest created_at created_by_id _aux _branch_code

or

ln.Artifact.filter(ln.Q(suffix=".jpg") | ln.Q(suffix=".fastq.gz")).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
1 hyMQOeRqi1TEVpmd0000 images/my_image.jpg None .jpg None None 29358 r4tnqmKI_SjrkdLzpuWp4g None None md5 True False 1 1 None None True 1 2025-02-20 07:29:44.592000+00:00 1 None 1
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None None md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1

negate/ unequal

ln.Artifact.filter(~ln.Q(suffix=".jpg")).df()
Hide code cell output
uid key description suffix kind otype size hash n_files n_observations _hash_type _key_is_virtual _overwrite_versions space_id storage_id schema_id version is_latest run_id created_at created_by_id _aux _branch_code
id
2 d62QCe9CEAHSf2vv0000 raw/my_fastq.fastq None .fastq.gz None None 20 hi7ZmAzz8sfMd3vIQr-57Q None NaN md5 True False 1 1 None None True 1 2025-02-20 07:29:44.603000+00:00 1 None 1
3 GEIt7qOPvgJcmCPv0000 iris/iris_collection.parquet None .parquet dataset DataFrame 5088 8jtyI0r4o8fp7Gl7NayGjw None 150.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:44.734000+00:00 1 None 1
4 nSbWoD5SbqJaoxQl0000 example_datasets/dataset1.h5ad None .h5ad dataset AnnData 25088 MDScrpnbcTBtntAdtu5dAA None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:48.529000+00:00 1 None 1
5 HrM6IJXJCIErBBRz0000 example_datasets/dataset2.h5ad None .h5ad dataset AnnData 22384 IBoCVKeglWczsj9aosAMbg None 3.0 md5 True False 1 1 None None True 1 2025-02-20 07:29:50.978000+00:00 1 None 1