Domains
Why Would You Use Domains?
Domains are curated, top-level folders or categories where related assets can be explicitly grouped. Management of Domains can be centralized, or distributed out to Domain owners Currently, an asset can belong to only one Domain at a time. For more information about domains, refer to About DataHub Domains.
Goal Of This Guide
This guide will show you how to
- Create a domain.
- Read domains attached to a dataset.
- Add a dataset to a domain
- Remove the domain from a dataset.
Prerequisites
For this tutorial, you need to deploy DataHub Quickstart and ingest sample data. For detailed steps, please refer to Datahub Quickstart Guide.
Create Domain
- GraphQL
- Curl
- Python
mutation createDomain {
createDomain(input: { name: "Marketing", description: "Entities related to the marketing department" })
}
If you see the following response, the operation was successful:
{
"data": {
"createDomain": "<domain_urn>"
},
"extensions": {}
}
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer <my-access-token>' \
--header 'Content-Type: application/json' \
--data-raw '{ "query": "mutation createDomain { createDomain(input: { name: \"Marketing\", description: \"Entities related to the marketing department.\" }) }", "variables":{}}'
Expected Response:
{ "data": { "createDomain": "<domain_urn>" }, "extensions": {} }
# Inlined from /metadata-ingestion/examples/library/create_domain.py
import logging
from datahub.emitter.mce_builder import make_domain_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.metadata.schema_classes import ChangeTypeClass, DomainPropertiesClass
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
domain_urn = make_domain_urn("marketing")
domain_properties_aspect = DomainPropertiesClass(
name="Marketing", description="Entities related to the marketing department"
)
event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
entityType="domain",
changeType=ChangeTypeClass.UPSERT,
entityUrn=domain_urn,
aspect=domain_properties_aspect,
)
rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
rest_emitter.emit(event)
log.info(f"Created domain {domain_urn}")
Expected Outcomes of Creating Domain
You can now see Marketing
domain has been created under Govern > Domains
.
Creating a Nested Domain
You can also create a nested domain, or a domain within another domain.
- GraphQL
- Curl
- Python
mutation createDomain {
createDomain(input: { name: "Verticals", description: "An optional description", parentDomain: "urn:li:domain:marketing" })
}
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer <my-access-token>' \
--header 'Content-Type: application/json' \
--data-raw '{ "query": "mutation createDomain { createDomain(input: { name: \"Verticals\", description: \"Entities related to the verticals sub-domain.\", parentDomain: \"urn:li:domain:marketing\" }) }", "variables":{}}'
# Inlined from /metadata-ingestion/examples/library/create_nested_domain.py
import logging
from datahub.emitter.mce_builder import make_domain_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.metadata.schema_classes import ChangeTypeClass, DomainPropertiesClass
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
domain_urn = make_domain_urn("marketing")
domain_properties_aspect = DomainPropertiesClass(
name="Verticals",
description="Entities related to the verticals sub-domain",
parentDomain="urn:li:domain:marketing",
)
event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
entityType="domain",
changeType=ChangeTypeClass.UPSERT,
entityUrn=domain_urn,
aspect=domain_properties_aspect,
)
rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
rest_emitter.emit(event)
log.info(f"Created domain {domain_urn}")
This query will create a new domain, "Verticals", under the "Marketing" domain.
Read Domains
- GraphQL
- Curl
- Python
query {
dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {
domain {
associatedUrn
domain {
urn
properties {
name
}
}
}
}
}
If you see the following response, the operation was successful:
{
"data": {
"dataset": {
"domain": {
"associatedUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"domain": {
"urn": "urn:li:domain:71b3bf7b-2e3f-4686-bfe1-93172c8c4e10",
"properties": {
"name": "Marketing"
}
}
}
}
},
"extensions": {}
}
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer <my-access-token>' \
--header 'Content-Type: application/json' \
--data-raw '{ "query": "{ dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") { domain { associatedUrn domain { urn properties { name } } } } }", "variables":{}}'
Expected Response:
{
"data": {
"dataset": {
"domain": {
"associatedUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
"domain": {
"urn": "urn:li:domain:71b3bf7b-2e3f-4686-bfe1-93172c8c4e10",
"properties": { "name": "Marketing" }
}
}
}
},
"extensions": {}
}
# Inlined from /metadata-ingestion/examples/library/dataset_query_domain.py
from datahub.emitter.mce_builder import make_dataset_urn
# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
# Imports for metadata model classes
from datahub.metadata.schema_classes import DomainsClass
dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
# Query multiple aspects from entity
result = graph.get_aspects_for_entity(
entity_urn=dataset_urn,
aspects=["domains"],
aspect_types=[DomainsClass],
)
print(result)
Add Domains
- GraphQL
- Curl
- Python
mutation setDomain {
setDomain(domainUrn: "urn:li:domain:marketing", entityUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)")
}
If you see the following response, the operation was successful:
{
"data": {
"setDomain": true
},
"extensions": {}
}
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer <my-access-token>' \
--header 'Content-Type: application/json' \
--data-raw '{ "query": "mutation setDomain { setDomain(entityUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", domainUrn: "urn:li:domain:marketing")) }", "variables":{}}'
Expected Response:
{ "data": { "setDomain": true }, "extensions": {} }
# Inlined from /metadata-ingestion/examples/library/dataset_add_domain_execute_graphql.py
# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
# Query multiple aspects from entity
query = """
mutation setDomain {
setDomain(domainUrn: "urn:li:domain:marketing", entityUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)")
}
"""
result = graph.execute_graphql(query=query)
print(result)
Expected Outcomes of Adding Domain
You can now see Marketing
domain has been added to the dataset.
Remove Domains
- GraphQL
- Curl
- Python
mutation unsetDomain {
unsetDomain(
entityUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
)
}
Expected Response:
{
"data": {
"removeDomain": true
},
"extensions": {}
}
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer <my-access-token>' \
--header 'Content-Type: application/json' \
--data-raw '{ "query": "mutation unsetDomain { unsetDomain(entityUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") }", "variables":{}}'
# Inlined from /metadata-ingestion/examples/library/dataset_remove_domain_execute_graphql.py
# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
gms_endpoint = "http://localhost:8080"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
# Query multiple aspects from entity
query = """
mutation unsetDomain {
unsetDomain(
entityUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
)
}
"""
result = graph.execute_graphql(query=query)
print(result)
Expected Outcomes of Removing Domain
You can now see a domain Marketing
has been removed from the fct_users_created
dataset.