Data Manager module for the Label Studio SDK
Classes can be used to filter, order, and select items in Project.get_tasks()
and provides enumeration for all column names available in the Data Manager for tasks, and other helpers.
See the client, project or utils modules for other operations you might want to perform.
Example:
from label_studio_sdk.data_manager import Filters, Column, Operator, Type
filters = Filters.create(Filters.OR, [
Filters.item(
Column.id,
Operator.GREATER,
Type.Number,
Filters.value(42)
),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime.now()
)
)
])
tasks = project.get_tasks(filters=filters)
source code Browse git
""" # Data Manager module for the Label Studio SDK
Classes can be used to filter, order, and select items in `label_studio_sdk.project.Project.get_tasks`
and provides enumeration for all column names available in the Data Manager for tasks, and other helpers.
See the [client](client.html), [project](project.html) or [utils](utils.html) modules for other operations you
might want to perform.
Example:
```python
from label_studio_sdk.data_manager import Filters, Column, Operator, Type
filters = Filters.create(Filters.OR, [
Filters.item(
Column.id,
Operator.GREATER,
Type.Number,
Filters.value(42)
),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime.now()
)
)
])
tasks = project.get_tasks(filters=filters)
```
"""
from datetime import datetime
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
class Filters:
"""
Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
"""
OR = "or"
"""Combine filters with an OR"""
AND = "and"
"""Combine filters with an AND"""
@staticmethod
def create(conjunction, items):
"""Create a filter for `label_studio_sdk.project.Project.get_tasks()`
Parameters
----------
conjunction: str
The conjunction operator between filters ('or' or 'and')
items: list
What to filter, use `Filter.item()` method to build it
Returns
-------
dict
containing specified parameters
"""
return {"conjunction": conjunction, "items": items}
@staticmethod
def item(name, operator, column_type, value):
"""Use in combination with other classes to specify the contents of a filter.
Parameters
----------
name: `Column` or str
Column.id, Column.completed_at, Column.data('my_field'), etc
operator: `Operator`
Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type: `Type`
Type.Number, Type.Boolean, Type.String, etc
value: `Filters.value()`
Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())
Returns
-------
dict
"""
return {
"filter": "filter:" + name,
"operator": operator,
"type": column_type,
"value": value,
}
@staticmethod
def datetime(dt):
"""Date time string format for filtering the Data Manager.
Parameters
----------
dt
datetime instance
Returns
-------
str
datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format
"""
assert isinstance(dt, datetime), "dt must be datetime type"
return dt.strftime(DATETIME_FORMAT)
@classmethod
def value(cls, value, maximum=None):
"""Set a filter value in the Data Manager.
Parameters
----------
value: str | int | float | datetime | boolean
value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum: int | float | datetime
Specify a maximum for a filtering range with IN, NOT_IN operators.
Returns
-------
any
value for filtering
"""
if isinstance(value, datetime):
value = cls.datetime(value)
if maximum is not None:
if isinstance(maximum, datetime):
maximum = cls.datetime(maximum)
return {"min": value, "max": maximum}
return value
class Operator:
"""Specify the operator to use when creating a filter."""
EQUAL = "equal"
NOT_EQUAL = "not_equal"
LESS = "less"
GREATER = "greater"
LESS_OR_EQUAL = "less_or_equal"
GREATER_OR_EQUAL = "greater_or_equal"
IN = "in"
NOT_IN = "not_in"
IN_LIST = "in_list"
NOT_IN_LIST = "not_in_list"
EMPTY = "empty"
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
REGEX = "regex"
class Type:
"""Specify the type of data in a column."""
Number = "Number"
Datetime = "Datetime"
Boolean = "Boolean"
String = "String"
List = "List"
Unknown = "Unknown"
""" Unknown is explicitly converted to string format. """
class Column:
"""Specify the column on the Data Manager in Label Studio UI to use in the filter."""
id = "tasks:id"
"""Task ID"""
inner_id = "tasks:inner_id"
"""Task Inner ID, it starts from 1 for all projects"""
ground_truth = "tasks:ground_truth"
"""Ground truth status of the tasks"""
annotations_results = "tasks:annotations_results"
"""Annotation results for the tasks"""
reviewed = "tasks:reviewed"
"""Whether the tasks have been reviewed (Enterprise only)"""
predictions_score = "tasks:predictions_score"
"""Prediction score for the task"""
predictions_model_versions = "tasks:predictions_model_versions"
"""Model version used for the predictions"""
predictions_results = "tasks:predictions_results"
"""Prediction results for the tasks"""
file_upload = "tasks:file_upload"
"""Name of the file uploaded to create the tasks"""
created_at = "tasks:created_at"
"""Time the task was created at"""
updated_at = "tasks:updated_at"
"""Time the task was updated at (e.g. new annotation was created, review added, etc)"""
annotators = "tasks:annotators"
"""Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)"""
total_predictions = "tasks:total_predictions"
"""Total number of predictions for the task"""
cancelled_annotations = "tasks:cancelled_annotations"
"""Number of cancelled or skipped annotations for the task"""
total_annotations = "tasks:total_annotations"
"""Total number of annotations on a task"""
completed_at = "tasks:completed_at"
"""Time when a task was fully annotated"""
agreement = "tasks:agreement"
"""Agreement for annotation results for a specific task (Enterprise only)"""
reviewers = "tasks:reviewers"
"""Reviewers that reviewed the task, or assigned reviewers (Enterprise only)"""
reviews_rejected = "tasks:reviews_rejected"
"""Number of annotations rejected for a task in review (Enterprise only)"""
reviews_accepted = "tasks:reviews_accepted"
"""Number of annotations accepted for a task in review (Enterprise only)"""
comments = "tasks:comments"
"""Number of comments in a task"""
unresolved_comment_count = "tasks:unresolved_comment_count"
"""Number of unresolved comments in a task"""
@staticmethod
def data(task_field):
"""Create a filter name for the task data field
Parameters
----------
task_field
Returns
-------
str
Filter name for task data
"""
return "tasks:data." + task_field
def _test():
"""Test it"""
filters = Filters.create(
Filters.OR,
[
Filters.item(Column.id, Operator.GREATER, Type.Number, Filters.value(42)),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime(2021, 11, 5),
),
),
],
)
assert filters == {
"conjunction": "or",
"items": [
{
"filter": "filter:tasks:id",
"operator": "greater",
"type": "Number",
"value": 42,
},
{
"filter": "filter:tasks:completed_at",
"operator": "in",
"type": "Datetime",
"value": {
"min": "2021-11-01T00:00:00.000000Z",
"max": "2021-11-05T00:00:00.000000Z",
},
},
],
}
Classes
class Column
-
Specify the column on the Data Manager in Label Studio UI to use in the filter.
source code Browse git
class Column: """Specify the column on the Data Manager in Label Studio UI to use in the filter.""" id = "tasks:id" """Task ID""" inner_id = "tasks:inner_id" """Task Inner ID, it starts from 1 for all projects""" ground_truth = "tasks:ground_truth" """Ground truth status of the tasks""" annotations_results = "tasks:annotations_results" """Annotation results for the tasks""" reviewed = "tasks:reviewed" """Whether the tasks have been reviewed (Enterprise only)""" predictions_score = "tasks:predictions_score" """Prediction score for the task""" predictions_model_versions = "tasks:predictions_model_versions" """Model version used for the predictions""" predictions_results = "tasks:predictions_results" """Prediction results for the tasks""" file_upload = "tasks:file_upload" """Name of the file uploaded to create the tasks""" created_at = "tasks:created_at" """Time the task was created at""" updated_at = "tasks:updated_at" """Time the task was updated at (e.g. new annotation was created, review added, etc)""" annotators = "tasks:annotators" """Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)""" total_predictions = "tasks:total_predictions" """Total number of predictions for the task""" cancelled_annotations = "tasks:cancelled_annotations" """Number of cancelled or skipped annotations for the task""" total_annotations = "tasks:total_annotations" """Total number of annotations on a task""" completed_at = "tasks:completed_at" """Time when a task was fully annotated""" agreement = "tasks:agreement" """Agreement for annotation results for a specific task (Enterprise only)""" reviewers = "tasks:reviewers" """Reviewers that reviewed the task, or assigned reviewers (Enterprise only)""" reviews_rejected = "tasks:reviews_rejected" """Number of annotations rejected for a task in review (Enterprise only)""" reviews_accepted = "tasks:reviews_accepted" """Number of annotations accepted for a task in review (Enterprise only)""" comments = "tasks:comments" """Number of comments in a task""" unresolved_comment_count = "tasks:unresolved_comment_count" """Number of unresolved comments in a task""" @staticmethod def data(task_field): """Create a filter name for the task data field Parameters ---------- task_field Returns ------- str Filter name for task data """ return "tasks:data." + task_field
Constants
agreement
-
Agreement for annotation results for a specific task (Enterprise only)
annotations_results
-
Annotation results for the tasks
annotators
-
Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)
cancelled_annotations
-
Number of cancelled or skipped annotations for the task
comments
-
Number of comments in a task
completed_at
-
Time when a task was fully annotated
created_at
-
Time the task was created at
file_upload
-
Name of the file uploaded to create the tasks
ground_truth
-
Ground truth status of the tasks
id
-
Task ID
inner_id
-
Task Inner ID, it starts from 1 for all projects
predictions_model_versions
-
Model version used for the predictions
predictions_results
-
Prediction results for the tasks
predictions_score
-
Prediction score for the task
reviewed
-
Whether the tasks have been reviewed (Enterprise only)
reviewers
-
Reviewers that reviewed the task, or assigned reviewers (Enterprise only)
reviews_accepted
-
Number of annotations accepted for a task in review (Enterprise only)
reviews_rejected
-
Number of annotations rejected for a task in review (Enterprise only)
total_annotations
-
Total number of annotations on a task
total_predictions
-
Total number of predictions for the task
unresolved_comment_count
-
Number of unresolved comments in a task
updated_at
-
Time the task was updated at (e.g. new annotation was created, review added, etc)
Static methods
def data(task_field)
-
Create a filter name for the task data field
Parameters
task_field
Returns
str
- Filter name for task data
source code Browse git
@staticmethod def data(task_field): """Create a filter name for the task data field Parameters ---------- task_field Returns ------- str Filter name for task data """ return "tasks:data." + task_field
class Filters
-
Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
source code Browse git
class Filters: """ Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager. """ OR = "or" """Combine filters with an OR""" AND = "and" """Combine filters with an AND""" @staticmethod def create(conjunction, items): """Create a filter for `label_studio_sdk.project.Project.get_tasks()` Parameters ---------- conjunction: str The conjunction operator between filters ('or' or 'and') items: list What to filter, use `Filter.item()` method to build it Returns ------- dict containing specified parameters """ return {"conjunction": conjunction, "items": items} @staticmethod def item(name, operator, column_type, value): """Use in combination with other classes to specify the contents of a filter. Parameters ---------- name: `Column` or str Column.id, Column.completed_at, Column.data('my_field'), etc operator: `Operator` Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc column_type: `Type` Type.Number, Type.Boolean, Type.String, etc value: `Filters.value()` Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now()) Returns ------- dict """ return { "filter": "filter:" + name, "operator": operator, "type": column_type, "value": value, } @staticmethod def datetime(dt): """Date time string format for filtering the Data Manager. Parameters ---------- dt datetime instance Returns ------- str datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format """ assert isinstance(dt, datetime), "dt must be datetime type" return dt.strftime(DATETIME_FORMAT) @classmethod def value(cls, value, maximum=None): """Set a filter value in the Data Manager. Parameters ---------- value: str | int | float | datetime | boolean value to use for filtering. If the maximum parameter is passed, then this value field is the minimum. maximum: int | float | datetime Specify a maximum for a filtering range with IN, NOT_IN operators. Returns ------- any value for filtering """ if isinstance(value, datetime): value = cls.datetime(value) if maximum is not None: if isinstance(maximum, datetime): maximum = cls.datetime(maximum) return {"min": value, "max": maximum} return value
Constants
AND
-
Combine filters with an AND
OR
-
Combine filters with an OR
Static methods
def create(conjunction, items)
-
Create a filter for
Project.get_tasks()
Parameters
conjunction
:str
- The conjunction operator between filters ('or' or 'and')
items
:list
- What to filter, use
Filter.item()
method to build it
Returns
dict
- containing specified parameters
source code Browse git
@staticmethod def create(conjunction, items): """Create a filter for `label_studio_sdk.project.Project.get_tasks()` Parameters ---------- conjunction: str The conjunction operator between filters ('or' or 'and') items: list What to filter, use `Filter.item()` method to build it Returns ------- dict containing specified parameters """ return {"conjunction": conjunction, "items": items}
def datetime(dt)
-
Date time string format for filtering the Data Manager.
Parameters
dt
- datetime instance
Returns
str
- datetime in
'%Y-%m-%dT%H:%M:%S.%fZ'
format
source code Browse git
@staticmethod def datetime(dt): """Date time string format for filtering the Data Manager. Parameters ---------- dt datetime instance Returns ------- str datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format """ assert isinstance(dt, datetime), "dt must be datetime type" return dt.strftime(DATETIME_FORMAT)
def item(name, operator, column_type, value)
-
Use in combination with other classes to specify the contents of a filter.
Parameters
name
:Column
orstr
- Column.id, Column.completed_at, Column.data('my_field'), etc
operator
:Operator
- Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type
:Type
- Type.Number, Type.Boolean, Type.String, etc
value
:Filters.value()
- Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())
Returns
dict
source code Browse git
@staticmethod def item(name, operator, column_type, value): """Use in combination with other classes to specify the contents of a filter. Parameters ---------- name: `Column` or str Column.id, Column.completed_at, Column.data('my_field'), etc operator: `Operator` Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc column_type: `Type` Type.Number, Type.Boolean, Type.String, etc value: `Filters.value()` Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now()) Returns ------- dict """ return { "filter": "filter:" + name, "operator": operator, "type": column_type, "value": value, }
def value(value, maximum=None)
-
Set a filter value in the Data Manager.
Parameters
value
:str | int | float | datetime | boolean
- value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum
:int | float | datetime
- Specify a maximum for a filtering range with IN, NOT_IN operators.
Returns
any
- value for filtering
source code Browse git
@classmethod def value(cls, value, maximum=None): """Set a filter value in the Data Manager. Parameters ---------- value: str | int | float | datetime | boolean value to use for filtering. If the maximum parameter is passed, then this value field is the minimum. maximum: int | float | datetime Specify a maximum for a filtering range with IN, NOT_IN operators. Returns ------- any value for filtering """ if isinstance(value, datetime): value = cls.datetime(value) if maximum is not None: if isinstance(maximum, datetime): maximum = cls.datetime(maximum) return {"min": value, "max": maximum} return value
class Operator
-
Specify the operator to use when creating a filter.
source code Browse git
class Operator: """Specify the operator to use when creating a filter.""" EQUAL = "equal" NOT_EQUAL = "not_equal" LESS = "less" GREATER = "greater" LESS_OR_EQUAL = "less_or_equal" GREATER_OR_EQUAL = "greater_or_equal" IN = "in" NOT_IN = "not_in" IN_LIST = "in_list" NOT_IN_LIST = "not_in_list" EMPTY = "empty" CONTAINS = "contains" NOT_CONTAINS = "not_contains" REGEX = "regex"
Constants
CONTAINS
EMPTY
EQUAL
GREATER
GREATER_OR_EQUAL
IN
IN_LIST
LESS
LESS_OR_EQUAL
NOT_CONTAINS
NOT_EQUAL
NOT_IN
NOT_IN_LIST
REGEX
class Type
-
Specify the type of data in a column.
source code Browse git
class Type: """Specify the type of data in a column.""" Number = "Number" Datetime = "Datetime" Boolean = "Boolean" String = "String" List = "List" Unknown = "Unknown" """ Unknown is explicitly converted to string format. """
Constants
Boolean
Datetime
List
Number
String
Unknown
-
Unknown is explicitly converted to string format.