Source code for supervisely.labeling_jobs.utils

# coding: utf-8
"""utilities used for labeling jobs"""

from __future__ import annotations
from typing import List, Tuple, NamedTuple
import urllib.parse

from supervisely.api.labeling_job_api import LabelingJobApi

Status = LabelingJobApi.Status
from supervisely.labeling_jobs import constants as constants
from supervisely.api.module_api import ApiField
from pandas import DataFrame  # for typehints


[docs]def total_desc() -> Tuple[str, str]: """ :return: Description about total number of Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "TOTAL", "the total number of jobs in current team"
[docs]def is_completed_desc() -> Tuple[str, str]: """ :return: Description about total number of completed Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "COMPLETED", "the number of completed jobs"
[docs]def is_completed(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is completed, otherwise None :rtype: :class:`bool` or :class:`None` """ if job_info.status == str(Status.COMPLETED): return True
[docs]def is_stopped_desc() -> Tuple[str, str]: """ :return: Description about number of stopped Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "STOPPED", "the number of stopped jobs"
[docs]def is_stopped(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is stopped, otherwise None :rtype: :class:`bool` or :class:`None` """ if job_info.status == str(Status.STOPPED): return True
[docs]def is_not_started_desc() -> Tuple[str, str]: """ :return: Description about total number of pending Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "PENDING", "the number of jobs labeler haven't even opened yet (created but not started)"
[docs]def is_not_started(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is not started, otherwise None :rtype: :class:`bool` or :class:`None` """ if job_info.status == str(Status.PENDING): return True
[docs]def total_items_count_desc() -> Tuple[str, str]: """ :return: Description about total number of items in all Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "TOTAL", "the total number of items in all labeling jobs"
[docs]def total_items_count(job_info: NamedTuple) -> int: """ :return: Number of total items count in Labeling Jobs :rtype: :class:`int` """ return job_info.images_count
[docs]def labeled_items_count_desc() -> Tuple[str, str]: """ :return: Description about total number of labeled items in all Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "LABELED", "the total number of labeled items (labelers marked as \"finished\") in all labeling jobs"
# cnt images, that labeler marked as done
[docs]def labeled_items_count(job_info: NamedTuple) -> int: """ :return: Number of Images, that labeler marked as done :rtype: :class:`int` """ if is_on_labeling(job_info): return job_info.finished_images_count else: return total_items_count(job_info)
[docs]def reviewed_items_count_desc() -> Tuple[str, str]: """ :return: Description about total number of reviewed items in all Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "REVIEWED", "the total number of reviewed items (reviewers marked as \"accepted\" or \"rejected\") in all labeling jobs"
# cnt images, that reviewer accepted or rejected
[docs]def reviewed_items_count(job_info: NamedTuple) -> int: """ :return: Number of reviewed Images(accepted and rejected) :rtype: :class:`int` """ return job_info.accepted_images_count + job_info.rejected_images_count
[docs]def accepted_items_count_desc() -> Tuple[str, str]: """ :return: Description about total number of accepted items in all Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "ACCEPTED", "the total number of accepted items (reviewers marked as \"accepted\") in all labeling jobs"
[docs]def accepted_items_count(job_info: NamedTuple) -> int: """ :return: Number of accepted images in all Labeling Jobs :rtype: :class:`int` """ return job_info.accepted_images_count
[docs]def rejected_items_count_desc() -> Tuple[str, str]: """ :return: Description about total number of rejected items in Labeling Jobs :rtype: :class:`Tuple[str, str]` """ return "REJECTED", "the total number of rejected items (reviewers marked as \"rejected\") in all labeling jobs"
[docs]def rejected_items_count(job_info: NamedTuple) -> int: """ :return: Number of rejected images in all Labeling Jobs :rtype: :class:`int` """ return job_info.rejected_images_count
def is_on_labeling_desc() -> str: return 'LABELING IN PROGRESS', "the number of jobs with status IN_PROGRESS" # labeling is in progress
[docs]def is_on_labeling(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is in progress, False otherwise :rtype: :class:`bool` """ if job_info.status == str(Status.IN_PROGRESS): return True return False
[docs]def is_labeling_started_desc() -> Tuple[str, str]: """ :return: Description about total number of Labeling Jobs that are started by labeler :rtype: :class:`Tuple[str, str]` """ return 'LABELING STARTED', "the number of jobs that are started by labeler and with at least one labeled item (marked \"done\" by labeler)"
# cnt jobs that are started by labeler and with at least one image that marked "done" by labeler
[docs]def is_labeling_started(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is started, False otherwise :rtype: :class:`bool` """ if is_on_labeling(job_info) and labeled_items_count(job_info) != 0: return True return False
[docs]def is_on_review_desc() -> Tuple[str, str]: """ :return: Description about number of Labeling Jobs with status 'ON REVIEW' :rtype: :class:`Tuple[str, str]` """ return 'ON REVIEW', "the number of jobs with status ON_REVIEW"
# cnt jobs with "on review" status
[docs]def is_on_review(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is in 'ON REVIEW' status , False otherwise :rtype: :class:`bool` """ if job_info.status == str(Status.ON_REVIEW): return True return False
[docs]def is_review_started_desc() -> Tuple[str, str]: """ :return: Description about number of Labeling Jobs that are started by reviewer :rtype: :class:`Tuple[str, str]` """ return 'REVIEW STARTED', "the number of jobs that are started by reviewer - with at least one reviewed item (marked \"accepted\" or \"rejected\")"
# cnt jobs with at least one reviewed (accepted or rejected) item
[docs]def is_review_started(job_info: NamedTuple) -> bool: """ :return: True if Labeling Job is in 'review' status and there are Images that reviewer accepted or rejected, False otherwise :rtype: :class:`bool` """ if is_on_review(job_info) and reviewed_items_count(job_info) != 0: return True return False
[docs]def is_zero_labeling_desc() -> Tuple[str, str]: """ :return: Description about number of Labeling Jobs with status "IN PROGRESS" with zero labeled items :rtype: :class:`Tuple[str, str]` """ return 'ZERO LABELED', "the number of jobs with status \"IN PROGRESS\" with zero labeled items"
[docs]def is_zero_reviewed_desc() -> Tuple[str, str]: """ :return: Description about number of Labeling Jobs with status "ON REVIEW" with zero reviewed items :rtype: :class:`Tuple[str, str]` """ return 'ZERO REVIEWED', "the number of jobs with status \"ON REVIEW\" with zero reviewed items"
[docs]def get_job_url(server_address: str, job: NamedTuple) -> str: """ Get labeling job url. :param server_address: Server address there labeling job working. :type server_address: str :param job: Information about labeling job. :type job: NamedTuple :return: Labeling job url :rtype: :class:`str` :Usage example: .. code-block:: python address = 'https://app.supervise.ly' os.environ['SERVER_ADDRESS'] = address os.environ['API_TOKEN'] = 'Your Supervisely API Token' api = sly.Api.from_env() job_info = api.labeling_job.get_info_by_id(2) from supervisely.labeling_jobs.utils import get_job_url job_url = get_job_url(address, job_info) print(job_url) # Output: https://app.supervise.ly/app/images/4/8/58/54?jobId=2 """ result = urllib.parse.urljoin(server_address, 'app/images/{}/{}/{}/{}?jobId={}'.format(job.team_id, job.workspace_id, job.project_id, job.dataset_id, job.id)) return result
[docs]def jobs_stats(server_address: str, jobs: List[NamedTuple], stats: List) -> DataFrame: """ Get statistics about Labeling Jobs items. :param server_address: Supervisely Server Address. :type server_address: str :param jobs: List of information about Labeling Jobs. :type jobs: List[NamedTuple] :param stats: :type stats: list :return: Statistics about Labeling Jobs items as `pd.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ :rtype: :class:`pd.DataFrame` :Usage example: .. code-block:: python os.environ['SERVER_ADDRESS'] = 'https://app.supervise.ly' os.environ['API_TOKEN'] = 'Your Supervisely API Token' api = sly.Api.from_env() job_info1 = api.labeling_job.get_info_by_id(2) job_info2 = api.labeling_job.get_info_by_id(3) address = https://app.supervise.ly from supervisely.labeling_jobs.utils import jobs_stats result = jobs_stats(address, [job_info1, job_info2], [1, 2]) print(result) # Output: # ID NAME STATUS TOTAL LABELED REVIEWED ACCEPTED REJECTED CREATED_AT # 0 2 <a href="https://app.supervise.ly/app/images/... completed 3 3 3 2 1 08/04/2020 15:10 # 1 3 <a href="https://app.supervise.ly/app/images/... completed 2 2 2 2 0 08/04/2020 15:10 """ import pandas as pd col_job_id = [] col_job_name = [] # link here col_job_status = [] col_items_total = [] col_items_labeled = [] col_items_reviewed = [] col_items_accepted = [] col_items_rejected = [] col_created_at = [] for job, stat in zip(jobs, stats): col_job_id.append(job.id) col_job_name.append('<a href="{0}" rel="noopener noreferrer" target="_blank">{1}</a>' .format(get_job_url(server_address, job), job.name)) col_job_status.append(job.status) col_created_at.append(job.created_at) col_items_total.append(total_items_count(job)) col_items_labeled.append(labeled_items_count(job)) col_items_reviewed.append(reviewed_items_count(job)) col_items_accepted.append(accepted_items_count(job)) col_items_rejected.append(rejected_items_count(job)) df = pd.DataFrame(list(zip(col_job_id, col_job_name, col_job_status, col_items_total, col_items_labeled, col_items_reviewed, col_items_accepted, col_items_rejected, col_created_at)), columns=['ID', 'NAME', 'STATUS', 'TOTAL', 'LABELED', 'REVIEWED', 'ACCEPTED', 'REJECTED', 'CREATED_AT']) df['CREATED_AT'] = pd.to_datetime(df['CREATED_AT']).dt.strftime('%d/%m/%Y %H:%M') return df
[docs]def jobs_summary(jobs: List[NamedTuple]) -> DataFrame: """ Get summary statistics about given Labeling Jobs. :param jobs: List of information about Labeling Jobs. :type jobs: List[NamedTuple] :return: Statistics about Labeling Jobs as `pd.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ :rtype: :class:`pd.DataFrame` :Usage example: .. code-block:: python os.environ['SERVER_ADDRESS'] = 'https://app.supervise.ly' os.environ['API_TOKEN'] = 'Your Supervisely API Token' api = sly.Api.from_env() job_info1 = api.labeling_job.get_info_by_id(2) job_info2 = api.labeling_job.get_info_by_id(3) from supervisely.labeling_jobs.utils import jobs_summary result = jobs_summary([job_info1, job_info2]) print(result) # Output: # JOB STATUS QUANTITY PERCENTAGE DESCRIPTION # 0 0 TOTAL 2 100.00 % the total number of jobs in current team # 1 1 COMPLETED 2 100.00 % the number of completed jobs # 2 2 STOPPED 0 0.00 % the number of stopped jobs # 3 3 PENDING 0 0.00 % the number of jobs labeler haven't even opened... # 4 4 LABELING IN PROGRESS 0 0.00 % the number of jobs with status IN_PROGRESS # 5 5 LABELING STARTED 0 0.00 % the number of jobs that are started by labeler... # 6 6 ZERO LABELED 0 0.00 % the number of jobs with status "IN PROGRESS" w... # 7 7 ON REVIEW 0 0.00 % the number of jobs with status ON_REVIEW # 8 8 REVIEW STARTED 0 0.00 % the number of jobs that are started by reviewe... # 9 9 ZERO REVIEWED 0 0.00 % the number of jobs with status "ON REVIEW" wit... """ import pandas as pd count_total = len(jobs) count_completed = 0 count_stopped = 0 count_labeling_not_started = 0 # pending count_on_labeling = 0 count_labeling_started = 0 count_labeling_zero_done = 0 count_on_review = 0 count_review_started = 0 count_review_zero_done = 0 for job in jobs: if is_completed(job): count_completed += 1 elif is_stopped(job): count_stopped += 1 elif is_not_started(job): count_labeling_not_started += 1 elif is_on_labeling(job): count_on_labeling += 1 if is_labeling_started(job): count_labeling_started += 1 elif is_on_review(job): count_on_review += 1 if is_review_started(job): count_review_started += 1 else: raise RuntimeError("Unhandled job status: {}".format(str(job))) count_labeling_zero_done = count_on_labeling - count_labeling_started count_review_zero_done = count_on_review - count_review_started names = [] percentages = [] descriptions = [] table_rows_f = [total_desc, is_completed_desc, is_stopped_desc, is_not_started_desc, is_on_labeling_desc, is_labeling_started_desc, is_zero_labeling_desc, is_on_review_desc, is_review_started_desc, is_zero_reviewed_desc] values = [count_total, count_completed, count_stopped, count_labeling_not_started, count_on_labeling, count_labeling_started, count_labeling_zero_done, count_on_review, count_review_started, count_review_zero_done, ] for v, func in zip(values, table_rows_f): percentages.append("{:.2f} %".format(v * 100 / count_total)) name, desc = func() names.append(name) descriptions.append(desc) df = pd.DataFrame(list(zip(list(range(len(names))), names, values, percentages, descriptions)), columns=['#', 'JOB STATUS', 'QUANTITY', 'PERCENTAGE', 'DESCRIPTION']) return df
[docs]def images_summary(jobs: List[NamedTuple]) -> DataFrame: """ Get summary statistics about given Labeling Jobs images. :param jobs: List of information about Labeling Jobs. :type jobs: List[NamedTuple] :return: Statistics about Labeling Jobs images :rtype: :class:`pd.DataFrame` :Usage example: .. code-block:: python os.environ['SERVER_ADDRESS'] = 'https://app.supervise.ly' os.environ['API_TOKEN'] = 'Your Supervisely API Token' api = sly.Api.from_env() job_info1 = api.labeling_job.get_info_by_id(2) job_info2 = api.labeling_job.get_info_by_id(3) from supervisely.labeling_jobs.utils import images_summary result = images_summary([job_info1, job_info2]) print(result) # Output: # # ITEM STATUS QUANTITY PERCENTAGE DESCRIPTION # 0 0 TOTAL 5 100.00 % the total number of items in all labeling jobs # 1 1 LABELED 5 100.00 % the total number of labeled items (labelers ma... # 2 2 REVIEWED 5 100.00 % the total number of reviewed items (reviewers ... # 3 3 ACCEPTED 4 80.00 % the total number of accepted items (reviewers ... # 4 4 REJECTED 1 20.00 % the total number of rejected items (reviewers ... """ import pandas as pd count_total_items = 0 count_labeled_items = 0 count_reviewed_items = 0 count_accepted_items = 0 count_rejected_items = 0 for job in jobs: count_total_items += total_items_count(job) count_labeled_items += labeled_items_count(job) count_reviewed_items += reviewed_items_count(job) count_accepted_items += accepted_items_count(job) count_rejected_items += rejected_items_count(job) values_items = [count_total_items, count_labeled_items, count_reviewed_items, count_accepted_items, count_rejected_items] items_f = [total_items_count_desc, labeled_items_count_desc, reviewed_items_count_desc, accepted_items_count_desc, rejected_items_count_desc] names_items = [] percentages_items = [] descriptions_items = [] for v, func in zip(values_items, items_f): percentages_items.append("{:.2f} %".format(v * 100 / count_total_items)) name, desc = func() names_items.append(name) descriptions_items.append(desc) df = pd.DataFrame( list(zip(list(range(len(names_items))), names_items, values_items, percentages_items, descriptions_items)), columns=['#', 'ITEM STATUS', 'QUANTITY', 'PERCENTAGE', 'DESCRIPTION']) return df
def classes_summary(stats: List) -> DataFrame: import pandas as pd class_id_stats = {} for stat in stats: for class_stat in stat[constants.CLASSES_STATS]: class_id = class_stat[ApiField.ID] class_name = class_stat[ApiField.NAME] class_shape = class_stat[ApiField.SHAPE] image_count = class_stat[ApiField.IMAGES_COUNT] object_count = class_stat[ApiField.LABELS_COUNT] class_color = class_stat[ApiField.COLOR] if class_id not in class_id_stats: class_id_stats[class_id] = {ApiField.NAME: class_name, ApiField.SHAPE: class_shape, ApiField.COLOR: class_color, ApiField.IMAGES_COUNT: 0, ApiField.LABELS_COUNT: 0} class_id_stats[class_id][ApiField.IMAGES_COUNT] += image_count class_id_stats[class_id][ApiField.LABELS_COUNT] += object_count col_name = [] col_shape = [] col_image_count = [] col_object_count = [] for class_id, value in class_id_stats.items(): col_name.append( '<b style="display: inline-block; border-radius: 50%; background: {}; width: 8px; height: 8px"></b> {}' .format(value[ApiField.COLOR], value[ApiField.NAME])) col_shape.append(value[ApiField.SHAPE]) col_image_count.append(value[ApiField.IMAGES_COUNT]) col_object_count.append(value[ApiField.LABELS_COUNT]) df = pd.DataFrame(list(zip(list(range(len(col_name))), col_name, col_shape, col_image_count, col_object_count)), columns=['#', 'CLASS', 'SHAPE', 'IMAGES COUNT', 'OBJECTS COUNT']) return df def tags_summary(stats: List) -> DataFrame: import pandas as pd tag_id_stats = {} for stat in stats: for tag_stat in stat["job"][constants.TAGS_STATS]: if "parentId" in tag_stat: # @TODO: consider parentId (for oneOf cases) continue tag_id = tag_stat[ApiField.ID] tag_name = tag_stat[ApiField.NAME] image_count = tag_stat[ApiField.IMAGES] object_count = tag_stat[ApiField.FIGURES] class_color = tag_stat[ApiField.COLOR] if tag_id not in tag_id_stats: tag_id_stats[tag_id] = {ApiField.NAME: tag_name, ApiField.COLOR: class_color, ApiField.IMAGES_COUNT: 0, ApiField.LABELS_COUNT: 0} tag_id_stats[tag_id][ApiField.IMAGES_COUNT] += image_count tag_id_stats[tag_id][ApiField.LABELS_COUNT] += object_count col_name = [] col_image_count = [] col_object_count = [] for tag_id, value in tag_id_stats.items(): col_name.append( '<b style="display: inline-block; border-radius: 50%; background: {}; width: 8px; height: 8px"></b> {}' .format(value[ApiField.COLOR], value[ApiField.NAME])) col_image_count.append(value[ApiField.IMAGES_COUNT]) col_object_count.append(value[ApiField.LABELS_COUNT]) df = pd.DataFrame(list(zip(list(range(len(col_name))), col_name, col_image_count, col_object_count)), columns=['#', 'TAG', 'IMAGES COUNT', 'OBJECTS COUNT']) return df