I've got a few models and am trying to speed up the page where I list out users.
The issue is that I was leveraging model methods to display some of the data - but when I listed the Users out it was hitting the DB multiple times per User
which ended up with hundreds of extra queries (thousands when there were thousands of User
objects in the list) so it was a serious performance hit.
I've since began using annotate
and prefetch_related
which has cut the queries down significantly. I've just got one bit I can't figure out how to annotate.
I have a model method (on Summation
model) I use to get a summary of Evaluation
data for a user like this:
def evaluations_summary(self):
evaluations_summary = (
self.evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
return evaluations_summary
I'm trying to figure out how to annotate that particular query on a User
object.
So the relationship looks like this User
has multiple Summation
s, but only one is ever 'active', which is the one we display in the User
list. Each Summation
has multiple Evaluations
- the summary of which we're trying to show as well.
Here is a summary of the relevant parts of code (including the Summation
model method which gives an example of what is currently 'working' to display the data as needed) - I have also made a pastebin example for easier viewing.
# MODELS
class User(AbstractUser):
employee_no = models.IntegerField(default=1)
...all the other usual attributes...
class Summation(CreateUpdateMixin, CreateUpdateUserMixin):
# CreateUpdateMixin adds 'created_at' & 'updated_at
# CreateUpdateUserMixin adds 'created_by' & 'updated_by'
employee = models.ForeignKey(
User, on_delete=models.PROTECT, related_name="%(class)s_employee"
)
report_url = models.CharField(max_length=350, blank=True)
...other unimportant attributes...
def evaluations_summary(self):
evaluations_summary = (
self.evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
return evaluations_summary
class Evaluation(CreateUpdateMixin, CreateUpdateUserMixin):
summation = models.ForeignKey(Summation, on_delete=models.PROTECT)
evaluation_type = models.ForeignKey(
EvaluationType, on_delete=models.PROTECT
)
evaluation_level = models.ForeignKey(
EvaluationLevel, on_delete=models.PROTECT
)
evaluation_date = models.DateField(
auto_now=False, auto_now_add=False, null=True, blank=True
)
published = models.BooleanField(default=False)
class EvaluationLevel(CreateUpdateMixin):
name = models.CharField(max_length=50)
description = models.CharField(max_length=50)
class EvaluationType(CreateUpdateMixin):
name = models.CharField(max_length=50)
description = models.CharField(max_length=50)
evaluation_levels = models.ManyToManyField(EvaluationLevel)
# SERIALIZERS
class UserSerializer(serializers.HyperlinkedModelSerializer):
multiple_locations = serializers.BooleanField()
multiple_jobs = serializers.BooleanField()
summation_status_due_date = serializers.DateField()
summation_employee = SummationSerializer(many=True, read_only=True)
evaluations_summary = serializers.SerializerMethodField()
class Meta:
model = User
fields = [
"url",
"id",
"username",
"first_name",
"last_name",
"full_name",
"email",
"is_staff",
"multiple_locations",
"multiple_jobs",
"summation_status_due_date",
"summation_employee",
"evaluations_summary",
]
def get_evaluations_summary(self, obj):
return (
obj.summation_employee__evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
# CURRENT ANNOTATIONS
# Subqueries for evaluation_summary
active_summations = (
Summation.objects.filter(employee=OuterRef("pk"), locked=False)
)
evaluations_set = (
Evaluation.objects.filter(summation__in=active_summations)
.order_by()
.values("evaluation_type__name")
)
summary_set = evaluations_set.annotate(Count("evaluation_type"))
# the 'summation_employee__evaluation_set' prefetch does not seem
# to make an impact on queries needed
user_list = (
User.objects.prefetch_related("summation_employee")
.prefetch_related("summation_employee__evaluation_set")
.filter(id__in=all_user_ids)
# Get the total locations and if > 1, set multiple_locations to True
.annotate(total_locations=Subquery(total_locations))
.annotate(
multiple_locations=Case(
When(total_locations__gt=1, then=Value(True)),
default=Value(False),
output_field=BooleanField(),
)
)
# Get the total jobs and if > 1 set mutiple_jobs to True
.annotate(total_jobs=Subquery(total_jobs))
.annotate(
multiple_jobs=Case(
When(total_jobs__gt=1, then=Value(True)),
default=Value(False),
output_field=BooleanField(),
)
)
# Get the due_date of the summation from the SummationStatus object
.annotate(
summation_status_due_date=Subquery(
summation_status.values("summation_due")
)
)
# I need to add the annotation here for the 'evaluations_summary' to avoid
# having the database hit for every user (which could possibly range into the
# thousands in certain cases)
# I have tried a number of ways to obtain what I'm looking for
.annotate(
evaluations_summary=Subquery(
evaluations_set.order_by()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
)
# this annotation gives the error: Only one expression can be specified in the
# select list when the subquery is not introduced with EXISTS.
Is it even possible to transition that model method annotation?? Am I close?
django.db.utils.ProgrammingError: subquery must return only one column
. It seems that the.annotate(evaluations_summary=Subquery(
really returns 2 columns:name
andcount
. So, can you plz recheck your query. Also if you only need to figure out only the last annotation, plz simplify the query for Minimal, Reproducible Example – Yettayetti