Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions api/base/elasticsearch_dsl_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import datetime
import typing

import elasticsearch6_dsl as edsl
import elasticsearch8.dsl as esdsl
from rest_framework import generics, exceptions as drf_exceptions
from rest_framework.serializers import Serializer
from rest_framework.settings import api_settings as drf_settings
from api.base.settings.defaults import REPORT_FILENAME_FORMAT

Expand All @@ -23,7 +24,7 @@


class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC):
'''abstract view class using `elasticsearch6_dsl.Search` as a queryset-analogue
'''abstract view class using `elasticsearch8.dsl.Search` as a queryset-analogue

builds a `Search` based on `self.get_default_search()` and the request's
query parameters for filtering, sorting, and pagination -- fetches only
Expand All @@ -35,18 +36,18 @@ class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView,
ordering_fields: frozenset[str] = frozenset() # serializer field names

@abc.abstractmethod
def get_default_search(self) -> edsl.Search | None:
'''the base `elasticsearch6_dsl.Search` for this list, based on url path
def get_default_search(self) -> esdsl.Search | None:
'''the base `elasticsearch8.dsl.Search` for this list, based on url path

(common jsonapi query parameters will be considered automatically)
'''
...

FILE_RENDERER_CLASSES = {
FILE_RENDERER_CLASSES = (
MetricsReportsCsvRenderer,
MetricsReportsTsvRenderer,
MetricsReportsJsonRenderer,
}
)

def set_content_disposition(self, response, renderer: str):
"""Set the Content-Disposition header to prompt a file download with the appropriate filename.
Expand Down Expand Up @@ -75,7 +76,7 @@ def finalize_response(self, request, response, *args, **kwargs):
response = super().finalize_response(request, response, *args, **kwargs)
# Check if this is a direct download request or file renderer classes, set to the Content-Disposition header
# so filename and attachment for browser download
if isinstance(request.accepted_renderer, tuple(self.FILE_RENDERER_CLASSES)):
if isinstance(request.accepted_renderer, self.FILE_RENDERER_CLASSES):
self.set_content_disposition(response, request.accepted_renderer)

return response
Expand All @@ -95,7 +96,7 @@ def finalize_response(self, request, response, *args, **kwargs):
# (filtering handled in-view to reuse logic from FilterMixin)
filter_backends = ()

# note: because elasticsearch6_dsl.Search supports slicing and gives results when iterated on,
# note: because elasticsearch8.dsl.Search supports slicing and gives results when iterated on,
# it works fine with default pagination

# override rest_framework.generics.GenericAPIView
Expand Down Expand Up @@ -128,10 +129,17 @@ def get_queryset(self):
)
return self.__add_sort(_search)

def get_serializer_context(self):
return (
super().get_serializer_context()
if issubclass(self.get_serializer_class(), Serializer)
else {} # allow custom BaseSerializer-based serializer
)

###
# private methods

def __add_sort(self, search: edsl.Search) -> edsl.Search:
def __add_sort(self, search: esdsl.Search) -> esdsl.Search:
_elastic_sort = self.__get_elastic_sort()
return (search if _elastic_sort is None else search.sort(_elastic_sort))

Expand All @@ -148,17 +156,20 @@ def __get_elastic_sort(self) -> str | None:
raise drf_exceptions.ValidationError(
f'invalid value for {drf_settings.ORDERING_PARAM} query param (valid values: {", ".join(self.ordering_fields)})',
)
_serializer_field = self.get_serializer().fields[_sort_field]
_elastic_sort_field = _serializer_field.source
_elastic_sort_field = (
self.get_serializer().fields[_sort_field].source
if issubclass(self.get_serializer_class(), Serializer)
else _sort_field # allow custom BaseSerializer-based serializer
)
return (_elastic_sort_field if _ascending else f'-{_elastic_sort_field}')

def __add_search_filter(
self,
search: edsl.Search,
search: esdsl.Search,
elastic_field_name: str,
operator: str,
value: str,
) -> edsl.Search:
) -> esdsl.Search:
match operator: # operators from FilterMixin
case 'eq':
if value == '':
Expand Down
163 changes: 73 additions & 90 deletions api/base/metrics.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import re
from datetime import timedelta

import abc
import waffle
from django.utils import timezone

from api.base.exceptions import InvalidQueryStringError
from osf import features
from website.settings import PREPRINT_METRICS_START_DATE
from osf.metrics.es8_metrics import (
OsfCountedUsageEvent,
MonthlyPublicItemUsageReportEs8,
)
from osf.models.base import osfid_iri


class MetricsViewMixin:
class UsageMetricsViewMixin(abc.ABC):
"""Mixin for views that expose metrics via django-elasticsearch-metrics.
Enables metrics to be requested with a query parameter, like so: ::

Expand All @@ -18,110 +19,98 @@ class MetricsViewMixin:
Any subclass of this mixin MUST do the following:

* Use a serializer_class that subclasses MetricsSerializerMixin
* Define metric_map as a class variable. It should be dict mapping metric name
("downloads") to a Metric class (PreprintDownload)
* For list views: implement `get_annotated_queryset_with_metrics`
* For detail views: implement `add_metric_to_object`
* Call add_metrics_to_object(obj) to get `views` and/or `downloads`
assigned on the obj (according to query params)
"""
# Adapted from FilterMixin.QUERY_PATTERN
METRICS_QUERY_PATTERN = re.compile(r'^metrics\[(?P<metric_name>((?:,*\s*\w+)*))\]$')
TIMEDELTA_MAP = {
'daily': timedelta(hours=24),
'weekly': timedelta(days=7),
'monthly': timedelta(days=30),
'yearly': timedelta(days=365),
METRICS_QUERY_MAP = {
'metrics[views]': OsfCountedUsageEvent.ActionLabel.VIEW,
'metrics[downloads]': OsfCountedUsageEvent.ActionLabel.DOWNLOAD,
}
METRICS_ATTR_MAP = {
OsfCountedUsageEvent.ActionLabel.VIEW: 'views',
OsfCountedUsageEvent.ActionLabel.DOWNLOAD: 'downloads',
}
TIMESPAN_MAP = {
'daily': 'now-1d/d',
'weekly': 'now-1w/d',
'monthly': 'now-1M/d',
}
VALID_METRIC_PERIODS = {
'daily',
'weekly',
'monthly',
'yearly',
'total',
}

@property
def metric_map(self):
raise NotImplementedError('MetricsViewMixin subclasses must define a metric_map class variable.')

def get_annotated_queryset_with_metrics(self, queryset, metric_class, metric_name, after):
"""Return a queryset annotated with metrics. Use for list endpoints that expose metrics."""
raise NotImplementedError('MetricsViewMixin subclasses must define get_annotated_queryset_with_metrics().')

def add_metric_to_object(self, obj, metric_class, metric_name, after):
"""Set an attribute for a metric on obj. Use for detail endpoints that expose metrics.
Return the modified object.
"""
raise NotImplementedError('MetricsViewMixin subclasses must define add_metric_to_object().')

@property
def metrics_default_after(self):
"""Value to be used as the `after` in metrics queries if not otherwise specified.
Datetime or None.
"""
return None

@property
def metrics_requested(self):
return (
waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and
bool(self.parse_metric_query_params(self.request.query_params))
waffle.switch_is_active(features.ELASTICSEARCH_METRICS)
and any(_param in self.METRICS_QUERY_MAP for _param in self.request.query_params)
)

# Adapted from FilterMixin.parse_query_params
# TODO: Should we get rid of query_params argument and use self.request.query_params instead?
def parse_metric_query_params(self, query_params):
def get_item_iri(self, item):
return osfid_iri(item._id)

def parse_metric_query_params(self):
"""Parses query parameters to a dict usable for fetching metrics.

:param dict query_params:
:return dict of the format {
<metric_name>: {
'period': <[daily|weekly|monthly|yearly|total]>,
}
<usage_label>: <[daily|weekly|monthly|yearly|total]>,
}
"""
query = {}
for key, value in query_params.items():
match = self.METRICS_QUERY_PATTERN.match(key)
if match:
match_dict = match.groupdict()
metric_name = match_dict['metric_name']
query[metric_name] = value
for key, value in self.request.query_params.items():
_usage_label = self.METRICS_QUERY_MAP.get(key)
if _usage_label:
if value not in self.VALID_METRIC_PERIODS:
raise InvalidQueryStringError(f"Invalid period for metric: '{value}'", parameter='metrics')
query[_usage_label] = value
return query

def _add_metrics(self, queryset_or_obj, method):
"""Parse the ?metric[METRIC]=PERIOD query param, validate it, and
run ``method`` for each requested object.

This is used to share code between add_metric_to_object and get_metrics_queryset.
def add_metrics_to_object(self, obj):
"""Helper method used for detail views.
"""
metrics_requested = self.parse_metric_query_params(self.request.query_params)
if metrics_requested:
metric_map = self.metric_map
for metric, period in metrics_requested.items():
if metric not in metric_map:
raise InvalidQueryStringError(f"Invalid metric in query string: '{metric}'", parameter='metrics')
if period not in self.VALID_METRIC_PERIODS:
raise InvalidQueryStringError(f"Invalid period for metric: '{period}'", parameter='metrics')
metric_class = metric_map[metric]
if period == 'total':
after = self.metrics_default_after
for _action_label, _period in self.parse_metric_query_params().items():
_count = self._get_usage_count(self.get_item_iri(obj), _action_label, _period)
setattr(obj, self.METRICS_ATTR_MAP[_action_label], _count)

def _get_usage_count(self, item_iri, action_label, period):
_search = (
OsfCountedUsageEvent.search()
.filter('term', item_iri=item_iri)
.filter('term', action_labels=action_label.value)
)
_prior_count = 0
if _timespan := self.TIMESPAN_MAP.get(period):
_search = _search.filter('range', timestamp={'gte': _timespan})
else: # cumulative total
_latest_usage_report = self._get_latest_usage_report(item_iri)
if _latest_usage_report:
_search = _search.filter(
'range', timestamp={
'gte': _latest_usage_report.report_yearmonth.month_end(),
},
)
if action_label == OsfCountedUsageEvent.ActionLabel.VIEW:
_prior_count = _latest_usage_report.cumulative_view_count
elif action_label == OsfCountedUsageEvent.ActionLabel.DOWNLOAD:
_prior_count = _latest_usage_report.cumulative_download_count
else:
after = timezone.now() - self.TIMEDELTA_MAP[period]
queryset_or_obj = method(queryset_or_obj, metric_class, metric, after)
return queryset_or_obj

def add_metrics_to_object(self, obj):
"""Helper method used for detail views."""
return self._add_metrics(obj, method=self.add_metric_to_object)

def get_metrics_queryset(self, queryset):
"""Helper method used for list views."""
return self._add_metrics(queryset, method=self.get_annotated_queryset_with_metrics)
raise ValueError(f'unsupported action label {action_label!r}')
_response = _search[0:0].execute()
return _prior_count + _response.doc_count

def _get_latest_usage_report(self, item_iri):
_search = (
MonthlyPublicItemUsageReportEs8.search()
.filter('term', item_iri=item_iri)
.sort('-cycle_coverage')
)
_response = _search[0].execute()
return _response[0] if _response else None

# Override get_default_queryset for convenience
def get_default_queryset(self):
queryset = super().get_default_queryset()
return self.get_metrics_queryset(queryset)

class MetricsSerializerMixin:
@property
Expand All @@ -138,9 +127,3 @@ def get_meta(self, obj):
meta = meta or {'metrics': {}}
meta['metrics'][metric] = getattr(obj, metric)
return meta


class PreprintMetricsViewMixin(MetricsViewMixin):
@property
def metrics_default_after(self):
return PREPRINT_METRICS_START_DATE
Loading