etesync-server/etebase_fastapi/collection.py

504 lines
17 KiB
Python
Raw Normal View History

2020-12-25 09:12:22 +00:00
import typing as t
2020-12-25 17:23:46 +00:00
from asgiref.sync import sync_to_async
2020-12-25 09:12:22 +00:00
from django.contrib.auth import get_user_model
2020-12-26 10:39:20 +00:00
from django.core import exceptions as django_exceptions
from django.core.files.base import ContentFile
from django.db import transaction
2020-12-25 09:12:22 +00:00
from django.db.models import Q
from django.db.models import QuerySet
2020-12-27 16:54:06 +00:00
from fastapi import APIRouter, Depends, status, Request
2020-12-27 14:04:33 +00:00
from pydantic import BaseModel
2020-12-25 09:12:22 +00:00
2020-12-26 10:39:20 +00:00
from django_etebase import models
2020-12-25 09:12:22 +00:00
from .authentication import get_authenticated_user
from .exceptions import ValidationError, transform_validation_error
2020-12-25 09:12:22 +00:00
from .msgpack import MsgpackRoute, MsgpackResponse
2020-12-27 15:58:15 +00:00
from .stoken_handler import filter_by_stoken_and_limit, filter_by_stoken, get_stoken_obj, get_queryset_stoken
2020-12-27 16:54:06 +00:00
from .utils import get_object_or_404, Context, Prefetch, PrefetchQuery
2020-12-25 09:12:22 +00:00
User = get_user_model()
collection_router = APIRouter(route_class=MsgpackRoute)
default_queryset: QuerySet = models.Collection.objects.all()
2020-12-27 15:02:36 +00:00
default_item_queryset: QuerySet = models.CollectionItem.objects.all()
2020-12-25 09:12:22 +00:00
class ListMulti(BaseModel):
collectionTypes: t.List[bytes]
class CollectionItemRevisionInOut(BaseModel):
uid: str
meta: bytes
deleted: bool
chunks: t.List[t.Tuple[str, t.Optional[bytes]]]
class Config:
orm_mode = True
@classmethod
2020-12-27 12:23:19 +00:00
def from_orm_context(
cls: t.Type["CollectionItemRevisionInOut"], obj: models.CollectionItemRevision, context: Context
) -> "CollectionItemRevisionInOut":
2020-12-27 16:12:16 +00:00
chunks = []
for chunk_relation in obj.chunks_relation.all():
chunk_obj = chunk_relation.chunk
if context.prefetch == "auto":
with open(chunk_obj.chunkFile.path, "rb") as f:
chunks.append((chunk_obj.uid, f.read()))
else:
chunks.append((chunk_obj.uid,))
return cls(uid=obj.uid, meta=obj.meta, deleted=obj.deleted, chunks=chunks)
2020-12-27 14:04:33 +00:00
class CollectionItemCommon(BaseModel):
2020-12-25 09:12:22 +00:00
uid: str
version: int
encryptionKey: t.Optional[bytes]
content: CollectionItemRevisionInOut
2020-12-27 14:04:33 +00:00
2020-12-27 14:04:33 +00:00
class CollectionItemOut(CollectionItemCommon):
class Config:
orm_mode = True
@classmethod
2020-12-27 12:23:19 +00:00
def from_orm_context(
cls: t.Type["CollectionItemOut"], obj: models.CollectionItem, context: Context
) -> "CollectionItemOut":
return cls(
uid=obj.uid,
version=obj.version,
encryptionKey=obj.encryptionKey,
etag=obj.etag,
content=CollectionItemRevisionInOut.from_orm_context(obj.content, context),
)
2020-12-25 09:12:22 +00:00
2020-12-27 14:04:33 +00:00
class CollectionItemIn(CollectionItemCommon):
etag: t.Optional[str]
class CollectionCommon(BaseModel):
2020-12-25 09:12:22 +00:00
collectionType: bytes
2020-12-27 14:04:33 +00:00
collectionKey: bytes
class CollectionOut(CollectionCommon):
accessLevel: models.AccessLevels
2020-12-25 09:12:22 +00:00
stoken: str
item: CollectionItemOut
@classmethod
def from_orm_context(cls: t.Type["CollectionOut"], obj: models.Collection, context: Context) -> "CollectionOut":
2020-12-27 12:23:19 +00:00
member: CollectionMember = obj.members.get(user=context.user)
2020-12-25 09:12:22 +00:00
collection_type = member.collectionType
ret = cls(
2020-12-25 09:12:22 +00:00
collectionType=collection_type and collection_type.uid,
collectionKey=member.encryptionKey,
accessLevel=member.accessLevel,
stoken=obj.stoken,
2020-12-27 12:23:19 +00:00
item=CollectionItemOut.from_orm_context(obj.main_item, context),
2020-12-25 09:12:22 +00:00
)
return ret
2020-12-25 09:12:22 +00:00
2020-12-27 14:04:33 +00:00
class CollectionIn(CollectionCommon):
item: CollectionItemIn
2020-12-27 15:10:59 +00:00
class RemovedMembershipOut(BaseModel):
uid: str
2020-12-27 15:02:36 +00:00
class CollectionListResponse(BaseModel):
2020-12-25 09:12:22 +00:00
data: t.List[CollectionOut]
stoken: t.Optional[str]
done: bool
2020-12-27 19:41:31 +00:00
removedMemberships: t.Optional[t.List[RemovedMembershipOut]]
2020-12-27 15:10:59 +00:00
2020-12-25 09:12:22 +00:00
2020-12-27 15:02:36 +00:00
class CollectionItemListResponse(BaseModel):
data: t.List[CollectionItemOut]
stoken: t.Optional[str]
done: bool
2020-12-27 16:34:40 +00:00
class CollectionItemRevisionListResponse(BaseModel):
data: t.List[CollectionItemRevisionInOut]
iterator: t.Optional[str]
done: bool
2020-12-27 15:58:15 +00:00
class CollectionItemBulkGetIn(BaseModel):
uid: str
etag: t.Optional[str]
class ItemDepIn(BaseModel):
etag: str
uid: str
def validate_db(self):
item = models.CollectionItem.objects.get(uid=self.uid)
etag = self.etag
if item.etag != etag:
raise ValidationError(
"wrong_etag",
"Wrong etag. Expected {} got {}".format(item.etag, etag),
status_code=status.HTTP_409_CONFLICT,
)
class ItemBatchIn(BaseModel):
2020-12-27 14:04:33 +00:00
items: t.List[CollectionItemIn]
deps: t.Optional[ItemDepIn]
def validate_db(self):
if self.deps is not None:
for key, _value in self.deps:
getattr(self.deps, key).validate_db()
2020-12-25 09:12:22 +00:00
@sync_to_async
2020-12-27 15:02:36 +00:00
def collection_list_common(
queryset: QuerySet,
user: User,
stoken: t.Optional[str],
limit: int,
prefetch: Prefetch,
) -> MsgpackResponse:
2020-12-27 15:33:01 +00:00
result, new_stoken_obj, done = filter_by_stoken_and_limit(
stoken, limit, queryset, models.Collection.stoken_annotation
)
2020-12-25 09:53:11 +00:00
new_stoken = new_stoken_obj and new_stoken_obj.uid
2020-12-27 12:23:19 +00:00
context = Context(user, prefetch)
data: t.List[CollectionOut] = [CollectionOut.from_orm_context(item, context) for item in result]
2020-12-27 15:10:59 +00:00
stoken_obj = get_stoken_obj(stoken)
removedMemberships = None
if stoken_obj is not None:
# FIXME: honour limit? (the limit should be combined for data and this because of stoken)
remed_qs = models.CollectionMemberRemoved.objects.filter(user=user, stoken__id__gt=stoken_obj.id)
if not done and new_stoken_obj is not None:
# We only filter by the new_stoken if we are not done. This is because if we are done, the new stoken
# can point to the most recent collection change rather than most recent removed membership.
remed_qs = remed_qs.filter(stoken__id__lte=new_stoken_obj.id)
remed = remed_qs.values_list("collection__uid", flat=True)
if len(remed) > 0:
removedMemberships = [{"uid": x} for x in remed]
ret = CollectionListResponse(data=data, stoken=new_stoken, done=done, removedMemberships=removedMemberships)
2020-12-25 09:12:22 +00:00
return MsgpackResponse(content=ret)
def get_collection_queryset(user: User, queryset: QuerySet) -> QuerySet:
return queryset.filter(members__user=user)
2020-12-27 15:02:36 +00:00
def get_item_queryset(
user: User, collection_uid: str, queryset: QuerySet = default_item_queryset
) -> t.Tuple[models.Collection, QuerySet]:
collection = get_object_or_404(get_collection_queryset(user, models.Collection.objects), uid=collection_uid)
2020-12-27 15:02:36 +00:00
# XXX Potentially add this for performance: .prefetch_related('revisions__chunks')
queryset = queryset.filter(collection__pk=collection.pk, revisions__current=True)
return collection, queryset
2020-12-25 09:12:22 +00:00
@collection_router.post("/list_multi/")
2020-12-25 09:53:11 +00:00
async def list_multi(
data: ListMulti,
stoken: t.Optional[str] = None,
limit: int = 50,
user: User = Depends(get_authenticated_user),
prefetch: Prefetch = PrefetchQuery,
2020-12-25 09:53:11 +00:00
):
2020-12-25 09:12:22 +00:00
queryset = get_collection_queryset(user, default_queryset)
2020-12-27 15:10:59 +00:00
2020-12-25 09:12:22 +00:00
# FIXME: Remove the isnull part once we attach collection types to all objects ("collection-type-migration")
queryset = queryset.filter(
Q(members__collectionType__uid__in=data.collectionTypes) | Q(members__collectionType__isnull=True)
)
2020-12-27 15:10:59 +00:00
return await collection_list_common(queryset, user, stoken, limit, prefetch)
2020-12-25 17:23:46 +00:00
@collection_router.post("/list/")
async def collection_list(
req: Request,
2020-12-27 15:33:01 +00:00
stoken: t.Optional[str] = None,
limit: int = 50,
prefetch: Prefetch = PrefetchQuery,
user: User = Depends(get_authenticated_user),
):
2020-12-27 15:33:01 +00:00
queryset = get_collection_queryset(user, default_queryset)
return await collection_list_common(queryset, user, stoken, limit, prefetch)
def process_revisions_for_item(item: models.CollectionItem, revision_data: CollectionItemRevisionInOut):
2020-12-26 10:39:20 +00:00
chunks_objs = []
revision = models.CollectionItemRevision(**revision_data.dict(exclude={"chunks"}), item=item)
revision.validate_unique() # Verify there aren't any validation issues
for chunk in revision_data.chunks:
uid = chunk[0]
chunk_obj = models.CollectionItemChunk.objects.filter(uid=uid).first()
content = chunk[1] if len(chunk) > 1 else None
# If the chunk already exists we assume it's fine. Otherwise, we upload it.
if chunk_obj is None:
if content is not None:
chunk_obj = models.CollectionItemChunk(uid=uid, collection=item.collection)
chunk_obj.chunkFile.save("IGNORED", ContentFile(content))
chunk_obj.save()
else:
raise ValidationError("chunk_no_content", "Tried to create a new chunk without content")
chunks_objs.append(chunk_obj)
stoken = models.Stoken.objects.create()
revision.stoken = stoken
revision.save()
for chunk in chunks_objs:
models.RevisionChunkRelation.objects.create(chunk=chunk, revision=revision)
return revision
2020-12-26 16:01:55 +00:00
def _create(data: CollectionIn, user: User):
2020-12-26 10:39:20 +00:00
with transaction.atomic():
if data.item.etag is not None:
raise ValidationError("bad_etag", "etag is not null")
instance = models.Collection(uid=data.item.uid, owner=user)
try:
instance.validate_unique()
except django_exceptions.ValidationError:
raise ValidationError(
"unique_uid", "Collection with this uid already exists", status_code=status.HTTP_409_CONFLICT
)
instance.save()
main_item = models.CollectionItem.objects.create(
uid=data.item.uid, version=data.item.version, collection=instance
2020-12-26 10:39:20 +00:00
)
instance.main_item = main_item
instance.save()
# TODO
process_revisions_for_item(main_item, data.item.content)
collection_type_obj, _ = models.CollectionType.objects.get_or_create(uid=data.collectionType, owner=user)
models.CollectionMember(
collection=instance,
stoken=models.Stoken.objects.create(),
user=user,
accessLevel=models.AccessLevels.ADMIN,
encryptionKey=data.collectionKey,
collectionType=collection_type_obj,
).save()
2020-12-25 17:23:46 +00:00
@collection_router.post("/")
2020-12-26 16:01:55 +00:00
async def create(data: CollectionIn, user: User = Depends(get_authenticated_user)):
2020-12-26 10:39:20 +00:00
await sync_to_async(_create)(data, user)
2020-12-25 17:23:46 +00:00
return MsgpackResponse({}, status_code=status.HTTP_201_CREATED)
@collection_router.get("/{uid}/")
2020-12-27 15:03:17 +00:00
def collection_get(uid: str, user: User = Depends(get_authenticated_user), prefetch: Prefetch = PrefetchQuery):
obj = get_collection_queryset(user, default_queryset).get(uid=uid)
2020-12-27 12:23:19 +00:00
ret = CollectionOut.from_orm_context(obj, Context(user, prefetch))
return MsgpackResponse(ret)
2020-12-27 14:39:20 +00:00
def item_create(item_model: CollectionItemIn, collection: models.Collection, validate_etag: bool):
"""Function that's called when this serializer creates an item"""
etag = item_model.etag
revision_data = item_model.content
uid = item_model.uid
Model = models.CollectionItem
with transaction.atomic():
instance, created = Model.objects.get_or_create(
2020-12-27 14:39:20 +00:00
uid=uid, collection=collection, defaults=item_model.dict(exclude={"uid", "etag", "content"})
)
cur_etag = instance.etag if not created else None
# If we are trying to update an up to date item, abort early and consider it a success
if cur_etag == revision_data.uid:
return instance
if validate_etag and cur_etag != etag:
raise ValidationError(
"wrong_etag",
"Wrong etag. Expected {} got {}".format(cur_etag, etag),
status_code=status.HTTP_409_CONFLICT,
)
if not created:
# We don't have to use select_for_update here because the unique constraint on current guards against
# the race condition. But it's a good idea because it'll lock and wait rather than fail.
current_revision = instance.revisions.filter(current=True).select_for_update().first()
current_revision.current = None
current_revision.save()
try:
process_revisions_for_item(instance, revision_data)
except django_exceptions.ValidationError as e:
transform_validation_error("content", e)
return instance
2020-12-27 15:02:36 +00:00
@collection_router.get("/{collection_uid}/item/{uid}/")
def item_get(
collection_uid: str, uid: str, user: User = Depends(get_authenticated_user), prefetch: Prefetch = PrefetchQuery
):
_, queryset = get_item_queryset(user, collection_uid)
obj = queryset.get(uid=uid)
ret = CollectionItemOut.from_orm_context(obj, Context(user, prefetch))
return MsgpackResponse(ret)
@sync_to_async
def item_list_common(
queryset: QuerySet,
user: User,
stoken: t.Optional[str],
limit: int,
prefetch: Prefetch,
) -> MsgpackResponse:
result, new_stoken_obj, done = filter_by_stoken_and_limit(
stoken, limit, queryset, models.CollectionItem.stoken_annotation
)
new_stoken = new_stoken_obj and new_stoken_obj.uid
context = Context(user, prefetch)
data: t.List[CollectionItemOut] = [CollectionItemOut.from_orm_context(item, context) for item in result]
ret = CollectionItemListResponse(data=data, stoken=new_stoken, done=done)
return MsgpackResponse(content=ret)
@collection_router.get("/{collection_uid}/item/")
async def item_list(
collection_uid: str,
stoken: t.Optional[str] = None,
limit: int = 50,
prefetch: Prefetch = PrefetchQuery,
withCollection: bool = False,
user: User = Depends(get_authenticated_user),
):
_, queryset = await sync_to_async(get_item_queryset)(user, collection_uid)
if not withCollection:
queryset = queryset.filter(parent__isnull=True)
response = await item_list_common(queryset, user, stoken, limit, prefetch)
return response
def item_bulk_common(data: ItemBatchIn, user: User, stoken: t.Optional[str], uid: str, validate_etag: bool):
queryset = get_collection_queryset(user, default_queryset)
with transaction.atomic(): # We need this for locking the collection object
collection_object = queryset.select_for_update().get(uid=uid)
if stoken is not None and stoken != collection_object.stoken:
raise ValidationError("stale_stoken", "Stoken is too old", status_code=status.HTTP_409_CONFLICT)
# XXX-TOM: make sure we return compatible errors
data.validate_db()
for item in data.items:
2020-12-27 14:39:20 +00:00
item_create(item, collection_object, validate_etag)
return MsgpackResponse({})
2020-12-27 16:34:40 +00:00
@collection_router.get("/{collection_uid}/item/{uid}/revision/")
def item_revisions(
collection_uid: str,
uid: str,
limit: int = 50,
iterator: t.Optional[str] = None,
prefetch: Prefetch = PrefetchQuery,
user: User = Depends(get_authenticated_user),
):
_, items = get_item_queryset(user, collection_uid)
item = get_object_or_404(items, uid=uid)
queryset = item.revisions.order_by("-id")
if iterator is not None:
iterator_obj = get_object_or_404(queryset, uid=iterator)
queryset = queryset.filter(id__lt=iterator_obj.id)
result = list(queryset[: limit + 1])
if len(result) < limit + 1:
done = True
else:
done = False
result = result[:-1]
context = Context(user, prefetch)
ret_data = [CollectionItemRevisionInOut.from_orm_context(revision, context) for revision in result]
iterator = ret_data[-1].uid if len(result) > 0 else None
ret = CollectionItemRevisionListResponse(
data=ret_data,
iterator=iterator,
done=done,
)
return MsgpackResponse(ret)
2020-12-27 15:58:15 +00:00
@collection_router.post("/{collection_uid}/item/fetch_updates/")
def fetch_updates(
collection_uid: str,
data: t.List[CollectionItemBulkGetIn],
stoken: t.Optional[str] = None,
prefetch: Prefetch = PrefetchQuery,
user: User = Depends(get_authenticated_user),
):
_, queryset = get_item_queryset(user, collection_uid)
# FIXME: make configurable?
item_limit = 200
if len(data) > item_limit:
raise ValidationError("too_many_items", "Request has too many items.", status_code=status.HTTP_400_BAD_REQUEST)
queryset, stoken_rev = filter_by_stoken(stoken, queryset, models.CollectionItem.stoken_annotation)
uids, etags = zip(*[(item.uid, item.etag) for item in data])
revs = models.CollectionItemRevision.objects.filter(uid__in=etags, current=True)
queryset = queryset.filter(uid__in=uids).exclude(revisions__in=revs)
new_stoken_obj = get_queryset_stoken(queryset)
new_stoken = new_stoken_obj and new_stoken_obj.uid
stoken = stoken_rev and getattr(stoken_rev, "uid", None)
new_stoken = new_stoken or stoken
context = Context(user, prefetch)
ret = CollectionItemListResponse(
data=[CollectionItemOut.from_orm_context(item, context) for item in queryset],
stoken=new_stoken,
done=True, # we always return all the items, so it's always done
)
return MsgpackResponse(ret)
@collection_router.post("/{collection_uid}/item/transaction/")
def item_transaction(
collection_uid: str, data: ItemBatchIn, stoken: t.Optional[str] = None, user: User = Depends(get_authenticated_user)
):
return item_bulk_common(data, user, stoken, collection_uid, validate_etag=True)
@collection_router.post("/{collection_uid}/item/batch/")
def item_batch(
collection_uid: str, data: ItemBatchIn, stoken: t.Optional[str] = None, user: User = Depends(get_authenticated_user)
):
return item_bulk_common(data, user, stoken, collection_uid, validate_etag=False)