Skip to content

Commit

Permalink
Nextcloud integration (#247)
Browse files Browse the repository at this point in the history
* add nextcloud collections

remove unused import

add admin interface to add nextcloud collection

fix nextcloudc collection api

update url for nextcloud collections

add snoop to allowed hosts

add url to allowed hosts

add signal to sync nextcloudcollections after new collection is created

add command to sync nextcloud signals

fix signals import

fix handle function in management command

add task to sync nextcloud directories

unused imports

fix typo in model

fix typo in model

add webdav dependency

add migration

add webdav password model

fix typo in model

add webdavpassword migration

add admin field to add webdav password for user

add task that syncs nextcloud with full functionality

add command to manually sync nextcloud directories

refactor for debugging

add debugging logs

use webdav password correctly

add missing argument to function call

fix nextcloud recursion

fix function call

create instances properly

extract directory name from path

update or create directory entries

fix nextcloud recursion

refactor nextcloudcollection model and add admin

comment nextcloud collection admin

make nullable directory for migration

make directory not nullable

add nextcloud collection admin

add nextcloud directory admin

add migration

extend nextcloud collection admin

add fields to nextcloud colleciton admin

fix nextcloud collection admin

add periodic execution of nextcloud sync

remove unused import

fix runserver script

fix nextcloud recursion and skip first element

debug prints

debug

debug

flake

fix bug and cleanup

fix update or create call

use path to get object

remove non existent directories from db

fix get all typo

add timezone

make timezone aware timestamp

create directory with timezone aware timestamp

don't delete directories

clean up

create collection in signal

make index lowercase

add settings fields to model

add migration for collection settings

add collection settings to admin interface

allow blank fields

migration to allow blank fields

remove spaces from collection names when syncing

rebase: realign migrations

* squash migrations

* fix minor admin issues related to nextcloud collections

* improve admin interface for nextcloud collections

* disable fields if features not enabled

* remove unused import

* squash migrations

* fix error message and validator bug, avoid multiple bad webdav requests

* fix nextcloud directory fetching

* don't delete nextcloudcollections when syncing

* add command to delete nextcloudcollection

---------

Co-authored-by: Kjell Knudsen <[email protected]>
  • Loading branch information
k-jell and Kjell Knudsen authored Apr 9, 2024
1 parent 19c8045 commit 406ae48
Show file tree
Hide file tree
Showing 18 changed files with 881 additions and 84 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ sentry-sdk = {extras = ["django"], version = "*"}
opentelemetry-distro = "*"
opentelemetry-exporter-otlp = "*"
django = "==3.*"
webdavclient3 = "*"

[requires]
python_version = "3.10"
280 changes: 200 additions & 80 deletions Pipfile.lock

Large diffs are not rendered by default.

164 changes: 162 additions & 2 deletions hoover/search/admin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os
from django import forms
from django.db import transaction
from django.contrib import admin
from django.contrib.admin.widgets import FilteredSelectMultiple
from django.contrib.auth.admin import User, Group, UserAdmin, GroupAdmin
from django.forms import ModelForm
from django.urls import reverse
from django.utils.html import format_html
from . import models


Expand All @@ -13,11 +17,22 @@ class HooverAdminSite(admin.AdminSite):
class CollectionAdmin(admin.ModelAdmin):
list_display = ['__str__', 'count', 'user_access_list', 'group_access_list',
'uploaders_access_list', 'group_upload_access_list',
'group_access_list', 'public', 'writeable', 'avg_search_time', 'avg_batch_time']
'group_access_list', 'public', 'writeable', 'avg_search_time', 'avg_batch_time',
'link_to_nccollection']
fields = ['title', 'name', 'index', 'public', 'writeable', 'users', 'groups', 'uploader_users', 'uploader_groups']
filter_horizontal = ['users', 'groups', 'uploader_users', 'uploader_groups']
readonly_fields = ['index', 'name']

def link_to_nccollection(self, obj):
nc_collection = obj.nextcloudcollection_set.first()
if nc_collection:
link = reverse("admin:search_nextcloudcollection_change", args=[nc_collection.id])
return format_html(f'<a href="{link}"><b>{nc_collection}</b></a>')
else:
return None

link_to_nccollection.short_description = 'Nextcloud Collection'

def get_queryset(self, request):
qs = super().get_queryset(request)
if request.user.is_superuser:
Expand Down Expand Up @@ -56,6 +71,133 @@ def has_delete_permission(self, request, obj=None):
return False


class NextcloudDirectoryAdmin(admin.ModelAdmin):
list_display = ['name', 'path', 'user', 'exists_in_nextcloud', 'deleted_from_nextcloud', 'link_to_nccollection']
search_fields = ['path']
readonly_fields = ['name', 'path', 'modified', 'user', 'deleted_from_nextcloud']

def link_to_nccollection(self, obj):
link = reverse("admin:search_nextcloudcollection_change", args=[obj.nextcloudcollection.id])
return format_html(f'<a href="{link}"><b>{obj.nextcloudcollection}</b></a>')

link_to_nccollection.short_description = 'Nextcloud Collection'

def exists_in_nextcloud(self, obj):
return False if obj.deleted_from_nextcloud else True

exists_in_nextcloud.boolean = True

def get_search_results(self, request, queryset, search_term):
queryset, may_have_duplicates = super().get_search_results(
request, queryset, search_term,
)
# only filter the deleted directories in the autocomplete dialougue
# but not in the list display
if 'autocomplete' in request.get_full_path():
queryset = queryset.filter(deleted_from_nextcloud__isnull=True)
return queryset, may_have_duplicates

def path(self, obj):
return str(obj)

def has_add_permission(self, request):
return False

def has_change_permission(self, request, obj=None):
return False

def has_delete_permission(self, request, obj=None):
return False


class NextcloudCollectionForm(ModelForm):
class Meta:
model = models.NextcloudCollection
fields = [
'name',
'directory',
'process',
'sync',
'ocr_languages',
'max_result_window',
'pdf_preview_enabled',
'thumbnail_generator_enabled',
'image_classification_object_detection_enabled',
'image_classification_classify_images_enabled',
'nlp_language_detection_enabled',
'nlp_fallback_language',
'nlp_entity_extraction_enabled',
'translation_enabled',
'translation_target_languages',
'translation_text_length_limit',
'default_table_header',
'explode_table_rows',
's3_blobs_address',
's3_blobs_access_key',
's3_blobs_secret_key',
]

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# disable fields when features are not enabled
fields = []
if not os.getenv('PDF_PREVIEW_ENABLED'):
fields.append('pdf_preview_enabled')
if not os.getenv('THUMBNAIL_GENERATOR'):
fields.append('thumbnail_generator_enabled')
if not os.getenv('OBJECT_DETECTION_ENABLED'):
fields.append('image_classification_object_detection_enabled')
if not os.getenv('IMAGE_CLASSIFICATION_ENABLED'):
fields.append('image_classification_classify_images_enabled')
if not os.getenv('NLP_LANGUAGE_DETECTION_ENABLED'):
fields += ['nlp_language_detection_enabled', 'nlp_fallback_language']
if not os.getenv('NLP_ENTITY_EXTRACTION_ENABLED'):
fields.append('nlp_entity_extraction_enabled')
if not os.getenv('TRANSLATION_ENABLED'):
fields += ['translation_enabled',
'translation_target_languages',
'translation_text_length_limit']
for field in fields:
self.fields[field].disabled = True


class NextcloudCollectionAdmin(admin.ModelAdmin):
form = NextcloudCollectionForm
autocomplete_fields = ['directory']
list_display = [
'name',
'username',
'link_to_collection'
]

def get_readonly_fields(self, request, obj=None):
fields = []
if obj:
fields += ['name', 'directory']
return fields

def link_to_collection(self, obj):
link = reverse("admin:search_collection_change", args=[obj.collection.id])
return format_html(f'<a href="{link}"><b>{obj.collection}</b></a>')

link_to_collection.short_description = 'Collection'

def username(self, obj):
return obj.username

def name(self, obj):
return obj.name

def password(self, obj):
return obj.password

def save_model(self, request, obj, form, change):
with transaction.atomic():
collection, _ = models.Collection.objects.get_or_create(name=obj.name, index=obj.name)
obj.collection = collection
super().save_model(request, obj, form, change)


class GroupAdminForm(ModelForm):
class Meta:
model = Group
Expand Down Expand Up @@ -96,8 +238,24 @@ class ProfileInline(admin.StackedInline):
list_display = ('user', 'uuid', 'preferences')


class WebDAVPasswordInlineForm(ModelForm):
class Meta:
model = models.WebDAVPassword
exclude = ['user']
widgets = {
'password': forms.PasswordInput(),
}


class WebDAVPasswordInline(admin.StackedInline): # You can use TabularInline as an alternative
model = models.WebDAVPassword
can_delete = False
form = WebDAVPasswordInlineForm


class HooverUserAdmin(UserAdmin):
inlines = (ProfileInline,)
inlines = (ProfileInline,
WebDAVPasswordInline,)
actions = []
fieldsets = (
('Personal info', {'fields': ('username', 'first_name', 'last_name', 'email')}),
Expand Down Expand Up @@ -198,6 +356,8 @@ def has_delete_permission(self, request, obj=None):

admin_site = HooverAdminSite(name='hoover-admin')
admin_site.register(models.Collection, CollectionAdmin)
admin_site.register(models.NextcloudCollection, NextcloudCollectionAdmin)
admin_site.register(models.NextcloudDirectory, NextcloudDirectoryAdmin)
admin_site.register(Group, HooverGroupAdmin)
admin_site.register(User, HooverUserAdmin)

Expand Down
8 changes: 8 additions & 0 deletions hoover/search/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from django.apps import AppConfig


class SearchConfig(AppConfig):
name = 'hoover.search'

def ready(self):
from . import signals # noqa: F401
36 changes: 36 additions & 0 deletions hoover/search/management/commands/purgenextcloudcollection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import requests
from django.core.management.base import BaseCommand
from django.conf import settings
from ... import models


class Command(BaseCommand):
help = "Remove a nextcloud collection"

def add_arguments(self, parser):
parser.add_argument('collection_name')

def handle(self, collection_name, **kwargs):

nextcloud_collection = models.NextcloudCollection.objects.filter(name=collection_name).first()
if not nextcloud_collection:
print(f'Nextcloud collection not found: {collection_name}')
return

collection = nextcloud_collection.collection
nextcloud_collection.delete()
collection.delete()
print(f'Deleted nextcloud collection and corresponding collection from database: {collection_name}')

snoop_base_url = settings.SNOOP_BASE_URL
assert snoop_base_url

url = snoop_base_url + f'/common/remove-nextcloud-collection/{collection_name}'
res = requests.get(url)

if res.status_code == 200:
print('Deleted nextcloud collection from snoop database.')
elif res.status_code == 404:
print(f'Nextcloud collection not found in snoop database: {collection_name}')
else:
print(f'Unknown response from snoop: {res}')
9 changes: 9 additions & 0 deletions hoover/search/management/commands/purgenextclouddirs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ...tasks import sync_nextcloud_directories
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = "Sync nextcloud directories."

def handle(self, *args, **options):
sync_nextcloud_directories(4, 50, purge=True)
7 changes: 6 additions & 1 deletion hoover/search/management/commands/synccollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ def handle(self, snoop_collections_json, **kwargs):

print('locking table...')
with lock_table(models.Collection):
to_delete = models.Collection.objects.exclude(name__in=[c['name'] for c in snoop_collections])
# exclude collections with an associated nextcloudcollection
to_delete = models.Collection.objects.exclude(
name__in=[c['name'] for c in snoop_collections]
).exclude(
nextcloudcollection__collection__isnull=False
)
print('Deleting', to_delete.count(), 'collections')
to_delete.delete()

Expand Down
9 changes: 9 additions & 0 deletions hoover/search/management/commands/syncnextcloudcols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ...signals import sync_nextcloud_collections
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = "Sync nextcloud collections with snoop"

def handle(self, *args, **options):
sync_nextcloud_collections()
9 changes: 9 additions & 0 deletions hoover/search/management/commands/syncnextclouddirs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ...tasks import sync_nextcloud_directories
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = "Sync nextcloud directories."

def handle(self, *args, **options):
sync_nextcloud_directories(4, 50)
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Generated by Django 3.2.21 on 2024-02-21 15:49

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import hoover.search.validators


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('search', '0014_collection_stats'),
]

operations = [
migrations.CreateModel(
name='WebDAVPassword',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('password', models.CharField(max_length=256)),
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='NextcloudDirectory',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=256)),
('path', models.CharField(max_length=512, unique=True)),
('modified', models.DateTimeField()),
('deleted_from_nextcloud', models.DateTimeField(null=True)),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name_plural': 'Nextcloud directories',
},
),
migrations.CreateModel(
name='NextcloudCollection',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=256, unique=True, validators=[hoover.search.validators.validate_collection_name])),
('process', models.BooleanField(default=True)),
('sync', models.BooleanField(default=True)),
('ocr_languages', models.CharField(blank=True, default='', max_length=256)),
('max_result_window', models.IntegerField(default=100000)),
('pdf_preview_enabled', models.BooleanField(default=False)),
('thumbnail_generator_enabled', models.BooleanField(default=False)),
('image_classification_object_detection_enabled', models.BooleanField(default=False)),
('image_classification_classify_images_enabled', models.BooleanField(default=False)),
('nlp_language_detection_enabled', models.BooleanField(default=False)),
('nlp_fallback_language', models.CharField(default='en', max_length=256)),
('nlp_entity_extraction_enabled', models.BooleanField(default=False)),
('translation_enabled', models.BooleanField(default=False)),
('translation_target_languages', models.CharField(default='en', max_length=256)),
('translation_text_length_limit', models.IntegerField(default=400)),
('default_table_header', models.CharField(blank=True, default='', max_length=512)),
('explode_table_rows', models.BooleanField(default=False)),
('s3_blobs_address', models.CharField(blank=True, default='', max_length=512)),
('s3_blobs_access_key', models.CharField(blank=True, default='', max_length=512)),
('s3_blobs_secret_key', models.CharField(blank=True, default='', max_length=512)),
('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='search.collection')),
('directory', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='search.nextclouddirectory', validators=[hoover.search.validators.validate_directory_path])),
],
),
]
Loading

0 comments on commit 406ae48

Please sign in to comment.