Implement synchronization for community authentication

This adds the concept of an apiurl to each site that uses community
authentication, that the main website server can make calls to and send
updates. This URL will receive POSTs from the main website when a user
account that has been used on this site gets updated, and can then
optionally update it's local entries with it (the django plugin sample
is updated to handle this fully).

Updates are only sent for users that have a history of having logged
into the specific site -- this way we avoid braodcasting user
information to sites requiring specific constent that the user hasn't
given, and also decreases the amount of updates that have to be sent.

Updates are queued by the system in a table and using listen/notify a
daemon that's running picks up what needs to be updated and posts it to
the endpoints. If this daemon is not running, obviously nothing gets
sent.

Updates are tracked using triggers in the database which push
information into this queue.
This commit is contained in:
Magnus Hagander
2020-08-08 17:03:17 +02:00
parent d969bd33d8
commit c1fb5de080
10 changed files with 489 additions and 0 deletions

View File

@ -28,8 +28,20 @@ class CommunityAuthSiteAdminForm(forms.ModelForm):
raise forms.ValidationError("Crypto key must be 16, 24 or 32 bytes before being base64-encoded")
return self.cleaned_data['cryptkey']
def clean(self):
d = super().clean()
if d.get('push_changes', False) and not d['apiurl']:
self.add_error('push_changes', 'API url must be specified to enable push changes!')
if d.get('push_ssh', False) and not d.get('push_changes', False):
self.add_error('push_ssh', 'SSH changes can only be pushed if general change push is enabled')
return d
class CommunityAuthSiteAdmin(admin.ModelAdmin):
list_display = ('name', 'cooloff_hours', 'push_changes', 'push_ssh', 'org')
form = CommunityAuthSiteAdminForm

View File

@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.27 on 2020-08-06 13:36
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('account', '0005_secondaryemail'),
('core', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='communityauthsite',
name='apiurl',
field=models.URLField(max_length=200, null=False, blank=True),
),
migrations.AddField(
model_name='communityauthsite',
name='push_changes',
field=models.BooleanField(default=False, help_text='Supports receiving http POSTs with changes to accounts'),
),
migrations.AddField(
model_name='communityauthsite',
name='push_ssh',
field=models.BooleanField(default=False, help_text='Wants to receive SSH keys in push changes'),
),
migrations.RunSQL(
"""CREATE TABLE account_communityauthchangelog (
user_id int NOT NULL REFERENCES auth_user(id) DEFERRABLE INITIALLY DEFERRED,
site_id int NOT NULL REFERENCES account_communityauthsite (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
changedat timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT account_communityauthchangelog_pkey PRIMARY KEY (user_id, site_id)
)""",
"""DROP TABLE account_communityauthchangelog""",
),
# When a user entry is changed, propagate it to any community auth site that has push enabled, and that
# the user has at some point logged in to. We do this through a trigger on auth_user, to make sure we
# definitely catch all changes.
migrations.RunSQL(
"""CREATE FUNCTION account_cauth_changetrack () RETURNS trigger AS $$
BEGIN
IF NEW.username != OLD.username THEN
RAISE EXCEPTION 'Usernames cannot be changed';
END IF;
IF NEW.first_name != OLD.first_name OR NEW.last_name != OLD.last_name OR NEW.email != OLD.email THEN
INSERT INTO account_communityauthchangelog (user_id, site_id, changedat)
SELECT NEW.id, s.id, CURRENT_TIMESTAMP
FROM account_communityauthsite s
INNER JOIN account_communityauthlastlogin ll ON ll.site_id=s.id
WHERE s.push_changes AND ll.user_id=NEW.id
ON CONFLICT (user_id, site_id) DO UPDATE SET changedat=greatest(account_communityauthchangelog.changedat, CURRENT_TIMESTAMP);
NOTIFY communityauth_changetrack;
END IF;
RETURN NEW;
END;
$$ language 'plpgsql'""",
"""DROP FUNCTION account_cauth_changetrack""",
),
# We specifically don't use "UPDATE OF" to find columns because then we create a dependency on columns in
# auth_user, which is owned by django, and may block migrations in that app. So we make the check at runtime.
migrations.RunSQL(
"""CREATE TRIGGER account_cauth_changetrack_trg
AFTER UPDATE ON auth_user
FOR EACH ROW EXECUTE FUNCTION account_cauth_changetrack()""",
"""DROP TRIGGER account_cauth_changetrack_trg ON auth_user""",
),
# We also need to track when secondary email addresses are added/removed (if they are confirmed)
# We don't have to track INSERTs as they are always unconfirmed, but we do need to track deletes here.
migrations.RunSQL(
"""CREATE FUNCTION account_secondaryemail_changetrack () RETURNS trigger AS $$
BEGIN
INSERT INTO account_communityauthchangelog (user_id, site_id, changedat)
SELECT NEW.user_id, s.id, CURRENT_TIMESTAMP
FROM account_communityauthsite s
INNER JOIN account_communityauthlastlogin ll ON ll.site_id=s.id
WHERE s.push_changes AND ll.user_id=NEW.user_id
ON CONFLICT (user_id, site_id) DO UPDATE SET changedat=greatest(account_communityauthchangelog.changedat, CURRENT_TIMESTAMP);
NOTIFY communityauth_changetrack;
RETURN NEW;
END;
$$ language 'plpgsql'""",
"""DROP FUNCTION account_secondaryemail_changetrack""",
),
migrations.RunSQL(
"""CREATE TRIGGER account_secondaryemail_changetrack_trg
AFTER DELETE OR UPDATE ON account_secondaryemail
FOR EACH ROW EXECUTE FUNCTION account_secondaryemail_changetrack()""",
"""DROP TRIGGER account_Secondaryemail_changetrack_trg""",
),
migrations.RunSQL(
"""CREATE FUNCTION account_profile_changetrack () RETURNS trigger AS $$
BEGIN
IF NEW.sshkey != OLD.sshkey THEN
INSERT INTO account_communityauthchangelog (user_id, site_id, changedat)
SELECT NEW.user_id, s.id, CURRENT_TIMESTAMP
FROM account_communityauthsite s
INNER JOIN account_communityauthlastlogin ll ON ll.site_id=s.id
WHERE s.push_changes AND s.push_ssh AND ll.user_id=NEW.user_id
ON CONFLICT (user_id, site_id) DO UPDATE SET changedat=greatest(account_communityauthchangelog.changedat, CURRENT_TIMESTAMP);
NOTIFY communityauth_changetrack;
END IF;
RETURN NEW;
END;
$$ language 'plpgsql'""",
"""DROP FUNCTION account_secondaryemail_changetrack""",
),
migrations.RunSQL(
"""CREATE TRIGGER account_profile_changetrack_trg
AFTER DELETE OR UPDATE ON core_userprofile
FOR EACH ROW EXECUTE FUNCTION account_profile_changetrack()""",
"""DROP TRIGGER account_profile_changetrack_trg""",
),
]

View File

@ -15,12 +15,17 @@ class CommunityAuthSite(models.Model):
name = models.CharField(max_length=100, null=False, blank=False,
help_text="Note that the value in this field is shown on the login page, so make sure it's user-friendly!")
redirecturl = models.URLField(max_length=200, null=False, blank=False)
apiurl = models.URLField(max_length=200, null=False, blank=True)
cryptkey = models.CharField(max_length=100, null=False, blank=False,
help_text="Use tools/communityauth/generate_cryptkey.py to create a key")
comment = models.TextField(null=False, blank=True)
org = models.ForeignKey(CommunityAuthOrg, null=False, blank=False, on_delete=models.CASCADE)
cooloff_hours = models.IntegerField(null=False, blank=False, default=0,
help_text="Number of hours a user must have existed in the systems before allowed to log in to this site")
push_changes = models.BooleanField(null=False, blank=False, default=False,
help_text="Supports receiving http POSTs with changes to accounts")
push_ssh = models.BooleanField(null=False, blank=False, default=False,
help_text="Wants to receive SSH keys in push changes")
def __str__(self):
return self.name

View File

@ -0,0 +1,127 @@
#!/usr/bin/env python3
#
# auth_changetrack.py - tracks changes to users and distributes them
#
import sys
import select
import requests
import json
import base64
import hmac
import logging
import psycopg2
import psycopg2.extensions
def process_queue(conn):
site_stoplist = []
curs = conn.cursor()
while True:
# Fetch data for one site at a time, by just picking whatever happens to be the oldest one
curs.execute("SELECT site_id, apiurl, cryptkey, push_ssh FROM (SELECT site_id FROM account_communityauthchangelog WHERE NOT site_id=ANY(%(stoplist)s) LIMIT 1) x INNER JOIN account_communityauthsite s ON s.id=x.site_id", {
'stoplist': site_stoplist,
})
if not curs.rowcount:
# Nothing in the queue, so we're done here.
conn.rollback()
return
siteid, url, cryptkey, include_ssh = curs.fetchone()
# Get all data for this site (well, up to 100 users to not generate packages that are too big... We'll come back for the rest later if there are more.
curs.execute(
"""SELECT cl.user_id, changedat, username, first_name, last_name, u.email, sshkey, array_agg(se.email) FILTER (WHERE se.confirmed AND se.email IS NOT NULL)
FROM account_communityauthchangelog cl
INNER JOIN auth_user u ON u.id=cl.user_id
LEFT JOIN account_secondaryemail se ON se.user_id=cl.user_id
LEFT JOIN core_userprofile up ON up.user_id=cl.user_id
WHERE cl.site_id=%(siteid)s
GROUP BY cl.user_id, cl.changedat, u.id, up.user_id
LIMIT 100""",
{
'siteid': siteid,
}
)
rows = curs.fetchall()
if not rows:
# This shouldn't happen
logging.error("Re-querying for updates returned no rows! Aborting.")
conn.rollback()
return
# Build the update structure
def _get_userid_struct(row):
yield 'username', row[2]
yield 'firstname', row[3]
yield 'lastname', row[4]
yield 'email', row[5]
yield 'secondaryemails', row[7] or []
if include_ssh:
yield 'sshkeys', row[6]
pushstruct = {
'type': 'update',
'users': [dict(_get_userid_struct(row)) for row in rows],
}
pushjson = json.dumps(pushstruct)
# We don't need to encrypt since it's over https, but we need to sign.
h = hmac.digest(
base64.b64decode(cryptkey),
msg=bytes(pushjson, 'utf-8'),
digest='sha512',
)
try:
r = requests.post(url, data=pushjson, headers={
'X-pgauth-sig': base64.b64encode(h),
}, timeout=10)
except Exception as e:
logging.error("Exception pushing changes to {}: {}".format(url, e))
conn.rollback()
site_stoplist.append(siteid)
continue
if r.status_code == 200:
# Success! Whee!
# This is a really silly way to do it, but meh.
curs.executemany("DELETE FROM account_communityauthchangelog WHERE site_id=%(siteid)s AND user_id=%(userid)s AND changedat=%(changedat)s", [
{
'siteid': siteid,
'userid': row[0],
'changedat': row[1],
} for row in rows]
)
logging.info("Successfully pushed {} changes to {}".format(len(rows), url))
conn.commit()
continue
logging.error("Failed to push changes to {}: status {}, initial: {}".format(url, r.status_code, r.text[:100]))
conn.rollback()
site_stoplist.append(siteid)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: auth_changetrack.py <dsn>")
sys.exit(1)
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.INFO)
conn = psycopg2.connect(sys.argv[1])
curs = conn.cursor()
curs.execute("LISTEN communityauth_changetrack")
conn.commit()
conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ)
while True:
process_queue(conn)
select.select([conn], [], [], 5 * 60)
conn.poll()
while conn.notifies:
conn.notifies.pop()
# Loop back up and process the full queue

View File

@ -0,0 +1,39 @@
#!/usr/bin/env python3
import sys
import psycopg2
from datetime import timedelta
# Up to 5 minutes delay is ok
WARNING_THRESHOLD = timedelta(minutes=5)
# More than 15 minutes something is definitely wrong
CRITICAL_THRESHOLD = timedelta(minutes=15)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: nagios_check.py <dsn>")
sys.exit(1)
conn = psycopg2.connect(sys.argv[1])
curs = conn.cursor()
# Get the oldest entry that has not been completed, if any
curs.execute("SELECT COALESCE(max(now()-changedat), '0') FROM account_communityauthchangelog")
rows = curs.fetchall()
conn.close()
if len(rows) == 0:
print("OK, queue is empty")
sys.exit(0)
age = rows[0][0]
if age < WARNING_THRESHOLD:
print("OK, queue age is %s" % age)
sys.exit(0)
elif age < CRITICAL_THRESHOLD:
print("WARNING, queue age is %s" % age)
sys.exit(1)
else:
print("CRITICAL, queue age is %s" % age)
sys.exit(2)

View File

@ -8,6 +8,10 @@
# * Make sure the view "login" from this module is used for login
# * Map an url somwehere (typically /auth_receive/) to the auth_receive
# view.
# * To receive live updates (not just during login), map an url somewhere
# (typically /auth_api/) to the auth_api view.
# * To receive live updates, also connect to the signal auth_user_data_received.
# This signal will fire *both* on login events *and* on background updates.
# * In settings.py, set AUTHENTICATION_BACKENDS to point to the class
# AuthBackend in this module.
# * (And of course, register for a crypto key with the main authentication
@ -19,15 +23,19 @@
#
from django.http import HttpResponse, HttpResponseRedirect
from django.views.decorators.csrf import csrf_exempt
from django.contrib.auth.models import User
from django.contrib.auth.backends import ModelBackend
from django.contrib.auth import login as django_login
from django.contrib.auth import logout as django_logout
from django.dispatch import Signal
from django.db import transaction
from django.conf import settings
import base64
import json
import socket
import hmac
from urllib.parse import urlencode, parse_qs
import requests
from Cryptodome.Cipher import AES
@ -36,6 +44,12 @@ from Cryptodome import Random
import time
# This signal fires whenever new user data has been received. Note that this
# happens *after* first_name, last_name and email has been updated on the user
# record, so those are not included in the userdata struct.
auth_user_data_received = Signal(providing_args=['user', 'userdata'])
class AuthBackend(ModelBackend):
# We declare a fake backend that always fails direct authentication -
# since we should never be using direct authentication in the first place!
@ -166,6 +180,11 @@ We apologize for the inconvenience.
user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__)
django_login(request, user)
# Signal that we have information about this user
auth_user_data_received.send(sender=auth_receive, user=user, userdata={
'secondaryemails': data['se'][0].split(',') if 'se' in data else []
})
# Finally, check of we have a data package that tells us where to
# redirect the user.
if 'd' in data:
@ -187,6 +206,73 @@ We apologize for the inconvenience.
return HttpResponse("Authentication successful, but don't know where to redirect!", status=500)
# Receive API calls from upstream, such as push changes to users
@csrf_exempt
def auth_api(request):
if 'X-pgauth-sig' not in request.headers:
return HttpResponse("Missing signature header!", status=400)
try:
sig = base64.b64decode(request.headers['X-pgauth-sig'])
except Exception:
return HttpResponse("Invalid signature header!", status=400)
try:
h = hmac.digest(
base64.b64decode(settings.PGAUTH_KEY),
msg=request.body,
digest='sha512',
)
if not hmac.compare_digest(h, sig):
return HttpResponse("Invalid signature!", status=401)
except Exception:
return HttpResponse("Unable to compute hmac", status=400)
try:
pushstruct = json.loads(request.body)
except Exception:
return HttpResponse("Invalid JSON!", status=400)
def _conditionally_update_record(rectype, recordkey, structkey, fieldmap, struct):
try:
obj = rectype.objects.get(**{recordkey: struct[structkey]})
ufields = []
for k, v in fieldmap.items():
if struct[k] != getattr(obj, v):
setattr(obj, v, struct[k])
ufields.append(v)
if ufields:
obj.save(update_fields=ufields)
return obj
except rectype.DoesNotExist:
# If the record doesn't exist, we just ignore it
return None
# Process the received structure
if pushstruct.get('type', None) == 'update':
# Process updates!
with transaction.atomic():
for u in pushstruct.get('users', []):
user = _conditionally_update_record(
User,
'username', 'username',
{
'firstname': 'first_name',
'lastname': 'last_name',
'email': 'email',
},
u,
)
# Signal that we have information about this user (only if it exists)
if user:
auth_user_data_received.send(sender=auth_api, user=user, userdata={
k: u[k] for k in u.keys() if k not in ['firstname', 'lastname', 'email', ]
})
return HttpResponse("OK", status=200)
# Perform a search in the central system. Note that the results are returned as an
# array of dicts, and *not* as User objects. To be able to for example reference the
# user through a ForeignKey, a User object must be materialized locally. We don't do

View File

@ -0,0 +1 @@
cauth_push_receiver.ini

View File

@ -0,0 +1,10 @@
[receiver]
plugin=redmine
key=1xWzS4MW7JHlJgc618WaeTqfXaH0152xP7hZtnRe73w=
[mediawiki]
connstr=dbname=postgres
schema=mediawiki
[redmine]
connstr=dbname=postgres

View File

@ -0,0 +1,85 @@
#!/usr/bin/env python3
#
# postgresql.org community authentication push updates receiver
#
# This simple wsgi application is intended to run on systems that otherwise
# run a completely different codebase with just a simple authentication
# plugin, in order to receive push updates and materialize those into the
# database.
#
# It should be mapped to receive only the POST requests specifically for
# the community authentication API, and will act as that regardless of
# which URI it actually receives.
#
import os
import sys
import configparser
import json
import base64
import importlib
import hmac
config = configparser.ConfigParser()
config.read(os.path.abspath(os.path.join(__file__, '../cauth_push_receiver.ini')))
# Get the class ReceiverPlugin in the defined plugin
pluginclass = getattr(
importlib.import_module('plugins.{}'.format(config.get('receiver', 'plugin'))),
'ReceiverPlugin',
)
def application(environ, start_response):
try:
if environ['REQUEST_METHOD'] != 'POST':
raise Exception("Only POST allowed")
if 'HTTP_X_PGAUTH_SIG' not in environ:
raise Exception("Required authentication header missing")
try:
sig = base64.b64decode(environ['HTTP_X_PGAUTH_SIG'])
except Exception:
raise Exception("Invalid signature header!")
body = environ['wsgi.input'].read()
try:
h = hmac.digest(
base64.b64decode(config.get('receiver', 'key')),
msg=body,
digest='sha512',
)
except Exception:
raise Exception("Could not calculate hmac!")
if not hmac.compare_digest(h, sig):
raise Exception("Invalid signature!")
try:
pushstruct = json.loads(body)
except Exception:
raise Exception("Invalid json payload!")
if pushstruct.get('type', None) == 'update':
with pluginclass(config) as p:
for u in pushstruct.get('users', []):
p.push_user(u)
start_response('200 OK', [
('Content-type', 'text/plain'),
])
return [
"OK",
]
except Exception as e:
print("Error receiving cauth call: {}".format(e), file=sys.stderr)
start_response('500 Internal Server Error', [
('Content-type', 'text/plain'),
])
return [
"An internal server error occurred.\n",
]