Commit 58ff5c71 authored by fred's avatar fred
Browse files

start module to keep podcast stats

parent 2f63bfab
......@@ -160,6 +160,7 @@ INSTALLED_APPS = (
'taggit',
'gadjo',
'panikweb.paniktags',
'panikweb.webstats',
'sorl.thumbnail',
'ckeditor',
'emissions',
......@@ -225,6 +226,8 @@ STATSD_CLIENT = 'django_statsd.clients.null'
RAVEN_CONFIG = None
ACCESS_LOG_FILENAME = '/var/log/nginx/panikweb-access.log'
DEBUG_TOOLBAR_PANELS = (
'debug_toolbar.panels.version.VersionDebugPanel',
'debug_toolbar.panels.timer.TimerDebugPanel',
......
from ipaddress import ip_address
import re
import dateutil.parser
from django.conf import settings
from django.core.management.base import BaseCommand
from emissions.models import SoundFile
from panikweb.webstats.models import PodcastLogLine
class Command(BaseCommand):
def handle(self, *args, **kwargs):
regex = re.compile(
r'([(a-f\d\.\:)]+) - - \[(.*?)\] "GET /media/sounds/(.*?) HTTP/..." \d+ \d+ ".*?" "(.*?)"'
)
seen = {}
with open(settings.ACCESS_LOG_FILENAME) as fd:
for line in fd:
match = regex.match(line)
if not match:
continue
ip, date, path, user_agent = match.groups()
if not (path.endswith('.ogg') or path.endswith('.mp3')):
continue
log_datetime = dateutil.parser.parse(date.replace(':', ' ', 1))
previous_sighting = seen.get((ip, path, user_agent))
if previous_sighting and (log_datetime - previous_sighting).total_seconds() < 3600:
# don't record requests that already happened less than an
# hour ago.
continue
seen[(ip, path, user_agent)] = log_datetime
try:
soundfile = SoundFile.objects.get(id=path.split('_')[-3])
except SoundFile.DoesNotExist:
continue
PodcastLogLine.objects.get_or_create(
timestamp=log_datetime,
ip=self.anonymise_ip(ip),
path=path,
soundfile=soundfile,
user_agent=user_agent,
)
def anonymise_ip(self, ip):
address = ip_address(ip)
if address.version == 4: # apply 255.255.255.0 mask
address = ip_address(address.packed[:3] + b'\0')
elif address.version == 6: # apply ffff:ffff:ffff:ffff:0000:0000:0000:0000 mask
address = ip_address(address.packed[:8] + b'\0' * 8)
return str(address)
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2020-12-13 15:18
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = [
('emissions', '0015_auto_20200404_1510'),
]
operations = [
migrations.CreateModel(
name='PodcastLogLine',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('timestamp', models.DateTimeField()),
('ip', models.GenericIPAddressField()),
('path', models.CharField(max_length=255)),
('user_agent', models.CharField(max_length=255)),
('is_bot', models.NullBooleanField()),
('soundfile', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='emissions.SoundFile')),
],
),
]
from django.db import models
class PodcastLogLine(models.Model):
timestamp = models.DateTimeField()
ip = models.GenericIPAddressField()
path = models.CharField(max_length=255)
soundfile = models.ForeignKey('emissions.SoundFile', null=True, on_delete=models.SET_NULL)
user_agent = models.CharField(max_length=255)
is_bot = models.NullBooleanField()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment