Commit cb19ebde authored by fred's avatar fred

webstats: exclude bots from stats

parent bd9fc61d
......@@ -69,7 +69,7 @@ class Command(BaseCommand):
except SoundFile.DoesNotExist:
continue
PodcastLogLine.objects.update_or_create(
obj, created = PodcastLogLine.objects.update_or_create(
timestamp=sighting.log_datetime,
ip=self.anonymise_ip(sighting.ip),
path=sighting.path,
......@@ -79,6 +79,8 @@ class Command(BaseCommand):
'referrer': referrer,
},
)
if obj.is_bot is None:
obj.check_bot()
sighting.stored = True
def anonymise_ip(self, ip):
......
......@@ -9,3 +9,35 @@ class PodcastLogLine(models.Model):
user_agent = models.CharField(max_length=1000)
referrer = models.CharField(max_length=1000, null=True)
is_bot = models.NullBooleanField()
def check_bot(self, recheck=False):
if self.is_bot is not None and recheck is False:
return
BOTS = [
'AdsBot-Google',
'AdsBot-Google-Mobile',
'Applebot/',
'archive.org_bot',
'bingbot/',
'Googlebot/',
'Googlebot-Image/',
'Googlebot-Video/',
'INA dlweb',
'MJ12bot/',
'MTRobot/',
'PetalBot',
'SMTBot/',
'special_archiver/',
]
current_value = self.is_bot
for bot in BOTS:
if bot in self.user_agent:
self.is_bot = True
break
else:
if 'bot' in self.user_agent:
self.is_bot = None
else:
self.is_bot = False
if self.is_bot != current_value:
self.save()
......@@ -5,6 +5,6 @@ from .models import PodcastLogLine
def downloads_json(request, *args, **kwargs):
qs = PodcastLogLine.objects.values('soundfile').annotate(total=Count('*'))
qs = PodcastLogLine.objects.filter(is_bot=False).values('soundfile').annotate(total=Count('*'))
content = {x['soundfile']: x['total'] for x in qs}
return JsonResponse(content)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment