Commit 79158cc0 authored by fred's avatar fred

add a management command to load data from a spip xml dump

parent 92a31098
from datetime import datetime
import gzip
import xml.etree.ElementTree as ET
from django.core.management.base import BaseCommand, CommandError
from panikdb.emissions.models import Emission, Episode, Diffusion
class Rubric(object):
def __init__(self):
self.articles = {}
self.rubrics = {}
class Article(object):
pass
class Command(BaseCommand):
args = 'filename'
help = 'Load emissions and episodes from a Spip dump file'
def handle(self, filename, *args, **options):
rubrics = {}
articles = {}
with open(filename) as fd:
content = fd.read()
# the spip_courriers parts of the spip export are not properly
# encoded, we manually remove them here so the XML file can be
# parsed correctly.
content = content[:content.find('<spip_courriers>')] + \
content[content.rfind('</spip_courriers>')+17:]
root = ET.fromstring(content)
for rubric_xml in root.iter('spip_rubriques'):
rubric = Rubric()
for attr in ('id_rubrique', 'id_parent', 'titre', 'texte'):
setattr(rubric, attr, rubric_xml.find(attr).text)
rubrics[rubric.id_rubrique] = rubric
for rubric in rubrics.values():
if rubric.id_parent and rubric.id_parent != '0':
rubrics[rubric.id_parent].rubrics[rubric.id_rubrique] = rubric
emission_rubric_ids = []
straight_emission_rubric_ids = []
for rubric in rubrics['2'].rubrics.values(): # 'Les emissions'
emission_rubric_ids.append(rubric.id_rubrique)
straight_emission_rubric_ids.append(rubric.id_rubrique)
for subrubric in rubric.rubrics.values():
emission_rubric_ids.append(subrubric.id_rubrique)
for article_xml in root.iter('spip_articles'):
if not article_xml.find('id_rubrique').text in emission_rubric_ids:
continue
article = Article()
for attr in ('id_rubrique', 'id_article', 'titre', 'surtitre',
'soustitre', 'descriptif', 'chapo', 'texte',
'date_redac', 'statut'):
setattr(article, attr, article_xml.find(attr).text)
if article.statut != 'publie':
continue
articles[article.id_article] = article
if rubrics[article.id_rubrique].id_parent != '2':
# the spip structure didn't really expect subrubrics in the
# 'emissions' section, but people added some nevertheless,
# move related articles to their parent rubric.
article.id_rubrique = rubrics[article.id_rubrique].id_parent
rubrics[article.id_rubrique].articles[article.id_article] = article
for spip_url_xml in root.iter('spip_urls'):
id_objet = spip_url_xml.find('id_objet').text
if spip_url_xml.find('type').text == 'article' and id_objet in articles:
articles[spip_url_xml.find('id_objet').text].url = spip_url_xml.find('url').text
elif spip_url_xml.find('type').text == 'rubrique' and id_objet in rubrics:
rubrics[spip_url_xml.find('id_objet').text].url = spip_url_xml.find('url').text
for emission_id in straight_emission_rubric_ids:
rubric = rubrics[emission_id]
slug = rubric.url.lower()
try:
emission = Emission.objects.get(slug=slug)
except Emission.DoesNotExist:
emission = Emission()
emission.slug = slug
emission.title = rubric.titre
emission.description = rubric.texte
emission.save()
for article in rubric.articles.values():
if article.date_redac == '0000-00-00 00:00:00':
# date_redac was used for the diffusion date, if it's
# not set it's probably not really an episode
continue
try:
slug = article.url.lower()
except AttributeError:
print 'no slug for article', article.titre
slug = article.id_article
try:
episode = Episode.objects.get(slug=slug)
except Episode.DoesNotExist:
episode = Episode()
episode.slug = slug
episode.emission = emission
episode.title = article.titre
episode.description = article.texte
episode.save()
if not Diffusion.objects.filter(episode=episode).count():
diffusion = Diffusion()
diffusion.episode = episode
try:
diffusion.datetime = datetime.strptime(article.date_redac, '%Y-%m-%d %H:%M:%S')
except ValueError:
pass
else:
diffusion.save()
......@@ -9,9 +9,9 @@ from django.db import models
class Emission(models.Model):
title = models.CharField(max_length=50)
slug = models.SlugField()
description = models.TextField()
description = models.TextField(null=True)
first_diffusion = models.DateTimeField()
first_diffusion = models.DateTimeField(null=True)
# other_diffusions =
def __unicode__(self):
......@@ -22,7 +22,7 @@ class Episode(models.Model):
emission = models.ForeignKey('Emission', verbose_name=u'Emission')
title = models.CharField(max_length=50)
slug = models.SlugField()
description = models.TextField()
description = models.TextField(null=True)
def __unicode__(self):
return self.title
......
......@@ -14,7 +14,7 @@
<ul>
{% for episode in episodes %}
<li><a href="{{ episode.id }}/">{{ episode.title }}</a></li>
<li><a href="{{ episode.slug }}/">{{ episode.title }}</a></li>
{% endfor %}
</ul>
......
......@@ -4,7 +4,7 @@
<ul>
{% for emission in object_list %}
<li><a href="{{ emission.id }}/">{{ emission.title }}</a></li>
<li><a href="{{ emission.slug }}/">{{ emission.title }}</a></li>
{% endfor %}
</ul>
......
......@@ -5,12 +5,12 @@ from .views import *
urlpatterns = patterns('',
url(r'^$', EmissionListView.as_view(), name='emission-list'),
url(r'^add$', EmissionCreateView.as_view(), name='emission-add'),
url(r'^(?P<pk>\d+)/$', EmissionDetailView.as_view(), name='emission-view'),
url(r'^(?P<pk>\d+)/edit/$', EmissionUpdateView.as_view(), name='emission-update'),
url(r'^(?P<pk>\d+)/delete/$', EmissionDeleteView.as_view(), name='emission-delete'),
url(r'^(?P<slug>[\w,-]+)/$', EmissionDetailView.as_view(), name='emission-view'),
url(r'^(?P<slug>[\w,-]+)/edit/$', EmissionUpdateView.as_view(), name='emission-update'),
url(r'^(?P<slug>[\w,-]+)/delete/$', EmissionDeleteView.as_view(), name='emission-delete'),
url(r'^(?P<emission_id>\d+)/add$', EpisodeCreateView.as_view(), name='episode-add'),
url(r'^(?P<emission_id>\d+)/(?P<pk>\d+)/$', EpisodeDetailView.as_view(), name='episode-view'),
url(r'^(?P<emission_id>\d+)/(?P<pk>\d+)/edit/$', EpisodeUpdateView.as_view(), name='episode-update'),
url(r'^(?P<emission_id>\d+)/(?P<pk>\d+)/delete/$', EpisodeDeleteView.as_view(), name='episode-delete'),
url(r'^(?P<emission_slug>[\w,-]+)/add$', EpisodeCreateView.as_view(), name='episode-add'),
url(r'^(?P<emission_slug>[\w,-]+)/(?P<slug>[\w,-]+)/$', EpisodeDetailView.as_view(), name='episode-view'),
url(r'^(?P<emission_slug>[\w,-]+)/(?P<slug>[\w,-]+)/edit/$', EpisodeUpdateView.as_view(), name='episode-update'),
url(r'^(?P<emission_slug>[\w,-]+)/(?P<slug>[\w,-]+)/delete/$', EpisodeDeleteView.as_view(), name='episode-delete'),
)
......@@ -45,7 +45,7 @@ USE_I18N = True
USE_L10N = True
# If you set this to False, Django will not use timezone-aware datetimes.
USE_TZ = True
USE_TZ = False
# Absolute filesystem path to the directory that will hold user-uploaded files.
# Example: "/var/www/example.com/media/"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment