Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
radiopanik
panikdb
Commits
d79604db
Commit
d79604db
authored
Aug 26, 2013
by
fred
Browse files
refactored load-from-spip code into smaller methods
parent
a650993b
Changes
1
Hide whitespace changes
Inline
Side-by-side
panikdb/emissions/management/commands/load-from-spip.py
View file @
d79604db
...
...
@@ -51,13 +51,6 @@ class Command(BaseCommand):
help
=
'Load emissions and episodes from a Spip dump file'
def
handle
(
self
,
filename
,
*
args
,
**
options
):
rubrics
=
{}
articles
=
{}
keyword_groups
=
{}
keywords
=
{}
documents
=
{}
breves
=
{}
with
open
(
filename
)
as
fd
:
content
=
fd
.
read
()
# the spip_courriers parts of the spip export are not properly
...
...
@@ -65,302 +58,353 @@ class Command(BaseCommand):
# parsed correctly.
content
=
content
[:
content
.
find
(
'<spip_courriers>'
)]
+
\
content
[
content
.
rfind
(
'</spip_courriers>'
)
+
17
:]
root
=
ET
.
fromstring
(
content
)
for
keywordgroup_xml
in
root
.
iter
(
'spip_groupes_mots'
):
keyword_group
=
KeywordGroup
()
for
attr
in
(
'id_groupe'
,
'titre'
):
setattr
(
keyword_group
,
attr
,
keywordgroup_xml
.
find
(
attr
).
text
)
if
keyword_group
.
id_groupe
not
in
(
'11'
,
# archives
'12'
,
# subjects
'3'
,
# category
'10'
,
# transversal
):
continue
keyword_groups
[
keyword_group
.
id_groupe
]
=
keyword_group
self
.
root
=
ET
.
fromstring
(
content
)
for
keyword_xml
in
root
.
iter
(
'spip_mots'
):
keyword
=
Keyword
()
for
attr
in
(
'id_mot'
,
'titre'
,
'id_groupe'
):
setattr
(
keyword
,
attr
,
keyword_xml
.
find
(
attr
).
text
)
if
not
keyword
.
id_groupe
in
keyword_groups
:
continue
if
keyword
.
id_mot
in
(
'92'
,):
# blacklist
continue
keywords
[
keyword
.
id_mot
]
=
keyword
keyword_groups
[
keyword
.
id_groupe
]
=
keyword
for
rubric_xml
in
root
.
iter
(
'spip_rubriques'
):
rubric
=
Rubric
()
for
attr
in
(
'id_rubrique'
,
'id_parent'
,
'titre'
,
'descriptif'
,
'texte'
):
setattr
(
rubric
,
attr
,
rubric_xml
.
find
(
attr
).
text
)
rubrics
[
rubric
.
id_rubrique
]
=
rubric
for
rubric
in
rubrics
.
values
():
if
rubric
.
id_parent
and
rubric
.
id_parent
!=
'0'
:
rubrics
[
rubric
.
id_parent
].
rubrics
[
rubric
.
id_rubrique
]
=
rubric
self
.
load_keyword_groups
()
self
.
load_keywords
()
self
.
load_rubrics
()
emission_rubric_ids
=
[]
straight_emission_rubric_ids
=
[]
for
rubric
in
rubrics
[
'2'
].
rubrics
.
values
():
# 'Les emissions'
for
rubric
in
self
.
rubrics
[
'2'
].
rubrics
.
values
():
# 'Les emissions'
emission_rubric_ids
.
append
(
rubric
.
id_rubrique
)
straight_emission_rubric_ids
.
append
(
rubric
.
id_rubrique
)
for
subrubric
in
rubric
.
rubrics
.
values
():
emission_rubric_ids
.
append
(
subrubric
.
id_rubrique
)
for
article_xml
in
root
.
iter
(
'spip_articles'
):
if
article_xml
.
find
(
'id_rubrique'
).
text
==
'65'
:
pass
# rubric for events, handle with care
elif
not
article_xml
.
find
(
'id_rubrique'
).
text
in
emission_rubric_ids
:
continue
article
=
Article
()
for
attr
in
(
'id_rubrique'
,
'id_article'
,
'titre'
,
'surtitre'
,
'soustitre'
,
'descriptif'
,
'chapo'
,
'texte'
,
'date_redac'
,
'statut'
,
'date'
):
setattr
(
article
,
attr
,
article_xml
.
find
(
attr
).
text
)
if
article
.
id_rubrique
==
'65'
:
# this is an event, they get a special handling, to be
# merged with newsitems
if
article
.
statut
not
in
(
'publie'
,
'prop'
):
continue
breve
=
Breve
()
breve
.
id_breve
=
'0%s'
%
article
.
id_article
breve
.
titre
=
article
.
titre
breve
.
texte
=
article
.
texte
breve
.
date_heure
=
article
.
date
breves
[
breve
.
id_breve
]
=
breve
continue
self
.
load_breves
()
self
.
load_articles
(
emission_rubric_ids
)
if
article
.
statut
!=
'publie'
:
continue
article
.
mots_cles
=
[]
articles
[
article
.
id_article
]
=
article
self
.
set_urls
()
if
rubrics
[
article
.
id_rubrique
].
id_parent
!=
'2'
:
# the spip structure didn't really expect subrubrics in the
# 'emissions' section, but people added some nevertheless,
# move related articles to their parent rubric.
article
.
id_rubrique
=
rubrics
[
article
.
id_rubrique
].
id_parent
self
.
load_documents
()
self
.
load_document_links
()
rubrics
[
article
.
id_rubrique
].
articles
[
article
.
id_article
]
=
article
self
.
process_emission_keywords
()
self
.
process_episode_keywords
()
for
breve_xml
in
root
.
iter
(
'spip_breves'
):
breve
=
Breve
()
for
attr
in
(
'id_breve'
,
'titre'
,
'texte'
,
'date_heure'
,
'statut'
):
setattr
(
breve
,
attr
,
breve_xml
.
find
(
attr
).
text
)
if
breve
.
statut
!=
'publie'
:
continue
breves
[
breve
.
id_breve
]
=
breve
for
spip_url_xml
in
root
.
iter
(
'spip_urls'
):
id_objet
=
spip_url_xml
.
find
(
'id_objet'
).
text
url
=
spip_url_xml
.
find
(
'url'
).
text
if
spip_url_xml
.
find
(
'type'
).
text
==
'article'
and
id_objet
in
articles
:
articles
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'article'
and
(
'0%s'
%
id_objet
)
in
breves
:
breves
[
'0'
+
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'rubrique'
and
id_objet
in
rubrics
:
rubrics
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'mot'
and
id_objet
in
keywords
:
keywords
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'breve'
and
id_objet
in
breves
:
breves
[
id_objet
].
url
=
url
for
spip_doc_xml
in
root
.
iter
(
'spip_documents'
):
id_document
=
spip_doc_xml
.
find
(
'id_document'
).
text
doc
=
Document
()
doc
.
filename
=
spip_doc_xml
.
find
(
'fichier'
).
text
doc
.
title
=
spip_doc_xml
.
find
(
'titre'
).
text
if
spip_doc_xml
.
find
(
'distant'
).
text
==
'oui'
:
url
=
doc
.
filename
doc
.
filename
=
os
.
path
.
split
(
url
)[
-
1
]
filename
=
os
.
path
.
join
(
'media/IMG/'
,
doc
.
filename
)
if
not
os
.
path
.
exists
(
'media/IMG'
):
for
emission_id
in
straight_emission_rubric_ids
:
rubric
=
self
.
rubrics
[
emission_id
]
emission
=
self
.
get_or_create_emission
(
rubric
)
for
article
in
rubric
.
articles
.
values
():
episode
=
self
.
get_or_create_episode
(
article
,
emission
)
if
episode
is
None
:
continue
if
not
os
.
path
.
exists
(
filename
):
fd
=
file
(
filename
,
'w'
)
fd
.
write
(
urllib2
.
urlopen
(
url
).
read
())
fd
.
close
()
documents
[
id_document
]
=
doc
attached_documents
=
{}
for
spip_doc_liens_xml
in
root
.
iter
(
'spip_documents_liens'
):
id_document
=
spip_doc_liens_xml
.
find
(
'id_document'
).
text
id_object
=
spip_doc_liens_xml
.
find
(
'id_objet'
).
text
if
spip_doc_liens_xml
.
find
(
'objet'
).
text
!=
'article'
:
continue
if
not
attached_documents
.
get
(
id_object
):
attached_documents
[
id_object
]
=
[]
attached_documents
[
id_object
].
append
(
documents
.
get
(
id_document
))
for
rubrickeyword_xml
in
root
.
iter
(
'spip_mots_rubriques'
):
keyword_id
=
rubrickeyword_xml
.
find
(
'id_mot'
).
text
rubric_id
=
rubrickeyword_xml
.
find
(
'id_rubrique'
).
text
rubric
=
rubrics
.
get
(
rubric_id
)
if
not
rubric
:
continue
if
keyword_id
==
'100'
:
# archive
rubric
.
archived
=
True
self
.
set_sound_files
(
article
,
episode
)
for
breve
in
self
.
breves
.
values
():
newsitem
=
self
.
get_or_create_newsitem
(
breve
)
def
load_keyword_groups
(
self
):
self
.
keyword_groups
=
{}
for
keywordgroup_xml
in
self
.
root
.
iter
(
'spip_groupes_mots'
):
keyword_group
=
KeywordGroup
()
for
attr
in
(
'id_groupe'
,
'titre'
):
setattr
(
keyword_group
,
attr
,
keywordgroup_xml
.
find
(
attr
).
text
)
if
keyword_group
.
id_groupe
not
in
(
'11'
,
# archives
'12'
,
# subjects
'3'
,
# category
'10'
,
# transversal
):
continue
self
.
keyword_groups
[
keyword_group
.
id_groupe
]
=
keyword_group
def
load_keywords
(
self
):
self
.
keywords
=
{}
for
keyword_xml
in
self
.
root
.
iter
(
'spip_mots'
):
keyword
=
Keyword
()
for
attr
in
(
'id_mot'
,
'titre'
,
'id_groupe'
):
setattr
(
keyword
,
attr
,
keyword_xml
.
find
(
attr
).
text
)
if
not
keyword
.
id_groupe
in
self
.
keyword_groups
:
continue
if
keyword
.
id_mot
in
(
'92'
,):
# blacklist
continue
self
.
keywords
[
keyword
.
id_mot
]
=
keyword
self
.
keyword_groups
[
keyword
.
id_groupe
]
=
keyword
def
load_rubrics
(
self
):
self
.
rubrics
=
{}
for
rubric_xml
in
self
.
root
.
iter
(
'spip_rubriques'
):
rubric
=
Rubric
()
for
attr
in
(
'id_rubrique'
,
'id_parent'
,
'titre'
,
'descriptif'
,
'texte'
):
setattr
(
rubric
,
attr
,
rubric_xml
.
find
(
attr
).
text
)
self
.
rubrics
[
rubric
.
id_rubrique
]
=
rubric
for
rubric
in
self
.
rubrics
.
values
():
if
rubric
.
id_parent
and
rubric
.
id_parent
!=
'0'
:
self
.
rubrics
[
rubric
.
id_parent
].
rubrics
[
rubric
.
id_rubrique
]
=
rubric
def
load_breves
(
self
):
self
.
breves
=
{}
for
breve_xml
in
self
.
root
.
iter
(
'spip_breves'
):
breve
=
Breve
()
for
attr
in
(
'id_breve'
,
'titre'
,
'texte'
,
'date_heure'
,
'statut'
):
setattr
(
breve
,
attr
,
breve_xml
.
find
(
attr
).
text
)
if
breve
.
statut
!=
'publie'
:
continue
self
.
breves
[
breve
.
id_breve
]
=
breve
def
load_articles
(
self
,
emission_rubric_ids
):
self
.
articles
=
{}
for
article_xml
in
self
.
root
.
iter
(
'spip_articles'
):
if
article_xml
.
find
(
'id_rubrique'
).
text
==
'65'
:
pass
# rubric for events, handle with care
elif
not
article_xml
.
find
(
'id_rubrique'
).
text
in
emission_rubric_ids
:
continue
article
=
Article
()
for
attr
in
(
'id_rubrique'
,
'id_article'
,
'titre'
,
'surtitre'
,
'soustitre'
,
'descriptif'
,
'chapo'
,
'texte'
,
'date_redac'
,
'statut'
,
'date'
):
setattr
(
article
,
attr
,
article_xml
.
find
(
attr
).
text
)
if
article
.
id_rubrique
==
'65'
:
# this is an event, they get a special handling, to be
# merged with newsitems
if
article
.
statut
not
in
(
'publie'
,
'prop'
):
continue
keyword
=
keywords
.
get
(
keyword_id
)
if
keyword
is
None
:
breve
=
Breve
()
breve
.
id_breve
=
'0%s'
%
article
.
id_article
breve
.
titre
=
article
.
titre
breve
.
texte
=
article
.
texte
breve
.
date_heure
=
article
.
date
self
.
breves
[
breve
.
id_breve
]
=
breve
continue
if
article
.
statut
!=
'publie'
:
continue
article
.
mots_cles
=
[]
self
.
articles
[
article
.
id_article
]
=
article
if
self
.
rubrics
[
article
.
id_rubrique
].
id_parent
!=
'2'
:
# the spip structure didn't really expect subrubrics in the
# 'emissions' section, but people added some nevertheless,
# move related articles to their parent rubric.
article
.
id_rubrique
=
self
.
rubrics
[
article
.
id_rubrique
].
id_parent
self
.
rubrics
[
article
.
id_rubrique
].
articles
[
article
.
id_article
]
=
article
def
set_urls
(
self
):
for
spip_url_xml
in
self
.
root
.
iter
(
'spip_urls'
):
id_objet
=
spip_url_xml
.
find
(
'id_objet'
).
text
url
=
spip_url_xml
.
find
(
'url'
).
text
if
spip_url_xml
.
find
(
'type'
).
text
==
'article'
and
id_objet
in
self
.
articles
:
self
.
articles
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'article'
and
(
'0%s'
%
id_objet
)
in
self
.
breves
:
self
.
breves
[
'0'
+
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'rubrique'
and
id_objet
in
self
.
rubrics
:
self
.
rubrics
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'mot'
and
id_objet
in
self
.
keywords
:
self
.
keywords
[
id_objet
].
url
=
url
elif
spip_url_xml
.
find
(
'type'
).
text
==
'breve'
and
id_objet
in
self
.
breves
:
self
.
breves
[
id_objet
].
url
=
url
def
load_documents
(
self
):
self
.
documents
=
{}
for
spip_doc_xml
in
self
.
root
.
iter
(
'spip_documents'
):
id_document
=
spip_doc_xml
.
find
(
'id_document'
).
text
doc
=
Document
()
doc
.
filename
=
spip_doc_xml
.
find
(
'fichier'
).
text
doc
.
title
=
spip_doc_xml
.
find
(
'titre'
).
text
if
spip_doc_xml
.
find
(
'distant'
).
text
==
'oui'
:
url
=
doc
.
filename
doc
.
filename
=
os
.
path
.
split
(
url
)[
-
1
]
filename
=
os
.
path
.
join
(
'media/IMG/'
,
doc
.
filename
)
if
not
os
.
path
.
exists
(
'media/IMG'
):
continue
if
keyword
.
id_groupe
==
'3'
:
# category
rubric
.
categories
.
append
(
keyword
)
if
not
keyword
.
related_object
:
cs
=
Category
.
objects
.
filter
(
title
=
keyword
.
titre
)
if
len
(
cs
):
c
=
cs
[
0
]
else
:
c
=
Category
()
c
.
title
=
keyword
.
titre
c
.
save
()
keyword
.
related_object
=
c
for
articlekeyword_xml
in
root
.
iter
(
'spip_mots_articles'
):
keyword_id
=
articlekeyword_xml
.
find
(
'id_mot'
).
text
article_id
=
articlekeyword_xml
.
find
(
'id_article'
).
text
article
=
articles
.
get
(
article_id
)
if
not
article
:
if
not
os
.
path
.
exists
(
filename
):
fd
=
file
(
filename
,
'w'
)
fd
.
write
(
urllib2
.
urlopen
(
url
).
read
())
fd
.
close
()
self
.
documents
[
id_document
]
=
doc
def
load_document_links
(
self
):
self
.
attached_documents
=
{}
for
spip_doc_liens_xml
in
self
.
root
.
iter
(
'spip_documents_liens'
):
id_document
=
spip_doc_liens_xml
.
find
(
'id_document'
).
text
id_object
=
spip_doc_liens_xml
.
find
(
'id_objet'
).
text
if
spip_doc_liens_xml
.
find
(
'objet'
).
text
!=
'article'
:
continue
if
not
self
.
attached_documents
.
get
(
id_object
):
self
.
attached_documents
[
id_object
]
=
[]
self
.
attached_documents
[
id_object
].
append
(
self
.
documents
.
get
(
id_document
))
def
process_emission_keywords
(
self
):
for
rubrickeyword_xml
in
self
.
root
.
iter
(
'spip_mots_rubriques'
):
keyword_id
=
rubrickeyword_xml
.
find
(
'id_mot'
).
text
rubric_id
=
rubrickeyword_xml
.
find
(
'id_rubrique'
).
text
rubric
=
self
.
rubrics
.
get
(
rubric_id
)
if
not
rubric
:
continue
if
keyword_id
==
'100'
:
# archive
rubric
.
archived
=
True
continue
keyword
=
self
.
keywords
.
get
(
keyword_id
)
if
keyword
is
None
:
continue
if
keyword
.
id_groupe
==
'3'
:
# category
rubric
.
categories
.
append
(
keyword
)
if
not
keyword
.
related_object
:
cs
=
Category
.
objects
.
filter
(
title
=
keyword
.
titre
)
if
len
(
cs
):
c
=
cs
[
0
]
else
:
c
=
Category
()
c
.
title
=
keyword
.
titre
c
.
save
()
keyword
.
related_object
=
c
def
process_episode_keywords
(
self
):
for
articlekeyword_xml
in
self
.
root
.
iter
(
'spip_mots_articles'
):
keyword_id
=
articlekeyword_xml
.
find
(
'id_mot'
).
text
article_id
=
articlekeyword_xml
.
find
(
'id_article'
).
text
article
=
self
.
articles
.
get
(
article_id
)
if
not
article
:
continue
keyword
=
self
.
keywords
.
get
(
keyword_id
)
if
keyword
is
None
:
continue
if
keyword
.
id_groupe
in
(
'10'
,
'12'
):
# transversales & sujets
article
.
mots_cles
.
append
(
keyword
.
titre
)
def
get_or_create_emission
(
self
,
rubric
):
slug
=
rubric
.
url
.
lower
()
try
:
emission
=
Emission
.
objects
.
get
(
slug
=
slug
)
except
Emission
.
DoesNotExist
:
slug
=
slug
.
split
(
','
)[
0
]
try
:
emission
=
Emission
.
objects
.
get
(
slug
=
slug
)
except
Emission
.
DoesNotExist
:
emission
=
Emission
()
slug
=
slug
.
split
(
','
)[
0
]
emission
.
slug
=
slug
emission
.
title
=
rubric
.
titre
emission
.
archived
=
rubric
.
archived
emission
.
description
=
makeHtmlFromSpip
(
rubric
.
descriptif
,
documents
=
self
.
documents
)
or
None
emission
.
text
=
makeHtmlFromSpip
(
rubric
.
texte
,
documents
=
self
.
documents
)
or
None
image_path
=
None
for
ext
in
(
'.jpg'
,
'.png'
,
'.gif'
):
if
os
.
path
.
exists
(
'media/IMG/rubon%s%s'
%
(
rubric
.
id_rubrique
,
ext
)):
image_path
=
[
'media/IMG/rubon%s%s'
%
(
rubric
.
id_rubrique
,
ext
)]
break
else
:
if
emission
.
text
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
emission
.
text
,
re
.
DOTALL
)
elif
emission
.
description
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
emission
.
description
,
re
.
DOTALL
)
self
.
set_image
(
emission
,
image_path
)
emission
.
save
()
emission
.
categories
.
clear
()
for
category
in
rubric
.
categories
:
emission
.
categories
.
add
(
category
.
related_object
)
emission
.
save
()
return
emission
def
get_or_create_episode
(
self
,
article
,
emission
):
if
article
.
date_redac
==
'0000-00-00 00:00:00'
:
# date_redac was used for the diffusion date, if it's
# not set it's probably not really an episode
return
None
slug
=
article
.
url
.
lower
()
if
slug
.
startswith
(
'nouvel-article'
):
# <sigh/>
slug
=
slugify
(
unicode
(
article
.
titre
))
try
:
episode
=
Episode
.
objects
.
get
(
slug
=
slug
)
except
Episode
.
DoesNotExist
:
episode
=
Episode
()
episode
.
slug
=
slug
episode
.
emission
=
emission
episode
.
title
=
article
.
titre
episode
.
description
=
makeHtmlFromSpip
(
article
.
descriptif
,
documents
=
self
.
documents
)
or
None
episode
.
text
=
makeHtmlFromSpip
(
article
.
texte
,
documents
=
self
.
documents
)
or
None
image_path
=
None
for
ext
in
(
'.jpg'
,
'.png'
,
'.gif'
):
if
os
.
path
.
exists
(
'media/IMG/arton%s%s'
%
(
article
.
id_article
,
ext
)):
image_path
=
[
'media/IMG/arton%s%s'
%
(
article
.
id_article
,
ext
)]
break
else
:
if
episode
.
text
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
episode
.
text
,
re
.
DOTALL
)
elif
episode
.
description
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
episode
.
description
,
re
.
DOTALL
)
self
.
set_image
(
episode
,
image_path
)
for
motcle
in
article
.
mots_cles
:
episode
.
tags
.
add
(
motcle
.
lower
())
episode
.
save
()
if
not
Diffusion
.
objects
.
filter
(
episode
=
episode
).
count
():
diffusion
=
Diffusion
()
diffusion
.
episode
=
episode
try
:
diffusion
.
datetime
=
datetime
.
strptime
(
article
.
date_redac
,
'%Y-%m-%d %H:%M:%S'
)
except
ValueError
:
pass
else
:
diffusion
.
save
()
return
episode
def
set_sound_files
(
self
,
article
,
episode
):
if
SoundFile
.
objects
.
filter
(
episode
=
episode
).
count
():
return
# skip episodes that already have sound files
episode_files
=
self
.
attached_documents
.
get
(
article
.
id_article
)
if
episode_files
:
for
episode_file
in
episode_files
:
if
episode_file
is
None
:
continue
keyword
=
keywords
.
get
(
keyword_id
)
if
keyword
is
None
:
if
os
.
path
.
splitext
(
episode_file
.
filename
)[
-
1
]
not
in
(
'.ogg'
,
'.mp3'
):
continue
if
keyword
.
id_groupe
in
(
'10'
,
'12'
):
# transversales & sujets
article
.
mots_cles
.
append
(
keyword
.
titre
)
for
emission_id
in
straight_emission_rubric_ids
:
rubric
=
rubrics
[
emission_id
]
slug
=
rubric
.
url
.
lower
()
try
:
emission
=
Emission
.
objects
.
get
(
slug
=
slug
)
except
Emission
.
DoesNotExist
:
slug
=
slug
.
split
(
','
)[
0
]
try
:
emission
=
Emission
.
objects
.
get
(
slug
=
slug
)
except
Emission
.
DoesNotExist
:
emission
=
Emission
()
slug
=
slug
.
split
(
','
)[
0
]
emission
.
slug
=
slug
emission
.
title
=
rubric
.
titre
emission
.
archived
=
rubric
.
archived
emission
.
description
=
makeHtmlFromSpip
(
rubric
.
descriptif
,
documents
=
documents
)
or
None
emission
.
text
=
makeHtmlFromSpip
(
rubric
.
texte
,
documents
=
documents
)
or
None
image_path
=
None
for
ext
in
(
'.jpg'
,
'.png'
,
'.gif'
):
if
os
.
path
.
exists
(
'media/IMG/rubon%s%s'
%
(
rubric
.
id_rubrique
,
ext
)):
image_path
=
[
'media/IMG/rubon%s%s'
%
(
rubric
.
id_rubrique
,
ext
)]
break
else
:
if
emission
.
text
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
emission
.
text
,
re
.
DOTALL
)
elif
emission
.
description
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
emission
.
description
,
re
.
DOTALL
)
self
.
set_image
(
emission
,
image_path
)
emission
.
save
()
emission
.
categories
.
clear
()
for
category
in
rubric
.
categories
:
emission
.
categories
.
add
(
category
.
related_object
)
emission
.
save
()
for
article
in
rubric
.
articles
.
values
():
if
article
.
date_redac
==
'0000-00-00 00:00:00'
:
# date_redac was used for the diffusion date, if it's
# not set it's probably not really an episode
continue
slug
=
article
.
url
.
lower
()
if
slug
.
startswith
(
'nouvel-article'
):
# <sigh/>
slug
=
slugify
(
article
.
title
)
try
:
episode
=
Episode
.
objects
.
get
(
slug
=
slug
)
except
Episode
.
DoesNotExist
:
episode
=
Episode
()
episode
.
slug
=
slug
episode
.
emission
=
emission
episode
.
title
=
article
.
titre
episode
.
description
=
makeHtmlFromSpip
(
article
.
descriptif
,
documents
=
documents
)
or
None
episode
.
text
=
makeHtmlFromSpip
(
article
.
texte
,
documents
=
documents
)
or
None
image_path
=
None
for
ext
in
(
'.jpg'
,
'.png'
,
'.gif'
):
if
os
.
path
.
exists
(
'media/IMG/arton%s%s'
%
(
article
.
id_article
,
ext
)):
image_path
=
[
'media/IMG/arton%s%s'
%
(
article
.
id_article
,
ext
)]
break
else
:
if
episode
.
text
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
episode
.
text
,
re
.
DOTALL
)
elif
episode
.
description
:
image_path
=
re
.
findall
(
'src="/(media/IMG.*?)"'
,
episode
.
description
,
re
.
DOTALL
)
self
.
set_image
(
episode
,
image_path
)
for
motcle
in
article
.
mots_cles
:
episode
.
tags
.
add
(
motcle
.
lower
())
episode
.
save
()
if
not
Diffusion
.
objects
.
filter
(
episode
=
episode
).
count
():
diffusion
=
Diffusion
()
diffusion
.
episode
=
episode
try
:
diffusion
.
datetime
=
datetime
.
strptime
(
article
.
date_redac
,
'%Y-%m-%d %H:%M:%S'
)