Skip to content

Commit

Permalink
maison 04/11 22:49
Browse files Browse the repository at this point in the history
  • Loading branch information
deborah-powers committed Apr 11, 2024
1 parent d4186c3 commit c40a70b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 27 deletions.
46 changes: 22 additions & 24 deletions fanfic.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Fanfic (htmlCls.Html, Article):
def __init__ (self, url, subject=None):
Article.__init__ (self)
htmlCls.Html.__init__ (self, url)
self.text = htmlCls.delAttributes (self.text)
self.delAttributes()
if subject: self.subject = subject
if 'archive of our own' in self.title: self.fromAooo()
elif '://www.gutenberg.org/' in url: self.gutemberg()
Expand All @@ -36,8 +36,7 @@ def __init__ (self, url, subject=None):
article = htmlCls.getByTagFirst (self.text, 'article')
self.text = article
self.meta ={ 'link': self.link, 'author': self.author, 'autlink': self.autlink, 'subject': self.subject }
self.text = htmlCls.delAttributes (self.text)
self.text = htmlCls.delClasses (self.text)
self.delClasses()
article = self.toText()
if article: article.divide()
else: self.divide()
Expand Down Expand Up @@ -116,33 +115,32 @@ def gutemberg (self):
# self.delImgLink()

def fromAooo (self):
# fanfic enregistrée via le bouton télécharger en html
self.meta ={}

def fromAoooVa (self):
# fanfic enregistrée en faisant un ctl+ click
self.meta ={}
# le titre
tag = htmlCls.getByTagAndClassFirst (self.text, 'h2', 'title heading')
self.title = htmlCls.cleanTitle (htmlCls.getText (tag))
self.title = htmlCls.cleanTitle (tag.innerHtml)
# l'auteur
tag = htmlCls.getByTagAndClassFirst (self.text, 'h3', 'byline heading')
tag = htmlCls.getByTagFirst (tag, 'a')
self.autlink = htmlCls.getText (tag)
d= self.autlink.find ('\n')
self.author = htmlCls.cleanTitle (self.autlink[d+1:])
self.autlink = self.autlink[:d]
log.log (self.author, self.autlink)


self.subject = data[2]
self.subject = self.subject.replace (' (band)', "")
self.clean()
# le lien
d= self.text.find ("<a href='/downloads/") +20
f= self.text.find ('/', d)
self.link = 'https://archiveofourown.org/works/' + self.text[d:f]
tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
self.autlink = 'https://archiveofourown.org' + tag.attributes['href']
self.author = htmlCls.cleanTitle (tag.innerHtml)
# le sujet
d= self.text.find ('Category:<ul><li><a') +20
d= self.text.find ('>', d) +1
f= self.text.find ('</a>', d)
if self.text[d:f] in ('F/M', 'F/F') and 'romance' not in self.subject: self.subject = ', romance'+ self.subject
tag = htmlCls.getByTagAndClassFirst (self.text, 'dd', 'fandom tags')
tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
self.subject = htmlCls.cleanTitle (tag.innerHtml)
self.findSubject()
# le lien de la fanfic
tag = htmlCls.getByTagAndClassFirst (self.text, 'dd', 'bookmarks')
tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
self.link = 'https://archiveofourown.org' + tag.attributes['href'].replace ('bookmarks', "")
# le texte
tag = htmlCls.getById (self.text, 'chapters')
self.text = tag.innerHtml

def unisciel (self, subject):
self.subject = 'cours'
Expand Down Expand Up @@ -314,4 +312,4 @@ def ffNet (self):

fileAooo = 'b/aooo.html'
fileGtb = ''
fic = Fanfic ()
fic = Fanfic (fileAooo)
4 changes: 3 additions & 1 deletion fileCls.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@ def toPath (self):

def read (self):
self.toPath()
log.log (self.path)
if not os.path.exists (self.path): return
log.log (self.path)
textBrut = open (self.path, 'rb')
log.log (self.path)
tmpByte = textBrut.read()
encodingList = ('utf-8', 'ascii', 'ISO-8859-1')
text =""
Expand Down Expand Up @@ -138,7 +141,6 @@ def readJson (self):
return jsonData

def divide (self):
log.log (self.title, self.path, len (self.text))
self.fromPath()
self.text = textFct.shape (self.text)
if len (self.text) < 420000: self.write()
Expand Down
4 changes: 2 additions & 2 deletions htmlCls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from urllib import request as urlRequest
import codecs
import textFct
from fileCls import File
from fileCls import File, Article
import loggerFct as log

listTags =( 'i', 'b', 'em', 'span', 'strong', 'a', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'ul', 'ol', 'td', 'th', 'tr', 'caption', 'table', 'nav', 'div', 'label', 'button', 'textarea', 'fieldset', 'form', 'figcaption', 'figure', 'section', 'article', 'body' )
Expand All @@ -30,7 +30,7 @@ def __init__ (self, tagStr):
self.className =""
self.id =""
self.attributes ={}
self.fromString()
self.fromString (tagStr)

def fromString (self, tagStr):
""" tagStr est envoyée par getByPos
Expand Down

0 comments on commit c40a70b

Please sign in to comment.