maison 04/11 22:49

deborah-powers · Apr 11, 2024 · c40a70b · c40a70b
1 parent d4186c3
commit c40a70b
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 27 deletions.
diff --git a/fanfic.py b/fanfic.py
@@ -19,7 +19,7 @@ class Fanfic (htmlCls.Html, Article):
 	def __init__ (self, url, subject=None):
 		Article.__init__ (self)
 		htmlCls.Html.__init__ (self, url)
-		self.text = htmlCls.delAttributes (self.text)
+		self.delAttributes()
 		if subject: self.subject = subject
 		if 'archive of our own' in self.title:	self.fromAooo()
 		elif '://www.gutenberg.org/' in url:	self.gutemberg()
@@ -36,8 +36,7 @@ def __init__ (self, url, subject=None):
 			article = htmlCls.getByTagFirst (self.text, 'article')
 			self.text = article
 		self.meta ={ 'link': self.link, 'author': self.author, 'autlink': self.autlink, 'subject': self.subject }
-		self.text = htmlCls.delAttributes (self.text)
-		self.text = htmlCls.delClasses (self.text)
+		self.delClasses()
 		article = self.toText()
 		if article: article.divide()
 		else: self.divide()
@@ -116,33 +115,32 @@ def gutemberg (self):
 	#	self.delImgLink()
 
 	def fromAooo (self):
+		# fanfic enregistrée via le bouton télécharger en html
+		self.meta ={}
+
+	def fromAoooVa (self):
+		# fanfic enregistrée en faisant un ctl+ click
 		self.meta ={}
 		# le titre
 		tag = htmlCls.getByTagAndClassFirst (self.text, 'h2', 'title heading')
-		self.title = htmlCls.cleanTitle (htmlCls.getText (tag))
+		self.title = htmlCls.cleanTitle (tag.innerHtml)
 		# l'auteur
 		tag = htmlCls.getByTagAndClassFirst (self.text, 'h3', 'byline heading')
-		tag = htmlCls.getByTagFirst (tag, 'a')
-		self.autlink = htmlCls.getText (tag)
-		d= self.autlink.find ('\n')
-		self.author = htmlCls.cleanTitle (self.autlink[d+1:])
-		self.autlink = self.autlink[:d]
-		log.log (self.author, self.autlink)
-
-
-		self.subject = data[2]
-		self.subject = self.subject.replace (' (band)', "")
-		self.clean()
-		# le lien
-		d= self.text.find ("<a href='/downloads/") +20
-		f= self.text.find ('/', d)
-		self.link = 'https://archiveofourown.org/works/' + self.text[d:f]
+		tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
+		self.autlink = 'https://archiveofourown.org' + tag.attributes['href']
+		self.author = htmlCls.cleanTitle (tag.innerHtml)
 		# le sujet
-		d= self.text.find ('Category:<ul><li><a') +20
-		d= self.text.find ('>', d) +1
-		f= self.text.find ('</a>', d)
-		if self.text[d:f] in ('F/M', 'F/F') and 'romance' not in self.subject: self.subject = ', romance'+ self.subject
+		tag = htmlCls.getByTagAndClassFirst (self.text, 'dd', 'fandom tags')
+		tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
+		self.subject = htmlCls.cleanTitle (tag.innerHtml)
 		self.findSubject()
+		# le lien de la fanfic
+		tag = htmlCls.getByTagAndClassFirst (self.text, 'dd', 'bookmarks')
+		tag = htmlCls.getByTagFirst (tag.innerHtml, 'a')
+		self.link = 'https://archiveofourown.org' + tag.attributes['href'].replace ('bookmarks', "")
+		# le texte
+		tag = htmlCls.getById (self.text, 'chapters')
+		self.text = tag.innerHtml
 
 	def unisciel (self, subject):
 		self.subject = 'cours'
@@ -314,4 +312,4 @@ def ffNet (self):
 
 fileAooo = 'b/aooo.html'
 fileGtb = ''
-fic = Fanfic ()
+fic = Fanfic (fileAooo)
diff --git a/fileCls.py b/fileCls.py
@@ -83,8 +83,11 @@ def toPath (self):
 
 	def read (self):
 		self.toPath()
+		log.log (self.path)
 		if not os.path.exists (self.path): return
+		log.log (self.path)
 		textBrut = open (self.path, 'rb')
+		log.log (self.path)
 		tmpByte = textBrut.read()
 		encodingList = ('utf-8', 'ascii', 'ISO-8859-1')
 		text =""
@@ -138,7 +141,6 @@ def readJson (self):
 		return jsonData
 
 	def divide (self):
-		log.log (self.title, self.path, len (self.text))
 		self.fromPath()
 		self.text = textFct.shape (self.text)
 		if len (self.text) < 420000: self.write()

diff --git a/htmlCls.py b/htmlCls.py
@@ -5,7 +5,7 @@
 from urllib import request as urlRequest
 import codecs
 import textFct
-from fileCls import File
+from fileCls import File, Article
 import loggerFct as log
 
 listTags =( 'i', 'b', 'em', 'span', 'strong', 'a', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'ul', 'ol', 'td', 'th', 'tr', 'caption', 'table', 'nav', 'div', 'label', 'button', 'textarea', 'fieldset', 'form', 'figcaption', 'figure', 'section', 'article', 'body' )
@@ -30,7 +30,7 @@ def __init__ (self, tagStr):
 		self.className =""
 		self.id =""
 		self.attributes ={}
-		self.fromString()
+		self.fromString (tagStr)
 
 	def fromString (self, tagStr):
 		""" tagStr est envoyée par getByPos