Document
Document
# you may not use this file except in compliance with the License.
# http://www.apache.org/licenses/LICENSE-2.0
# See the License for the specific language governing permissions and
import json
import logging
import re
logger = logging.getLogger(__name__)
class WattpadComAdapter(BaseSiteAdapter):
# Source: https://github.com/de3sw2aq1/wattpad-ebook-scraper/blob/master/scrape.py
API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'
API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id=%s'
API_CHAPTERINFO = 'https://www.wattpad.com/apiv2/info?id=%s'
CATEGORY_DEFs = None
self.storyId = unicode(self.getStoryId(url))
self.story.setMetadata('storyId', self.storyId)
self._setURL('https://www.wattpad.com/story/%s' % self.storyId)
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as
a class var for now
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs =
json.loads(self._fetchUrl(WattpadComAdapter.API_GETCATEGORIES))
except:
logger.debug('API_GETCATEGORIES failed.')
WattpadComAdapter.CATEGORY_DEFs = []
@staticmethod
def getSiteDomain():
return 'www.wattpad.com'
@classmethod
def getSiteExampleURLs(cls):
return 'https://www.wattpad.com/story/9999999-story-title
https://www.wattpad.com/story/9999999 https://www.wattpad.com/9999999-chapter-is-ok-
too'
@classmethod
def getSiteURLPattern(cls):
return 'https://www\.wattpad\.com/(story/)?(?P<storyId>\d+).*'
@classmethod
def getSiteAbbrev(cls):
return 'wattpad'
@classmethod
def getDateFormat(cls):
return "%Y-%m-%dT%H:%M:%SZ"
def use_pagecache(self):
return True
def getStoryId(self, url):
return storyIdInUrl.group("storyId")
else:
chapterInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_CHAPTERINFO %
chapterIdInUrl.group('chapterId')))
if groupid is None:
raise exceptions.StoryDoesNotExist(url)
else:
return groupid
try:
except Exception:
raise exceptions.AdultCheckRequired(self.url)
# title
self.story.setMetadata('title', storyInfo['title'])
# author
self.story.setMetadata('authorId', storyInfo['user']['name'])
self.story.setMetadata('author', storyInfo['user']['name'])
self.story.setMetadata('reads', storyInfo['readCount'])
# STATUS
self.story.setMetadata('status', 'In-Progress')
if storyInfo['completed']:
self.story.setMetadata('status', 'Completed')
# DESCRIPTION
self.setDescription(storyInfo['url'], storyInfo['description'])
# DATES
self.story.setMetadata('numChapters', len(self.chapterUrls))
self.setCoverImage(storyInfo['url'], storyInfo['cover'].replace('-256-','-512-'))
self.story.setMetadata('language', storyInfo['language']['name'])
# CATEGORIES
try:
WattpadComAdapter.CATEGORY_DEFs.has_key(str(c))]
self.story.setMetadata('category', storyCategories[0])
self.story.setMetadata('tags', storyInfo['tags'])
except:
pass
return self.extractChapterUrlsAndMetadata()
logger.debug('%s' % url)
chapterID = re.search(u'https://www.wattpad.com/(?P<chapterID>\d+).*',
url).group('chapterID')
return
self.utf8FromSoup(url,self.make_soup(self._fetchUrl(WattpadComAdapter.API_STORYTEXT %
chapterID)))
# adapter self-dicovery is not implemented in fanficfare (it existed for the previous project)
def getClass():
return WattpadComAdapter