Skip to content

Commit

Permalink
[asurascan] fix get list latest, add feat search, get list by genres
Browse files Browse the repository at this point in the history
  • Loading branch information
zcrossoverz committed Jun 29, 2023
1 parent 2ef3d19 commit fa0fa06
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 136 deletions.
12 changes: 10 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "manga-lib",
"author": "Nhan Nguyen",
"version": "1.0.51",
"version": "1.0.52",
"license": "MIT",
"description": "A library for scraping manga from various websites.",
"repository": "https://github.com/zcrossoverz/manga-lib",
Expand All @@ -24,7 +24,15 @@
"manga crawl",
"manga crawler",
"manga scrapper",
"manga-lib"
"manga-lib",
"manga api",
"blogtruyen",
"blogtruyen.vn",
"toonily",
"toonily.com",
"asurascans",
"asurascans.com",
"anime"
],
"scripts": {
"test": "nodemon test.ts",
Expand Down
1 change: 0 additions & 1 deletion src/hooks/getDataChapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ export const useGetDataChapter = async (
.$eval(nextChapterSelector, (el) => {
return {
url_chapter: el.getAttribute('href'),
t: el.outerHTML,
};
})
.catch(() => null);
Expand Down
12 changes: 7 additions & 5 deletions src/hooks/getListLatest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ export const useGetDataItemsManga = async (
});
} else {
const wrapItems = await puppeteer!.$$(wrapSelector);

data = await Promise.all(
wrapItems.map(async (e, i) => {
const image_thumbnail: string = await e.$eval(
thumbnailSelector,
(el) => el.getAttribute(`${thumbnailAttr}`)!
);
const image_thumbnail: string = await (await e.$(
thumbnailSelector
))!.evaluate((el, thumbnailAttr) => {
return el.getAttribute(thumbnailAttr)!;
}, thumbnailAttr);

const { href } = await e.$eval(hrefSelector, (el) => {
return {
Expand All @@ -74,7 +76,7 @@ export const useGetDataItemsManga = async (

return {
_id: i,
title: not_null(title),
title: not_null(title).trim().replace(/\n/, ''),
href: not_null(href),
image_thumbnail: image_thumbnail.startsWith('//')
? `https:${image_thumbnail}`
Expand Down
216 changes: 91 additions & 125 deletions src/lib/asurascans.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
/* eslint-disable @typescript-eslint/no-unsafe-call */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-unsafe-return */
/* eslint-disable @typescript-eslint/no-misused-promises */
/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable @typescript-eslint/no-floating-promises */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
Expand All @@ -11,10 +16,6 @@ import {
responseListManga,
} from '../types/type';
import { not_null } from '../utils/validate';
import {
NETTRUYEN_SORT_FILTER,
NETTRUYEN_STATUS_FILTER,
} from '../constants/filter';
import { useGetDataItemsManga } from '../hooks/getListLatest';
import { useGetDataChapter } from '../hooks/getDataChapter';

Expand All @@ -39,163 +40,105 @@ export class AsuraScans implements AbstractMangaFactory {
else req.continue();
});
await _page.goto(
`${this.baseUrl}/tim-truyen?keyword=${keyword}${
page > 1 ? `&page=${page}` : ``
}`
`${this.baseUrl}${page > 1 ? `/page/${page}` : ``}/?s=${keyword}`
);

const element = await _page.$$(
'#ctl00_divCenter > div.Module.Module-170 > div > div.items > div > div.item > figure'
);
const paramsSelector = {
puppeteer: _page,
wrapSelector: 'div.listupd > div.bs > div.bsx',
titleSelector: 'a > div.bigor > div.tt',
thumbnailSelector: 'a > div.limit > img',
thumbnailAttr: 'src',
hrefSelector: 'a',
};

const is_multipage = await _page
.$eval('#ctl00_mainContent_ctl01_divPager', () => true)
const data = await useGetDataItemsManga(paramsSelector);

const canNext = await _page
.$eval('div.pagination > a.next.page-numbers', () => true)
.catch(() => false);

const canNext = is_multipage
? await _page
.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li > a.next-page',
() => true
)
.catch(() => false)
: false;

const canPrev = is_multipage
? await _page
.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li > a.prev-page',
() => true
)
.catch(() => false)
: false;

const totalPage = is_multipage
? parseInt(
not_null(
await _page.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li:last-child > a',
(el) => el.getAttribute('href')
)
).split('page=')[1]
)
: 0;
const canPrev = await _page
.$eval('div.pagination > a.prev.page-numbers', () => true)
.catch(() => false);

const totalPages = await _page.$$(
'div.pagination > a.page-numbers:not(.prev):not(.next)'
);

const totalPage =
totalPages !== undefined
? Number(
await totalPages[totalPages.length - 1].evaluate(
(el) => el.textContent!
)
)
: 0;
return {
totalData: element.length,
totalData: data.length,
totalPage,
currentPage: page !== undefined ? page : 1,
canNext,
canPrev,
data: await Promise.all(
element.map(async (e, i) => {
const href = not_null(
await e.$eval('div.image > a', (el) => el.getAttribute('href'))
);

const title = not_null(
await e.$eval('figcaption > h3 > a', (el) => el.textContent)
);

const image_thumbnail = not_null(
await e.$eval('div.image > a > img', (el) =>
el.getAttribute('data-original')
)
);
return {
_id: i,
title,
image_thumbnail: image_thumbnail.startsWith('//')
? `https:${image_thumbnail}`
: image_thumbnail,
href,
};
})
),
data,
};
}

async getListByGenre(
genre: genre,
page?: number,
status?: NETTRUYEN_STATUS_FILTER,
sort?: NETTRUYEN_SORT_FILTER
status?: any,
sort?: any
): Promise<responseListManga> {
const _page = await (await this.browser).newPage();
let path = genre.path;
if (sort !== undefined) {
path += `?sort=${sort}${
status !== undefined ? `&status=${status}` : '&status=-1'
}${page !== undefined ? `&page=${page}` : ''}`;
} else if (status !== undefined) {
path += `?status=${status}${page !== undefined ? `&page=${page}` : ''}`;
} else if (page !== undefined) {
path += `?page=${page}`;
}
const url = `${this.baseUrl}${genre.path}${
page !== undefined && page > 1 ? `/page/${page}` : ``
}`;
await _page.setRequestInterception(true);
_page.on('request', (req) => {
if (req.resourceType() !== 'document') req.abort();
else req.continue();
});
await _page.goto(`${this.baseUrl}${path}`);
const element = await _page.$$(
'#ctl00_divCenter > div.Module.Module-170 > div > div.items > div > div.item > figure'
);
await _page.goto(url);
const paramsSelector = {
puppeteer: _page,
wrapSelector: 'div.listupd > div.bs > div.bsx',
titleSelector: 'a > div.bigor > div.tt',
thumbnailSelector: 'a > div.limit > img',
thumbnailAttr: 'src',
hrefSelector: 'a',
};

const data = await useGetDataItemsManga(paramsSelector);

const canNext = await _page
.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li > a.next-page',
() => true
)
.$eval('div.pagination > a.next.page-numbers', () => true)
.catch(() => false);

const canPrev = await _page
.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li > a.prev-page',
() => true
)
.$eval('div.pagination > a.prev.page-numbers', () => true)
.catch(() => false);

const totalPage = parseInt(
not_null(
await _page.$eval(
'#ctl00_mainContent_ctl01_divPager > ul > li:last-child > a',
(el) => el.getAttribute('href')
)
).split('page=')[1]
const totalPages = await _page.$$(
'div.pagination > a.page-numbers:not(.prev):not(.next)'
);

const totalPage =
totalPages !== undefined
? Number(
await totalPages[totalPages.length - 1].evaluate(
(el) => el.textContent!
)
)
: 0;

return {
totalData: element.length,
totalData: data.length,
totalPage,
currentPage: page !== undefined ? page : 1,
canNext,
canPrev,
data: await Promise.all(
element.map(async (e, i) => {
const href = not_null(
await e.$eval('div.image > a', (el) => el.getAttribute('href'))
);

const title = not_null(
await e.$eval('figcaption > h3 > a', (el) => el.textContent)
);

const image_thumbnail = not_null(
await e.$eval('div.image > a > img', (el) =>
el.getAttribute('data-original')
)
);
return {
_id: i,
title,
image_thumbnail: image_thumbnail.startsWith('//')
? `https:${image_thumbnail}`
: image_thumbnail,
href,
};
})
),
data,
};
}

Expand All @@ -220,18 +163,41 @@ export class AsuraScans implements AbstractMangaFactory {
titleSelector: 'div.headpost > h1',
imageSelectorAll: 'div#readerarea > p > img',
originImageAttr: 'src',
prevChapterSelector: '.amob > .npv.r > div.nextprev > a.ch-prev-btn',
nextChapterSelector: '.amob > .npv.r > div.nextprev > a.ch-next-btn',
prevChapterSelector: '.navlef > .npv.r > div.nextprev > a.ch-prev-btn',
nextChapterSelector: '.navlef > .npv.r > div.nextprev > a.ch-next-btn',
baseUrl: this.baseUrl,
url: url_chapter,
};

const data = await useGetDataChapter(paramsSelector);

const scripts = await _page.$$('script');
const script = await scripts[18].evaluate((e) => {
return JSON.parse(
e.textContent!.split('ts_reader.run(')[1].split(');')[0]
);
});

return {
...(url !== undefined ? { url } : {}),
...(path !== undefined ? { path } : {}),
...data,
next_chapter:
script.nextUrl !== ''
? {
url: script.nextUrl,
parent_href: url !== undefined ? url : '',
path: script.nextUrl.substring(`${this.baseUrl}`.length),
}
: null,
prev_chapter:
script.prevUrl !== ''
? {
url: script.prevUrl,
parent_href: url !== undefined ? url : '',
path: script.prevUrl.substring(`${this.baseUrl}`.length),
}
: null,
};
}

Expand Down
6 changes: 5 additions & 1 deletion src/utils/validate.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
export const not_null = (params: string | null | undefined): string => {
return params !== null && params !== undefined ? params : "";
return params !== null && params !== undefined ? params : '';
};

export const splitString = (str: string, start: string, end: string) => {
return str.split(start)[1].split(end)[0];
};
6 changes: 4 additions & 2 deletions test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ import { Manga, MangaType } from './src';
// };
// });

const t = new Manga().build(MangaType.BLOGTRUYEN);
const t = new Manga().build(MangaType.ASURASCANS);

void (async () => {
console.log(await t.getListLatestUpdate());
console.log(
await t.getListByGenre({ path: '/genres/action', name: 'action' })
);
})();

0 comments on commit fa0fa06

Please sign in to comment.