From bd0b29aaee14077be9b6a9cd151c67b19cb12b8c Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Sat, 21 Aug 2021 19:10:58 +0300 Subject: [PATCH 01/10] Update index.js --- src/index.js | 64 ++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/src/index.js b/src/index.js index a95e418..58280f7 100755 --- a/src/index.js +++ b/src/index.js @@ -13,11 +13,12 @@ program .option('-d, --debug', 'Enable debug mode') .parse(process.argv) +const options = program.opts() +const config = utils.loadConfig(options.config) + async function main() { console.log('\r\nStarting...') - const options = program.opts() - const config = utils.loadConfig(options.config) const channels = utils.parseChannels(config.channels) const utcDate = utils.getUTCDate() const dates = Array.from({ length: config.days }, (_, i) => utcDate.add(i, 'd')) @@ -34,35 +35,12 @@ async function main() { for (let item of queue) { if (options.debug) console.time(' Response Time') await utils - .fetchData(item, config) + .buildRequest(item, config) + .then(utils.fetchData) .then(response => { - if (options.debug) { - console.timeEnd(' Response Time') - console.time(' Parsing Time') - } - if (!item.channel.logo && config.logo) { - item.channel.logo = config.logo({ - channel: item.channel, - content: response.data.toString(), - buffer: response.data - }) - } - - const parsed = utils.parsePrograms({ response, item, config }).map(program => { - program.lang = program.lang || item.channel.lang || undefined - return program - }) - - console.log( - ` ${config.site} - ${item.channel.xmltv_id} - ${item.date.format('MMM D, YYYY')} (${ - parsed.length - } programs)` - ) - - programs = programs.concat(parsed) - }) - .then(() => { - if (options.debug) console.timeEnd(' Parsing Time') + if (options.debug) console.timeEnd(' Response Time') + const results = parseResponse(response, item) + programs = programs.concat(results) }) .then(utils.sleep(config.delay)) .catch(err => { @@ -86,4 +64,30 @@ async function main() { console.log('Finish') } +async function parseResponse(response, item) { + if (options.debug) console.time(' Parsing Time') + if (!item.channel.logo && config.logo) { + item.channel.logo = config.logo({ + channel: item.channel, + content: response.data.toString(), + buffer: response.data + }) + } + + const parsed = utils.parsePrograms({ response, item, config }).map(program => { + program.lang = program.lang || item.channel.lang || undefined + return program + }) + + console.log( + ` ${config.site} - ${item.channel.xmltv_id} - ${item.date.format('MMM D, YYYY')} (${ + parsed.length + } programs)` + ) + + if (options.debug) console.timeEnd(' Parsing Time') + + return parsed +} + main() From 8736c1a26beef0334dd35fa13d36619dd54d351a Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Sat, 21 Aug 2021 19:11:01 +0300 Subject: [PATCH 02/10] Update utils.js --- src/utils.js | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/src/utils.js b/src/utils.js index fe0034d..63fa04c 100644 --- a/src/utils.js +++ b/src/utils.js @@ -11,6 +11,8 @@ dayjs.extend(utc) axiosCookieJarSupport(axios) const utils = {} +const defaultUserAgent = + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71' utils.loadConfig = function (file) { if (!file) throw new Error('Path to [site].config.js is missing') @@ -39,10 +41,6 @@ utils.loadConfig = function (file) { output: 'guide.xml', request: { method: 'GET', - headers: { - 'User-Agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71' - }, maxContentLength: 5 * 1024 * 1024, timeout: 5000, withCredentials: true, @@ -194,15 +192,53 @@ utils.writeToFile = function (filename, data) { fs.writeFileSync(path.resolve(filename), data) } -utils.fetchData = function (item, config) { +utils.buildRequest = async function (item, config) { const request = { ...config.request } - request.url = typeof config.url === 'function' ? config.url(item) : config.url - request.data = - typeof config.request.data === 'function' ? config.request.data(item) : config.request.data + const headers = await utils.getRequestHeaders(item, config) + request.headers = { 'User-Agent': defaultUserAgent, ...headers } + request.url = await utils.getRequestUrl(item, config) + request.data = await utils.getRequestData(item, config) + return request +} + +utils.fetchData = function (request) { return axios(request) } +utils.getRequestHeaders = async function (item, config) { + if (typeof config.request.headers === 'function') { + const headers = config.request.headers(item) + if (typeof headers === 'Promise') { + return await headers + } + return headers + } + return config.request.headers +} + +utils.getRequestData = async function (item, config) { + if (typeof config.request.data === 'function') { + const data = config.request.data(item) + if (typeof data === 'Promise') { + return await data + } + return data + } + return config.request.data +} + +utils.getRequestUrl = async function (item, config) { + if (typeof config.url === 'function') { + const url = config.url(item) + if (typeof url === 'Promise') { + return await url + } + return url + } + return config.url +} + utils.getUTCDate = function () { return dayjs.utc() } From 18c3c8c2231c71bed731e987f015b064a2dba4da Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Sat, 21 Aug 2021 19:11:04 +0300 Subject: [PATCH 03/10] Create async.config.js --- tests/input/async.config.js | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/input/async.config.js diff --git a/tests/input/async.config.js b/tests/input/async.config.js new file mode 100644 index 0000000..60c461d --- /dev/null +++ b/tests/input/async.config.js @@ -0,0 +1,21 @@ +module.exports = { + site: 'example.com', + channels: 'example.com.channels.xml', + url() { + return Promise.resolve('http://example.com/20210319/1tv.json') + }, + request: { + method: 'POST', + headers() { + return Promise.resolve({ + 'Content-Type': 'application/json', + Cookie: 'abc=123' + }) + }, + data() { + return Promise.resolve({ accountID: '123' }) + } + }, + parser: () => [], + logo: () => 'http://example.com/logos/1TV.png?x=шеллы&sid=777' +} From 791cb0ff2f2bdb542f88d14b8c0c821ae6aef495 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Sat, 21 Aug 2021 19:11:06 +0300 Subject: [PATCH 04/10] Update utils.test.js --- tests/utils.test.js | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/tests/utils.test.js b/tests/utils.test.js index 349010c..edc7474 100644 --- a/tests/utils.test.js +++ b/tests/utils.test.js @@ -15,8 +15,6 @@ it('can load valid config.js', () => { timeout: 5000, headers: { 'Content-Type': 'application/json', - 'User-Agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71', Cookie: 'abc=123' } }) @@ -81,9 +79,23 @@ it('can escape url', () => { ) }) -it('can fetch data', () => { - const config = utils.loadConfig('./tests/input/example.com.config.js') - utils.fetchData({}, config).then(jest.fn).catch(jest.fn) +it('can fetch data', async () => { + const request = { + data: { accountID: '123' }, + headers: { + 'Content-Type': 'application/json', + Cookie: 'abc=123', + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71' + }, + maxContentLength: 5242880, + method: 'POST', + responseType: 'arraybuffer', + timeout: 5000, + url: 'http://example.com/20210319/1tv.json', + withCredentials: true + } + utils.fetchData(request).then(jest.fn).catch(jest.fn) expect(mockAxios).toHaveBeenCalledWith( expect.objectContaining({ data: { accountID: '123' }, @@ -101,3 +113,24 @@ it('can fetch data', () => { }) ) }) + +it('can build request async', async () => { + const config = utils.loadConfig('./tests/input/async.config.js') + return utils.buildRequest({}, config).then(request => { + expect(request).toMatchObject({ + data: { accountID: '123' }, + headers: { + 'Content-Type': 'application/json', + Cookie: 'abc=123', + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71' + }, + maxContentLength: 5242880, + method: 'POST', + responseType: 'arraybuffer', + timeout: 5000, + url: 'http://example.com/20210319/1tv.json', + withCredentials: true + }) + }) +}) From baaf21d0dd7b979375509bae93678f7a9927e549 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 13:45:50 +0300 Subject: [PATCH 05/10] Update index.js --- src/index.js | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/index.js b/src/index.js index 58280f7..ee4ff69 100755 --- a/src/index.js +++ b/src/index.js @@ -37,9 +37,11 @@ async function main() { await utils .buildRequest(item, config) .then(utils.fetchData) - .then(response => { + .then(async response => { if (options.debug) console.timeEnd(' Response Time') - const results = parseResponse(response, item) + if (options.debug) console.time(' Parsing Time') + const results = await utils.parseResponse(item, response, config) + if (options.debug) console.timeEnd(' Parsing Time') programs = programs.concat(results) }) .then(utils.sleep(config.delay)) @@ -64,30 +66,4 @@ async function main() { console.log('Finish') } -async function parseResponse(response, item) { - if (options.debug) console.time(' Parsing Time') - if (!item.channel.logo && config.logo) { - item.channel.logo = config.logo({ - channel: item.channel, - content: response.data.toString(), - buffer: response.data - }) - } - - const parsed = utils.parsePrograms({ response, item, config }).map(program => { - program.lang = program.lang || item.channel.lang || undefined - return program - }) - - console.log( - ` ${config.site} - ${item.channel.xmltv_id} - ${item.date.format('MMM D, YYYY')} (${ - parsed.length - } programs)` - ) - - if (options.debug) console.timeEnd(' Parsing Time') - - return parsed -} - main() From 7b9d81d10504e2df55333d26c53cff5626530964 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 13:45:52 +0300 Subject: [PATCH 06/10] Update utils.js --- src/utils.js | 80 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/src/utils.js b/src/utils.js index 63fa04c..87fef59 100644 --- a/src/utils.js +++ b/src/utils.js @@ -163,26 +163,6 @@ utils.convertToXMLTV = function ({ config, channels, programs }) { return output } -utils.parsePrograms = function ({ response, item, config }) { - const options = merge(item, config, { - content: response.data.toString(), - buffer: response.data - }) - - const programs = config.parser(options) - - if (!Array.isArray(programs)) { - throw new Error('Parser should return an array') - } - - return programs - .filter(i => i) - .map(p => { - p.channel = item.channel.xmltv_id - return p - }) -} - utils.writeToFile = function (filename, data) { const dir = path.resolve(path.dirname(filename)) if (!fs.existsSync(dir)) { @@ -209,7 +189,7 @@ utils.fetchData = function (request) { utils.getRequestHeaders = async function (item, config) { if (typeof config.request.headers === 'function') { const headers = config.request.headers(item) - if (typeof headers === 'Promise') { + if (this.isPromise(headers)) { return await headers } return headers @@ -220,7 +200,7 @@ utils.getRequestHeaders = async function (item, config) { utils.getRequestData = async function (item, config) { if (typeof config.request.data === 'function') { const data = config.request.data(item) - if (typeof data === 'Promise') { + if (this.isPromise(data)) { return await data } return data @@ -231,7 +211,7 @@ utils.getRequestData = async function (item, config) { utils.getRequestUrl = async function (item, config) { if (typeof config.url === 'function') { const url = config.url(item) - if (typeof url === 'Promise') { + if (this.isPromise(url)) { return await url } return url @@ -243,4 +223,58 @@ utils.getUTCDate = function () { return dayjs.utc() } +utils.parseResponse = async (item, response, config) => { + const options = merge(item, config, { + content: response.data.toString(), + buffer: response.data + }) + + if (!item.channel.logo && config.logo) { + item.channel.logo = await utils.loadLogo(options, config) + } + + const parsed = await utils.parsePrograms(options, config) + + console.log( + ` ${config.site} - ${item.channel.xmltv_id} - ${item.date.format('MMM D, YYYY')} (${ + parsed.length + } programs)` + ) + + return parsed +} + +utils.parsePrograms = async function (options, config) { + let programs = config.parser(options) + + if (this.isPromise(programs)) { + programs = await programs + } + + if (!Array.isArray(programs)) { + throw new Error('Parser should return an array') + } + + const channel = options.channel + return programs + .filter(i => i) + .map(program => { + program.channel = channel.xmltv_id + program.lang = program.lang || channel.lang || undefined + return program + }) +} + +utils.loadLogo = async function (options, config) { + const logo = config.logo(options) + if (this.isPromise(logo)) { + return await logo + } + return logo +} + +utils.isPromise = function (promise) { + return !!promise && typeof promise.then === 'function' +} + module.exports = utils From a42bcf35b963e5243a840e609407b957bbb95115 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 13:45:55 +0300 Subject: [PATCH 07/10] Update async.config.js --- tests/input/async.config.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/input/async.config.js b/tests/input/async.config.js index 60c461d..d8e1998 100644 --- a/tests/input/async.config.js +++ b/tests/input/async.config.js @@ -16,6 +16,10 @@ module.exports = { return Promise.resolve({ accountID: '123' }) } }, - parser: () => [], - logo: () => 'http://example.com/logos/1TV.png?x=шеллы&sid=777' + parser() { + return Promise.resolve([]) + }, + logo() { + return Promise.resolve('http://example.com/logos/1TV.png?x=шеллы&sid=777') + } } From 93baf248f736cd2cd87784d98a6107b5dbbe3244 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 13:45:57 +0300 Subject: [PATCH 08/10] Update utils.test.js --- tests/utils.test.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/utils.test.js b/tests/utils.test.js index edc7474..df69795 100644 --- a/tests/utils.test.js +++ b/tests/utils.test.js @@ -134,3 +134,19 @@ it('can build request async', async () => { }) }) }) + +it('can load logo async', async () => { + const config = utils.loadConfig('./tests/input/async.config.js') + return utils.loadLogo({}, config).then(logo => { + expect(logo).toBe('http://example.com/logos/1TV.png?x=шеллы&sid=777') + }) +}) + +it('can parse programs async', async () => { + const config = utils.loadConfig('./tests/input/async.config.js') + return utils + .parsePrograms({ channel: { xmltv_id: '1tv', lang: 'en' } }, config) + .then(programs => { + expect(programs.length).toBe(0) + }) +}) From 409f7ee8437ffa2a9a1692b6a6a8580fde2bd28d Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 14:17:24 +0300 Subject: [PATCH 09/10] Update README.md --- README.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2a7c1a5..0803d1e 100644 --- a/README.md +++ b/README.md @@ -27,16 +27,39 @@ module.exports = { site: 'example.com', // site domain name (required) output: 'example.com.guide.xml', // path to output file (default: 'guide.xml') channels: 'example.com.channels.xml', // path to channels.xml file (required) + day: 3, // number of days for which to grab the program (default: 1) request: { // request options (details: https://github.com/axios/axios#request-config) method: 'GET', - headers: { - 'User-Agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71', - }, - timeout: 5000 + timeout: 5000, + /** + * @param {object} date The 'dayjs' instance with the requested date + * @param {object} channel Data about the requested channel + * + * @return {string} The function should return headers for each request (optional) + */ + headers: function({ date, channel }) { + return { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71' + } + }, + + /** + * @param {object} date The 'dayjs' instance with the requested date + * @param {object} channel Data about the requested channel + * + * @return {string} The function should return data for each request (optional) + */ + data: function({ date, channel }) { + return { + channels: [channel.site_id], + dateStart: date.format('YYYY-MM-DDT00:00:00-00:00'), + dateEnd: date.add(1, 'd').format('YYYY-MM-DDT00:00:00-00:00') + } + } }, /** @@ -60,11 +83,12 @@ module.exports = { }, /** + * @param {object} date The 'dayjs' instance with the requested date * @param {string} content The response received after the request at the above url * * @return {array} The function should return an array of programs with their descriptions */ - parser: function ({ content }) { + parser: function ({ date, content }) { // content parsing... @@ -84,6 +108,33 @@ module.exports = { } ``` +Also each function can be asynchronous. + +```js +module.exports = { + site: 'example.com', + output: 'example.com.guide.xml', + channels: 'example.com.channels.xml', + request: { + async headers() { + return { ... } + }, + async data() { + return { ... } + } + }, + async url() { + return '...' + }, + async logo() { + return '...' + }, + async parser() { + return [ ... ] + } +} +``` + #### example.com.channels.xml ```xml From 9596939e68d2d5b48c2878befc82a470747a4aeb Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Mon, 23 Aug 2021 14:18:25 +0300 Subject: [PATCH 10/10] Bump to v0.7.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ee356b6..92d4fa7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "epg-grabber", - "version": "0.6.6", + "version": "0.7.0", "description": "Node.js CLI tool for grabbing EPG from different sites", "main": "src/index.js", "preferGlobal": true,