Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Felix Jian 2023-08-03 22:55:40 +02:00
commit f74ebfe798
Signed by: flex
GPG Key ID: 2FB8FBECB390C227
16 changed files with 2966 additions and 2640 deletions

15
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,15 @@
name: test
on:
push:
branches:
- master
workflow_dispatch:
pull_request:
types: [opened, synchronize, reopened, edited]
jobs:
main:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: npm install
- run: npm test

View File

@ -1,8 +0,0 @@
language: node_js
node_js:
- '10'
script:
- npm run lint
- npm run test

View File

@ -1,4 +1,4 @@
# EPG Grabber [![Build Status](https://app.travis-ci.com/freearhey/epg-grabber.svg?branch=master)](https://app.travis-ci.com/freearhey/epg-grabber) # EPG Grabber [![test](https://github.com/freearhey/epg-grabber/actions/workflows/test.yml/badge.svg)](https://github.com/freearhey/epg-grabber/actions/workflows/test.yml)
Node.js CLI tool for grabbing EPG from different websites. Node.js CLI tool for grabbing EPG from different websites.
@ -74,29 +74,31 @@ epg-grabber --config=example.com.config.js
Arguments: Arguments:
- `-c, --config`: path to config file - `-c, --config`: path to config file
- `-o, --output`: path to output file (default: 'guide.xml') - `-o, --output`: path to output file or path template (example: `guides/{site}.{lang}.xml`; default: `guide.xml`)
- `--channels`: path to list of channels (can be specified via config file) - `--channels`: path to list of channels; you can also use wildcard to specify the path to multiple files at once (example: `example.com_*.channels.xml`)
- `--lang`: set default language for all programs (default: 'en') - `--lang`: set default language for all programs (default: `en`)
- `--days`: number of days for which to grab the program (default: 1) - `--days`: number of days for which to grab the program (default: `1`)
- `--delay`: delay between requests in milliseconds (default: 3000) - `--delay`: delay between requests in milliseconds (default: `3000`)
- `--timeout`: set a timeout for each request in milliseconds (default: 5000) - `--timeout`: set a timeout for each request in milliseconds (default: `5000`)
- `--cache-ttl`: maximum time for storing each request in milliseconds (default: 0) - `--max-connections`: set a limit on the number of concurrent requests per site (default: `1`)
- `--gzip`: compress the output (default: false) - `--cache-ttl`: maximum time for storing each request in milliseconds (default: `0`)
- `--debug`: enable debug mode (default: false) - `--gzip`: compress the output (default: `false`)
- `--curl`: display current request as CURL (default: false) - `--debug`: enable debug mode (default: `false`)
- `--curl`: display current request as CURL (default: `false`)
- `--log`: path to log file (optional) - `--log`: path to log file (optional)
- `--log-level`: set the log level (default: 'info') - `--log-level`: set the log level (default: `info`)
## Site Config ## Site Config
```js ```js
module.exports = { module.exports = {
site: 'example.com', // site domain name (required) site: 'example.com', // site domain name (required)
output: 'example.com.guide.xml', // path to output file (default: 'guide.xml') output: 'example.com.guide.xml', // path to output file or path template (example: 'guides/{site}.{lang}.xml'; default: 'guide.xml')
channels: 'example.com.channels.xml', // path to channels.xml file (required) channels: 'example.com.channels.xml', // path to list of channels; you can also use an array to specify the path to multiple files at once (example: ['channels1.xml', 'channels2.xml']; required)
lang: 'fr', // default language for all programs (default: 'en') lang: 'fr', // default language for all programs (default: 'en')
days: 3, // number of days for which to grab the program (default: 1) days: 3, // number of days for which to grab the program (default: 1)
delay: 5000, // delay between requests (default: 3000) delay: 5000, // delay between requests (default: 3000)
maxConnections: 200, // limit on the number of concurrent requests (default: 1)
request: { // request options (details: https://github.com/axios/axios#request-config) request: { // request options (details: https://github.com/axios/axios#request-config)
@ -235,6 +237,34 @@ You can also specify the language and logo for each channel individually, like s
>France 24</channel> >France 24</channel>
``` ```
## How to use SOCKS proxy?
First, you need to install [socks-proxy-agent](https://www.npmjs.com/package/socks-proxy-agent):
```sh
npm install socks-proxy-agent
```
Then you can use it to create an agent that acts as a SOCKS proxy. Here is an example of how to do it with the Tor SOCKS proxy:
```js
const { SocksProxyAgent } = require('socks-proxy-agent')
const torProxyAgent = new SocksProxyAgent('socks://localhost:9050')
module.exports = {
site: 'example.com',
url: 'https://example.com/epg.json',
request: {
httpsAgent: torProxyAgent,
httpAgent: torProxyAgent
},
parser(context) {
// ...
}
}
```
## Contribution ## Contribution
If you find a bug or want to contribute to the code or documentation, you can help by submitting an [issue](https://github.com/freearhey/epg-grabber/issues) or a [pull request](https://github.com/freearhey/epg-grabber/pulls). If you find a bug or want to contribute to the code or documentation, you can help by submitting an [issue](https://github.com/freearhey/epg-grabber/issues) or a [pull request](https://github.com/freearhey/epg-grabber/pulls).

View File

@ -12,6 +12,7 @@ const { name, version, description } = require('../package.json')
const _ = require('lodash') const _ = require('lodash')
const dayjs = require('dayjs') const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc') const utc = require('dayjs/plugin/utc')
const { TaskQueue } = require('cwait')
dayjs.extend(utc) dayjs.extend(utc)
@ -21,11 +22,16 @@ program
.description(description) .description(description)
.requiredOption('-c, --config <config>', 'Path to [site].config.js file') .requiredOption('-c, --config <config>', 'Path to [site].config.js file')
.option('-o, --output <output>', 'Path to output file') .option('-o, --output <output>', 'Path to output file')
.option('--channels <channels>', 'Path to channels.xml file') .option('--channels <channels>', 'Path to list of channels')
.option('--lang <lang>', 'Set default language for all programs') .option('--lang <lang>', 'Set default language for all programs')
.option('--days <days>', 'Number of days for which to grab the program', parseNumber) .option('--days <days>', 'Number of days for which to grab the program', parseNumber)
.option('--delay <delay>', 'Delay between requests (in milliseconds)', parseNumber) .option('--delay <delay>', 'Delay between requests (in milliseconds)', parseNumber)
.option('--timeout <timeout>', 'Set a timeout for each request (in milliseconds)', parseNumber) .option('--timeout <timeout>', 'Set a timeout for each request (in milliseconds)', parseNumber)
.option(
'--max-connections <maxConnections>',
'Set a limit on the number of concurrent requests per site',
parseNumber
)
.option( .option(
'--cache-ttl <cacheTtl>', '--cache-ttl <cacheTtl>',
'Maximum time for storing each request (in milliseconds)', 'Maximum time for storing each request (in milliseconds)',
@ -53,6 +59,7 @@ async function main() {
curl: options.curl, curl: options.curl,
lang: options.lang, lang: options.lang,
delay: options.delay, delay: options.delay,
maxConnections: options.maxConnections,
request: { request: {
ignoreCookieErrors: true, ignoreCookieErrors: true,
} }
@ -60,34 +67,75 @@ async function main() {
if (options.timeout) config.request.timeout = options.timeout if (options.timeout) config.request.timeout = options.timeout
if (options.cacheTtl) config.request.cache.ttl = options.cacheTtl if (options.cacheTtl) config.request.cache.ttl = options.cacheTtl
if (options.channels) config.channels = options.channels
else if (config.channels)
config.channels = file.join(file.dirname(options.config), config.channels)
else throw new Error("The required 'channels' property is missing")
if (!config.channels) return logger.error('Path to [site].channels.xml is missing') if (options.channels) config.channels = options.channels
logger.info(`Loading '${config.channels}'...`)
let parsedChannels = []
if (config.channels) {
const dir = file.dirname(options.config)
let files = []
if (Array.isArray(config.channels)) {
files = config.channels.map(path => file.join(dir, path))
} else if (typeof config.channels === 'string') {
files = await file.list(config.channels)
} else {
throw new Error('The "channels" attribute must be of type array or string')
}
for (let filepath of files) {
logger.info(`Loading '${filepath}'...`)
const channelsXML = file.read(filepath)
const { channels } = parseChannels(channelsXML)
parsedChannels = parsedChannels.concat(channels)
}
} else throw new Error('Path to "channels" is missing')
const grabber = new EPGGrabber(config) const grabber = new EPGGrabber(config)
const channelsXML = file.read(config.channels) let template = options.output || config.output
const { channels } = parseChannels(channelsXML) const variables = file.templateVariables(template)
const groups = _.groupBy(parsedChannels, channel => {
let groupId = ''
for (let key in channel) {
if (variables.includes(key)) {
groupId += channel[key]
}
}
return groupId
})
for (let groupId in groups) {
const channels = groups[groupId]
let programs = [] let programs = []
let i = 1 let i = 1
let days = config.days || 1 let days = config.days || 1
const maxConnections = config.maxConnections || 1
const total = channels.length * days const total = channels.length * days
const utcDate = getUTCDate() const utcDate = getUTCDate()
const dates = Array.from({ length: days }, (_, i) => utcDate.add(i, 'd')) const dates = Array.from({ length: days }, (_, i) => utcDate.add(i, 'd'))
const taskQueue = new TaskQueue(Promise, maxConnections)
let queue = []
for (let channel of channels) { for (let channel of channels) {
if (!channel.logo && config.logo) { if (!channel.logo && config.logo) {
channel.logo = await grabber.loadLogo(channel) channel.logo = await grabber.loadLogo(channel)
} }
for (let date of dates) { for (let date of dates) {
queue.push({ channel, date })
}
}
await Promise.all(
queue.map(
taskQueue.wrap(async ({ channel, date }) => {
await grabber await grabber
.grab(channel, date, (data, err) => { .grab(channel, date, (data, err) => {
logger.info( logger.info(
`[${i}/${total}] ${config.site} - ${data.channel.id} - ${dayjs `[${i}/${total}] ${config.site} - ${data.channel.xmltv_id} - ${dayjs
.utc(data.date) .utc(data.date)
.format('MMM D, YYYY')} (${data.programs.length} programs)` .format('MMM D, YYYY')} (${data.programs.length} programs)`
) )
@ -99,13 +147,14 @@ async function main() {
.then(results => { .then(results => {
programs = programs.concat(results) programs = programs.concat(results)
}) })
} })
} )
)
programs = _.uniqBy(programs, p => p.start + p.channel) programs = _.uniqBy(programs, p => p.start + p.channel)
const xml = generateXMLTV({ channels, programs }) const xml = generateXMLTV({ channels, programs })
let outputPath = options.output || config.output let outputPath = file.templateFormat(template, channels[0])
if (options.gzip) { if (options.gzip) {
outputPath = outputPath || 'guide.xml.gz' outputPath = outputPath || 'guide.xml.gz'
const compressed = await gzip(xml) const compressed = await gzip(xml)
@ -116,6 +165,8 @@ async function main() {
} }
logger.info(`File '${outputPath}' successfully saved`) logger.info(`File '${outputPath}' successfully saved`)
}
logger.info('Finish') logger.info('Finish')
} }

4771
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"name": "epg-grabber", "name": "epg-grabber",
"version": "0.29.8", "version": "0.32.0",
"description": "Node.js CLI tool for grabbing EPG from different sites", "description": "Node.js CLI tool for grabbing EPG from different sites",
"main": "src/index.js", "main": "src/index.js",
"preferGlobal": true, "preferGlobal": true,
@ -34,8 +34,10 @@
"axios-mock-adapter": "^1.20.0", "axios-mock-adapter": "^1.20.0",
"commander": "^7.1.0", "commander": "^7.1.0",
"curl-generator": "^0.2.0", "curl-generator": "^0.2.0",
"cwait": "^1.1.2",
"dayjs": "^1.10.4", "dayjs": "^1.10.4",
"epg-parser": "^0.1.6", "epg-parser": "^0.1.6",
"fs-extra": "^11.1.1",
"glob": "^7.1.6", "glob": "^7.1.6",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"node-gzip": "^1.1.2", "node-gzip": "^1.1.2",

View File

@ -1,7 +1,7 @@
class Channel { class Channel {
constructor(c) { constructor(c) {
const data = { const data = {
id: c.id || c.xmltv_id, xmltv_id: c.xmltv_id,
name: c.name, name: c.name,
site: c.site || '', site: c.site || '',
site_id: c.site_id, site_id: c.site_id,

View File

@ -10,7 +10,7 @@ class Program {
const data = { const data = {
site: c.site || '', site: c.site || '',
channel: c.id || '', channel: c.xmltv_id || '',
titles: toArray(p.titles || p.title).map(text => toTextObject(text, c.lang)), titles: toArray(p.titles || p.title).map(text => toTextObject(text, c.lang)),
sub_titles: toArray(p.sub_titles || p.sub_title).map(text => toTextObject(text, c.lang)), sub_titles: toArray(p.sub_titles || p.sub_title).map(text => toTextObject(text, c.lang)),
descriptions: toArray(p.descriptions || p.description || p.desc).map(text => descriptions: toArray(p.descriptions || p.description || p.desc).map(text =>

View File

@ -1,11 +1,23 @@
const fs = require('fs') const fs = require('fs')
const path = require('path') const path = require('path')
const glob = require('glob')
module.exports.list = list
module.exports.read = read module.exports.read = read
module.exports.write = write module.exports.write = write
module.exports.resolve = resolve module.exports.resolve = resolve
module.exports.join = join module.exports.join = join
module.exports.dirname = dirname module.exports.dirname = dirname
module.exports.templateVariables = templateVariables
module.exports.templateFormat = templateFormat
function list(pattern) {
return new Promise(resolve => {
glob(pattern, function (err, files) {
resolve(files)
})
})
}
function read(filepath) { function read(filepath) {
return fs.readFileSync(path.resolve(filepath), { encoding: 'utf-8' }) return fs.readFileSync(path.resolve(filepath), { encoding: 'utf-8' })
@ -31,3 +43,20 @@ function join(path1, path2) {
function dirname(filepath) { function dirname(filepath) {
return path.dirname(filepath) return path.dirname(filepath)
} }
function templateVariables(template) {
const match = template.match(/{[^}]+}/g)
return Array.isArray(match) ? match.map(s => s.substring(1, s.length - 1)) : []
}
function templateFormat(template, obj) {
let output = template
for (let key in obj) {
const regex = new RegExp(`{${key}}`, 'g')
const value = obj[key] || undefined
output = output.replace(regex, value)
}
return output
}

View File

@ -29,7 +29,7 @@ function createElements(channels, programs, date) {
...channels.map(channel => { ...channels.map(channel => {
return ( return (
'\r\n' + '\r\n' +
el('channel', { id: channel.id }, [ el('channel', { id: channel.xmltv_id }, [
el('display-name', {}, [escapeString(channel.name)]), el('display-name', {}, [escapeString(channel.name)]),
el('icon', { src: channel.logo }), el('icon', { src: channel.logo }),
el('url', {}, [channel.url]) el('url', {}, [channel.url])

View File

@ -12,28 +12,7 @@ it('can create new Channel', () => {
expect(channel).toMatchObject({ expect(channel).toMatchObject({
name: '1 TV', name: '1 TV',
id: '1TV.com', xmltv_id: '1TV.com',
site_id: '1',
site: 'example.com',
url: 'https://example.com',
lang: 'fr',
logo: 'https://example.com/logos/1TV.png'
})
})
it('can create channel from exist object', () => {
const channel = new Channel({
name: '1 TV',
id: '1TV.com',
site_id: '1',
site: 'example.com',
lang: 'fr',
logo: 'https://example.com/logos/1TV.png'
})
expect(channel).toMatchObject({
name: '1 TV',
id: '1TV.com',
site_id: '1', site_id: '1',
site: 'example.com', site: 'example.com',
url: 'https://example.com', url: 'https://example.com',

View File

@ -7,7 +7,7 @@ module.exports = {
site: 'example.com', site: 'example.com',
days: 2, days: 2,
channels: 'example.channels.xml', channels: 'example.channels.xml',
output: 'tests/output/guide.xml', output: 'tests/__data__/output/guide.xml',
url: () => 'http://example.com/20210319/1tv.json', url: () => 'http://example.com/20210319/1tv.json',
request: { request: {
method: 'POST', method: 'POST',

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<site site="example2.com">
<channels>
<channel xmltv_id="3TV.com" site_id="3">3 TV</channel>
<channel xmltv_id="4TV.com" site_id="4">4 TV</channel>
</channels>
</site>

View File

@ -0,0 +1,32 @@
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
dayjs.extend(utc)
module.exports = {
site: 'example.com',
days: 2,
channels: ['example.channels.xml', 'example_2.channels.xml'],
output: 'tests/__data__/output/guide.xml',
url: () => 'http://example.com/20210319/1tv.json',
request: {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Cookie: 'abc=123'
},
data() {
return { accountID: '123' }
}
},
parser: () => {
return [
{
title: 'Program1',
start: 1640995200000,
stop: 1640998800000
}
]
},
logo: () => 'http://example.com/logos/1TV.png?x=шеллы&sid=777'
}

View File

@ -1,5 +1,5 @@
const { execSync } = require('child_process') const { execSync } = require('child_process')
const fs = require('fs') const fs = require('fs-extra')
const path = require('path') const path = require('path')
const epgParser = require('epg-parser') const epgParser = require('epg-parser')
@ -11,6 +11,10 @@ function stdoutResultTester(stdout) {
}) })
} }
beforeEach(() => {
fs.emptyDirSync('tests/__data__/output')
})
it('can load config', () => { it('can load config', () => {
const stdout = execSync( const stdout = execSync(
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --delay=0`, `node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --delay=0`,
@ -62,6 +66,26 @@ it('can generate gzip version', () => {
) )
}) })
it('can produce multiple outputs', () => {
const stdout = execSync(
`node ${pwd}/bin/epg-grabber.js \
--config=tests/__data__/input/mini.config.js \
--channels=tests/__data__/input/example.channels.xml \
--output=tests/__data__/output/{lang}/{xmltv_id}.xml`,
{
encoding: 'utf8'
}
)
expect(stdoutResultTester(stdout)).toBe(true)
expect(stdout.includes("File 'tests/__data__/output/fr/1TV.com.xml' successfully saved")).toBe(
true
)
expect(
stdout.includes("File 'tests/__data__/output/undefined/2TV.com.xml' successfully saved")
).toBe(true)
})
it('removes duplicates of the program', () => { it('removes duplicates of the program', () => {
const stdout = execSync( const stdout = execSync(
`node ${pwd}/bin/epg-grabber.js \ `node ${pwd}/bin/epg-grabber.js \
@ -81,3 +105,25 @@ it('removes duplicates of the program', () => {
expect(output.programs).toEqual(expected.programs) expect(output.programs).toEqual(expected.programs)
}) })
it('can load multiple "channels.xml" files at once', () => {
const stdout = execSync(
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --channels=tests/__data__/input/example*.channels.xml --timeout=1`,
{
encoding: 'utf8'
}
)
expect(stdoutResultTester(stdout)).toBe(true)
})
it('can parse list of "channels.xml" from array', () => {
const stdout = execSync(
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example_channels.config.js --timeout=1`,
{
encoding: 'utf8'
}
)
expect(stdoutResultTester(stdout)).toBe(true)
})