Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
f74ebfe798
|
@ -0,0 +1,15 @@
|
|||
name: test
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
jobs:
|
||||
main:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- run: npm install
|
||||
- run: npm test
|
|
@ -1,8 +0,0 @@
|
|||
language: node_js
|
||||
|
||||
node_js:
|
||||
- '10'
|
||||
|
||||
script:
|
||||
- npm run lint
|
||||
- npm run test
|
58
README.md
58
README.md
|
@ -1,4 +1,4 @@
|
|||
# EPG Grabber [![Build Status](https://app.travis-ci.com/freearhey/epg-grabber.svg?branch=master)](https://app.travis-ci.com/freearhey/epg-grabber)
|
||||
# EPG Grabber [![test](https://github.com/freearhey/epg-grabber/actions/workflows/test.yml/badge.svg)](https://github.com/freearhey/epg-grabber/actions/workflows/test.yml)
|
||||
|
||||
Node.js CLI tool for grabbing EPG from different websites.
|
||||
|
||||
|
@ -74,29 +74,31 @@ epg-grabber --config=example.com.config.js
|
|||
Arguments:
|
||||
|
||||
- `-c, --config`: path to config file
|
||||
- `-o, --output`: path to output file (default: 'guide.xml')
|
||||
- `--channels`: path to list of channels (can be specified via config file)
|
||||
- `--lang`: set default language for all programs (default: 'en')
|
||||
- `--days`: number of days for which to grab the program (default: 1)
|
||||
- `--delay`: delay between requests in milliseconds (default: 3000)
|
||||
- `--timeout`: set a timeout for each request in milliseconds (default: 5000)
|
||||
- `--cache-ttl`: maximum time for storing each request in milliseconds (default: 0)
|
||||
- `--gzip`: compress the output (default: false)
|
||||
- `--debug`: enable debug mode (default: false)
|
||||
- `--curl`: display current request as CURL (default: false)
|
||||
- `-o, --output`: path to output file or path template (example: `guides/{site}.{lang}.xml`; default: `guide.xml`)
|
||||
- `--channels`: path to list of channels; you can also use wildcard to specify the path to multiple files at once (example: `example.com_*.channels.xml`)
|
||||
- `--lang`: set default language for all programs (default: `en`)
|
||||
- `--days`: number of days for which to grab the program (default: `1`)
|
||||
- `--delay`: delay between requests in milliseconds (default: `3000`)
|
||||
- `--timeout`: set a timeout for each request in milliseconds (default: `5000`)
|
||||
- `--max-connections`: set a limit on the number of concurrent requests per site (default: `1`)
|
||||
- `--cache-ttl`: maximum time for storing each request in milliseconds (default: `0`)
|
||||
- `--gzip`: compress the output (default: `false`)
|
||||
- `--debug`: enable debug mode (default: `false`)
|
||||
- `--curl`: display current request as CURL (default: `false`)
|
||||
- `--log`: path to log file (optional)
|
||||
- `--log-level`: set the log level (default: 'info')
|
||||
- `--log-level`: set the log level (default: `info`)
|
||||
|
||||
## Site Config
|
||||
|
||||
```js
|
||||
module.exports = {
|
||||
site: 'example.com', // site domain name (required)
|
||||
output: 'example.com.guide.xml', // path to output file (default: 'guide.xml')
|
||||
channels: 'example.com.channels.xml', // path to channels.xml file (required)
|
||||
output: 'example.com.guide.xml', // path to output file or path template (example: 'guides/{site}.{lang}.xml'; default: 'guide.xml')
|
||||
channels: 'example.com.channels.xml', // path to list of channels; you can also use an array to specify the path to multiple files at once (example: ['channels1.xml', 'channels2.xml']; required)
|
||||
lang: 'fr', // default language for all programs (default: 'en')
|
||||
days: 3, // number of days for which to grab the program (default: 1)
|
||||
delay: 5000, // delay between requests (default: 3000)
|
||||
maxConnections: 200, // limit on the number of concurrent requests (default: 1)
|
||||
|
||||
request: { // request options (details: https://github.com/axios/axios#request-config)
|
||||
|
||||
|
@ -235,6 +237,34 @@ You can also specify the language and logo for each channel individually, like s
|
|||
>France 24</channel>
|
||||
```
|
||||
|
||||
## How to use SOCKS proxy?
|
||||
|
||||
First, you need to install [socks-proxy-agent](https://www.npmjs.com/package/socks-proxy-agent):
|
||||
|
||||
```sh
|
||||
npm install socks-proxy-agent
|
||||
```
|
||||
|
||||
Then you can use it to create an agent that acts as a SOCKS proxy. Here is an example of how to do it with the Tor SOCKS proxy:
|
||||
|
||||
```js
|
||||
const { SocksProxyAgent } = require('socks-proxy-agent')
|
||||
|
||||
const torProxyAgent = new SocksProxyAgent('socks://localhost:9050')
|
||||
|
||||
module.exports = {
|
||||
site: 'example.com',
|
||||
url: 'https://example.com/epg.json',
|
||||
request: {
|
||||
httpsAgent: torProxyAgent,
|
||||
httpAgent: torProxyAgent
|
||||
},
|
||||
parser(context) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Contribution
|
||||
|
||||
If you find a bug or want to contribute to the code or documentation, you can help by submitting an [issue](https://github.com/freearhey/epg-grabber/issues) or a [pull request](https://github.com/freearhey/epg-grabber/pulls).
|
||||
|
|
|
@ -12,6 +12,7 @@ const { name, version, description } = require('../package.json')
|
|||
const _ = require('lodash')
|
||||
const dayjs = require('dayjs')
|
||||
const utc = require('dayjs/plugin/utc')
|
||||
const { TaskQueue } = require('cwait')
|
||||
|
||||
dayjs.extend(utc)
|
||||
|
||||
|
@ -21,11 +22,16 @@ program
|
|||
.description(description)
|
||||
.requiredOption('-c, --config <config>', 'Path to [site].config.js file')
|
||||
.option('-o, --output <output>', 'Path to output file')
|
||||
.option('--channels <channels>', 'Path to channels.xml file')
|
||||
.option('--channels <channels>', 'Path to list of channels')
|
||||
.option('--lang <lang>', 'Set default language for all programs')
|
||||
.option('--days <days>', 'Number of days for which to grab the program', parseNumber)
|
||||
.option('--delay <delay>', 'Delay between requests (in milliseconds)', parseNumber)
|
||||
.option('--timeout <timeout>', 'Set a timeout for each request (in milliseconds)', parseNumber)
|
||||
.option(
|
||||
'--max-connections <maxConnections>',
|
||||
'Set a limit on the number of concurrent requests per site',
|
||||
parseNumber
|
||||
)
|
||||
.option(
|
||||
'--cache-ttl <cacheTtl>',
|
||||
'Maximum time for storing each request (in milliseconds)',
|
||||
|
@ -53,6 +59,7 @@ async function main() {
|
|||
curl: options.curl,
|
||||
lang: options.lang,
|
||||
delay: options.delay,
|
||||
maxConnections: options.maxConnections,
|
||||
request: {
|
||||
ignoreCookieErrors: true,
|
||||
}
|
||||
|
@ -60,34 +67,75 @@ async function main() {
|
|||
|
||||
if (options.timeout) config.request.timeout = options.timeout
|
||||
if (options.cacheTtl) config.request.cache.ttl = options.cacheTtl
|
||||
if (options.channels) config.channels = options.channels
|
||||
else if (config.channels)
|
||||
config.channels = file.join(file.dirname(options.config), config.channels)
|
||||
else throw new Error("The required 'channels' property is missing")
|
||||
|
||||
if (!config.channels) return logger.error('Path to [site].channels.xml is missing')
|
||||
logger.info(`Loading '${config.channels}'...`)
|
||||
if (options.channels) config.channels = options.channels
|
||||
|
||||
let parsedChannels = []
|
||||
if (config.channels) {
|
||||
const dir = file.dirname(options.config)
|
||||
|
||||
let files = []
|
||||
if (Array.isArray(config.channels)) {
|
||||
files = config.channels.map(path => file.join(dir, path))
|
||||
} else if (typeof config.channels === 'string') {
|
||||
files = await file.list(config.channels)
|
||||
} else {
|
||||
throw new Error('The "channels" attribute must be of type array or string')
|
||||
}
|
||||
|
||||
for (let filepath of files) {
|
||||
logger.info(`Loading '${filepath}'...`)
|
||||
const channelsXML = file.read(filepath)
|
||||
const { channels } = parseChannels(channelsXML)
|
||||
parsedChannels = parsedChannels.concat(channels)
|
||||
}
|
||||
} else throw new Error('Path to "channels" is missing')
|
||||
|
||||
const grabber = new EPGGrabber(config)
|
||||
|
||||
const channelsXML = file.read(config.channels)
|
||||
const { channels } = parseChannels(channelsXML)
|
||||
let template = options.output || config.output
|
||||
const variables = file.templateVariables(template)
|
||||
|
||||
const groups = _.groupBy(parsedChannels, channel => {
|
||||
let groupId = ''
|
||||
for (let key in channel) {
|
||||
if (variables.includes(key)) {
|
||||
groupId += channel[key]
|
||||
}
|
||||
}
|
||||
|
||||
return groupId
|
||||
})
|
||||
|
||||
for (let groupId in groups) {
|
||||
const channels = groups[groupId]
|
||||
let programs = []
|
||||
let i = 1
|
||||
let days = config.days || 1
|
||||
const maxConnections = config.maxConnections || 1
|
||||
const total = channels.length * days
|
||||
const utcDate = getUTCDate()
|
||||
const dates = Array.from({ length: days }, (_, i) => utcDate.add(i, 'd'))
|
||||
const taskQueue = new TaskQueue(Promise, maxConnections)
|
||||
|
||||
let queue = []
|
||||
for (let channel of channels) {
|
||||
if (!channel.logo && config.logo) {
|
||||
channel.logo = await grabber.loadLogo(channel)
|
||||
}
|
||||
|
||||
for (let date of dates) {
|
||||
queue.push({ channel, date })
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(
|
||||
queue.map(
|
||||
taskQueue.wrap(async ({ channel, date }) => {
|
||||
await grabber
|
||||
.grab(channel, date, (data, err) => {
|
||||
logger.info(
|
||||
`[${i}/${total}] ${config.site} - ${data.channel.id} - ${dayjs
|
||||
`[${i}/${total}] ${config.site} - ${data.channel.xmltv_id} - ${dayjs
|
||||
.utc(data.date)
|
||||
.format('MMM D, YYYY')} (${data.programs.length} programs)`
|
||||
)
|
||||
|
@ -99,13 +147,14 @@ async function main() {
|
|||
.then(results => {
|
||||
programs = programs.concat(results)
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
programs = _.uniqBy(programs, p => p.start + p.channel)
|
||||
|
||||
const xml = generateXMLTV({ channels, programs })
|
||||
let outputPath = options.output || config.output
|
||||
let outputPath = file.templateFormat(template, channels[0])
|
||||
if (options.gzip) {
|
||||
outputPath = outputPath || 'guide.xml.gz'
|
||||
const compressed = await gzip(xml)
|
||||
|
@ -116,6 +165,8 @@ async function main() {
|
|||
}
|
||||
|
||||
logger.info(`File '${outputPath}' successfully saved`)
|
||||
}
|
||||
|
||||
logger.info('Finish')
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "epg-grabber",
|
||||
"version": "0.29.8",
|
||||
"version": "0.32.0",
|
||||
"description": "Node.js CLI tool for grabbing EPG from different sites",
|
||||
"main": "src/index.js",
|
||||
"preferGlobal": true,
|
||||
|
@ -34,8 +34,10 @@
|
|||
"axios-mock-adapter": "^1.20.0",
|
||||
"commander": "^7.1.0",
|
||||
"curl-generator": "^0.2.0",
|
||||
"cwait": "^1.1.2",
|
||||
"dayjs": "^1.10.4",
|
||||
"epg-parser": "^0.1.6",
|
||||
"fs-extra": "^11.1.1",
|
||||
"glob": "^7.1.6",
|
||||
"lodash": "^4.17.21",
|
||||
"node-gzip": "^1.1.2",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
class Channel {
|
||||
constructor(c) {
|
||||
const data = {
|
||||
id: c.id || c.xmltv_id,
|
||||
xmltv_id: c.xmltv_id,
|
||||
name: c.name,
|
||||
site: c.site || '',
|
||||
site_id: c.site_id,
|
||||
|
|
|
@ -10,7 +10,7 @@ class Program {
|
|||
|
||||
const data = {
|
||||
site: c.site || '',
|
||||
channel: c.id || '',
|
||||
channel: c.xmltv_id || '',
|
||||
titles: toArray(p.titles || p.title).map(text => toTextObject(text, c.lang)),
|
||||
sub_titles: toArray(p.sub_titles || p.sub_title).map(text => toTextObject(text, c.lang)),
|
||||
descriptions: toArray(p.descriptions || p.description || p.desc).map(text =>
|
||||
|
|
29
src/file.js
29
src/file.js
|
@ -1,11 +1,23 @@
|
|||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const glob = require('glob')
|
||||
|
||||
module.exports.list = list
|
||||
module.exports.read = read
|
||||
module.exports.write = write
|
||||
module.exports.resolve = resolve
|
||||
module.exports.join = join
|
||||
module.exports.dirname = dirname
|
||||
module.exports.templateVariables = templateVariables
|
||||
module.exports.templateFormat = templateFormat
|
||||
|
||||
function list(pattern) {
|
||||
return new Promise(resolve => {
|
||||
glob(pattern, function (err, files) {
|
||||
resolve(files)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function read(filepath) {
|
||||
return fs.readFileSync(path.resolve(filepath), { encoding: 'utf-8' })
|
||||
|
@ -31,3 +43,20 @@ function join(path1, path2) {
|
|||
function dirname(filepath) {
|
||||
return path.dirname(filepath)
|
||||
}
|
||||
|
||||
function templateVariables(template) {
|
||||
const match = template.match(/{[^}]+}/g)
|
||||
|
||||
return Array.isArray(match) ? match.map(s => s.substring(1, s.length - 1)) : []
|
||||
}
|
||||
|
||||
function templateFormat(template, obj) {
|
||||
let output = template
|
||||
for (let key in obj) {
|
||||
const regex = new RegExp(`{${key}}`, 'g')
|
||||
const value = obj[key] || undefined
|
||||
output = output.replace(regex, value)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ function createElements(channels, programs, date) {
|
|||
...channels.map(channel => {
|
||||
return (
|
||||
'\r\n' +
|
||||
el('channel', { id: channel.id }, [
|
||||
el('channel', { id: channel.xmltv_id }, [
|
||||
el('display-name', {}, [escapeString(channel.name)]),
|
||||
el('icon', { src: channel.logo }),
|
||||
el('url', {}, [channel.url])
|
||||
|
|
|
@ -12,28 +12,7 @@ it('can create new Channel', () => {
|
|||
|
||||
expect(channel).toMatchObject({
|
||||
name: '1 TV',
|
||||
id: '1TV.com',
|
||||
site_id: '1',
|
||||
site: 'example.com',
|
||||
url: 'https://example.com',
|
||||
lang: 'fr',
|
||||
logo: 'https://example.com/logos/1TV.png'
|
||||
})
|
||||
})
|
||||
|
||||
it('can create channel from exist object', () => {
|
||||
const channel = new Channel({
|
||||
name: '1 TV',
|
||||
id: '1TV.com',
|
||||
site_id: '1',
|
||||
site: 'example.com',
|
||||
lang: 'fr',
|
||||
logo: 'https://example.com/logos/1TV.png'
|
||||
})
|
||||
|
||||
expect(channel).toMatchObject({
|
||||
name: '1 TV',
|
||||
id: '1TV.com',
|
||||
xmltv_id: '1TV.com',
|
||||
site_id: '1',
|
||||
site: 'example.com',
|
||||
url: 'https://example.com',
|
||||
|
|
|
@ -7,7 +7,7 @@ module.exports = {
|
|||
site: 'example.com',
|
||||
days: 2,
|
||||
channels: 'example.channels.xml',
|
||||
output: 'tests/output/guide.xml',
|
||||
output: 'tests/__data__/output/guide.xml',
|
||||
url: () => 'http://example.com/20210319/1tv.json',
|
||||
request: {
|
||||
method: 'POST',
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<site site="example2.com">
|
||||
<channels>
|
||||
<channel xmltv_id="3TV.com" site_id="3">3 TV</channel>
|
||||
<channel xmltv_id="4TV.com" site_id="4">4 TV</channel>
|
||||
</channels>
|
||||
</site>
|
|
@ -0,0 +1,32 @@
|
|||
const dayjs = require('dayjs')
|
||||
const utc = require('dayjs/plugin/utc')
|
||||
|
||||
dayjs.extend(utc)
|
||||
|
||||
module.exports = {
|
||||
site: 'example.com',
|
||||
days: 2,
|
||||
channels: ['example.channels.xml', 'example_2.channels.xml'],
|
||||
output: 'tests/__data__/output/guide.xml',
|
||||
url: () => 'http://example.com/20210319/1tv.json',
|
||||
request: {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Cookie: 'abc=123'
|
||||
},
|
||||
data() {
|
||||
return { accountID: '123' }
|
||||
}
|
||||
},
|
||||
parser: () => {
|
||||
return [
|
||||
{
|
||||
title: 'Program1',
|
||||
start: 1640995200000,
|
||||
stop: 1640998800000
|
||||
}
|
||||
]
|
||||
},
|
||||
logo: () => 'http://example.com/logos/1TV.png?x=шеллы&sid=777'
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
const { execSync } = require('child_process')
|
||||
const fs = require('fs')
|
||||
const fs = require('fs-extra')
|
||||
const path = require('path')
|
||||
const epgParser = require('epg-parser')
|
||||
|
||||
|
@ -11,6 +11,10 @@ function stdoutResultTester(stdout) {
|
|||
})
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
fs.emptyDirSync('tests/__data__/output')
|
||||
})
|
||||
|
||||
it('can load config', () => {
|
||||
const stdout = execSync(
|
||||
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --delay=0`,
|
||||
|
@ -62,6 +66,26 @@ it('can generate gzip version', () => {
|
|||
)
|
||||
})
|
||||
|
||||
it('can produce multiple outputs', () => {
|
||||
const stdout = execSync(
|
||||
`node ${pwd}/bin/epg-grabber.js \
|
||||
--config=tests/__data__/input/mini.config.js \
|
||||
--channels=tests/__data__/input/example.channels.xml \
|
||||
--output=tests/__data__/output/{lang}/{xmltv_id}.xml`,
|
||||
{
|
||||
encoding: 'utf8'
|
||||
}
|
||||
)
|
||||
|
||||
expect(stdoutResultTester(stdout)).toBe(true)
|
||||
expect(stdout.includes("File 'tests/__data__/output/fr/1TV.com.xml' successfully saved")).toBe(
|
||||
true
|
||||
)
|
||||
expect(
|
||||
stdout.includes("File 'tests/__data__/output/undefined/2TV.com.xml' successfully saved")
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('removes duplicates of the program', () => {
|
||||
const stdout = execSync(
|
||||
`node ${pwd}/bin/epg-grabber.js \
|
||||
|
@ -81,3 +105,25 @@ it('removes duplicates of the program', () => {
|
|||
|
||||
expect(output.programs).toEqual(expected.programs)
|
||||
})
|
||||
|
||||
it('can load multiple "channels.xml" files at once', () => {
|
||||
const stdout = execSync(
|
||||
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --channels=tests/__data__/input/example*.channels.xml --timeout=1`,
|
||||
{
|
||||
encoding: 'utf8'
|
||||
}
|
||||
)
|
||||
|
||||
expect(stdoutResultTester(stdout)).toBe(true)
|
||||
})
|
||||
|
||||
it('can parse list of "channels.xml" from array', () => {
|
||||
const stdout = execSync(
|
||||
`node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example_channels.config.js --timeout=1`,
|
||||
{
|
||||
encoding: 'utf8'
|
||||
}
|
||||
)
|
||||
|
||||
expect(stdoutResultTester(stdout)).toBe(true)
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue