Felix Jian f74ebfe798 | ||
---|---|---|
.github/workflows | ||
bin | ||
src | ||
tests | ||
.eslintrc.js | ||
.gitignore | ||
README.md | ||
babel.config.js | ||
package-lock.json | ||
package.json |
README.md
EPG Grabber
Node.js CLI tool for grabbing EPG from different websites.
Installation
npm install -g epg-grabber
Quick Start
epg-grabber --config=example.com.config.js
example.com.config.js
module.exports = {
site: 'example.com',
channels: 'example.com.channels.xml',
url: function (context) {
const { date, channel } = context
return `https://api.example.com/${date.format('YYYY-MM-DD')}/channel/${channel.site_id}`
},
parser: function (context) {
const programs = JSON.parse(context.content)
return programs.map(program => {
return {
title: program.title,
start: program.start,
stop: program.stop
}
})
}
}
example.com.channels.xml
<?xml version="1.0" ?>
<site site="example.com">
<channels>
<channel site_id="cnn-23" xmltv_id="CNN.us">CNN</channel>
</channels>
</site>
Example Output
<tv>
<channel id="CNN.us">
<display-name>CNN</display-name>
<url>https://example.com</url>
</channel>
<programme start="20211116040000 +0000" stop="20211116050000 +0000" channel="CNN.us">
<title lang="en">News at 10PM</title>
</programme>
// ...
</tv>
CLI
epg-grabber --config=example.com.config.js
Arguments:
-c, --config
: path to config file-o, --output
: path to output file or path template (example:guides/{site}.{lang}.xml
; default:guide.xml
)--channels
: path to list of channels; you can also use wildcard to specify the path to multiple files at once (example:example.com_*.channels.xml
)--lang
: set default language for all programs (default:en
)--days
: number of days for which to grab the program (default:1
)--delay
: delay between requests in milliseconds (default:3000
)--timeout
: set a timeout for each request in milliseconds (default:5000
)--max-connections
: set a limit on the number of concurrent requests per site (default:1
)--cache-ttl
: maximum time for storing each request in milliseconds (default:0
)--gzip
: compress the output (default:false
)--debug
: enable debug mode (default:false
)--curl
: display current request as CURL (default:false
)--log
: path to log file (optional)--log-level
: set the log level (default:info
)
Site Config
module.exports = {
site: 'example.com', // site domain name (required)
output: 'example.com.guide.xml', // path to output file or path template (example: 'guides/{site}.{lang}.xml'; default: 'guide.xml')
channels: 'example.com.channels.xml', // path to list of channels; you can also use an array to specify the path to multiple files at once (example: ['channels1.xml', 'channels2.xml']; required)
lang: 'fr', // default language for all programs (default: 'en')
days: 3, // number of days for which to grab the program (default: 1)
delay: 5000, // delay between requests (default: 3000)
maxConnections: 200, // limit on the number of concurrent requests (default: 1)
request: { // request options (details: https://github.com/axios/axios#request-config)
method: 'GET',
timeout: 5000,
proxy: {
protocol: 'https',
host: '127.0.0.1',
port: 9000,
auth: {
username: 'mikeymike',
password: 'rapunz3l'
}
},
cache: { // cache options (details: https://axios-cache-interceptor.js.org/#/pages/per-request-configuration)
ttl: 60 * 1000 // 60s
},
/**
* @param {object} context
*
* @return {string} The function should return headers for each request (optional)
*/
headers: function(context) {
return {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'
}
},
/**
* @param {object} context
*
* @return {string} The function should return data for each request (optional)
*/
data: function(context) {
const { channel, date } = context
return {
channels: [channel.site_id],
dateStart: date.format('YYYY-MM-DDT00:00:00-00:00'),
dateEnd: date.add(1, 'd').format('YYYY-MM-DDT00:00:00-00:00')
}
}
},
/**
* @param {object} context
*
* @return {string} The function should return URL of the program page for the channel
*/
url: function (context) {
return `https://example.com/${context.date.format('YYYY-MM-DD')}/channel/${context.channel.site_id}.html`
},
/**
* @param {object} context
*
* @return {string} The function should return URL of the channel logo (optional)
*/
logo: function (context) {
return `https://example.com/logos/${context.channel.site_id}.png`
},
/**
* @param {object} context
*
* @return {array} The function should return an array of programs with their descriptions
*/
parser: function (context) {
// content parsing...
return [
{
title, // program title (required)
start, // start time of the program (required)
stop, // end time of the program (required)
sub_title, // program sub-title (optional)
description, // description of the program (optional)
category, // type of program (optional)
season, // season number (optional)
episode, // episode number (optional)
date, // the date the programme or film was finished (optional)
icon, // image associated with the program (optional)
rating, // program rating (optional)
director, // the name of director (optional)
actor, // the name of actor (optional)
writer, // the name of writer (optional)
adapter, // the name of adapter (optional)
producer, // the name of producer (optional)
composer, // the name of composer (optional)
editor, // the name of editor (optional)
presenter, // the name of presenter (optional)
commentator, // the name of commentator (optional)
guest // the name of guest (optional)
},
...
]
}
}
Context Object
From each function in config.js
you can access a context
object containing the following data:
channel
: The object describing the current channel (xmltv_id, site_id, name, lang)date
: The 'dayjs' instance with the requested datecontent
: The response data as a Stringbuffer
: The response data as an ArrayBufferheaders
: The response headersrequest
: The request configcached
: A boolean to check whether this request was cached or not
Channels List
<?xml version="1.0" ?>
<site site="example.com">
<channels>
<channel site_id="cnn-23" xmltv_id="CNN.us">CNN</channel>
...
</channels>
</site>
You can also specify the language and logo for each channel individually, like so:
<channel
site_id="france-24"
xmltv_id="France24.fr"
lang="fr"
logo="https://example.com/france24.png"
>France 24</channel>
How to use SOCKS proxy?
First, you need to install socks-proxy-agent:
npm install socks-proxy-agent
Then you can use it to create an agent that acts as a SOCKS proxy. Here is an example of how to do it with the Tor SOCKS proxy:
const { SocksProxyAgent } = require('socks-proxy-agent')
const torProxyAgent = new SocksProxyAgent('socks://localhost:9050')
module.exports = {
site: 'example.com',
url: 'https://example.com/epg.json',
request: {
httpsAgent: torProxyAgent,
httpAgent: torProxyAgent
},
parser(context) {
// ...
}
}
Contribution
If you find a bug or want to contribute to the code or documentation, you can help by submitting an issue or a pull request.