diff options
author | Noah Loomans <noahloomans@gmail.com> | 2018-07-07 17:34:02 +0200 |
---|---|---|
committer | Noah Loomans <noahloomans@gmail.com> | 2018-07-07 17:34:02 +0200 |
commit | 5c265c04ad513d845a41c7866c3ed231c8d5e68e (patch) | |
tree | 9a86b66958e59e4a07d0c8e10731667b05990816 /src/sync/scrapeScheduleData.js | |
parent | 8b1f29f2802a081a67151e47b9c52803d7cc568a (diff) |
server: Use database for userssync-with-db
Diffstat (limited to 'src/sync/scrapeScheduleData.js')
-rw-r--r-- | src/sync/scrapeScheduleData.js | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/src/sync/scrapeScheduleData.js b/src/sync/scrapeScheduleData.js new file mode 100644 index 0000000..fc7193a --- /dev/null +++ b/src/sync/scrapeScheduleData.js @@ -0,0 +1,89 @@ +/** + * Copyright (C) 2018 Noah Loomans + * + * This file is part of rooster.hetmml.nl. + * + * rooster.hetmml.nl is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rooster.hetmml.nl is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rooster.hetmml.nl. If not, see <http://www.gnu.org/licenses/>. + * + */ + +const cheerio = require('cheerio'); +const _ = require('lodash'); + +/** + * Scrape all the valid users from a meetingpoint navbar. + * @param {string} html The html of a meetingpoint navbar. + * @returns {*} + * [ + * { type: 't', value: 'akh', index: 0 }, + * ... + * { type: 's', value: '18561', index: 245 }, + * ... + * { type: 'r', value: '008-mk', index: 2 }, + * ... + * { type: 'c', value: '6-5H2', index: 23 }, + * ... + * ] + */ +function scrapeUsers(html) { + const page = cheerio.load(html); + const script = page('script').eq(1).text(); + + const regexs = [/var classes = \[(.+)\];/, /var teachers = \[(.+)\];/, /var rooms = \[(.+)\];/, /var students = \[(.+)\];/]; + const items = regexs.map(regex => script.match(regex)[1].split(',').map(item => item.replace(/"/g, ''))); + + const classes = items[0].map((name, index) => ({ + key: `c/${name}`, + type: 'class', + name, + index, + })); + + const teachers = items[1].map((name, index) => ({ + key: `t/${name}`, + type: 'teacher', + name, + index, + })); + + const rooms = items[2].map((name, index) => ({ + key: `r/${name}`, + type: 'room', + name, + index, + })); + + const students = items[3].map((name, index) => ({ + key: `s/${name}`, + type: 'student', + name, + index, + })); + + return _.flatten([classes, teachers, rooms, students]); +} + +/** + * scrape the alt text (the text next to the short code) from a + * specific meetingpoint schedule. + * @param {string} html The html of a specific meetingpoint schedule. + * @returns {string} + */ +function scrapeAltName(html) { + const page = cheerio.load(html); + return page('center > font').eq(2).text().trim() || undefined; +} + +module.exports.scrapeUsers = scrapeUsers; +module.exports.scrapeAltName = scrapeAltName; |