aboutsummaryrefslogtreecommitdiff
path: root/src/server/lib/schools/hetmml/getScheduleData.js
blob: ead856f81de4fca8045d41d16cf463e8ec1f0dee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/**
 * Copyright (C) 2018 Noah Loomans
 *
 * This file is part of rooster.hetmml.nl.
 *
 * rooster.hetmml.nl is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * rooster.hetmml.nl is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with rooster.hetmml.nl.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

const cheerio = require('cheerio');
const debounce = require('promise-debounce');
const _ = require('lodash');

const getUrlOfUser = require('./getURLOfUser');
const axios = require('./axios');

let meetingpointData;
let lastUpdate;

/**
 * Scrape all the valid users from a meetingpoint navbar.
 * @param {string} html The html of a meetingpoint navbar.
 * @returns {*}
 * [
 *   { type: 't', value: 'akh', index: 0 },
 *   ...
 *   { type: 's', value: '18561', index: 245 },
 *   ...
 *   { type: 'r', value: '008-mk', index: 2 },
 *   ...
 *   { type: 'c', value: '6-5H2', index: 23 },
 *   ...
 * ]
 */
function scrapeUsers(html) {
  const page = cheerio.load(html);
  const script = page('script').eq(1).text();

  const regexs = [/var classes = \[(.+)\];/, /var teachers = \[(.+)\];/, /var rooms = \[(.+)\];/, /var students = \[(.+)\];/];
  const items = regexs.map(regex => script.match(regex)[1].split(',').map(item => item.replace(/"/g, '')));

  const classes = items[0].map((item, index) => ({
    type: 'c',
    value: item,
    index,
  }));

  const teachers = items[1].map((item, index) => ({
    type: 't',
    value: item,
    index,
  }));

  const rooms = items[2].map((item, index) => ({
    type: 'r',
    value: item,
    index,
  }));

  const students = items[3].map((item, index) => ({
    type: 's',
    value: item,
    index,
  }));

  return _.flatten([classes, teachers, rooms, students]);
}

/**
 * Scrape the known valid weeks from a meetingpoint navbar.
 *
 * There probably are more valid weeks, but these once are garanteed to be
 * valid.
 * @param {string} html The html of a meetingpoint navbar.
 * @returns {*} [{ id: string, text: string }, ...]
 */
function scrapeWeeks(html) {
  const page = cheerio.load(html);
  const weekSelector = page('select[name="week"]');
  const weeks = _.map(weekSelector.children(), option => ({
    id: cheerio(option).attr('value'),
    text: cheerio(option).text(),
  }));

  return weeks;
}

/**
 * scrape the alt text (the text next to the short code) from a
 * specific meetingpoint schedule.
 * @param {string} html The html of a specific meetingpoint schedule.
 * @returns {string}
 */
function scrapeAltText(html) {
  const page = cheerio.load(html);
  return page('center > font').eq(2).text().trim();
}

/**
 * Combines two user array, if a dublicate user is present, the first one will
 * be used.
 *
 * This function is currently used to merge a subset of users with alts
 * attached to them with a compleat set of users without alts.
 * @param {*} usersArrays An array of user arrays.
 */
function combineUsers(usersArrays) {
  return _.uniqBy(_.flatten(usersArrays), user => `${user.type}/${user.value}`);
}

/**
 * Requests and adds an alt field to the given users.
 *
 * For example, it will add the teacher name to a teacher object.
 *
 * @param {*} users [{ type: string, value: string, index: number }, ...]
 * @returns {*} [{ type: string, value: string, alt: string, index: number }, ...]
 */
function getAlts(users) {
  const requests = users.map(user => (
    axios.get(
      getUrlOfUser('dag', user.type, user.index, 7),
      { timeout: 8000 },
    )
  ));

  return Promise.all(requests).then(teacherResponses => (
    teacherResponses.map((teacherResponse, index) => {
      const teacherName = scrapeAltText(teacherResponse.data);

      return {
        ...users[index],
        alt: teacherName,
      };
    })
  ));
}

/**
 * Requests all the relevent data from the meetingpoint server
 * This is very expensive! Only call when you absolutely need to.
 * @returns {Promise} { users, dailyScheduleWeeks, basisScheduleWeeks }
 */
function getScheduleData() {
  const navbarRequests = [
    axios.get('/dagroosters/frames/navbar.htm'),
    axios.get('/basisroosters/frames/navbar.htm'),
  ];

  return Promise.all(navbarRequests)
    .then(([dailyScheduleResponse, basisScheduleResponse]) => {
      const users = scrapeUsers(dailyScheduleResponse.data);
      const dailyScheduleWeeks = scrapeWeeks(dailyScheduleResponse.data);
      const basisScheduleWeeks = scrapeWeeks(basisScheduleResponse.data);

      const teachers = users.filter(user => user.type === 't');

      return getAlts(teachers)
        .then(teachersWithAlts => ({
          users: combineUsers([teachersWithAlts, users]),
          dailyScheduleWeeks,
          basisScheduleWeeks,
        }))
        .catch(() => ({
          // Just return the user data without the alts if getAlts fails, since
          // the alts are non-essential.
          users,
          dailyScheduleWeeks,
          basisScheduleWeeks,
        }));
    });
}

/**
 * Wrapper around getScheduleData that is cheap to call. In most cases it
 * returns a cached version. The cache is stored for 30 minutes.
 * @returns {Promise} { users, dailyScheduleWeeks, basisScheduleWeeks }
*/
function getScheduleDataCacheWrapper() {
  if (meetingpointData == null || new Date() - lastUpdate > 30 * 60 * 1000) { // 30 minutes
    return getScheduleData().then((meetingpointData_) => {
      lastUpdate = new Date();
      meetingpointData = meetingpointData_;

      return meetingpointData;
    });
  }

  return Promise.resolve(meetingpointData);
}

// Debounce getScheduleDataCacheWrapper. This ensures that no requests will be
// waited if a user requests the schedule data while the schedule data is
// already being requested by another user.
module.exports = debounce(getScheduleDataCacheWrapper);