mirror of
https://gitlab.com/niansa/commsyfuse.git
synced 2025-03-06 20:48:31 +01:00
374 lines
14 KiB
C++
374 lines
14 KiB
C++
#include <iostream>
|
||
#include <string>
|
||
#include <sstream>
|
||
#include <fstream>
|
||
#include <vector>
|
||
#include <map>
|
||
#include <regex>
|
||
|
||
#include <csignal>
|
||
#include <cstdio>
|
||
#include <unistd.h>
|
||
|
||
#include <curlpp/cURLpp.hpp>
|
||
#include <curlpp/Easy.hpp>
|
||
#include <curlpp/Infos.hpp>
|
||
#include <curlpp/Options.hpp>
|
||
|
||
#include <gumbo.h>
|
||
|
||
static std::string server_url = "https://unterricht.sh.schulcommsy.de";
|
||
static std::string server_sid;
|
||
static std::string room;
|
||
|
||
|
||
void panic(std::string message) {
|
||
std::cerr << std::endl << "An unhandled exception occured:" << std::endl << message << std::endl;
|
||
abort();
|
||
}
|
||
|
||
|
||
std::string ltrim(const std::string &s) {
|
||
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
|
||
return std::regex_replace(s, lws, "");
|
||
}
|
||
std::string rtrim(const std::string &s) {
|
||
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
|
||
return std::regex_replace(s, tws, "");
|
||
}
|
||
std::string trim(const std::string &s) {
|
||
return ltrim(rtrim(s));
|
||
}
|
||
|
||
std::string get_filename(std::string &path) {
|
||
return path.substr(path.find_last_of("/\\") + 1);
|
||
}
|
||
|
||
std::string clean_spaces(const std::string &s) {
|
||
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
|
||
std::string newstr = std::regex_replace(s, tws, "");
|
||
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
|
||
newstr.erase(0, 4);
|
||
return newstr;
|
||
}
|
||
|
||
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
|
||
base.insert(base.end(), addition.begin(), addition.end());
|
||
return base;
|
||
}
|
||
|
||
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
|
||
// Initialise variables
|
||
curlpp::Cleanup cleaner;
|
||
curlpp::Easy request;
|
||
// Set the writer callback to enable cURL to write result in a memory area
|
||
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
|
||
// Setting the URL to retrive.
|
||
request.setOpt(new curlpp::options::Url(URL));
|
||
// Set SID cookie
|
||
std::list<std::string> header;
|
||
header.push_back("Cookie: SID=" + SID);
|
||
request.setOpt(new curlpp::options::HttpHeader(header));
|
||
// Perform request
|
||
request.perform();
|
||
// Return result
|
||
return curlpp::infos::ResponseCode::get(request);
|
||
}
|
||
|
||
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
|
||
if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return;
|
||
}
|
||
GumboAttribute* hclass;
|
||
if (node->v.element.tag == expectedtag &&
|
||
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
|
||
if (hclass->value == searchword) {
|
||
elemvect.push_back(node);
|
||
}
|
||
}
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
|
||
}
|
||
}
|
||
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
||
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
|
||
}
|
||
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
||
std::vector<GumboNode *> elemvect;
|
||
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
|
||
// Use first node found
|
||
if (elemvect.size() > 0) {
|
||
return elemvect[0];
|
||
}
|
||
// If no nodes were found, panic()
|
||
panic("Not a single node with ID "+searchword+" could be found!");
|
||
return new GumboNode;
|
||
}
|
||
|
||
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
|
||
if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return;
|
||
}
|
||
if (node->v.element.tag == searchedtag) {
|
||
elemvect.push_back(node);
|
||
}
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
|
||
}
|
||
}
|
||
|
||
static std::string gumbo_cleantext(GumboNode* node) {
|
||
if (node->type == GUMBO_NODE_TEXT) {
|
||
return std::string(node->v.text.text);
|
||
} else if (node->type == GUMBO_NODE_ELEMENT &&
|
||
node->v.element.tag != GUMBO_TAG_SCRIPT &&
|
||
node->v.element.tag != GUMBO_TAG_STYLE) {
|
||
std::string contents = "";
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
const std::string text = gumbo_cleantext((GumboNode*) children->data[i]);
|
||
if (i != 0 && !text.empty()) {
|
||
contents.append(" ");
|
||
}
|
||
contents.append(text);
|
||
}
|
||
return contents;
|
||
} else {
|
||
return "";
|
||
}
|
||
}
|
||
|
||
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
|
||
std::vector<std::string> attrvals;
|
||
GumboNode *childnode;
|
||
GumboVector* children = &node->v.element.children;
|
||
std::vector<std::string> toappend;
|
||
// Check if current element is already the right one
|
||
if (node->v.element.tag == expected_tag) {
|
||
// Return this elements wanted attribute key
|
||
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
|
||
}
|
||
// Check if This is a node element
|
||
else if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return {};
|
||
}
|
||
// Iterate through child nodes
|
||
for (unsigned int it = 0; it < children->length; ++it) {
|
||
childnode = (GumboNode*) children->data[it];
|
||
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
|
||
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
|
||
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
|
||
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
|
||
attrvals = merge_strvects(attrvals, toappend);
|
||
}
|
||
}
|
||
// Return the final result
|
||
return attrvals;
|
||
}
|
||
|
||
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
|
||
GumboNode *childnode;
|
||
GumboVector* children = &node->v.element.children;
|
||
// Iterate through childs
|
||
for (unsigned int it = 0; it < children->length; ++it) {
|
||
childnode = (GumboNode*) children->data[it];
|
||
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
|
||
return trim(gumbo_cleantext(childnode));
|
||
}
|
||
}
|
||
panic("A tag that was searched for could not be found!");
|
||
return "";
|
||
}
|
||
|
||
|
||
auto get_posts(GumboNode *node) {
|
||
std::vector<GumboNode *> posts;
|
||
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
|
||
return posts;
|
||
}
|
||
|
||
std::string get_post_name(GumboNode *node) {
|
||
std::vector<GumboNode *> titlenodes;
|
||
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
|
||
return trim(gumbo_cleantext(titlenodes[0]));
|
||
}
|
||
|
||
std::string get_post_meta(GumboNode *node) {
|
||
std::vector<GumboNode *> metanodes;
|
||
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
||
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
|
||
}
|
||
|
||
std::string get_post_url(GumboNode *node) {
|
||
std::vector<GumboNode *> titlenodes;
|
||
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
|
||
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
|
||
}
|
||
|
||
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
|
||
std::vector<GumboNode *> metanodes;
|
||
std::vector<std::string> fileurls;
|
||
std::vector<std::string> filenames;
|
||
std::vector<std::map<std::string, std::string>> filenameurlmap;
|
||
std::map<std::string, std::string> tmpmap;
|
||
// Get meta nodes
|
||
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
||
// Get URLs
|
||
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
|
||
// Get filenames
|
||
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
|
||
// Generate map
|
||
auto urlit = fileurls.begin();
|
||
auto nameit = filenames.begin();
|
||
while (true) {
|
||
// Break if last item was reached
|
||
if (urlit == fileurls.end() or nameit == filenames.end()) {
|
||
break;
|
||
}
|
||
// Generate temporary map
|
||
tmpmap = {};
|
||
tmpmap[*nameit] = *urlit;
|
||
// Append it to the result vector map
|
||
filenameurlmap.push_back(tmpmap);
|
||
// Get next item in both vectors
|
||
urlit++; nameit++;
|
||
}
|
||
return filenameurlmap;
|
||
}
|
||
|
||
std::string get_post_desc(std::string post_url) {
|
||
std::string material_id;
|
||
std::stringstream httpcontent;
|
||
GumboOutput *post_document;
|
||
GumboNode *desc_node;
|
||
std::vector<GumboNode *> results;
|
||
// Get material ID
|
||
material_id = get_filename(post_url);
|
||
// Download post
|
||
long statuscode = curlreq(httpcontent, server_sid, post_url);
|
||
// Check statuscode
|
||
if (statuscode != 200) {
|
||
panic("Request to download post had a unexpected result: "+std::to_string(statuscode));
|
||
}
|
||
// Parse post
|
||
post_document = gumbo_parse(httpcontent.str().c_str());
|
||
// Get description element
|
||
desc_node = gumbo_search_by_id(post_document->root, "description"+material_id, GUMBO_TAG_DIV);
|
||
// Extract description
|
||
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
|
||
// Cencenate occurencies
|
||
std::string result_string;
|
||
for (auto it = results.begin(); it != results.end(); it++) {
|
||
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
|
||
}
|
||
// Return first occurence
|
||
return result_string;
|
||
}
|
||
|
||
std::string get_rss_url(GumboNode *node) { // Currently unused
|
||
GumboNode *room_info_elem;
|
||
// Get room info element
|
||
room_info_elem = gumbo_search_by_id(node, "room-info", GUMBO_TAG_UL);
|
||
// Get all a hrefs
|
||
auto hrefs = gumbo_get_attr(room_info_elem, "href", GUMBO_TAG_A);
|
||
// Find RSS href
|
||
for (auto it = hrefs.begin(); it != hrefs.end(); it++) {
|
||
if ((*it).rfind("/rss/", 0) == 0) {
|
||
return *it;
|
||
}
|
||
}
|
||
// Return something even if none was found to prevent undefined behaviour
|
||
return "none";
|
||
}
|
||
|
||
|
||
int main(int argc, char *argv[]) {
|
||
// Check arguments
|
||
if (argc < 2) {
|
||
std::cerr << "Usage: " << argv[0] << " <SID> <room>" << std::endl;
|
||
return 1;
|
||
}
|
||
// Create required variables
|
||
server_sid = argv[1];
|
||
room = argv[2];
|
||
std::stringstream httpcontent;
|
||
std::string rss_url;
|
||
std::vector<std::string> postsmap_numname;
|
||
std::vector<std::string> postsmap_nummeta;
|
||
std::vector<std::string> postsmap_numurl;
|
||
std::vector<std::string> postsmap_numdesc;
|
||
std::vector<std::vector<std::string>> postsmap_numfileurls;
|
||
std::vector<std::vector<std::string>> postsmap_numfilenames;
|
||
std::vector<GumboNode *> postsmap_numnode;
|
||
GumboOutput *document;
|
||
// Check connection and download document
|
||
std::cout << "Connecting to server..." << std::endl;
|
||
long statuscode = curlreq(httpcontent, server_sid, server_url+"/room/" + room + "/material");
|
||
if (statuscode == 302) {
|
||
std::cerr << "Connection error: Invalid SID" << std::endl;
|
||
return 2;
|
||
} else if (statuscode == 500) {
|
||
std::cerr << "Connection error: Invalid room" << std::endl;
|
||
return 3;
|
||
} else if (statuscode != 200) {
|
||
std::cerr << "Connection error: Server error (Code " << statuscode << ")" << std::endl;
|
||
return 9;
|
||
}
|
||
// Do some stuff
|
||
document = gumbo_parse(httpcontent.str().c_str());
|
||
httpcontent.str(std::string()); // Clear buffer just in case we need it later
|
||
// Get posts
|
||
auto posts = get_posts(document->root);
|
||
// Map posts and their corresponding URL to a number
|
||
unsigned long numposts = 0;
|
||
for (auto it = posts.begin(); it != posts.end(); it++) {
|
||
// Get posts name
|
||
postsmap_numname.push_back(get_post_name(*it));
|
||
// Get posts meta string
|
||
postsmap_nummeta.push_back(get_post_meta(*it));
|
||
// Get posts URL
|
||
postsmap_numurl.push_back(get_post_url(*it));
|
||
// Get posts description
|
||
postsmap_numdesc.push_back(get_post_desc(server_url + *(postsmap_numurl.end() - 1)));
|
||
// Get posts files
|
||
auto urlnamefilemap = get_post_files(*it);
|
||
for (auto it2 = urlnamefilemap.begin(); it2 != urlnamefilemap.end(); it2++) {
|
||
for (auto const& pair: *it2) {
|
||
postsmap_numfilenames.push_back({}); postsmap_numfileurls.push_back({});
|
||
postsmap_numfilenames[numposts].push_back(pair.first);
|
||
postsmap_numfileurls[numposts].push_back(pair.second);
|
||
}
|
||
}
|
||
// Show overview
|
||
std::cout << numposts << ") " << postsmap_numname[numposts] << " – " << postsmap_nummeta[numposts] << std::endl;
|
||
numposts++;
|
||
}
|
||
// Debug CLI
|
||
unsigned long userin;
|
||
while (true) {
|
||
// Await user input
|
||
std::cout << "? " << std::flush;
|
||
std::cin >> userin;
|
||
// Check if input is valid
|
||
if (userin >= numposts) {
|
||
std::cerr << "No such post" << std::endl;
|
||
continue;
|
||
}
|
||
// If required; get posts description and cache it
|
||
// Print description
|
||
std::cout << std::endl << std::endl << postsmap_numdesc[userin] << std::endl;
|
||
// Print informations
|
||
std::cout << "Post name: " << postsmap_numname[userin] << std::endl;
|
||
std::cout << "Post URL: " << server_url << postsmap_numurl[userin] << std::endl;
|
||
if (postsmap_numfilenames[userin].size() != 0) {
|
||
std::cout << "Post files:" << std::endl;
|
||
for (unsigned long it = 0; it < postsmap_numfilenames[userin].size(); it++) {
|
||
std::cout << " – " << postsmap_numfilenames[userin][it] << ": " << server_url << postsmap_numfileurls[userin][it] << std::endl;
|
||
}
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|