1
0
Fork 0
mirror of https://gitlab.com/niansa/commsyfuse.git synced 2025-03-06 20:48:31 +01:00
commsyfuse/main.cpp
2020-04-29 14:58:55 +02:00

374 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <regex>
#include <csignal>
#include <cstdio>
#include <unistd.h>
#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#include <curlpp/Infos.hpp>
#include <curlpp/Options.hpp>
#include <gumbo.h>
static std::string server_url = "https://unterricht.sh.schulcommsy.de";
static std::string server_sid;
static std::string room;
void panic(std::string message) {
std::cerr << std::endl << "An unhandled exception occured:" << std::endl << message << std::endl;
abort();
}
std::string ltrim(const std::string &s) {
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
return std::regex_replace(s, lws, "");
}
std::string rtrim(const std::string &s) {
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
return std::regex_replace(s, tws, "");
}
std::string trim(const std::string &s) {
return ltrim(rtrim(s));
}
std::string get_filename(std::string &path) {
return path.substr(path.find_last_of("/\\") + 1);
}
std::string clean_spaces(const std::string &s) {
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
std::string newstr = std::regex_replace(s, tws, "");
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
newstr.erase(0, 4);
return newstr;
}
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
base.insert(base.end(), addition.begin(), addition.end());
return base;
}
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
// Initialise variables
curlpp::Cleanup cleaner;
curlpp::Easy request;
// Set the writer callback to enable cURL to write result in a memory area
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
// Setting the URL to retrive.
request.setOpt(new curlpp::options::Url(URL));
// Set SID cookie
std::list<std::string> header;
header.push_back("Cookie: SID=" + SID);
request.setOpt(new curlpp::options::HttpHeader(header));
// Perform request
request.perform();
// Return result
return curlpp::infos::ResponseCode::get(request);
}
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
GumboAttribute* hclass;
if (node->v.element.tag == expectedtag &&
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
if (hclass->value == searchword) {
elemvect.push_back(node);
}
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
}
}
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
}
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
std::vector<GumboNode *> elemvect;
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
// Use first node found
if (elemvect.size() > 0) {
return elemvect[0];
}
// If no nodes were found, panic()
panic("Not a single node with ID "+searchword+" could be found!");
return new GumboNode;
}
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
if (node->v.element.tag == searchedtag) {
elemvect.push_back(node);
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
}
}
static std::string gumbo_cleantext(GumboNode* node) {
if (node->type == GUMBO_NODE_TEXT) {
return std::string(node->v.text.text);
} else if (node->type == GUMBO_NODE_ELEMENT &&
node->v.element.tag != GUMBO_TAG_SCRIPT &&
node->v.element.tag != GUMBO_TAG_STYLE) {
std::string contents = "";
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
const std::string text = gumbo_cleantext((GumboNode*) children->data[i]);
if (i != 0 && !text.empty()) {
contents.append(" ");
}
contents.append(text);
}
return contents;
} else {
return "";
}
}
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
std::vector<std::string> attrvals;
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
std::vector<std::string> toappend;
// Check if current element is already the right one
if (node->v.element.tag == expected_tag) {
// Return this elements wanted attribute key
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
}
// Check if This is a node element
else if (node->type != GUMBO_NODE_ELEMENT) {
return {};
}
// Iterate through child nodes
for (unsigned int it = 0; it < children->length; ++it) {
childnode = (GumboNode*) children->data[it];
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
attrvals = merge_strvects(attrvals, toappend);
}
}
// Return the final result
return attrvals;
}
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
// Iterate through childs
for (unsigned int it = 0; it < children->length; ++it) {
childnode = (GumboNode*) children->data[it];
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
return trim(gumbo_cleantext(childnode));
}
}
panic("A tag that was searched for could not be found!");
return "";
}
auto get_posts(GumboNode *node) {
std::vector<GumboNode *> posts;
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
return posts;
}
std::string get_post_name(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
return trim(gumbo_cleantext(titlenodes[0]));
}
std::string get_post_meta(GumboNode *node) {
std::vector<GumboNode *> metanodes;
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
}
std::string get_post_url(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
}
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
std::vector<GumboNode *> metanodes;
std::vector<std::string> fileurls;
std::vector<std::string> filenames;
std::vector<std::map<std::string, std::string>> filenameurlmap;
std::map<std::string, std::string> tmpmap;
// Get meta nodes
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
// Get URLs
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
// Get filenames
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
// Generate map
auto urlit = fileurls.begin();
auto nameit = filenames.begin();
while (true) {
// Break if last item was reached
if (urlit == fileurls.end() or nameit == filenames.end()) {
break;
}
// Generate temporary map
tmpmap = {};
tmpmap[*nameit] = *urlit;
// Append it to the result vector map
filenameurlmap.push_back(tmpmap);
// Get next item in both vectors
urlit++; nameit++;
}
return filenameurlmap;
}
std::string get_post_desc(std::string post_url) {
std::string material_id;
std::stringstream httpcontent;
GumboOutput *post_document;
GumboNode *desc_node;
std::vector<GumboNode *> results;
// Get material ID
material_id = get_filename(post_url);
// Download post
long statuscode = curlreq(httpcontent, server_sid, post_url);
// Check statuscode
if (statuscode != 200) {
panic("Request to download post had a unexpected result: "+std::to_string(statuscode));
}
// Parse post
post_document = gumbo_parse(httpcontent.str().c_str());
// Get description element
desc_node = gumbo_search_by_id(post_document->root, "description"+material_id, GUMBO_TAG_DIV);
// Extract description
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
// Cencenate occurencies
std::string result_string;
for (auto it = results.begin(); it != results.end(); it++) {
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
}
// Return first occurence
return result_string;
}
std::string get_rss_url(GumboNode *node) { // Currently unused
GumboNode *room_info_elem;
// Get room info element
room_info_elem = gumbo_search_by_id(node, "room-info", GUMBO_TAG_UL);
// Get all a hrefs
auto hrefs = gumbo_get_attr(room_info_elem, "href", GUMBO_TAG_A);
// Find RSS href
for (auto it = hrefs.begin(); it != hrefs.end(); it++) {
if ((*it).rfind("/rss/", 0) == 0) {
return *it;
}
}
// Return something even if none was found to prevent undefined behaviour
return "none";
}
int main(int argc, char *argv[]) {
// Check arguments
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <SID> <room>" << std::endl;
return 1;
}
// Create required variables
server_sid = argv[1];
room = argv[2];
std::stringstream httpcontent;
std::string rss_url;
std::vector<std::string> postsmap_numname;
std::vector<std::string> postsmap_nummeta;
std::vector<std::string> postsmap_numurl;
std::vector<std::string> postsmap_numdesc;
std::vector<std::vector<std::string>> postsmap_numfileurls;
std::vector<std::vector<std::string>> postsmap_numfilenames;
std::vector<GumboNode *> postsmap_numnode;
GumboOutput *document;
// Check connection and download document
std::cout << "Connecting to server..." << std::endl;
long statuscode = curlreq(httpcontent, server_sid, server_url+"/room/" + room + "/material");
if (statuscode == 302) {
std::cerr << "Connection error: Invalid SID" << std::endl;
return 2;
} else if (statuscode == 500) {
std::cerr << "Connection error: Invalid room" << std::endl;
return 3;
} else if (statuscode != 200) {
std::cerr << "Connection error: Server error (Code " << statuscode << ")" << std::endl;
return 9;
}
// Do some stuff
document = gumbo_parse(httpcontent.str().c_str());
httpcontent.str(std::string()); // Clear buffer just in case we need it later
// Get posts
auto posts = get_posts(document->root);
// Map posts and their corresponding URL to a number
unsigned long numposts = 0;
for (auto it = posts.begin(); it != posts.end(); it++) {
// Get posts name
postsmap_numname.push_back(get_post_name(*it));
// Get posts meta string
postsmap_nummeta.push_back(get_post_meta(*it));
// Get posts URL
postsmap_numurl.push_back(get_post_url(*it));
// Get posts description
postsmap_numdesc.push_back(get_post_desc(server_url + *(postsmap_numurl.end() - 1)));
// Get posts files
auto urlnamefilemap = get_post_files(*it);
for (auto it2 = urlnamefilemap.begin(); it2 != urlnamefilemap.end(); it2++) {
for (auto const& pair: *it2) {
postsmap_numfilenames.push_back({}); postsmap_numfileurls.push_back({});
postsmap_numfilenames[numposts].push_back(pair.first);
postsmap_numfileurls[numposts].push_back(pair.second);
}
}
// Show overview
std::cout << numposts << ") " << postsmap_numname[numposts] << " " << postsmap_nummeta[numposts] << std::endl;
numposts++;
}
// Debug CLI
unsigned long userin;
while (true) {
// Await user input
std::cout << "? " << std::flush;
std::cin >> userin;
// Check if input is valid
if (userin >= numposts) {
std::cerr << "No such post" << std::endl;
continue;
}
// If required; get posts description and cache it
// Print description
std::cout << std::endl << std::endl << postsmap_numdesc[userin] << std::endl;
// Print informations
std::cout << "Post name: " << postsmap_numname[userin] << std::endl;
std::cout << "Post URL: " << server_url << postsmap_numurl[userin] << std::endl;
if (postsmap_numfilenames[userin].size() != 0) {
std::cout << "Post files:" << std::endl;
for (unsigned long it = 0; it < postsmap_numfilenames[userin].size(); it++) {
std::cout << " " << postsmap_numfilenames[userin][it] << ": " << server_url << postsmap_numfileurls[userin][it] << std::endl;
}
}
}
return 0;
}