1
0
Fork 0
mirror of https://gitlab.com/niansa/commsyfuse.git synced 2025-03-06 20:48:31 +01:00
commsyfuse/main.cpp

503 lines
19 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
This file is part of CommSyFuse.
pilang is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
pilang is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with pilang. If not, see <https://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <regex>
#include <csignal>
#include <cstdio>
#include <unistd.h>
#include <signal.h>
#include <execinfo.h>
#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#include <curlpp/Infos.hpp>
#include <curlpp/Options.hpp>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
#include <gumbo.h>
static std::string server_url = "https://unterricht.sh.schulcommsy.de";
static std::string server_sid;
static std::string room;
void panic(std::string message) {
// Print message
std::cerr << std::endl << "An unhandled exception occured:" << std::endl << message << std::endl;
// Print stacktrace
void *array[10];
int size = backtrace(array, 10);
std::cerr << std::endl << "Stacktrace:" << std::endl;
backtrace_symbols_fd(array, size, STDERR_FILENO);
// Abort
abort();
}
void sigsegv_panic(int) {
panic("Segmentation fault received!");
}
std::string ltrim(const std::string &s) {
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
return std::regex_replace(s, lws, "");
}
std::string rtrim(const std::string &s) {
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
return std::regex_replace(s, tws, "");
}
std::string trim(const std::string &s) {
return ltrim(rtrim(s));
}
std::string get_filename(std::string &path) {
return path.substr(path.find_last_of("/\\") + 1);
}
std::string clean_spaces(const std::string &s) {
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
std::string newstr = std::regex_replace(s, tws, "");
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
newstr.erase(0, 4);
return newstr;
}
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
base.insert(base.end(), addition.begin(), addition.end());
return base;
}
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
// Initialise variables
curlpp::Cleanup cleaner;
curlpp::Easy request;
// Set the writer callback to enable cURL to write result in a memory area
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
// Setting the URL to retrive.
request.setOpt(new curlpp::options::Url(URL));
// Set SID cookie
std::list<std::string> header;
header.push_back("Cookie: SID=" + SID);
request.setOpt(new curlpp::options::HttpHeader(header));
// Perform request
request.perform();
// Return result
return curlpp::infos::ResponseCode::get(request);
}
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
GumboAttribute* hclass;
if (node->v.element.tag == expectedtag &&
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
if (hclass->value == searchword) {
elemvect.push_back(node);
}
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
}
}
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
}
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
std::vector<GumboNode *> elemvect;
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
// Use first node found
if (elemvect.size() > 0) {
return elemvect[0];
}
// If no nodes were found, panic()
panic("Not a single node with ID "+searchword+" could be found!");
return new GumboNode;
}
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
if (node->v.element.tag == searchedtag) {
elemvect.push_back(node);
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
}
}
static std::string gumbo_cleantext(GumboNode* node) {
if (node->type == GUMBO_NODE_TEXT) {
return std::string(node->v.text.text);
} else if (node->type == GUMBO_NODE_ELEMENT &&
node->v.element.tag != GUMBO_TAG_SCRIPT &&
node->v.element.tag != GUMBO_TAG_STYLE) {
std::string contents = "";
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
const std::string text = gumbo_cleantext(reinterpret_cast<GumboNode*> (children->data[i]));
if (i != 0 && !text.empty()) {
contents.append(" ");
}
contents.append(text);
}
return contents;
} else {
return "";
}
}
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
std::vector<std::string> attrvals;
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
std::vector<std::string> toappend;
// Check if current element is already the right one
if (node->v.element.tag == expected_tag) {
// Return this elements wanted attribute key
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
}
// Check if This is a node element
else if (node->type != GUMBO_NODE_ELEMENT) {
return {};
}
// Iterate through child nodes
for (unsigned int it = 0; it < children->length; ++it) {
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
attrvals = merge_strvects(attrvals, toappend);
}
}
// Return the final result
return attrvals;
}
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
// Iterate through childs
for (unsigned int it = 0; it < children->length; ++it) {
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
return trim(gumbo_cleantext(childnode));
}
}
panic("A tag that was searched for could not be found!");
return "";
}
auto get_posts(GumboNode *node) {
std::vector<GumboNode *> posts;
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
return posts;
}
std::string get_post_name(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
return trim(gumbo_cleantext(titlenodes[0]));
}
std::string get_post_meta(GumboNode *node) {
std::vector<GumboNode *> metanodes;
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
}
std::string get_post_url(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
}
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
std::vector<GumboNode *> metanodes;
std::vector<std::string> fileurls;
std::vector<std::string> filenames;
std::vector<std::map<std::string, std::string>> filenameurlmap;
std::map<std::string, std::string> tmpmap;
// Get meta nodes
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
// Get URLs
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
// Get filenames
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
// Generate map
auto urlit = fileurls.begin();
auto nameit = filenames.begin();
while (true) {
// Break if last item was reached
if (urlit == fileurls.end() or nameit == filenames.end()) {
break;
}
// Generate temporary map
tmpmap = {};
tmpmap[*nameit] = *urlit;
// Append it to the result vector map
filenameurlmap.push_back(tmpmap);
// Get next item in both vectors
urlit++; nameit++;
}
return filenameurlmap;
}
std::string get_post_desc(std::string post_url) {
std::string material_id;
std::stringstream httpcontent;
GumboOutput *post_document;
GumboNode *desc_node;
std::vector<GumboNode *> results;
// Get material ID
material_id = get_filename(post_url);
// Download post
long statuscode = curlreq(httpcontent, server_sid, post_url);
// Check statuscode
if (statuscode != 200) {
panic("Request to download post had an unexpected result: "+std::to_string(statuscode));
}
// Parse post
post_document = gumbo_parse(httpcontent.str().c_str());
// Get description element
desc_node = gumbo_search_by_id(post_document->root, "description"+material_id, GUMBO_TAG_DIV);
// Extract description
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
// Cencenate occurencies
std::string result_string;
for (auto it = results.begin(); it != results.end(); it++) {
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
}
// Return first occurence
return result_string;
}
std::string get_rss_url(GumboNode *node) { // Currently unused
GumboNode *room_info_elem;
// Get room info element
room_info_elem = gumbo_search_by_id(node, "room-info", GUMBO_TAG_UL);
// Get all a hrefs
auto hrefs = gumbo_get_attr(room_info_elem, "href", GUMBO_TAG_A);
// Find RSS href
for (auto it = hrefs.begin(); it != hrefs.end(); it++) {
if ((*it).rfind("/rss/", 0) == 0) {
return *it;
}
}
// Return something even if none was found to prevent undefined behaviour
return "none";
}
#define mode_tmpl(func) func(char *argv[], int argc, unsigned long numposts, std::vector<std::string> postsmap_numname, std::vector<std::string> postsmap_nummeta,std::vector<std::string> postsmap_numurl, std::vector<std::string> postsmap_numdesc,std::vector<std::vector<std::string>> postsmap_numfileurls, std::vector<std::vector<std::string>> postsmap_numfilenames)
typedef std::function<mode_tmpl(int)> mode;
#pragma GCC diagnostic ignored "-Wunused-parameter"
int mode_tmpl(mode_cli) {
// Show overview
for (unsigned long it = 0; it != numposts; it++) {
std::cout << it << ") " << postsmap_numname[it] << " " << postsmap_nummeta[it] << std::endl;
}
std::cout << "r) Refresh" << std::endl;
// Start CLI loop
std::string userinstr;
unsigned long userin;
while (true) {
// Await user input
std::cout << "? " << std::flush;
if (!std::getline(std::cin, userinstr)) {
// On EOF
std::cout << std::endl;
break;
}
// Check string input options first
if (userinstr == "") {
continue;
} else if (userinstr == "r") {
execve(argv[0], argv, nullptr);
}
// Try to convert input to unsigned long
try {
userin = std::stoul(userinstr,nullptr,0);
} catch (const std::invalid_argument &) {
std::cerr << "Invalid input" << std::endl;
continue;
}
// Check if givenn number is valid
if (userin >= numposts) {
std::cerr << "No such post" << std::endl;
continue;
}
// If required; get posts description and cache it
// Print description
std::cout << std::endl << std::endl << postsmap_numdesc[userin] << std::endl;
// Print informations
std::cout << "Post name: " << postsmap_numname[userin] << std::endl;
std::cout << "Post URL: " << server_url << postsmap_numurl[userin] << std::endl;
if (postsmap_numfilenames[userin].size() != 0) {
std::cout << "Post files:" << std::endl;
for (unsigned long it = 0; it < postsmap_numfilenames[userin].size(); it++) {
std::cout << " " << postsmap_numfilenames[userin][it] << ": " << server_url << postsmap_numfileurls[userin][it] << std::endl;
}
}
}
return 0;
}
int mode_tmpl(mode_json) {
json jsonroot = json::array();
// Iterate through the posts
for (unsigned long it = 0; it != numposts; it++) {
jsonroot[it] = json::object();
jsonroot[it]["name"] = postsmap_numname[it];
jsonroot[it]["meta"] = postsmap_nummeta[it];
jsonroot[it]["description"] = postsmap_numdesc[it];
jsonroot[it]["url"] = server_url + postsmap_numurl[it];
jsonroot[it]["files"] = json::array();
for (unsigned long it2 = 0; it2 != postsmap_numfileurls[it].size(); it2++) {
jsonroot[it]["files"][it2] = json::object();
jsonroot[it]["files"][it2]["name"] = postsmap_numfilenames[it][it2];
jsonroot[it]["files"][it2]["url"] = server_url + postsmap_numfileurls[it][it2];
}
}
// Serialise to cout...
std::cout << jsonroot.dump() << std::endl;
return 0;
}
static std::map<std::string, mode> modes;
static std::map<std::string, bool> mode_requires_description;
static std::map<std::string, int> mode_minargc;
void modedef_init() {
modes["cli"] = mode_cli;
mode_requires_description["cli"] = true;
mode_minargc["cli"] = 4;
modes["json"] = mode_json;
mode_requires_description["json"] = true;
mode_minargc["json"] = 4;
}
void cmdusage(char *argv[]) {
std::cerr << "Usage: " << argv[0] << " license" << std::endl
<< " " << argv[0] << " cli <SID> <room>" << std::endl
<< " " << argv[0] << " json <SID> <room>" << std::endl
<< " " << argv[0] << " * <SID> <room>" << std::endl;
}
int main(int argc, char *argv[]) {
// Catch SIGSEGV
signal(SIGSEGV, sigsegv_panic);
// Show license note if --license was given as first argument
if (argc > 1 and !strncmp(argv[1], "license", 9)) {
std::cout << "CommSyFuse Copyright (C) 2020 niansa" << std::endl;
std::cout << "This program comes with ABSOLUTELY NO WARRANTY; for details type `warranty'." << std::endl;
std::cout << "This is free software, and you are welcome to redistribute it" << std::endl;
std::cout << "under certain conditions; type `license' for details." << std::endl;
return 0;
}
// Check arguments
if (argc < 4) {
cmdusage(argv);
return 1;
}
// Create required variables
server_sid = argv[2];
room = argv[3];
std::stringstream httpcontent;
std::vector<std::string> postsmap_numname;
std::vector<std::string> postsmap_nummeta;
std::vector<std::string> postsmap_numurl;
std::vector<std::string> postsmap_numdesc;
std::vector<std::vector<std::string>> postsmap_numfileurls;
std::vector<std::vector<std::string>> postsmap_numfilenames;
std::vector<GumboNode *> postsmap_numnode;
GumboOutput *document;
// Check connection and download document
//std::clog << "Connecting to server..." << std::endl;
long statuscode = curlreq(httpcontent, server_sid, server_url+"/room/" + room + "/material");
if (statuscode == 302) {
std::cerr << "Connection error: Invalid SID" << std::endl;
return 2;
} else if (statuscode == 500) {
std::cerr << "Connection error: Invalid room" << std::endl;
return 3;
} else if (statuscode != 200) {
std::cerr << "Connection error: Server error (Code " << statuscode << ")" << std::endl;
return 9;
}
// Do some stuff
document = gumbo_parse(httpcontent.str().c_str());
httpcontent.str(std::string()); // Clear buffer just in case we need it later
// Get posts
auto posts = get_posts(document->root);
// Initialise mode definitions
modedef_init();
// Map posts and their corresponding URL to a number
//std::clog << "Loading data..." << std::endl;
unsigned long numposts = 0;
for (auto it = posts.begin(); it != posts.end(); it++) {
// Get posts name
postsmap_numname.push_back(get_post_name(*it));
// Get posts meta string
postsmap_nummeta.push_back(get_post_meta(*it));
// Get posts URL
postsmap_numurl.push_back(get_post_url(*it));
// Get posts description
if (mode_requires_description[argv[1]])
postsmap_numdesc.push_back(get_post_desc(server_url + *(postsmap_numurl.end() - 1)));
// Get posts files
auto urlnamefilemap = get_post_files(*it);
for (auto it2 = urlnamefilemap.begin(); it2 != urlnamefilemap.end(); it2++) {
for (auto const& pair: *it2) {
postsmap_numfilenames.push_back({}); postsmap_numfileurls.push_back({});
postsmap_numfilenames[numposts].push_back(pair.first);
postsmap_numfileurls[numposts].push_back(pair.second);
}
}
numposts++;
}
// Check specified mode
std::string mode = static_cast<std::string>(argv[1]);
if (mode_minargc[mode] > argc) {
cmdusage(argv);
return 1;
}
// Start it
if (modes.find(mode) != modes.end()) {
return modes[mode](argv, argc, numposts, postsmap_numname, postsmap_nummeta,postsmap_numurl,
postsmap_numdesc, postsmap_numfileurls, postsmap_numfilenames);
} else {
std::cout << "It works!" << std::endl;
}
return 0;
}