mirror of
https://gitlab.com/niansa/commsyfuse.git
synced 2025-03-06 20:48:31 +01:00
503 lines
19 KiB
C++
503 lines
19 KiB
C++
/*
|
||
This file is part of CommSyFuse.
|
||
|
||
pilang is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
pilang is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with pilang. If not, see <https://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
#include <iostream>
|
||
#include <string>
|
||
#include <sstream>
|
||
#include <fstream>
|
||
#include <vector>
|
||
#include <map>
|
||
#include <regex>
|
||
|
||
#include <csignal>
|
||
#include <cstdio>
|
||
#include <unistd.h>
|
||
#include <signal.h>
|
||
#include <execinfo.h>
|
||
|
||
#include <curlpp/cURLpp.hpp>
|
||
#include <curlpp/Easy.hpp>
|
||
#include <curlpp/Infos.hpp>
|
||
#include <curlpp/Options.hpp>
|
||
|
||
#include <nlohmann/json.hpp>
|
||
using json = nlohmann::json;
|
||
|
||
#include <gumbo.h>
|
||
|
||
static std::string server_url = "https://unterricht.sh.schulcommsy.de";
|
||
static std::string server_sid;
|
||
static std::string room;
|
||
|
||
|
||
|
||
void panic(std::string message) {
|
||
// Print message
|
||
std::cerr << std::endl << "An unhandled exception occured:" << std::endl << message << std::endl;
|
||
// Print stacktrace
|
||
void *array[10];
|
||
int size = backtrace(array, 10);
|
||
std::cerr << std::endl << "Stacktrace:" << std::endl;
|
||
backtrace_symbols_fd(array, size, STDERR_FILENO);
|
||
// Abort
|
||
abort();
|
||
}
|
||
void sigsegv_panic(int) {
|
||
panic("Segmentation fault received!");
|
||
}
|
||
|
||
|
||
std::string ltrim(const std::string &s) {
|
||
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
|
||
return std::regex_replace(s, lws, "");
|
||
}
|
||
std::string rtrim(const std::string &s) {
|
||
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
|
||
return std::regex_replace(s, tws, "");
|
||
}
|
||
std::string trim(const std::string &s) {
|
||
return ltrim(rtrim(s));
|
||
}
|
||
|
||
std::string get_filename(std::string &path) {
|
||
return path.substr(path.find_last_of("/\\") + 1);
|
||
}
|
||
|
||
std::string clean_spaces(const std::string &s) {
|
||
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
|
||
std::string newstr = std::regex_replace(s, tws, "");
|
||
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
|
||
newstr.erase(0, 4);
|
||
return newstr;
|
||
}
|
||
|
||
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
|
||
base.insert(base.end(), addition.begin(), addition.end());
|
||
return base;
|
||
}
|
||
|
||
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
|
||
// Initialise variables
|
||
curlpp::Cleanup cleaner;
|
||
curlpp::Easy request;
|
||
// Set the writer callback to enable cURL to write result in a memory area
|
||
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
|
||
// Setting the URL to retrive.
|
||
request.setOpt(new curlpp::options::Url(URL));
|
||
// Set SID cookie
|
||
std::list<std::string> header;
|
||
header.push_back("Cookie: SID=" + SID);
|
||
request.setOpt(new curlpp::options::HttpHeader(header));
|
||
// Perform request
|
||
request.perform();
|
||
// Return result
|
||
return curlpp::infos::ResponseCode::get(request);
|
||
}
|
||
|
||
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
|
||
if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return;
|
||
}
|
||
GumboAttribute* hclass;
|
||
if (node->v.element.tag == expectedtag &&
|
||
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
|
||
if (hclass->value == searchword) {
|
||
elemvect.push_back(node);
|
||
}
|
||
}
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
|
||
}
|
||
}
|
||
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
||
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
|
||
}
|
||
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
||
std::vector<GumboNode *> elemvect;
|
||
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
|
||
// Use first node found
|
||
if (elemvect.size() > 0) {
|
||
return elemvect[0];
|
||
}
|
||
// If no nodes were found, panic()
|
||
panic("Not a single node with ID "+searchword+" could be found!");
|
||
return new GumboNode;
|
||
}
|
||
|
||
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
|
||
if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return;
|
||
}
|
||
if (node->v.element.tag == searchedtag) {
|
||
elemvect.push_back(node);
|
||
}
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
|
||
}
|
||
}
|
||
|
||
static std::string gumbo_cleantext(GumboNode* node) {
|
||
if (node->type == GUMBO_NODE_TEXT) {
|
||
return std::string(node->v.text.text);
|
||
} else if (node->type == GUMBO_NODE_ELEMENT &&
|
||
node->v.element.tag != GUMBO_TAG_SCRIPT &&
|
||
node->v.element.tag != GUMBO_TAG_STYLE) {
|
||
std::string contents = "";
|
||
GumboVector* children = &node->v.element.children;
|
||
for (unsigned int i = 0; i < children->length; ++i) {
|
||
const std::string text = gumbo_cleantext(reinterpret_cast<GumboNode*> (children->data[i]));
|
||
if (i != 0 && !text.empty()) {
|
||
contents.append(" ");
|
||
}
|
||
contents.append(text);
|
||
}
|
||
return contents;
|
||
} else {
|
||
return "";
|
||
}
|
||
}
|
||
|
||
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
|
||
std::vector<std::string> attrvals;
|
||
GumboNode *childnode;
|
||
GumboVector* children = &node->v.element.children;
|
||
std::vector<std::string> toappend;
|
||
// Check if current element is already the right one
|
||
if (node->v.element.tag == expected_tag) {
|
||
// Return this elements wanted attribute key
|
||
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
|
||
}
|
||
// Check if This is a node element
|
||
else if (node->type != GUMBO_NODE_ELEMENT) {
|
||
return {};
|
||
}
|
||
// Iterate through child nodes
|
||
for (unsigned int it = 0; it < children->length; ++it) {
|
||
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
|
||
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
|
||
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
|
||
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
|
||
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
|
||
attrvals = merge_strvects(attrvals, toappend);
|
||
}
|
||
}
|
||
// Return the final result
|
||
return attrvals;
|
||
}
|
||
|
||
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
|
||
GumboNode *childnode;
|
||
GumboVector* children = &node->v.element.children;
|
||
// Iterate through childs
|
||
for (unsigned int it = 0; it < children->length; ++it) {
|
||
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
|
||
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
|
||
return trim(gumbo_cleantext(childnode));
|
||
}
|
||
}
|
||
panic("A tag that was searched for could not be found!");
|
||
return "";
|
||
}
|
||
|
||
|
||
auto get_posts(GumboNode *node) {
|
||
std::vector<GumboNode *> posts;
|
||
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
|
||
return posts;
|
||
}
|
||
|
||
std::string get_post_name(GumboNode *node) {
|
||
std::vector<GumboNode *> titlenodes;
|
||
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
|
||
return trim(gumbo_cleantext(titlenodes[0]));
|
||
}
|
||
|
||
std::string get_post_meta(GumboNode *node) {
|
||
std::vector<GumboNode *> metanodes;
|
||
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
||
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
|
||
}
|
||
|
||
std::string get_post_url(GumboNode *node) {
|
||
std::vector<GumboNode *> titlenodes;
|
||
gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
|
||
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
|
||
}
|
||
|
||
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
|
||
std::vector<GumboNode *> metanodes;
|
||
std::vector<std::string> fileurls;
|
||
std::vector<std::string> filenames;
|
||
std::vector<std::map<std::string, std::string>> filenameurlmap;
|
||
std::map<std::string, std::string> tmpmap;
|
||
// Get meta nodes
|
||
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
||
// Get URLs
|
||
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
|
||
// Get filenames
|
||
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
|
||
// Generate map
|
||
auto urlit = fileurls.begin();
|
||
auto nameit = filenames.begin();
|
||
while (true) {
|
||
// Break if last item was reached
|
||
if (urlit == fileurls.end() or nameit == filenames.end()) {
|
||
break;
|
||
}
|
||
// Generate temporary map
|
||
tmpmap = {};
|
||
tmpmap[*nameit] = *urlit;
|
||
// Append it to the result vector map
|
||
filenameurlmap.push_back(tmpmap);
|
||
// Get next item in both vectors
|
||
urlit++; nameit++;
|
||
}
|
||
return filenameurlmap;
|
||
}
|
||
|
||
std::string get_post_desc(std::string post_url) {
|
||
std::string material_id;
|
||
std::stringstream httpcontent;
|
||
GumboOutput *post_document;
|
||
GumboNode *desc_node;
|
||
std::vector<GumboNode *> results;
|
||
// Get material ID
|
||
material_id = get_filename(post_url);
|
||
// Download post
|
||
long statuscode = curlreq(httpcontent, server_sid, post_url);
|
||
// Check statuscode
|
||
if (statuscode != 200) {
|
||
panic("Request to download post had an unexpected result: "+std::to_string(statuscode));
|
||
}
|
||
// Parse post
|
||
post_document = gumbo_parse(httpcontent.str().c_str());
|
||
// Get description element
|
||
desc_node = gumbo_search_by_id(post_document->root, "description"+material_id, GUMBO_TAG_DIV);
|
||
// Extract description
|
||
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
|
||
// Cencenate occurencies
|
||
std::string result_string;
|
||
for (auto it = results.begin(); it != results.end(); it++) {
|
||
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
|
||
}
|
||
// Return first occurence
|
||
return result_string;
|
||
}
|
||
|
||
std::string get_rss_url(GumboNode *node) { // Currently unused
|
||
GumboNode *room_info_elem;
|
||
// Get room info element
|
||
room_info_elem = gumbo_search_by_id(node, "room-info", GUMBO_TAG_UL);
|
||
// Get all a hrefs
|
||
auto hrefs = gumbo_get_attr(room_info_elem, "href", GUMBO_TAG_A);
|
||
// Find RSS href
|
||
for (auto it = hrefs.begin(); it != hrefs.end(); it++) {
|
||
if ((*it).rfind("/rss/", 0) == 0) {
|
||
return *it;
|
||
}
|
||
}
|
||
// Return something even if none was found to prevent undefined behaviour
|
||
return "none";
|
||
}
|
||
|
||
|
||
|
||
#define mode_tmpl(func) func(char *argv[], int argc, unsigned long numposts, std::vector<std::string> postsmap_numname, std::vector<std::string> postsmap_nummeta,std::vector<std::string> postsmap_numurl, std::vector<std::string> postsmap_numdesc,std::vector<std::vector<std::string>> postsmap_numfileurls, std::vector<std::vector<std::string>> postsmap_numfilenames)
|
||
typedef std::function<mode_tmpl(int)> mode;
|
||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||
|
||
int mode_tmpl(mode_cli) {
|
||
// Show overview
|
||
for (unsigned long it = 0; it != numposts; it++) {
|
||
std::cout << it << ") " << postsmap_numname[it] << " – " << postsmap_nummeta[it] << std::endl;
|
||
}
|
||
std::cout << "r) Refresh" << std::endl;
|
||
// Start CLI loop
|
||
std::string userinstr;
|
||
unsigned long userin;
|
||
while (true) {
|
||
// Await user input
|
||
std::cout << "? " << std::flush;
|
||
if (!std::getline(std::cin, userinstr)) {
|
||
// On EOF
|
||
std::cout << std::endl;
|
||
break;
|
||
}
|
||
// Check string input options first
|
||
if (userinstr == "") {
|
||
continue;
|
||
} else if (userinstr == "r") {
|
||
execve(argv[0], argv, nullptr);
|
||
}
|
||
// Try to convert input to unsigned long
|
||
try {
|
||
userin = std::stoul(userinstr,nullptr,0);
|
||
} catch (const std::invalid_argument &) {
|
||
std::cerr << "Invalid input" << std::endl;
|
||
continue;
|
||
}
|
||
// Check if givenn number is valid
|
||
if (userin >= numposts) {
|
||
std::cerr << "No such post" << std::endl;
|
||
continue;
|
||
}
|
||
// If required; get posts description and cache it
|
||
// Print description
|
||
std::cout << std::endl << std::endl << postsmap_numdesc[userin] << std::endl;
|
||
// Print informations
|
||
std::cout << "Post name: " << postsmap_numname[userin] << std::endl;
|
||
std::cout << "Post URL: " << server_url << postsmap_numurl[userin] << std::endl;
|
||
if (postsmap_numfilenames[userin].size() != 0) {
|
||
std::cout << "Post files:" << std::endl;
|
||
for (unsigned long it = 0; it < postsmap_numfilenames[userin].size(); it++) {
|
||
std::cout << " – " << postsmap_numfilenames[userin][it] << ": " << server_url << postsmap_numfileurls[userin][it] << std::endl;
|
||
}
|
||
}
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
int mode_tmpl(mode_json) {
|
||
json jsonroot = json::array();
|
||
// Iterate through the posts
|
||
for (unsigned long it = 0; it != numposts; it++) {
|
||
jsonroot[it] = json::object();
|
||
jsonroot[it]["name"] = postsmap_numname[it];
|
||
jsonroot[it]["meta"] = postsmap_nummeta[it];
|
||
jsonroot[it]["description"] = postsmap_numdesc[it];
|
||
jsonroot[it]["url"] = server_url + postsmap_numurl[it];
|
||
jsonroot[it]["files"] = json::array();
|
||
for (unsigned long it2 = 0; it2 != postsmap_numfileurls[it].size(); it2++) {
|
||
jsonroot[it]["files"][it2] = json::object();
|
||
jsonroot[it]["files"][it2]["name"] = postsmap_numfilenames[it][it2];
|
||
jsonroot[it]["files"][it2]["url"] = server_url + postsmap_numfileurls[it][it2];
|
||
}
|
||
}
|
||
// Serialise to cout...
|
||
std::cout << jsonroot.dump() << std::endl;
|
||
return 0;
|
||
}
|
||
|
||
|
||
static std::map<std::string, mode> modes;
|
||
static std::map<std::string, bool> mode_requires_description;
|
||
static std::map<std::string, int> mode_minargc;
|
||
void modedef_init() {
|
||
modes["cli"] = mode_cli;
|
||
mode_requires_description["cli"] = true;
|
||
mode_minargc["cli"] = 4;
|
||
modes["json"] = mode_json;
|
||
mode_requires_description["json"] = true;
|
||
mode_minargc["json"] = 4;
|
||
}
|
||
|
||
void cmdusage(char *argv[]) {
|
||
std::cerr << "Usage: " << argv[0] << " license" << std::endl
|
||
<< " " << argv[0] << " cli <SID> <room>" << std::endl
|
||
<< " " << argv[0] << " json <SID> <room>" << std::endl
|
||
<< " " << argv[0] << " * <SID> <room>" << std::endl;
|
||
}
|
||
|
||
int main(int argc, char *argv[]) {
|
||
// Catch SIGSEGV
|
||
signal(SIGSEGV, sigsegv_panic);
|
||
// Show license note if --license was given as first argument
|
||
if (argc > 1 and !strncmp(argv[1], "license", 9)) {
|
||
std::cout << "CommSyFuse Copyright (C) 2020 niansa" << std::endl;
|
||
std::cout << "This program comes with ABSOLUTELY NO WARRANTY; for details type `warranty'." << std::endl;
|
||
std::cout << "This is free software, and you are welcome to redistribute it" << std::endl;
|
||
std::cout << "under certain conditions; type `license' for details." << std::endl;
|
||
return 0;
|
||
}
|
||
// Check arguments
|
||
if (argc < 4) {
|
||
cmdusage(argv);
|
||
return 1;
|
||
}
|
||
// Create required variables
|
||
server_sid = argv[2];
|
||
room = argv[3];
|
||
std::stringstream httpcontent;
|
||
std::vector<std::string> postsmap_numname;
|
||
std::vector<std::string> postsmap_nummeta;
|
||
std::vector<std::string> postsmap_numurl;
|
||
std::vector<std::string> postsmap_numdesc;
|
||
std::vector<std::vector<std::string>> postsmap_numfileurls;
|
||
std::vector<std::vector<std::string>> postsmap_numfilenames;
|
||
std::vector<GumboNode *> postsmap_numnode;
|
||
GumboOutput *document;
|
||
// Check connection and download document
|
||
//std::clog << "Connecting to server..." << std::endl;
|
||
long statuscode = curlreq(httpcontent, server_sid, server_url+"/room/" + room + "/material");
|
||
if (statuscode == 302) {
|
||
std::cerr << "Connection error: Invalid SID" << std::endl;
|
||
return 2;
|
||
} else if (statuscode == 500) {
|
||
std::cerr << "Connection error: Invalid room" << std::endl;
|
||
return 3;
|
||
} else if (statuscode != 200) {
|
||
std::cerr << "Connection error: Server error (Code " << statuscode << ")" << std::endl;
|
||
return 9;
|
||
}
|
||
// Do some stuff
|
||
document = gumbo_parse(httpcontent.str().c_str());
|
||
httpcontent.str(std::string()); // Clear buffer just in case we need it later
|
||
// Get posts
|
||
auto posts = get_posts(document->root);
|
||
// Initialise mode definitions
|
||
modedef_init();
|
||
// Map posts and their corresponding URL to a number
|
||
//std::clog << "Loading data..." << std::endl;
|
||
unsigned long numposts = 0;
|
||
for (auto it = posts.begin(); it != posts.end(); it++) {
|
||
// Get posts name
|
||
postsmap_numname.push_back(get_post_name(*it));
|
||
// Get posts meta string
|
||
postsmap_nummeta.push_back(get_post_meta(*it));
|
||
// Get posts URL
|
||
postsmap_numurl.push_back(get_post_url(*it));
|
||
// Get posts description
|
||
if (mode_requires_description[argv[1]])
|
||
postsmap_numdesc.push_back(get_post_desc(server_url + *(postsmap_numurl.end() - 1)));
|
||
// Get posts files
|
||
auto urlnamefilemap = get_post_files(*it);
|
||
for (auto it2 = urlnamefilemap.begin(); it2 != urlnamefilemap.end(); it2++) {
|
||
for (auto const& pair: *it2) {
|
||
postsmap_numfilenames.push_back({}); postsmap_numfileurls.push_back({});
|
||
postsmap_numfilenames[numposts].push_back(pair.first);
|
||
postsmap_numfileurls[numposts].push_back(pair.second);
|
||
}
|
||
}
|
||
numposts++;
|
||
}
|
||
// Check specified mode
|
||
std::string mode = static_cast<std::string>(argv[1]);
|
||
if (mode_minargc[mode] > argc) {
|
||
cmdusage(argv);
|
||
return 1;
|
||
}
|
||
// Start it
|
||
if (modes.find(mode) != modes.end()) {
|
||
return modes[mode](argv, argc, numposts, postsmap_numname, postsmap_nummeta,postsmap_numurl,
|
||
postsmap_numdesc, postsmap_numfileurls, postsmap_numfilenames);
|
||
} else {
|
||
std::cout << "It works!" << std::endl;
|
||
}
|
||
return 0;
|
||
}
|