commsyfuse/main.cpp

#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <regex>

#include <csignal>
#include <cstdio>
#include <unistd.h>

#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#include <curlpp/Infos.hpp>
#include <curlpp/Options.hpp>

#include <gumbo.h>

static std::string server_url = "https://unterricht.sh.schulcommsy.de";
static std::string server_sid;
static std::string room;


void panic(std::string message) {
    std::cerr << std::endl << "An unhandled exception occured:" << std::endl << message << std::endl;
    abort();
}


std::string ltrim(const std::string &s) {
    static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
    return std::regex_replace(s, lws, "");
}
std::string rtrim(const std::string &s) {
    static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
    return std::regex_replace(s, tws, "");
}
std::string trim(const std::string &s) {
    return ltrim(rtrim(s));
}

std::string get_filename(std::string &path) {
    return path.substr(path.find_last_of("/\\") + 1);
}

std::string clean_spaces(const std::string &s) {
    static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
    std::string newstr = std::regex_replace(s, tws, "");
    std::replace(newstr.begin(), newstr.end(), '\n', ' ');
    newstr.erase(0, 4);
    return newstr;
}

std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
    base.insert(base.end(), addition.begin(), addition.end());
    return base;
}

static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
    // Initialise variables
    curlpp::Cleanup cleaner;
    curlpp::Easy request;
    // Set the writer callback to enable cURL to write result in a memory area
    request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
    // Setting the URL to retrive.
    request.setOpt(new curlpp::options::Url(URL));
    // Set SID cookie
    std::list<std::string> header;
    header.push_back("Cookie: SID=" + SID);
    request.setOpt(new curlpp::options::HttpHeader(header));
    // Perform request
    request.perform();
    // Return result
    return curlpp::infos::ResponseCode::get(request);
}

void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
    if (node->type != GUMBO_NODE_ELEMENT) {
        return;
    }
    GumboAttribute* hclass;
    if (node->v.element.tag == expectedtag &&
        (hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
        if (hclass->value == searchword) {
            elemvect.push_back(node);
        }
    }
    GumboVector* children = &node->v.element.children;
    for (unsigned int i = 0; i < children->length; ++i) {
        gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
    }
}
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
    return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
}
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
    std::vector<GumboNode *> elemvect;
    gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
    // Use first node found
    if (elemvect.size() > 0) {
        return elemvect[0];
    }
    // If no nodes were found, panic()
    panic("Not a single node with ID "+searchword+" could be found!");
    return new GumboNode;
}

void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
    if (node->type != GUMBO_NODE_ELEMENT) {
        return;
    }
    if (node->v.element.tag == searchedtag) {
        elemvect.push_back(node);
    }
    GumboVector* children = &node->v.element.children;
    for (unsigned int i = 0; i < children->length; ++i) {
        gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
    }
}

static std::string gumbo_cleantext(GumboNode* node) {
  if (node->type == GUMBO_NODE_TEXT) {
    return std::string(node->v.text.text);
  } else if (node->type == GUMBO_NODE_ELEMENT &&
             node->v.element.tag != GUMBO_TAG_SCRIPT &&
             node->v.element.tag != GUMBO_TAG_STYLE) {
    std::string contents = "";
    GumboVector* children = &node->v.element.children;
    for (unsigned int i = 0; i < children->length; ++i) {
      const std::string text = gumbo_cleantext((GumboNode*) children->data[i]);
      if (i != 0 && !text.empty()) {
        contents.append(" ");
      }
      contents.append(text);
    }
    return contents;
  } else {
    return "";
  }
}

std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
    std::vector<std::string> attrvals;
    GumboNode *childnode;
    GumboVector* children = &node->v.element.children;
    std::vector<std::string> toappend;
    // Check if current element is already the right one
    if (node->v.element.tag == expected_tag) {
        // Return this elements wanted attribute key
        return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
    }
    // Check if This is a node element
    else if (node->type != GUMBO_NODE_ELEMENT) {
        return {};
    }
    // Iterate through child nodes
    for (unsigned int it = 0; it < children->length; ++it) {
        childnode = (GumboNode*) children->data[it];
        if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
            attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
        } else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
            toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
            attrvals = merge_strvects(attrvals, toappend);
        }
    }
    // Return the final result
    return attrvals;
}

std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
    GumboNode *childnode;
    GumboVector* children = &node->v.element.children;
    // Iterate through childs
    for (unsigned int it = 0; it < children->length; ++it) {
        childnode = (GumboNode*) children->data[it];
        if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
            return trim(gumbo_cleantext(childnode));
        }
    }
    panic("A tag that was searched for could not be found!");
    return "";
}


auto get_posts(GumboNode *node) {
    std::vector<GumboNode *> posts;
    gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
    return posts;
}

std::string get_post_name(GumboNode *node) {
    std::vector<GumboNode *> titlenodes;
    gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
    return trim(gumbo_cleantext(titlenodes[0]));
}

std::string get_post_meta(GumboNode *node) {
    std::vector<GumboNode *> metanodes;
    gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
    return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
}

std::string get_post_url(GumboNode *node) {
    std::vector<GumboNode *> titlenodes;
    gumbo_search_by_class(titlenodes, node, "uk-comment-title uk-text-truncate", GUMBO_TAG_H4);
    return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
}

std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
    std::vector<GumboNode *> metanodes;
    std::vector<std::string> fileurls;
    std::vector<std::string> filenames;
    std::vector<std::map<std::string, std::string>> filenameurlmap;
    std::map<std::string, std::string> tmpmap;
    // Get meta nodes
    gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
    // Get URLs
    fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
    // Get filenames
    filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
    // Generate map
    auto urlit = fileurls.begin();
    auto nameit = filenames.begin();
    while (true) {
        // Break if last item was reached
        if (urlit == fileurls.end() or nameit == filenames.end()) {
            break;
        }
        // Generate temporary map
        tmpmap = {};
        tmpmap[*nameit] = *urlit;
        // Append it to the result vector map
        filenameurlmap.push_back(tmpmap);
        // Get next item in both vectors
        urlit++; nameit++;
    }
    return filenameurlmap;
}

std::string get_post_desc(std::string post_url) {
    std::string material_id;
    std::stringstream httpcontent;
    GumboOutput *post_document;
    GumboNode *desc_node;
    std::vector<GumboNode *> results;
    // Get material ID
    material_id = get_filename(post_url);
    // Download post
    long statuscode = curlreq(httpcontent, server_sid, post_url);
    // Check statuscode
    if (statuscode != 200) {
        panic("Request to download post had a unexpected result: "+std::to_string(statuscode));
    }
    // Parse post
    post_document = gumbo_parse(httpcontent.str().c_str());
    // Get description element
    desc_node = gumbo_search_by_id(post_document->root, "description"+material_id, GUMBO_TAG_DIV);
    // Extract description
    gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
    // Cencenate occurencies
    std::string result_string;
    for (auto it = results.begin(); it != results.end(); it++) {
        result_string.append(trim(gumbo_cleantext(*it)) + "\n");
    }
    // Return first occurence
    return result_string;
}

std::string get_rss_url(GumboNode *node) { // Currently unused
    GumboNode *room_info_elem;
    // Get room info element
    room_info_elem = gumbo_search_by_id(node, "room-info", GUMBO_TAG_UL);
    // Get all a hrefs
    auto hrefs = gumbo_get_attr(room_info_elem, "href", GUMBO_TAG_A);
    // Find RSS href
    for (auto it = hrefs.begin(); it != hrefs.end(); it++) {
        if ((*it).rfind("/rss/", 0) == 0) {
            return *it;
        }
    }
    // Return something even if none was found to prevent undefined behaviour
    return "none";
}


int main(int argc, char *argv[]) {
    // Check arguments
    if (argc < 2) {
        std::cerr << "Usage:  " << argv[0] << " <SID> <room>" << std::endl;
        return 1;
    }
    // Create required variables
    server_sid = argv[1];
    room = argv[2];
    std::stringstream httpcontent;
    std::string rss_url;
    std::vector<std::string> postsmap_numname;
    std::vector<std::string> postsmap_nummeta;
    std::vector<std::string> postsmap_numurl;
    std::vector<std::string> postsmap_numdesc;
    std::vector<std::vector<std::string>> postsmap_numfileurls;
    std::vector<std::vector<std::string>> postsmap_numfilenames;
    std::vector<GumboNode *> postsmap_numnode;
    GumboOutput *document;
    // Check connection and download document
    std::cout << "Connecting to server..." << std::endl;
    long statuscode = curlreq(httpcontent, server_sid, server_url+"/room/" + room + "/material");
    if (statuscode == 302) {
        std::cerr << "Connection error: Invalid SID" << std::endl;
        return 2;
    } else if (statuscode == 500) {
        std::cerr << "Connection error: Invalid room" << std::endl;
        return 3;
    } else if (statuscode != 200) {
        std::cerr << "Connection error: Server error (Code " << statuscode << ")" << std::endl;
        return 9;
    }
    // Do some stuff
    document = gumbo_parse(httpcontent.str().c_str());
    httpcontent.str(std::string()); // Clear buffer just in case we need it later
    // Get posts
    auto posts = get_posts(document->root);
    // Map posts and their corresponding URL to a number
    unsigned long numposts = 0;
    for (auto it = posts.begin(); it != posts.end(); it++) {
        // Get posts name
        postsmap_numname.push_back(get_post_name(*it));
        // Get posts meta string
        postsmap_nummeta.push_back(get_post_meta(*it));
        // Get posts URL
        postsmap_numurl.push_back(get_post_url(*it));
        // Get posts description
        postsmap_numdesc.push_back(get_post_desc(server_url + *(postsmap_numurl.end() - 1)));
        // Get posts files
        auto urlnamefilemap = get_post_files(*it);
        for (auto it2 = urlnamefilemap.begin(); it2 != urlnamefilemap.end(); it2++) {
            for (auto const& pair: *it2) {
                postsmap_numfilenames.push_back({}); postsmap_numfileurls.push_back({});
                postsmap_numfilenames[numposts].push_back(pair.first);
                postsmap_numfileurls[numposts].push_back(pair.second);
            }
        }
        // Show overview
        std::cout << numposts << ") " << postsmap_numname[numposts] << "  –  " << postsmap_nummeta[numposts] <<  std::endl;
        numposts++;
    }
    // Debug CLI
    unsigned long userin;
    while (true) {
        // Await user input
        std::cout << "? " << std::flush;
        std::cin >> userin;
        // Check if input is valid
        if (userin >= numposts) {
            std::cerr << "No such post" << std::endl;
            continue;
        }
        // If required; get posts description and cache it
        // Print description
        std::cout << std::endl << std::endl << postsmap_numdesc[userin] <<  std::endl;
        // Print informations
        std::cout << "Post name: " << postsmap_numname[userin] << std::endl;
        std::cout << "Post URL: " << server_url << postsmap_numurl[userin] << std::endl;
        if (postsmap_numfilenames[userin].size() != 0) {
            std::cout << "Post files:" << std::endl;
            for (unsigned long it = 0; it < postsmap_numfilenames[userin].size(); it++) {
                std::cout << "  – " << postsmap_numfilenames[userin][it] << ": " << server_url << postsmap_numfileurls[userin][it] << std::endl;
            }
        }
    }

    return 0;
}