1
0
Fork 0
mirror of https://gitlab.com/niansa/qcommsy.git synced 2025-03-06 20:53:33 +01:00
qcommsy/libcommsy.hpp

471 lines
15 KiB
C++

#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <regex>
#include <exception>
#include <csignal>
#include <cstdio>
#include <unistd.h>
#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#include <curlpp/Infos.hpp>
#include <curlpp/Options.hpp>
#include <gumbo.h>
class invalidSIDError {};
class invalidRoomError {};
class invalidPostError {};
class connectionFailError {};
class parsingNoSuchIDError {};
class parsingNoSuchTagError {};
class descDownloadError {};
static std::string server_url;
static std::string server_sid;
static std::string room;
namespace taskState {
enum type {none, done, inProgress, todo};
}
std::string ltrim(const std::string& s) {
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
return std::regex_replace(s, lws, "");
}
std::string rtrim(const std::string& s) {
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
return std::regex_replace(s, tws, "");
}
std::string trim(const std::string& s) {
return ltrim(rtrim(s));
}
std::string get_filename(const std::string& path) {
return path.substr(path.find_last_of("/\\") + 1);
}
std::string clean_spaces(const std::string& s) {
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
std::string newstr = std::regex_replace(s, tws, "");
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
newstr.erase(0, 4);
return newstr;
}
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
base.insert(base.end(), addition.begin(), addition.end());
return base;
}
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
std::cout << "Connection details begin" << std::endl;
std::cout << "URL: " << URL << std::endl;
std::cout << "SID: " << SID << std::endl;
std::cout << "Connection details end" << std::endl;
// Initialise variables
curlpp::Cleanup cleaner;
curlpp::Easy request;
// Set the writer callback to enable cURL to write result in a memory area
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
// Setting the URL to retrive.
request.setOpt(new curlpp::options::Url(URL));
// Set SID cookie
std::list<std::string> header;
header.push_back("Cookie: SID=" + SID);
request.setOpt(new curlpp::options::HttpHeader(header));
// Perform request
request.perform();
// Return result
return curlpp::infos::ResponseCode::get(request);
}
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
GumboAttribute* hclass;
if (node->v.element.tag == expectedtag &&
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
if (hclass->value == searchword) {
elemvect.push_back(node);
}
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
}
}
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
}
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
std::vector<GumboNode *> elemvect;
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
// Use first node found
if (elemvect.size() > 0) {
return elemvect[0];
}
// If no nodes were found, panic()
throw parsingNoSuchIDError();
}
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
if (node->type != GUMBO_NODE_ELEMENT) {
return;
}
if (node->v.element.tag == searchedtag) {
elemvect.push_back(node);
}
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
}
}
static std::string gumbo_cleantext(GumboNode* node) {
if (node->type == GUMBO_NODE_TEXT) {
return std::string(node->v.text.text);
} else if (node->type == GUMBO_NODE_ELEMENT &&
node->v.element.tag != GUMBO_TAG_SCRIPT &&
node->v.element.tag != GUMBO_TAG_STYLE) {
std::string contents = "";
GumboVector* children = &node->v.element.children;
for (unsigned int i = 0; i < children->length; ++i) {
const std::string text = gumbo_cleantext(reinterpret_cast<GumboNode*> (children->data[i]));
if (i != 0 && !text.empty()) {
contents.append(" ");
}
contents.append(text);
}
return contents;
} else {
return "";
}
}
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
std::vector<std::string> attrvals;
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
std::vector<std::string> toappend;
// Check if current element is already the right one
if (node->v.element.tag == expected_tag) {
// Return this elements wanted attribute key
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
}
// Check if This is a node element
else if (node->type != GUMBO_NODE_ELEMENT) {
return {};
}
// Iterate through child nodes
for (unsigned int it = 0; it < children->length; ++it) {
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
attrvals = merge_strvects(attrvals, toappend);
}
}
// Return the final result
return attrvals;
}
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
GumboNode *childnode;
GumboVector* children = &node->v.element.children;
// Iterate through childs
for (unsigned int it = 0; it < children->length; ++it) {
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
return trim(gumbo_cleantext(childnode));
}
}
throw parsingNoSuchTagError();
}
auto get_posts(GumboNode *node) {
std::vector<GumboNode *> posts;
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
return posts;
}
std::string get_post_name(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title", GUMBO_TAG_H4);
return trim(gumbo_cleantext(titlenodes[0]));
}
std::string get_post_id(GumboNode *node) {
return gumbo_get_attr(node, "data-item-id", GUMBO_TAG_ARTICLE)[0];
}
std::string get_post_meta(GumboNode *node) {
std::vector<GumboNode *> metanodes;
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
}
std::string get_post_url(GumboNode *node) {
std::vector<GumboNode *> titlenodes;
gumbo_search_by_class(titlenodes, node, "uk-comment-title", GUMBO_TAG_H4);
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
}
bool get_post_unread(GumboNode *node) {
std::vector<GumboNode *> elems;
gumbo_search_by_class(elems, node, "cs-comment-change-info", GUMBO_TAG_DIV);
return !elems.empty();
}
taskState::type get_post_taskState(GumboNode *node) {
// Find all elements that could contain the information we need and grab their "class" attribute
std::vector<std::string> divClassAttrs;
divClassAttrs = gumbo_get_attr(node, "class", GUMBO_TAG_I);
// Try to find the information we need
for (const auto& classAttr : divClassAttrs) {
if (classAttr.find("todo") != std::string::npos) {
return taskState::todo;
} else if (classAttr.find("inProgress") != std::string::npos) {
return taskState::inProgress;
} else if (classAttr.find("DONE(?)") != std::string::npos) { // TODO
return taskState::done;
}
}
return taskState::none;
}
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
std::vector<GumboNode *> metanodes;
std::vector<std::string> fileurls;
std::vector<std::string> filenames;
std::vector<std::map<std::string, std::string>> filenameurlmap;
std::map<std::string, std::string> tmpmap;
// Get meta nodes
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
// Get URLs
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
// Get filenames
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
// Generate map
auto urlit = fileurls.begin();
auto nameit = filenames.begin();
while (true) {
// Break if last item was reached
if (urlit == fileurls.end() or nameit == filenames.end()) {
break;
}
// Generate temporary map
tmpmap = {};
tmpmap[*nameit] = *urlit;
// Append it to the result vector map
filenameurlmap.push_back(tmpmap);
// Get next item in both vectors
urlit++; nameit++;
}
return filenameurlmap;
}
std::string get_post_desc(std::string post_url) {
std::string material_id;
std::stringstream httpcontent;
GumboOutput *post_document;
GumboNode *desc_node;
std::vector<GumboNode *> results;
// Get material ID
material_id = get_filename(post_url);
// Download post
long statuscode = curlreq(httpcontent, server_sid, post_url);
// Check statuscode
if (statuscode != 200) {
throw descDownloadError();
}
// Parse post
post_document = gumbo_parse(httpcontent.str().c_str());
// Get description element
desc_node = gumbo_search_by_id(post_document->root, "description" + material_id, GUMBO_TAG_DIV);
// Extract description
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
// Cencenate occurencies
std::string result_string;
for (auto it = results.begin(); it != results.end(); it++) {
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
}
// Return first occurence
return result_string;
}
struct commsyFile {
std::string name;
std::string url;
};
struct commsyPost {
std::string name;
std::string id;
std::string description = "\xFF";
std::string meta;
std::string url;
bool unread;
taskState::type taskState;
std::vector<commsyFile> files;
};
#define libCommsy_NAME "libcommsy"
#define libCommsy_VERSION "1.3-stable"
class libCommsy {
public:
std::vector<commsyPost> posts;
unsigned long numposts;
std::string lastID;
bool postExists(unsigned long postID) {
return postID < numposts;
}
commsyPost *getPost(unsigned long postID) {
// Check if post exists
if (not postExists(postID)) {
throw invalidPostError();
}
// Return post pointer
return &posts[postID];
}
std::string *getDescription(unsigned long postID) {
// Get post
commsyPost *thispost = getPost(postID);
// Check if post description was downloaded already
if (thispost->description == "\xFF") {
// Download post
thispost->description = get_post_desc(server_url + thispost->url);
}
// Return it
return &thispost->description;
}
libCommsy(const std::string& _server_url, const std::string& _server_sid, const std::string& _room, const std::string start_id = "", const unsigned long max_posts = 0) {
// Define required variables
server_url = _server_url;
server_sid = _server_sid;
room = _room;
lastID = start_id;
std::stringstream httpcontent;
GumboOutput *document;
long statuscode;
numposts = 0;
// Loop until all or max_posts posts are fetched
while (1) {
// Check connection and download feed
try {
statuscode = curlreq(httpcontent, server_sid, server_url + "/room/" + room + "/feed/10/date?lastId=" + lastID);
} catch (std::exception&) {
throw connectionFailError();
}
if (statuscode == 302) {
throw invalidSIDError();
} else if (statuscode == 500) {
throw invalidRoomError();
} else if (statuscode != 200) {
std::cout << "Unhandled status code " << statuscode << std::endl;
throw connectionFailError();
}
// Do some stuff XD
document = gumbo_parse(httpcontent.str().c_str());
httpcontent.str(std::string()); // Clear buffer just in case we need it later
// Get posts
auto gumboPosts = get_posts(document->root);
if (gumboPosts.size() == 0) {
// Stop fetching more data
break;
}
// Map posts and their corresponding URL to a number
for (auto it = gumboPosts.begin(); it != gumboPosts.end(); it++) {
// Create post struct
commsyPost thispost;
{
// Get posts name
thispost.name = get_post_name(*it);
// Get posts ID
thispost.id = get_post_id(*it);
// Get posts meta string
thispost.meta = get_post_meta(*it);
// Get if post is unread
thispost.unread = get_post_unread(*it);
// Get posts task state
thispost.taskState = get_post_taskState(*it);
// Get posts URL
thispost.url = get_post_url(*it);
// Get posts files
auto files = get_post_files(*it);
for (const auto& filemap : files) {
for (const auto& [filename, fileurl] : filemap) {
commsyFile thisfile;
{
thisfile.name = filename;
thisfile.url = fileurl;
}
thispost.files.push_back(thisfile);
}
}
}
// Append to posts vector
posts.push_back(thispost);
// Increment post counter
numposts++;
// Get lastID
lastID = posts.back().id;
// Check if maximum amount of posts to load was exceeded
if (numposts == max_posts) {
// Stop loading more posts
break;
}
}
}
}
};