mirror of
https://gitlab.com/niansa/qcommsy.git
synced 2025-03-06 20:53:33 +01:00
426 lines
14 KiB
C++
426 lines
14 KiB
C++
#include <iostream>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <regex>
|
|
#include <exception>
|
|
|
|
#include <csignal>
|
|
#include <cstdio>
|
|
#include <unistd.h>
|
|
|
|
#include <curlpp/cURLpp.hpp>
|
|
#include <curlpp/Easy.hpp>
|
|
#include <curlpp/Infos.hpp>
|
|
#include <curlpp/Options.hpp>
|
|
|
|
#include <nlohmann/json.hpp>
|
|
using json = nlohmann::json;
|
|
|
|
#include <gumbo.h>
|
|
|
|
class invalidSIDError {};
|
|
class invalidRoomError {};
|
|
class invalidPostError {};
|
|
class connectionFailError {};
|
|
class parsingNoSuchIDError {};
|
|
class parsingNoSuchTagError {};
|
|
class descDownloadError {};
|
|
|
|
|
|
static std::string server_url;
|
|
static std::string server_sid;
|
|
static std::string room;
|
|
|
|
|
|
std::string ltrim(const std::string& s) {
|
|
static const std::regex lws{"^[[:space:]]*", std::regex_constants::extended};
|
|
return std::regex_replace(s, lws, "");
|
|
}
|
|
std::string rtrim(const std::string& s) {
|
|
static const std::regex tws{"[[:space:]]*$", std::regex_constants::extended};
|
|
return std::regex_replace(s, tws, "");
|
|
}
|
|
std::string trim(const std::string& s) {
|
|
return ltrim(rtrim(s));
|
|
}
|
|
|
|
std::string get_filename(const std::string& path) {
|
|
return path.substr(path.find_last_of("/\\") + 1);
|
|
}
|
|
|
|
std::string clean_spaces(const std::string& s) {
|
|
static const std::regex tws{"[ ]{2,}", std::regex_constants::extended};
|
|
std::string newstr = std::regex_replace(s, tws, "");
|
|
std::replace(newstr.begin(), newstr.end(), '\n', ' ');
|
|
newstr.erase(0, 4);
|
|
return newstr;
|
|
}
|
|
|
|
std::vector<std::string> merge_strvects(std::vector<std::string> base, const std::vector<std::string> &addition) {
|
|
base.insert(base.end(), addition.begin(), addition.end());
|
|
return base;
|
|
}
|
|
|
|
static long curlreq(std::stringstream &responsebuffer, std::string SID, std::string URL) {
|
|
std::cout << "Connection details begin" << std::endl;
|
|
std::cout << "URL: " << URL << std::endl;
|
|
std::cout << "SID: " << SID << std::endl;
|
|
std::cout << "Connection details end" << std::endl;
|
|
// Initialise variables
|
|
curlpp::Cleanup cleaner;
|
|
curlpp::Easy request;
|
|
// Set the writer callback to enable cURL to write result in a memory area
|
|
request.setOpt(new curlpp::options::WriteStream(&responsebuffer));
|
|
// Setting the URL to retrive.
|
|
request.setOpt(new curlpp::options::Url(URL));
|
|
// Set SID cookie
|
|
std::list<std::string> header;
|
|
header.push_back("Cookie: SID=" + SID);
|
|
request.setOpt(new curlpp::options::HttpHeader(header));
|
|
// Perform request
|
|
request.perform();
|
|
// Return result
|
|
return curlpp::infos::ResponseCode::get(request);
|
|
}
|
|
|
|
void gumbo_search_by_attr(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string attrname, std::string searchword, GumboTag expectedtag) {
|
|
if (node->type != GUMBO_NODE_ELEMENT) {
|
|
return;
|
|
}
|
|
|
|
GumboAttribute* hclass;
|
|
if (node->v.element.tag == expectedtag &&
|
|
(hclass = gumbo_get_attribute(&node->v.element.attributes, attrname.c_str()))) {
|
|
if (hclass->value == searchword) {
|
|
elemvect.push_back(node);
|
|
}
|
|
}
|
|
|
|
GumboVector* children = &node->v.element.children;
|
|
for (unsigned int i = 0; i < children->length; ++i) {
|
|
gumbo_search_by_attr(elemvect, static_cast<GumboNode*>(children->data[i]), attrname, searchword, expectedtag);
|
|
}
|
|
}
|
|
|
|
void gumbo_search_by_class(std::vector<GumboNode *> &elemvect, GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
|
return gumbo_search_by_attr(elemvect, node, "class", searchword, expectedtag);
|
|
}
|
|
|
|
GumboNode *gumbo_search_by_id(GumboNode* node, std::string searchword, GumboTag expectedtag) {
|
|
std::vector<GumboNode *> elemvect;
|
|
gumbo_search_by_attr(elemvect, node, "id", searchword, expectedtag);
|
|
// Use first node found
|
|
if (elemvect.size() > 0) {
|
|
return elemvect[0];
|
|
}
|
|
// If no nodes were found, panic()
|
|
throw parsingNoSuchIDError();
|
|
}
|
|
|
|
void gumbo_search_by_tag(std::vector<GumboNode *> &elemvect, GumboNode* node, GumboTag searchedtag) {
|
|
if (node->type != GUMBO_NODE_ELEMENT) {
|
|
return;
|
|
}
|
|
|
|
if (node->v.element.tag == searchedtag) {
|
|
elemvect.push_back(node);
|
|
}
|
|
|
|
GumboVector* children = &node->v.element.children;
|
|
for (unsigned int i = 0; i < children->length; ++i) {
|
|
gumbo_search_by_tag(elemvect, static_cast<GumboNode*>(children->data[i]), searchedtag);
|
|
}
|
|
}
|
|
|
|
static std::string gumbo_cleantext(GumboNode* node) {
|
|
if (node->type == GUMBO_NODE_TEXT) {
|
|
return std::string(node->v.text.text);
|
|
|
|
} else if (node->type == GUMBO_NODE_ELEMENT &&
|
|
node->v.element.tag != GUMBO_TAG_SCRIPT &&
|
|
node->v.element.tag != GUMBO_TAG_STYLE) {
|
|
|
|
std::string contents = "";
|
|
GumboVector* children = &node->v.element.children;
|
|
|
|
for (unsigned int i = 0; i < children->length; ++i) {
|
|
const std::string text = gumbo_cleantext(reinterpret_cast<GumboNode*> (children->data[i]));
|
|
if (i != 0 && !text.empty()) {
|
|
contents.append(" ");
|
|
}
|
|
contents.append(text);
|
|
}
|
|
|
|
return contents;
|
|
|
|
} else {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> gumbo_get_attr(GumboNode *node, std::string attrkey, GumboTag expected_tag) {
|
|
std::vector<std::string> attrvals;
|
|
GumboNode *childnode;
|
|
GumboVector* children = &node->v.element.children;
|
|
std::vector<std::string> toappend;
|
|
|
|
// Check if current element is already the right one
|
|
if (node->v.element.tag == expected_tag) {
|
|
// Return this elements wanted attribute key
|
|
return {gumbo_get_attribute(&node->v.element.attributes, attrkey.c_str())->value};
|
|
}
|
|
|
|
// Check if This is a node element
|
|
else if (node->type != GUMBO_NODE_ELEMENT) {
|
|
return {};
|
|
}
|
|
|
|
// Iterate through child nodes
|
|
for (unsigned int it = 0; it < children->length; ++it) {
|
|
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
|
|
if (childnode->v.element.tag == expected_tag) { // If node is the expected tag; use it
|
|
attrvals.push_back(gumbo_get_attribute(&childnode->v.element.attributes, attrkey.c_str())->value);
|
|
} else if (childnode->type == GUMBO_NODE_ELEMENT) { // Else; iterate through its child nodes
|
|
toappend = gumbo_get_attr(childnode, attrkey, expected_tag);
|
|
attrvals = merge_strvects(attrvals, toappend);
|
|
}
|
|
}
|
|
|
|
// Return the final result
|
|
return attrvals;
|
|
}
|
|
|
|
std::string gumbo_find_text_by_tag(GumboNode *node, GumboTag searchtag) {
|
|
GumboNode *childnode;
|
|
GumboVector* children = &node->v.element.children;
|
|
|
|
// Iterate through childs
|
|
for (unsigned int it = 0; it < children->length; ++it) {
|
|
childnode = reinterpret_cast<GumboNode*> (children->data[it]);
|
|
if (childnode->v.element.tag == searchtag) { // If node is the expected tag; check content
|
|
return trim(gumbo_cleantext(childnode));
|
|
}
|
|
}
|
|
|
|
throw parsingNoSuchTagError();
|
|
}
|
|
|
|
|
|
auto get_posts(GumboNode *node) {
|
|
std::vector<GumboNode *> posts;
|
|
gumbo_search_by_class(posts, node, "uk-comment", GUMBO_TAG_ARTICLE);
|
|
return posts;
|
|
}
|
|
|
|
std::string get_post_name(GumboNode *node) {
|
|
std::vector<GumboNode *> titlenodes;
|
|
gumbo_search_by_class(titlenodes, node, "uk-comment-title", GUMBO_TAG_H4);
|
|
return trim(gumbo_cleantext(titlenodes[0]));
|
|
}
|
|
|
|
std::string get_post_id(GumboNode *node) {
|
|
return gumbo_get_attr(node, "data-item-id", GUMBO_TAG_ARTICLE)[0];
|
|
}
|
|
|
|
std::string get_post_meta(GumboNode *node) {
|
|
std::vector<GumboNode *> metanodes;
|
|
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
|
return clean_spaces(trim(gumbo_cleantext(metanodes[1])));
|
|
}
|
|
|
|
std::string get_post_url(GumboNode *node) {
|
|
std::vector<GumboNode *> titlenodes;
|
|
gumbo_search_by_class(titlenodes, node, "uk-comment-title", GUMBO_TAG_H4);
|
|
return gumbo_get_attr(titlenodes[0], "href", GUMBO_TAG_A)[0];
|
|
}
|
|
|
|
std::vector<std::map<std::string, std::string>> get_post_files(GumboNode *node) {
|
|
std::vector<GumboNode *> metanodes;
|
|
std::vector<std::string> fileurls;
|
|
std::vector<std::string> filenames;
|
|
std::vector<std::map<std::string, std::string>> filenameurlmap;
|
|
std::map<std::string, std::string> tmpmap;
|
|
|
|
// Get meta nodes
|
|
gumbo_search_by_class(metanodes, node, "uk-comment-meta", GUMBO_TAG_DIV);
|
|
// Get URLs
|
|
fileurls = gumbo_get_attr(metanodes[2], "href", GUMBO_TAG_A);
|
|
// Get filenames
|
|
filenames = gumbo_get_attr(metanodes[2], "title", GUMBO_TAG_A);
|
|
|
|
// Generate map
|
|
auto urlit = fileurls.begin();
|
|
auto nameit = filenames.begin();
|
|
while (true) {
|
|
|
|
// Break if last item was reached
|
|
if (urlit == fileurls.end() or nameit == filenames.end()) {
|
|
break;
|
|
}
|
|
|
|
// Generate temporary map
|
|
tmpmap = {};
|
|
tmpmap[*nameit] = *urlit;
|
|
// Append it to the result vector map
|
|
filenameurlmap.push_back(tmpmap);
|
|
// Get next item in both vectors
|
|
urlit++; nameit++;
|
|
}
|
|
return filenameurlmap;
|
|
}
|
|
|
|
std::string get_post_desc(std::string post_url) {
|
|
std::string material_id;
|
|
std::stringstream httpcontent;
|
|
GumboOutput *post_document;
|
|
GumboNode *desc_node;
|
|
std::vector<GumboNode *> results;
|
|
|
|
// Get material ID
|
|
material_id = get_filename(post_url);
|
|
// Download post
|
|
long statuscode = curlreq(httpcontent, server_sid, post_url);
|
|
// Check statuscode
|
|
if (statuscode != 200) {
|
|
throw descDownloadError();
|
|
}
|
|
|
|
// Parse post
|
|
post_document = gumbo_parse(httpcontent.str().c_str());
|
|
// Get description element
|
|
desc_node = gumbo_search_by_id(post_document->root, "description" + material_id, GUMBO_TAG_DIV);
|
|
// Extract description
|
|
gumbo_search_by_tag(results, desc_node, GUMBO_TAG_P);
|
|
|
|
// Cencenate occurencies
|
|
std::string result_string;
|
|
for (auto it = results.begin(); it != results.end(); it++) {
|
|
result_string.append(trim(gumbo_cleantext(*it)) + "\n");
|
|
}
|
|
|
|
// Return first occurence
|
|
return result_string;
|
|
}
|
|
|
|
|
|
struct commsyPost {
|
|
std::string name;
|
|
std::string id;
|
|
std::string description;
|
|
std::string meta;
|
|
std::string url;
|
|
std::map<std::string, std::string> files;
|
|
};
|
|
|
|
|
|
#define libCommsy_NAME "libcommsy"
|
|
#define libCommsy_VERSION "1.1-stable"
|
|
class libCommsy {
|
|
public:
|
|
std::vector<commsyPost> posts;
|
|
unsigned long numposts;
|
|
std::string lastID;
|
|
|
|
bool postExists(unsigned long postID) {
|
|
return postID < numposts;
|
|
}
|
|
|
|
commsyPost *getPost(unsigned long postID) {
|
|
// Check if post exists
|
|
if (not postExists(postID)) {
|
|
throw invalidPostError();
|
|
}
|
|
// Return post pointer
|
|
return &posts[postID];
|
|
}
|
|
|
|
std::string *getDescription(unsigned long postID) {
|
|
// Get post
|
|
commsyPost *thispost = getPost(postID);
|
|
|
|
// Check if post description was downloaded already
|
|
if (thispost->description.empty()) {
|
|
// Download post
|
|
thispost->description = get_post_desc(server_url + thispost->url);
|
|
}
|
|
// Return it
|
|
return &thispost->description;
|
|
}
|
|
|
|
libCommsy(const std::string& _server_url, const std::string& _server_sid, const std::string& _room, const std::string start_id = "", const unsigned long max_posts = 0) {
|
|
// Define required variables
|
|
server_url = _server_url;
|
|
server_sid = _server_sid;
|
|
room = _room;
|
|
lastID = start_id;
|
|
std::stringstream httpcontent;
|
|
GumboOutput *document;
|
|
long statuscode;
|
|
numposts = 0;
|
|
|
|
// Loop until all or max_posts posts are fetched
|
|
while (1) {
|
|
// Check connection and download feed
|
|
try {
|
|
statuscode = curlreq(httpcontent, server_sid, server_url + "/room/" + room + "/feed/10/date?lastId=" + lastID);
|
|
} catch (std::exception&) {
|
|
throw connectionFailError();
|
|
}
|
|
if (statuscode == 302) {
|
|
throw invalidSIDError();
|
|
} else if (statuscode == 500) {
|
|
throw invalidRoomError();
|
|
} else if (statuscode != 200) {
|
|
std::cout << "Unhandled status code " << statuscode << std::endl;
|
|
throw connectionFailError();
|
|
}
|
|
|
|
// Do some stuff XD
|
|
document = gumbo_parse(httpcontent.str().c_str());
|
|
httpcontent.str(std::string()); // Clear buffer just in case we need it later
|
|
|
|
// Get posts
|
|
auto gumboPosts = get_posts(document->root);
|
|
if (gumboPosts.size() == 0) {
|
|
// Stop fetching more data
|
|
break;
|
|
}
|
|
|
|
// Map posts and their corresponding URL to a number
|
|
for (auto it = gumboPosts.begin(); it != gumboPosts.end(); it++) {
|
|
// Create post struct
|
|
commsyPost thispost;
|
|
{
|
|
// Get posts name
|
|
thispost.name = get_post_name(*it);
|
|
// Get posts ID
|
|
thispost.id = get_post_id(*it);
|
|
// Get posts meta string
|
|
thispost.meta = get_post_meta(*it);
|
|
// Get posts URL
|
|
thispost.url = get_post_url(*it);
|
|
// Get posts files
|
|
auto files = get_post_files(*it);
|
|
for (const auto& filemap : files) {
|
|
thispost.files.insert(filemap.begin(), filemap.end());
|
|
}
|
|
}
|
|
|
|
// Append to posts vector
|
|
posts.push_back(thispost);
|
|
// Increment post counter
|
|
numposts++;
|
|
// Get lastID
|
|
lastID = posts.back().id;
|
|
// Check if maximum amount of posts to load was exceeded
|
|
if (numposts == max_posts) {
|
|
// Stop loading more posts
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|