From b3ab05c40ceac4040bced7aa39a76f4f153d5508 Mon Sep 17 00:00:00 2001 From: zvon Date: Tue, 15 Sep 2020 00:55:03 +0200 Subject: [PATCH] Initial working version --- .gitignore | 4 ++ Makefile | 32 ++++++++++++++ htmlparser.hpp | 27 ++++++++++++ main.cpp | 34 +++++++++++++++ meal.cpp | 12 ++++++ meal.hpp | 48 +++++++++++++++++++++ menu.cpp | 54 ++++++++++++++++++++++++ menu.hpp | 30 +++++++++++++ network/network.cpp | 91 ++++++++++++++++++++++++++++++++++++++++ network/network.hpp | 25 +++++++++++ parser.hpp | 27 ++++++++++++ parsers/lightofindia.cpp | 43 +++++++++++++++++++ parsers/padagali.cpp | 26 ++++++++++++ parsers/parsers.hpp | 29 +++++++++++++ parsers/udrevaka.cpp | 34 +++++++++++++++ parsers/ukarla.cpp | 36 ++++++++++++++++ 16 files changed, 552 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 htmlparser.hpp create mode 100644 main.cpp create mode 100644 meal.cpp create mode 100644 meal.hpp create mode 100644 menu.cpp create mode 100644 menu.hpp create mode 100644 network/network.cpp create mode 100644 network/network.hpp create mode 100644 parser.hpp create mode 100644 parsers/lightofindia.cpp create mode 100644 parsers/padagali.cpp create mode 100644 parsers/parsers.hpp create mode 100644 parsers/udrevaka.cpp create mode 100644 parsers/ukarla.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e2f6cea --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +*.out +menuprint +.lvimrc diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..151c8a9 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +CXX ?= g++ +CFLAGS ?= -O2 -Wall -Wextra `pkg-config libxml-2.0 --cflags` `pkg-config libxml++-3.0 --cflags` +PREFIX ?= /usr/local/bin +LDFLAGS ?= -lcurl `pkg-config libxml-2.0 --libs` `pkg-config libxml++-3.0 --libs` + +PARSERS = udrevaka.o padagali.o lightofindia.o ukarla.o + +.PHONY: default +default: menuprint + +menuprint: main.o meal.o menu.o network.o $(PARSERS) + $(CXX) $(CFLAGS) -o $@ $^ ${LDFLAGS} + +main.o: main.cpp parser.hpp menu.hpp meal.hpp parsers/parsers.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +meal.o: meal.cpp meal.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +menu.o: menu.cpp menu.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +network.o: network/network.cpp network/network.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +udrevaka.o: parsers/udrevaka.cpp parsers/parsers.hpp network/network.hpp htmlparser.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +padagali.o: parsers/padagali.cpp parsers/parsers.hpp network/network.hpp htmlparser.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +lightofindia.o: parsers/lightofindia.cpp parsers/parsers.hpp network/network.hpp htmlparser.hpp + $(CXX) $(CFLAGS) -c -o $@ $< +ukarla.o: parsers/ukarla.cpp parsers/parsers.hpp network/network.hpp htmlparser.hpp + $(CXX) $(CFLAGS) -c -o $@ $< + +clean: + rm -Rf *.o menuprint diff --git a/htmlparser.hpp b/htmlparser.hpp new file mode 100644 index 0000000..6fec18f --- /dev/null +++ b/htmlparser.hpp @@ -0,0 +1,27 @@ +#ifndef HTML_PARSER_H +#define HTML_PARSER_H + +#include +#include +#include + +class HtmlParser { +public: + HtmlParser() = delete; + HtmlParser(const std::string &html) { + doc = htmlReadDoc((xmlChar*)html.c_str(), NULL, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); + auto r = xmlDocGetRootElement(doc); + root.reset(new xmlpp::Element(r)); + } + ~HtmlParser() { + xmlFreeDoc(doc); + } + xmlpp::Element &getRoot() { + return *root; + } +private: + xmlDoc *doc; + std::unique_ptr root; +}; + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..35e2895 --- /dev/null +++ b/main.cpp @@ -0,0 +1,34 @@ +#include "parser.hpp" +#include "menu.hpp" +#include "meal.hpp" +#include "parsers/parsers.hpp" + +#include +#include + +std::vector days = {"MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY"}; + +int main() { + std::vector> parsers; + parsers.emplace_back(new LunchRest::UDrevakaParser()); + parsers.emplace_back(new LunchRest::PadagaliParser()); + parsers.emplace_back(new LunchRest::LightOfIndiaParser()); + parsers.emplace_back(new LunchRest::UKarlaParser()); + // add parsers here + for(auto &x : parsers) { + x->parse(); + std::cout << "RESTAURANT " << x->getRestaurant() << std::endl; + for(unsigned long int i = 0; i < x->getMenus().size(); i++) { + auto y = x->getMenus()[i]; + if(y.isValid()) { + std::cout << days[i] << std::endl; + auto soupInd = y.getSoupIndex(); + std::cout << "\t" << y.getMeals()[soupInd] << std::endl; + for(unsigned long int i = 0; i < y.getMeals().size(); i++) { + if(i != soupInd) + std::cout << "\t" << y.getMeals()[i] << std::endl; + } + } + } + } +} diff --git a/meal.cpp b/meal.cpp new file mode 100644 index 0000000..a64b898 --- /dev/null +++ b/meal.cpp @@ -0,0 +1,12 @@ +#include "meal.hpp" + +std::ostream &operator<<(std::ostream &os, const LunchRest::Meal &meal) { + if(meal.isSoup()) + os << "SOUP "; + os << meal.getName(); + if(meal.getDesc() != "") + os << " - " << meal.getDesc(); + if(meal.getPrice() > 0) + os << ", " << meal.getPrice() << " czk"; + return os; +} diff --git a/meal.hpp b/meal.hpp new file mode 100644 index 0000000..0e1d1e6 --- /dev/null +++ b/meal.hpp @@ -0,0 +1,48 @@ +#ifndef LUNCH_REST_MEAL_H +#define LUNCH_REST_MEAL_H + +#include +#include + +namespace LunchRest { +class Meal { +public: + Meal() = default; + Meal(bool soup, const std::string &name, const std::string &desc, int price) : + _isSoup(soup), _name(name), _desc(desc), _price(price) {} + + bool isSoup() const { + return _isSoup; + } + const std::string &getName() const { + return _name; + } + const std::string &getDesc() const { + return _desc; + } + int getPrice() const { + return _price; + } + void setName(const std::string &name) { + _name = name; + } + void setPrice(int price) { + _price = price; + } + void setSoup(bool soup = true) { + _isSoup = soup; + } + void setDesc(const std::string &desc) { + _desc = desc; + } +private: + bool _isSoup = false; + std::string _name; + std::string _desc; + int _price; +}; +} // end of namespace LunchRest + +std::ostream &operator<<(std::ostream &os, const LunchRest::Meal &meal); + +#endif diff --git a/menu.cpp b/menu.cpp new file mode 100644 index 0000000..beac1e3 --- /dev/null +++ b/menu.cpp @@ -0,0 +1,54 @@ +#include "menu.hpp" + +void LunchRest::Menu::addMeal(bool soup, const std::string &name, const std::string &desc, int price) { + _meals.emplace_back(soup, name, desc, price); +} + +void LunchRest::Menu::addMeal(const LunchRest::Meal &meal) { + _meals.push_back(meal); +} + +bool LunchRest::Menu::hasSoup() const { + bool ret = false; + for(auto &x : _meals) { + ret |= x.isSoup(); + } + return ret; +} + +LunchRest::Meal LunchRest::Menu::getSoup() const { + for(auto &x : _meals) { + if(x.isSoup()) + return x; + } + return Meal(true, "", "", 0); +} + +std::vector LunchRest::Menu::getNonSoupMeals() { + std::vector ret{}; + for(auto &x : _meals) { + if(!x.isSoup()) + ret.push_back(x); + } + return ret; +} + +const std::vector &LunchRest::Menu::getMeals() const { + return _meals; +} + +unsigned long int LunchRest::Menu::getSoupIndex() const { + for(unsigned long int i = 0; i < _meals.size(); i++) { + if(_meals[i].isSoup()) + return i; + } + return -1; +} + +void LunchRest::Menu::setInvalidMenu() { + _valid = false; +} + +bool LunchRest::Menu::isValid() const { + return _valid; +} diff --git a/menu.hpp b/menu.hpp new file mode 100644 index 0000000..de15fa8 --- /dev/null +++ b/menu.hpp @@ -0,0 +1,30 @@ +#ifndef LUNCH_REST_MENU_H +#define LUNCH_REST_MENU_H + +#include "meal.hpp" +#include +#include + +namespace LunchRest { +class Menu { +public: + Menu() = default; + Menu(const std::vector &meals) : + _meals(meals) {} + + void addMeal(bool soup, const std::string &name, const std::string &desc, int price); + void addMeal(const Meal &meal); + bool hasSoup() const; + Meal getSoup() const; + std::vector getNonSoupMeals(); + const std::vector &getMeals() const; + unsigned long int getSoupIndex() const; + void setInvalidMenu(); + bool isValid() const; +private: + std::vector _meals; + bool _valid = true; +}; +} // end of namespace LunchRest + +#endif diff --git a/network/network.cpp b/network/network.cpp new file mode 100644 index 0000000..ba4e7ac --- /dev/null +++ b/network/network.cpp @@ -0,0 +1,91 @@ +#include "network.hpp" +#include +#include + +size_t writeCallback( void *contents, size_t size, size_t nmemb, + void *target ) { + *static_cast< std::string * >( target ) += + std::string( static_cast< char * >( contents ), size * nmemb ); + return size * nmemb; +} + +Request::Request() { + curl_global_init( CURL_GLOBAL_ALL ); + _curl_handle = curl_easy_init(); + if ( _curl_handle == NULL ) { + std::cerr << "ERROR curl_easy_init" << std::endl; + } + curl_easy_setopt( _curl_handle, CURLOPT_WRITEFUNCTION, writeCallback ); + curl_easy_setopt( _curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0" ); +} + +Request::~Request() { + curl_easy_cleanup( _curl_handle ); + curl_slist_free_all( _chunk ); + curl_global_cleanup(); +} + +void cleanUp( CURL *curl_handle ) { + curl_easy_setopt( curl_handle, CURLOPT_POST, 0 ); + curl_easy_setopt( curl_handle, CURLOPT_POSTFIELDS, "" ); +} + +std::string Request::get( const std::string &url ) { + // get rid of garbage + cleanUp( _curl_handle ); + std::string response; + response.reserve( 100000 ); + curl_easy_setopt( _curl_handle, CURLOPT_WRITEDATA, + static_cast< void * >( &response ) ); + curl_easy_setopt( _curl_handle, CURLOPT_URL, ( _server + url ).c_str() ); + curl_easy_setopt( _curl_handle, CURLOPT_HTTPGET, 1 ); + auto res = curl_easy_perform( _curl_handle ); + if ( res != CURLE_OK ) { + std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror( res ) + << std::endl; + return ""; + } + return response; +} + +std::string Request::post( const std::string &url, const std::string &data ) { + std::string response; + response.reserve( 100000 ); + curl_easy_setopt( _curl_handle, CURLOPT_URL, ( _server + url ).c_str() ); + curl_easy_setopt( _curl_handle, CURLOPT_POST, 1 ); + curl_easy_setopt( _curl_handle, CURLOPT_POSTFIELDS, data.c_str() ); + curl_easy_setopt( _curl_handle, CURLOPT_WRITEDATA, + static_cast< void * >( &response ) ); + auto res = curl_easy_perform( _curl_handle ); + if ( res != CURLE_OK ) { + std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror( res ) + << std::endl; + return ""; + } + return response; +} + +void Request::addHeader( const std::string &header ) { + _chunk = curl_slist_append( _chunk, header.c_str() ); + curl_easy_setopt( _curl_handle, CURLOPT_HTTPHEADER, _chunk ); +} + +void Request::clearHeader() { + curl_slist_free_all( _chunk ); + _chunk = nullptr; + curl_easy_setopt( _curl_handle, CURLOPT_HTTPHEADER, _chunk ); +} + +bool Request::initSuccessful() { + return _curl_handle != nullptr; +} + +void Request::setServer( const std::string &server ) { + _server = server; +} + +int Request::lastResponseCode() { + long code{}; + curl_easy_getinfo( _curl_handle, CURLINFO_RESPONSE_CODE, &code ); + return code; +} diff --git a/network/network.hpp b/network/network.hpp new file mode 100644 index 0000000..8846723 --- /dev/null +++ b/network/network.hpp @@ -0,0 +1,25 @@ +#ifndef NETWORK_HPP +#define NETWORK_HPP + +#include +#include + +class Request { +public: + Request(); + ~Request(); + std::string get( const std::string &url ); + std::string post( const std::string &url, const std::string &data ); + void addHeader( const std::string &header ); + void clearHeader(); + bool initSuccessful(); + void setServer( const std::string &server ); + int lastResponseCode(); + +private: + CURL *_curl_handle = nullptr; + struct curl_slist *_chunk = nullptr; + std::string _server; +}; + +#endif diff --git a/parser.hpp b/parser.hpp new file mode 100644 index 0000000..e47de3f --- /dev/null +++ b/parser.hpp @@ -0,0 +1,27 @@ +#ifndef LUNCH_REST_PARSER_H +#define LUNCH_REST_PARSER_H + +#include "menu.hpp" + +namespace LunchRest { +class Parser { +public: + Parser() = delete; + Parser(const std::string &url, const std::string &restaurant) : + _url(url), _restaurant(restaurant) {} + virtual ~Parser() = default; + const std::vector &getMenus() { + return menus; + } + const std::string &getRestaurant() { + return _restaurant; + } + virtual void parse() = 0; +protected: + std::string _url; + std::string _restaurant; + std::vector menus; +}; +} // end of namespace LunchRest + +#endif diff --git a/parsers/lightofindia.cpp b/parsers/lightofindia.cpp new file mode 100644 index 0000000..8493057 --- /dev/null +++ b/parsers/lightofindia.cpp @@ -0,0 +1,43 @@ +#include "parsers.hpp" +#include "../network/network.hpp" +#include "../htmlparser.hpp" + +bool isWhiteSpaceOnly(const std::string &text) { + if(text == "") + return true; + for(auto &x : text) { + if(!std::isspace(x)) + return false; + } + return true; +} + +void LunchRest::LightOfIndiaParser::parse() { + menus.clear(); + menus.resize(5); + Request r; + auto html = r.get(_url); + HtmlParser hparse(html); + auto &root = hparse.getRoot(); + auto container = root.find("//div[@id='content_container']")[0]; + auto texts = container->find(".//td/text()"); + int index = -1; + for(auto text : texts) { + Menu m{}; + std::string text_text = dynamic_cast(text)->get_content(); + if(isWhiteSpaceOnly(text_text) || text_text.find("Week") != std::string::npos) + continue; + if(text_text[0] == '1') + index++; + auto end = text_text.find(')'); + if(end == std::string::npos) + continue; + auto possible_end = text_text.find('g', end); + if(possible_end != std::string::npos) + end = possible_end; + std::string name = text_text.substr(4, end - 3); + int price = std::stoi(text_text.substr(end+1)); + bool soup = name.find("soup") == std::string::npos ? false : true; + menus[index].addMeal(soup, name, "", price); + } +} diff --git a/parsers/padagali.cpp b/parsers/padagali.cpp new file mode 100644 index 0000000..b901b73 --- /dev/null +++ b/parsers/padagali.cpp @@ -0,0 +1,26 @@ +#include "parsers.hpp" +#include "../network/network.hpp" +#include "../htmlparser.hpp" + +void LunchRest::PadagaliParser::parse() { + menus.clear(); + Request r; + auto html = r.get(_url); + HtmlParser hparse(html); + auto &root = hparse.getRoot(); + auto days = root.find("//div[@class='glf-mor-restaurant-menu-category']"); + for(int i = 0; i < 5; i++) { + auto day = days[i]; + Menu m{}; + auto meals = day->find("./div"); + for(auto &meal : meals) { + auto info = meal->find("./div/div/div"); + std::string desc = dynamic_cast(info[1]->find("./text()")[0])->get_content(); + std::string name = dynamic_cast(info[0]->find("./h5/text()")[0])->get_content(); + int price = std::stoi(dynamic_cast(info[0]->find("./div/text()")[0])->get_content()); + bool soup = name.find("Soup") == std::string::npos ? false : true; + m.addMeal(soup, name, desc, price); + } + menus.push_back(m); + } +} diff --git a/parsers/parsers.hpp b/parsers/parsers.hpp new file mode 100644 index 0000000..a27fe63 --- /dev/null +++ b/parsers/parsers.hpp @@ -0,0 +1,29 @@ +#include "../parser.hpp" + +namespace LunchRest { +class UDrevakaParser : public Parser { +public: + UDrevakaParser() : Parser("https://www.udrevaka.cz/denni-menu/", "U Dřeváka") {} + virtual ~UDrevakaParser() = default; + virtual void parse() override; +}; +class PadagaliParser : public Parser { +public: + PadagaliParser() : Parser("https://padagali.cz/denni-menu/", "Padagali") {} + virtual ~PadagaliParser() = default; + virtual void parse() override; +}; +class LightOfIndiaParser : public Parser { +public: + LightOfIndiaParser() : Parser("http://lightofindia.cz/lang-en/denni-menu", "Light of India") {} + virtual ~LightOfIndiaParser() = default; + virtual void parse() override; +}; +class UKarlaParser : public Parser { +public: + UKarlaParser() : Parser("https://www.ukarlabrno.cz/denni-menu/", "Light of India") {} + virtual ~UKarlaParser() = default; + virtual void parse() override; +}; +} // end of namespace LunchRest + diff --git a/parsers/udrevaka.cpp b/parsers/udrevaka.cpp new file mode 100644 index 0000000..dfe3964 --- /dev/null +++ b/parsers/udrevaka.cpp @@ -0,0 +1,34 @@ +#include "parsers.hpp" +#include "../network/network.hpp" +#include "../htmlparser.hpp" + +void LunchRest::UDrevakaParser::parse() { + menus.clear(); + Request r; + auto html = r.get(_url); + HtmlParser hparse(html); + auto &root = hparse.getRoot(); + auto days = root.find("//li[@class='item-day']"); + for(auto &day : days) { + Menu m{}; + auto meals = day->find("./div[@class='row']"); + for(auto meal : meals) { + auto divs = meal->find(".//div/text()"); + Meal meal_obj{}; + std::string name = dynamic_cast(divs[0])->get_content();; + auto soup_pos = name.find("Polévka"); + if(soup_pos != std::string::npos) { + meal_obj.setSoup(); + meal_obj.setName(name.substr(10, name.find('(') - 11)); + } else { + meal_obj.setName(name.substr(3, name.find('(') - 4)); + } + if(divs.size() > 1) { + std::string price = dynamic_cast(divs[1])->get_content();; + meal_obj.setPrice(std::stoi(price)); + } + m.addMeal(meal_obj); + } + menus.push_back(m); + } +} diff --git a/parsers/ukarla.cpp b/parsers/ukarla.cpp new file mode 100644 index 0000000..c921c24 --- /dev/null +++ b/parsers/ukarla.cpp @@ -0,0 +1,36 @@ +#include "parsers.hpp" +#include "../network/network.hpp" +#include "../htmlparser.hpp" + +void LunchRest::UKarlaParser::parse() { + menus.clear(); + Request r; + auto html = r.get(_url); + HtmlParser hparse(html); + auto &root = hparse.getRoot(); + auto days = root.find("//li[@class='item-day']"); + int validdays = 0; + for(auto &day : days) { + validdays++; + Menu m{}; + auto meals = day->find("./div[@class='row']"); + for(auto &meal : meals) { + auto soup = false; + auto texts = meal->find("./div/text()"); + std::string name = dynamic_cast(texts[0])->get_content(); + if(name[0] == 'P') { + soup = true; + name = name.substr(10); + } + int price = -1; + if(texts.size() > 1) + price = std::stoi(dynamic_cast(texts[1])->get_content()); + m.addMeal(soup, name, "", price); + } + menus.push_back(m); + } + for(int i = validdays; i < 5; i++) { + menus.push_back(Menu{}); + menus.back().setInvalidMenu(); + } +}