Author: Mason Wright
Email:
[email protected]
Date: Sun, 29 Jun 2025 13:33:00 -0600
src/grim.cc
5e4c38ff3c212cdd9881427ef3f8c2706539a190
Reworked parseSelectorParts and tests
Clone
-
.gitignore
M
Makefile
-
include/catch_amalgamated.cpp
-
include/catch_amalgamated.hpp
M
include/grim.h
-
include/parser.h
-
index.html
-
main.cc
-
src/adapter.cc
-
src/events.cc
M
src/grim.cc
A
src/grim.cc.bak
-
src/parser.cc
-
style.css
A
tests/css_selector.cc
-
tests/html_parser.cc
Commits
b966b2a517365074e5c381dbdea05b3221dc0198
e840f1eeb0ae26af69e1ae146ea9938e28e9f1af
e4e05418a640eaed08cd1ec7cd8644eb1dbcca50
4e01ba8ad2c3361fa4be3d896288020948b58b5e
aae562ac1350480e4889aabb35899f776c5b59e9
6c3ae0e31eb0893f20e3872117f92cc6b9a942af
350e7d88bb2feb9db00c6e032cc6623f215b7adf
95e6c70d23e99ffcf70e5bbe12503496e5d8f232
e188783659b9bc3b9993a647e93ed110e7f41db6
5e4c38ff3c212cdd9881427ef3f8c2706539a190
e50ea9e1356a74af18fdd171337ef9dc931e1f4e
8f2e83556d12aaebe8e8597ea6923804b0eb7a43
1627c585128af263181053ab2cf1a4cdcd14ee21
def3513f75b325464ad88a33c741c4ca80572b77
a21501590980a905fa9b902897d700a42a08b7f0
56074a6bfe4498d092f3a227297c8c20e2bb962c
d9cf1485b7ae0614130494f0e73237921323b9a1
80f04b134ae32ad8a9d526007b33dd02f6600f05
23d6c65f9368d3c622a55a3068a6b2f1efa0c8d4
09c195df02536b6a796bd648fce9669397b96109
f2b5c8202fbc904e2ed78260e3fdbd55164799d2
4bfba076120f389994fc46a98e8b7a2622314400
e36ac5417e10ee9b9f94f340e1ccf28afc5705ea
d00dc89a86dd7e2fcfd4618bc3a1c8cfba9e3c3d
d9eef16adaf292f3748db5fb5aa98463de10d712
18ff2ec1bfc1cf9fcd17c1acb05c3b41f8f0ed83
9e7fd2980d723437ea621b78d395fa72ca3f4922
Diff
diff --git a/src/grim.cc b/src/grim.cc index 25bad64..6600855 100755 --- a/src/grim.cc +++ b/src/grim.cc @@ -183,0 +184,6 @@ std::string Node::print(int indent) { +struct Style { + std::unordered_map
properties; + std::string selector; + // Index of when it was added (for cascading) + size_t index; +}; @@ -185,3 +191,6 @@ std::string Node::print(int indent) { -std::string trimSpace(std::string str) { - int start = 0; - int end = str.length() - 1; +// struct BaseParts { +// std::string tagName; +// std::string id; +// std::vector
classes; +// std::unordered_map
attributes; +// } @@ -189,4 +198,3 @@ std::string trimSpace(std::string str) { - // Handle empty string case - if (str.empty()) { - return ""; - } +std::vector
parseSelectorParts(std::string selector) { + // we want to find the right most (right of a " " > ~ + ) selector + // account for * for commas split the selector then for each parse right @@ -194,6 +202,2 @@ std::string trimSpace(std::string str) { - // Find the first non-space character - for (/* start is already 0 */; start <= end; ++start) { - if (!std::isspace(str[start])) { - break; - } - } + std::vector
parts; + std::string word; @@ -201,4 +205,3 @@ std::string trimSpace(std::string str) { - // If the loop finished, it means the string was all spaces or empty - if (start > end) { - return ""; // Or a string of spaces, depending on desired behavior for " " - } + for (size_t s = 0; s < selector.length(); s++) { + // Start parsing if we see a comma or the end of the selector + word += selector[s]; @@ -206,6 +209,16 @@ std::string trimSpace(std::string str) { - // Find the last non-space character - for (/* end is already str.length() - 1 */; end >= start; --end) { - if (!std::isspace(str[end])) { - break; - } - } + if (selector[s] == ',' || s == selector.length()-1) { + if (selector[s] == ',') { + word.pop_back(); // Remove the trailing comma + } + // Break the tag and add the right most parts to parts + for (int i = word.length()-1; i > -1; i--) { + if ( + word[i] == ' ' || + word[i] == '>' || + word[i] == '~' || + word[i] == '+' + ) { + word = word.substr(i+1); + break; + } + } @@ -213,4 +226,12 @@ std::string trimSpace(std::string str) { - // Calculate the length of the substring - // The length is (end_index - start_index) + 1 - return str.substr(start, end - start + 1); -} + // Now word contains the right most selection + // we will extract the quick to verify parts from it + // so we can build the basemap + std::string part; + bool inbracket = false; + for (auto w : word) { +// Need to add :checked etc parsing + if (w == ':' && !inbracket) { + break; + } + // We don't care if the thing we are splitting is a id or class we just + // need them split up so we can use them as map keys @@ -218,6 +239,5 @@ std::string trimSpace(std::string str) { -struct Style { - std::unordered_map
properties; - std::vector
> selector; - // Index of when it was added (for cascading) - size_t index; -}; + if (w == '#' && !inbracket) { + if (part != "") { + parts.push_back(part); + part = ""; + } @@ -225,15 +245,16 @@ struct Style { -// parseSelectorParts deconstructs a selector into its indiviual parts -// so they can be store and used in things like finding styles for basemap -// and comparing a node to a selector in testSelector. This is a higher level function -// that should be ran once perselector and the results saved somewhere -std::vector
> parseSelectorParts(std::string selector) { - // need to account for selectors with parenthesis like: h1:(+ input:required) - // need to convert all single quotes to double quotes - // need to account for commas, will return - // need to colapse all spaces to a single space - // need to trim spaces - - std::vector
> parts; - std::vector
buffer; - std::string current; - size_t nesting = 0; + } else if (w == '.' && !inbracket) { + if (part != "") { + parts.push_back(part); + part = ""; + } + } else if (w == '[' || w == ']') { + + // We need to check if its in brackets or not because the [ will + // close the last .class, if we don't check then it will be in brackets + if (part != "" && inbracket) { + parts.push_back('['+part+']'); + part = ""; + } else if (!inbracket) { + parts.push_back(part); + part = ""; + } @@ -241 +262,7 @@ std::vector
> parseSelectorParts(std::string selector) { - size_t sl = selector.length(); + if (w=='[') { + inbracket = true; + } else if (w == ']'){ + inbracket = false; + } + continue; + } @@ -243,7 +270,4 @@ std::vector
> parseSelectorParts(std::string selector) { - for (size_t e = 0; e < sl; e++) { - char s = selector[e]; - - if (s == '\'') { - // convert single quotes to double quotes - s = '\"'; - } + // We need to keep a consistant quote mark to make matching reliable + if (inbracket && w == '\'') { + w = '"'; + } @@ -251,6 +275 @@ std::vector
> parseSelectorParts(std::string selector) { - if (s == ' ' && nesting == 0) { - if (e > 0 && selector[e-1] != ':' && selector[e-1] != ',' && selector[e-1] != '#' && selector[e-1] != '.') { - continue; - } else - if (e < sl-1 && selector[e+1] != ':' && selector[e+1] != ',' && selector[e+1] != '#' && selector[e+1] != '.') { - continue; + part += w; @@ -258 +276,0 @@ std::vector
> parseSelectorParts(std::string selector) { - } @@ -260,13 +278,2 @@ std::vector
> parseSelectorParts(std::string selector) { - if (nesting == 0 && !current.empty()) { - // !ISSUE: Missing space - if (s == ':' || s == '[' || s == ',' || s == '#' || s == '.' || s == ' ') { - buffer.push_back(trimSpace(current)); - current = s; - } else - if (s == '>' || s == '+' || s == '~') { - buffer.push_back(trimSpace(current)); - buffer.push_back(""); - buffer.back() += s; - current = ""; - } else { - current += s; + if (part != "") { + parts.push_back(part); @@ -274,2 +280,0 @@ std::vector
> parseSelectorParts(std::string selector) { - } else { - current += s; @@ -276,0 +282 @@ std::vector
> parseSelectorParts(std::string selector) { + } @@ -278,8 +284,7 @@ std::vector
> parseSelectorParts(std::string selector) { - if ((s == ',' && nesting == 0) || e == sl-1) { - //std::cout << selector.substr(start, e-start+1) << std::endl; - if (!current.empty() && current != ",") { - buffer.push_back(trimSpace(current)); - } - parts.push_back(buffer); - buffer = {}; - current = ""; + std::vector
deduplicated; + + for (size_t p1 = 0; p1 < parts.size(); p1++) { + bool matches = false; + + if (parts[p1] == "") { + continue; @@ -287,4 +292,5 @@ std::vector
> parseSelectorParts(std::string selector) { - if (s == '(' || s == '[' || s == '{') { - nesting++; - } else if (s == ')' || s == ']' || s == '}') { - nesting--; + + for (size_t p2 = p1+1; p2 < parts.size(); p2++) { + if (parts[p1] == parts[p2]) { + matches = true; + } @@ -293 +299,4 @@ std::vector
> parseSelectorParts(std::string selector) { - } + if (!matches) { + deduplicated.push_back(parts[p1]); + } + } @@ -295 +304 @@ std::vector
> parseSelectorParts(std::string selector) { - return parts; + return deduplicated; @@ -341,0 +351,18 @@ class StyleHandler { + std::string cleanSelector(std::string selector) { + std::string cleaned; + for (size_t i = 0; i < selector.length(); i++) { + if (selector[i] == ' ' && i > 0 && i < selector.length()-1) { + char prev = selector[i-1]; + char next = selector[i+1]; + + if (prev != '>' && prev != '+' && prev != '~' && next != '>' && next != '+' && next != '~' && !std::isspace(prev) && !std::isspace(next)) { + cleaned += selector[i]; + } + } else if (selector[i] == ' ' && (i == 0 || i == selector.length()-1)) { + continue; + } else { + cleaned += selector[i]; + } + } + return cleaned; + } @@ -343,3 +370,3 @@ class StyleHandler { - void add(std::string selector, std::unordered_map
properties) { - // Type is 2d vector of selector parts - auto parts = parseSelectorParts(selector); + void add(Style style) { + std::string selector = cleanSelector(style.selector); + style.selector = selector; @@ -347 +373,0 @@ class StyleHandler { - Style style = {properties,parts,index}; @@ -348,0 +375 @@ class StyleHandler { + style.index = index; @@ -350,3 +377,4 @@ class StyleHandler { -// !ISSUE: Re do the basemap mapping in the add function -// + will need to go through each 1d then once in start from right and go left until >+~" " or eol -/* for (auto p : parts) { + + std::vector
parts = parseSelectorParts(selector); + + for (auto p : parts) { @@ -360 +387,0 @@ class StyleHandler { -*/ @@ -390,0 +418,28 @@ class StyleHandler { +std::vector
splitSelector(std::string selector, char key) { + size_t nesting = 0; + + std::vector
selectors; + std::string current = ""; + + for (auto s : selector) { + if (s == '(') { + nesting++; + } else if (s == ')') { + nesting--; + } else if (s == '[') { + nesting++; + } else if (s == ']') { + nesting--; + } else if (s == '{') { + nesting++; + } else if (s == '}') { + nesting--; + } else if (s == key && nesting == 0) { + selectors.push_back(current); + current = ""; + continue; + } + current += s; + } + + selectors.push_back(current); @@ -391,0 +447,21 @@ class StyleHandler { + // Trim the space + for (size_t i = 0; i < selectors.size(); i++) { + size_t start = 0; + size_t end = selectors[i].length()-1; + for (size_t s = 0; s < selectors[i].length(); s++) { + if (!std::isspace(selectors[i][s])) { + start = s; + break; + } + } + for (size_t e = selectors[i].length()-1; e > start; e--) { + if (!std::isspace(selectors[i][e])) { + end = e; + break; + } + } + selectors[i] = selectors[i].substr(start,end - start +1); + } + + return selectors; +} @@ -410 +486 @@ void popSelector(std::string selector, std::string& trimmed, std::string& popped -/* + @@ -417,0 +494,11 @@ bool testSelector(Node* node, std::string selector) { + std::vector
selectors = splitSelector(selector, ','); + + if (selectors.size() > 1) { + for (auto s : selectors) { + bool match = testSelector(node, s); + + if (match) { + return true; + } + } + } @@ -426 +513 @@ bool testSelector(Node* node, std::string selector) { - std::vector
> pParts = parseSelectorParts(popped); + std::vector
pParts = parseSelectorParts(popped); @@ -444,4 +531,3 @@ bool testSelector(Node* node, std::string selector) { - - size_t i = 0; - for (auto p : pParts) { - i++; + + for (size_t i = 0; i < pParts.size(); i++) { + std::string p = pParts[i]; @@ -540 +626 @@ bool testSelector(Node* node, std::string selector) { -*/ +
#include "grim.h" #include
#include
#include
#include
#include
std::string ClassList::value() const { if (classes.empty()) { return ""; } std::string collection = classes[0]; for (size_t i = 1; i < classes.size(); ++i) { collection += " " + classes[i]; } return collection; } std::vector
ClassList::values() { return classes; } void ClassList::add(std::string value) { classes.push_back(value); } void ClassList::remove(std::string value) { auto it_prev = std::find(classes.begin(), classes.end(), value); if (it_prev != classes.end()) { *it_prev = classes.back(); classes.pop_back(); } } // Constructor Node::Node() : parent(nullptr) {} std::string Node::getTagName() const { return TagName; } void Node::setTagName(const std::string& name) { TagName = name; } const std::unordered_map
& Node::getAttributes() const { return Attributes; } // Implement the getter/setter methods declared using the macro #define IMPLEMENT_ATTRIBUTE_ACCESSORS(_Type, _FuncNameSuffix, _AttrKeyString) \ _Type Node::get##_FuncNameSuffix() const { \ return getAttribute<_Type>(_AttrKeyString); \ } \ void Node::set##_FuncNameSuffix(_Type value) { \ setAttribute(_AttrKeyString, value); \ } IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Id, "id") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, InnerText, "innerText") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, ContentEditable, "contenteditable") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Href, "href") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Src, "src") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Title, "title") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Value, "value") IMPLEMENT_ATTRIBUTE_ACCESSORS(int, TabIndex, "tabindex") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Disabled, "disabled") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Required, "required") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Checked, "checked") Node* Node::createElement(std::string name) { std::unique_ptr
newNode = std::make_unique
(); newNode->setTagName(name); newNode->parent = this; children.push_back(std::move(newNode)); return children.back().get(); } template
void Node::setAttribute(const std::string& name, const T& value) { if constexpr (std::is_same_v
) { if (value) { Attributes[name] = ""; } else { Attributes.erase(name); } } else if constexpr (std::is_arithmetic_v
) { Attributes[name] = std::to_string(value); } else { static_assert(std::is_convertible_v
, "setAttribute: Type cannot be converted to std::string automatically."); Attributes[name] = static_cast
(value); } } void Node::setAttribute(const std::string& name, const std::string& value) { Attributes[name] = value; } template
T Node::getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { const std::string& s = it->second; if constexpr (std::is_same_v
) { try { return std::stoi(s); } catch (const std::invalid_argument& e) { return 0; } catch (const std::out_of_range& e) { return 0; } } else if constexpr (std::is_same_v
) { std::string lower_s = s; std::transform(lower_s.begin(), lower_s.end(), lower_s.begin(), [](unsigned char c){ return std::tolower(c); }); return (!lower_s.empty() && lower_s != "false"); } else if constexpr (std::is_same_v
) { try { return std::stod(s); } catch (const std::invalid_argument& e) { return 0.0; } catch (const std::out_of_range& e) { return 0.0; } } else { static_assert(std::is_convertible_v
, "getAttribute: Type conversion from std::string not implemented for this type."); return static_cast
(s); } } return T(); } std::string Node::getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { return it->second; } return ""; } std::vector
Node::getAttributeKeys() { std::vector
keys; for(auto p : Attributes) { keys.push_back(p.first); } return keys; } std::string Node::print(int indent) { std::string out = ""; for (int i = 0; i < indent; ++i) { out += " "; } out += "<" + getTagName(); for (const auto& attr_pair : getAttributes()) { if (attr_pair.first == "innerText" || attr_pair.first == "tagName") { continue; } out += " " + attr_pair.first; if (!attr_pair.second.empty()) { out += "=\"" + attr_pair.second + "\""; } } out += ">"; if (!getAttribute("innerText").empty()) { out += "\n" + getAttribute("innerText"); } out += "\n"; for (const auto& child : children) { out += child->print(indent + 1)+"\n"; } for (int i = 0; i < indent; ++i) { out += " "; } out += "" + getTagName() + ">\n"; return out; } std::string trimSpace(std::string str) { int start = 0; int end = str.length() - 1; // Handle empty string case if (str.empty()) { return ""; } // Find the first non-space character for (/* start is already 0 */; start <= end; ++start) { if (!std::isspace(str[start])) { break; } } // If the loop finished, it means the string was all spaces or empty if (start > end) { return ""; // Or a string of spaces, depending on desired behavior for " " } // Find the last non-space character for (/* end is already str.length() - 1 */; end >= start; --end) { if (!std::isspace(str[end])) { break; } } // Calculate the length of the substring // The length is (end_index - start_index) + 1 return str.substr(start, end - start + 1); } struct Style { std::unordered_map
properties; std::vector
> selector; // Index of when it was added (for cascading) size_t index; }; // parseSelectorParts deconstructs a selector into its indiviual parts // so they can be store and used in things like finding styles for basemap // and comparing a node to a selector in testSelector. This is a higher level function // that should be ran once perselector and the results saved somewhere std::vector
> parseSelectorParts(std::string selector) { // need to account for selectors with parenthesis like: h1:(+ input:required) // need to convert all single quotes to double quotes // need to account for commas, will return // need to colapse all spaces to a single space // need to trim spaces std::vector
> parts; std::vector
buffer; std::string current; size_t nesting = 0; size_t sl = selector.length(); for (size_t e = 0; e < sl; e++) { char s = selector[e]; if (s == '\'') { // convert single quotes to double quotes s = '\"'; } if (s == ' ' && nesting == 0) { if (e > 0 && selector[e-1] != ':' && selector[e-1] != ',' && selector[e-1] != '#' && selector[e-1] != '.') { continue; } else if (e < sl-1 && selector[e+1] != ':' && selector[e+1] != ',' && selector[e+1] != '#' && selector[e+1] != '.') { continue; } } if (nesting == 0 && !current.empty()) { // !ISSUE: Missing space if (s == ':' || s == '[' || s == ',' || s == '#' || s == '.' || s == ' ') { buffer.push_back(trimSpace(current)); current = s; } else if (s == '>' || s == '+' || s == '~') { buffer.push_back(trimSpace(current)); buffer.push_back(""); buffer.back() += s; current = ""; } else { current += s; } } else { current += s; } if ((s == ',' && nesting == 0) || e == sl-1) { //std::cout << selector.substr(start, e-start+1) << std::endl; if (!current.empty() && current != ",") { buffer.push_back(trimSpace(current)); } parts.push_back(buffer); buffer = {}; current = ""; } if (s == '(' || s == '[' || s == '{') { nesting++; } else if (s == ')' || s == ']' || s == '}') { nesting--; } } return parts; } class StyleHandler { private: // basemap: Maps baseparts to a index of the styles vector pointing to a Style object // because processing a CSS selector isn't trivial we want to make a short list of // the styles that can possibly be a match so we aren't spending a lot of compute // on a selector that applies to the wrong element. Todo this we take the right most part // of a selector (that is the part that targets the current element) and we do a few checks // like is the tagname the same, does it have that class, and do the id's match. If so that // is a candidate and we run the full selector on it. That includes checking parents,children, // or what ever else the selector needs to match. std::unordered_map
> basemap; std::vector