Author: Mason Wright
Email:
[email protected]
Date: Fri, 4 Jul 2025 18:13:36 -0600
src/grim.cc
aae562ac1350480e4889aabb35899f776c5b59e9
Fixed issue with parsing only pseudo-classes
Clone
-
.gitignore
-
Makefile
-
config
-
include/catch_amalgamated.cpp
-
include/catch_amalgamated.hpp
-
include/grim.h
-
include/parser.h
-
index.html
-
main.cc
-
src/adapter.cc
-
src/events.cc
M
src/grim.cc
-
src/parser.cc
-
style.css
-
tests/css_selector.cc
-
tests/html_node.cc
-
tests/html_parser.cc
Commits
fc556c26a73a47fe5a718a1cad4eead8c1873174
6dfb96c72f966734322c80cd77576867f462dc77
3482d6efe8f342715f9a6e190ee7e8b2d59ddf7f
0bff1901284184c86da44aa48971c856d6d4fae3
b966b2a517365074e5c381dbdea05b3221dc0198
e840f1eeb0ae26af69e1ae146ea9938e28e9f1af
e4e05418a640eaed08cd1ec7cd8644eb1dbcca50
4e01ba8ad2c3361fa4be3d896288020948b58b5e
aae562ac1350480e4889aabb35899f776c5b59e9
6c3ae0e31eb0893f20e3872117f92cc6b9a942af
350e7d88bb2feb9db00c6e032cc6623f215b7adf
95e6c70d23e99ffcf70e5bbe12503496e5d8f232
e188783659b9bc3b9993a647e93ed110e7f41db6
5e4c38ff3c212cdd9881427ef3f8c2706539a190
e50ea9e1356a74af18fdd171337ef9dc931e1f4e
8f2e83556d12aaebe8e8597ea6923804b0eb7a43
1627c585128af263181053ab2cf1a4cdcd14ee21
def3513f75b325464ad88a33c741c4ca80572b77
a21501590980a905fa9b902897d700a42a08b7f0
56074a6bfe4498d092f3a227297c8c20e2bb962c
d9cf1485b7ae0614130494f0e73237921323b9a1
80f04b134ae32ad8a9d526007b33dd02f6600f05
23d6c65f9368d3c622a55a3068a6b2f1efa0c8d4
09c195df02536b6a796bd648fce9669397b96109
f2b5c8202fbc904e2ed78260e3fdbd55164799d2
4bfba076120f389994fc46a98e8b7a2622314400
e36ac5417e10ee9b9f94f340e1ccf28afc5705ea
d00dc89a86dd7e2fcfd4618bc3a1c8cfba9e3c3d
d9eef16adaf292f3748db5fb5aa98463de10d712
18ff2ec1bfc1cf9fcd17c1acb05c3b41f8f0ed83
9e7fd2980d723437ea621b78d395fa72ca3f4922
Diff
diff --git a/src/grim.cc b/src/grim.cc index 1ff8fc0..2911970 100755 --- a/src/grim.cc +++ b/src/grim.cc @@ -385,5 +385,2 @@ std::vector
> parseSelectorParts(std::string selector) { - if (!trimmed.empty()) { - if (trimmed[0] == ':' && trimmed.back() != ')') { - trimmed = "[" +trimmed.substr(1)+"]"; - } - buffer.push_back(trimmed); + if (trimmed[0] == ':' && trimmed.back() != ')') { + trimmed = "[" +trimmed.substr(1)+"]"; @@ -390,0 +388 @@ std::vector
> parseSelectorParts(std::string selector) { + buffer.push_back(trimmed); @@ -395 +392,0 @@ std::vector
> parseSelectorParts(std::string selector) { - @@ -399 +395,0 @@ std::vector
> parseSelectorParts(std::string selector) { -
#include "grim.h" #include
#include
#include
#include
#include
#include
// Constructor void Node::ClassList::createIndex() { int index = 0; // Add a space to the back to help trigger the adding of the // last item std::string classes = self->Attributes["class"]+" "; // Update the stored length len = classes.length(); indexes.clear(); bool waitForFirstChar = true; for (size_t i = 0; i < len; i++) { if (waitForFirstChar && classes[i] != ' ') { index = i; waitForFirstChar = false; } else if (!waitForFirstChar) { if (classes[i] == ' ') { indexes.push_back({index, i-index}); waitForFirstChar = true; } } } } bool Node::ClassList::checkIndex() { std::string classes = self->Attributes["class"]; if (classes.length() != len) { return false; } else { // Skip the first one because there is a space // also we don't care if the values are the same // just that the array indexing is the same. If the // value is the same size and position it doesn't matter for (size_t i = 1; i < indexes.size(); i++) { // See if the index position is a space // if not return false if (classes[indexes[i-1].first] != ' ') { return false; } } return true; } } size_t Node::ClassList::length() { if (!checkIndex()) { createIndex(); } return indexes.size(); } std::string Node::ClassList::item(size_t key) { if (!checkIndex()) { createIndex(); } if (key < indexes.size()) { std::pair
pair = indexes[key]; return self->Attributes["class"].substr(pair.first, pair.second); } else { return ""; } } void Node::ClassList::add(std::string value) { if (!checkIndex()) { createIndex(); } if (self->Attributes["class"].length() == 0) { self->Attributes["class"] += value; } else { self->Attributes["class"] += " "+value; } // len because the index of the added space will be the length of // the prevous string int vl = value.length(); indexes.push_back({len, vl}); len += vl+1; } void Node::ClassList::remove(std::string value) { if (!checkIndex()) { createIndex(); } int newLen = value.length(); size_t cLen = indexes.size(); std::string classes = self->Attributes["class"]; for (size_t i = 0; i < cLen; i++) { std::pair
pair = indexes[i]; if (newLen == pair.second) { if (classes.substr(pair.first, pair.second) == value) { // Splice out the value to be removed // !ISSUE: This will keep the surounding spaces and cause the size to grow when // + classes are added and removed self->Attributes["class"] = classes.substr(0, pair.first)+classes.substr(pair.first+pair.second); } } } } bool Node::ClassList::contains(std::string value) { if (!checkIndex()) { createIndex(); } int newLen = value.length(); size_t cLen = indexes.size(); std::string classes = self->Attributes["class"]; bool found = false; for (size_t i = 0; i < cLen; i++) { std::pair
pair = indexes[i]; if (newLen == pair.second) { if (classes.substr(pair.first, pair.second) == value) { // Same logic as remove but turns found true then breaks found = true; break; } } } return found; } void Node::ClassList::toggle(std::string value) { if (contains(value)) { remove(value); } else { add(value); } } std::string Node::ClassList::value() const { return self->Attributes["class"]; } std::string Node::getTagName() const { return TagName; } void Node::setTagName(const std::string& name) { TagName = name; } const std::unordered_map
& Node::getAttributes() const { return Attributes; } // Implement the getter/setter methods declared using the macro #define IMPLEMENT_ATTRIBUTE_ACCESSORS(_Type, _FuncNameSuffix, _AttrKeyString) \ _Type Node::get##_FuncNameSuffix() const { \ return getAttribute<_Type>(_AttrKeyString); \ } \ void Node::set##_FuncNameSuffix(_Type value) { \ setAttribute(_AttrKeyString, value); \ } IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Id, "id") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, InnerText, "innerText") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, ContentEditable, "contenteditable") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Href, "href") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Src, "src") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Title, "title") IMPLEMENT_ATTRIBUTE_ACCESSORS(std::string, Value, "value") IMPLEMENT_ATTRIBUTE_ACCESSORS(int, TabIndex, "tabindex") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Disabled, "disabled") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Required, "required") IMPLEMENT_ATTRIBUTE_ACCESSORS(bool, Checked, "checked") Node* Node::createElement(std::string name) { std::unique_ptr
newNode = std::make_unique
(); newNode->setTagName(name); newNode->parent = this; children.push_back(std::move(newNode)); return children.back().get(); } template
void Node::setAttribute(const std::string& name, const T& value) { if constexpr (std::is_same_v
) { if (value) { Attributes[name] = ""; } else { Attributes.erase(name); } } else if constexpr (std::is_arithmetic_v
) { Attributes[name] = std::to_string(value); } else { static_assert(std::is_convertible_v
, "setAttribute: Type cannot be converted to std::string automatically."); Attributes[name] = static_cast
(value); } } void Node::setAttribute(const std::string& name, const std::string& value) { Attributes[name] = value; } template
T Node::getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { const std::string& s = it->second; if constexpr (std::is_same_v
) { try { return std::stoi(s); } catch (const std::invalid_argument& e) { return 0; } catch (const std::out_of_range& e) { return 0; } } else if constexpr (std::is_same_v
) { std::string lower_s = s; std::transform(lower_s.begin(), lower_s.end(), lower_s.begin(), [](unsigned char c){ return std::tolower(c); }); return (!lower_s.empty() && lower_s != "false"); } else if constexpr (std::is_same_v
) { try { return std::stod(s); } catch (const std::invalid_argument& e) { return 0.0; } catch (const std::out_of_range& e) { return 0.0; } } else { static_assert(std::is_convertible_v
, "getAttribute: Type conversion from std::string not implemented for this type."); return static_cast
(s); } } return T(); } std::string Node::getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { return it->second; } return ""; } std::vector
Node::getAttributeKeys() { std::vector
keys; for(auto p : Attributes) { keys.push_back(p.first); } return keys; } std::string Node::print(int indent) { std::string out = ""; for (int i = 0; i < indent; ++i) { out += " "; } out += "<" + getTagName(); for (const auto& attr_pair : getAttributes()) { if (attr_pair.first == "innerText" || attr_pair.first == "tagName") { continue; } out += " " + attr_pair.first; if (!attr_pair.second.empty()) { out += "=\"" + attr_pair.second + "\""; } } out += ">"; if (!getAttribute("innerText").empty()) { out += "\n" + getAttribute("innerText"); } out += "\n"; for (const auto& child : children) { out += child->print(indent + 1)+"\n"; } for (int i = 0; i < indent; ++i) { out += " "; } out += "" + getTagName() + ">\n"; return out; } std::string trimSpace(std::string str) { int start = 0; int end = str.length() - 1; // Handle empty string case if (str.empty()) { return ""; } // Find the first non-space character for (/* start is already 0 */; start <= end; ++start) { if (!std::isspace(str[start])) { break; } } // If the loop finished, it means the string was all spaces or empty if (start > end) { return ""; // Or a string of spaces, depending on desired behavior for " " } // Find the last non-space character for (/* end is already str.length() - 1 */; end >= start; --end) { if (!std::isspace(str[end])) { break; } } // Calculate the length of the substring // The length is (end_index - start_index) + 1 return str.substr(start, end - start + 1); } struct Style { std::unordered_map
properties; std::vector
> selector; // Index of when it was added (for cascading) size_t index; }; // parseSelectorParts deconstructs a selector into its indiviual parts // so they can be store and used in things like finding styles for basemap // and comparing a node to a selector in testSelector. This is a higher level function // that should be ran once perselector and the results saved somewhere std::vector
> parseSelectorParts(std::string selector) { // need to account for selectors with parenthesis like: h1:(+ input:required) // need to convert all single quotes to double quotes // need to account for commas, will return // need to colapse all spaces to a single space // need to trim spaces std::vector
> parts; std::vector
buffer; std::string current; size_t nesting = 0; size_t sl = selector.length(); for (size_t e = 0; e < sl; e++) { char s = selector[e]; if (s == '\'') { // convert single quotes to double quotes s = '\"'; } if (s == ' ' && nesting == 0) { if (e > 0 && e < sl-1) { bool prevMatch = selector[e-1] == '>' || selector[e-1] == '+' || selector[e-1] == '~'; bool nextMatch = selector[e+1] == '>' || selector[e+1] == '+' || selector[e+1] == '~'; if (prevMatch && !nextMatch) { s = '\0'; } else if (!prevMatch && nextMatch) { s = '\0'; } } } if (nesting == 0 && !current.empty()) { // !ISSUE: Missing space if (s == ':' || s == '[' || s == ',' || s == '#' || s == '.') { // We convert any : selectors (like :checked) to [checked] // because we store every thing as attributes on the Node // when test selector is ran it will convert all its attributes // to [checked] or [href="url"] // so we can easily compare the two std::string trimmed = trimSpace(current); if (!trimmed.empty()) { if (trimmed[0] == ':' && trimmed.back() != ')') { trimmed = "[" +trimmed.substr(1)+"]"; } buffer.push_back(trimmed); } current = s; } else if (s == '>' || s == '+' || s == '~' || s == ' ') { std::string trimmed = trimSpace(current); if (trimmed[0] == ':' && trimmed.back() != ')') { trimmed = "[" +trimmed.substr(1)+"]"; } buffer.push_back(trimmed); buffer.push_back(""); buffer.back() += s; current = ""; } else if (s != '\0') { current += s; } } else if (s != '\0') { current += s; } if ((s == ',' && nesting == 0) || e == sl-1) { if (!current.empty() && current != ",") { std::string trimmed = trimSpace(current); if (trimmed[0] == ':' && trimmed.back() != ')') { trimmed = "[" +trimmed.substr(1)+"]"; } buffer.push_back(trimmed); } parts.push_back(buffer); buffer.clear(); current = ""; } if (s == '(' || s == '[' || s == '{') { nesting++; } else if (s == ')' || s == ']' || s == '}') { nesting--; } } return parts; } class StyleHandler { private: // basemap: Maps baseparts to a index of the styles vector pointing to a Style object // because processing a CSS selector isn't trivial we want to make a short list of // the styles that can possibly be a match so we aren't spending a lot of compute // on a selector that applies to the wrong element. Todo this we take the right most part // of a selector (that is the part that targets the current element) and we do a few checks // like is the tagname the same, does it have that class, and do the id's match. If so that // is a candidate and we run the full selector on it. That includes checking parents,children, // or what ever else the selector needs to match. std::unordered_map
> basemap; std::vector