Author: Mason Wright
Email:
[email protected]
Date: Mon, 16 Jun 2025 20:44:42 -0600
element.cc
4bfba076120f389994fc46a98e8b7a2622314400
Off to a better start on the parser
Clone
-
adapter.cc
-
build.sh
M
element.cc
-
events.cc
M
index.html
M
main
-
main.cc
Commits
b966b2a517365074e5c381dbdea05b3221dc0198
e840f1eeb0ae26af69e1ae146ea9938e28e9f1af
e4e05418a640eaed08cd1ec7cd8644eb1dbcca50
4e01ba8ad2c3361fa4be3d896288020948b58b5e
aae562ac1350480e4889aabb35899f776c5b59e9
6c3ae0e31eb0893f20e3872117f92cc6b9a942af
350e7d88bb2feb9db00c6e032cc6623f215b7adf
95e6c70d23e99ffcf70e5bbe12503496e5d8f232
e188783659b9bc3b9993a647e93ed110e7f41db6
5e4c38ff3c212cdd9881427ef3f8c2706539a190
e50ea9e1356a74af18fdd171337ef9dc931e1f4e
8f2e83556d12aaebe8e8597ea6923804b0eb7a43
1627c585128af263181053ab2cf1a4cdcd14ee21
def3513f75b325464ad88a33c741c4ca80572b77
a21501590980a905fa9b902897d700a42a08b7f0
56074a6bfe4498d092f3a227297c8c20e2bb962c
d9cf1485b7ae0614130494f0e73237921323b9a1
80f04b134ae32ad8a9d526007b33dd02f6600f05
23d6c65f9368d3c622a55a3068a6b2f1efa0c8d4
09c195df02536b6a796bd648fce9669397b96109
f2b5c8202fbc904e2ed78260e3fdbd55164799d2
4bfba076120f389994fc46a98e8b7a2622314400
e36ac5417e10ee9b9f94f340e1ccf28afc5705ea
d00dc89a86dd7e2fcfd4618bc3a1c8cfba9e3c3d
d9eef16adaf292f3748db5fb5aa98463de10d712
18ff2ec1bfc1cf9fcd17c1acb05c3b41f8f0ed83
9e7fd2980d723437ea621b78d395fa72ca3f4922
Diff
diff --git a/element.cc b/element.cc index 08f9888..059e344 100755 --- a/element.cc +++ b/element.cc @@ -318,10 +317,0 @@ std::unordered_map
parseAttributes(std::vector
parseStream(std::istream& inputStream) { + std::string line; @@ -334 +325,2 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - + // !ISSUE: test + // @@ -338,0 +331 @@ std::unique_ptr
parseStream(std::istream& inputStream) { + bool isClosing = false; @@ -340 +333,2 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - std::vector
tokens; + std::vector
tokens; + std::string word = ""; @@ -342,0 +337,3 @@ std::unique_ptr
parseStream(std::istream& inputStream) { + + // !TODO: self closing tags
+ // + also tags with no content @@ -343,0 +341,11 @@ std::unique_ptr
parseStream(std::istream& inputStream) { + std::cout << "--------" << std::endl; + std::cout << current << std::endl; + if (isClosing && current != '>') { + std::cout << "Continuing no >" << std::endl; + continue; + } else if (isClosing && current == '>') { + std::cout << "Continuing at >" << std::endl; + isClosing = false; + continue; + } + @@ -345 +353,21 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - if ((current == '"' || current == '\'') && !escaped) { + std::cout << "In Tag" << std::endl; + if (current == '/') { + std::cout << "End Tag" << std::endl; + isClosing = true; + } else if (current == '>' && !escaped) { + tokens.push_back(word); + std::cout << "Creating Element: " << tokens[0] << std::endl; + // Add the last word + + word = ""; + inTag = false; + // Build the element + currentNode = currentNode->createElement(tokens[0]); + auto attrs = parseAttributes(tokens); + for (auto const& pair : attrs) { + // All attributes are stored as strings so we can just throw them in + currentNode->setAttribute(pair.first, pair.second); + } + + + } else if ((current == '"' || current == '\'') && !escaped) { @@ -353,9 +381,5 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - if (current == '>' && !inQuote && !escaped) { - inTag = false; - // Even if the next tag is next still add a text tag as we can just check if its empty and remove it - // this check would still have to be done either way so we aren't wasting anything - std::string data = ""; - Token newToken = Token{TEXT, data}; - tokens.push_back(newToken); - // for closing tags, use them as a checksum and to manage the position in the tree - continue; + if (current == ' ' && !inQuote) { + tokens.push_back(word); + word = ""; + } else { + word += current; @@ -363,8 +387,5 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - } else if (!escaped && current == '<') { - inTag = true; - std::string data = ""; - Token newToken = Token{NODE, data}; - tokens.push_back(newToken); - // Heres where you actually make the text node and above the real nodes - // can also prob make the vector of tokens a single variable - continue; + + } else if (current != '<' && current != '>') { + std::cout << "Adding innerText to " << currentNode->getTagName() <<":"<< current << std::endl; + // Outside tag add to innerText + currentNode->setInnerText(currentNode->getInnerText() + current); @@ -372,0 +394,6 @@ std::unique_ptr
parseStream(std::istream& inputStream) { + if (current == '<' && !escaped) { + tokens.clear(); + word = ""; + inTag = true; + } + @@ -377,5 +403,0 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - continue; - } - - if (!tokens.empty()) { - tokens.back().data += current; @@ -385,4 +406,0 @@ std::unique_ptr
parseStream(std::istream& inputStream) { - for (auto t : tokens) { - std::cout << t.type << t.data << std::endl; - } -
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
// --- THE MACRO DEFINITION --- // This macro generates a getter and a setter for a specific attribute. // _Type: The C++ data type (e.g., std::string, int, bool) // _FuncNameSuffix: The PascalCase suffix for the getter/setter (e.g., Id, TabIndex) // _AttrKeyString: The exact string literal key used in the Attributes map (e.g., "id", "tabindex") #define GENERATE_ATTRIBUTE_ACCESSORS(_Type, _FuncNameSuffix, _AttrKeyString) \ _Type get##_FuncNameSuffix() const { \ return getAttribute<_Type>(_AttrKeyString); \ } \ void set##_FuncNameSuffix(_Type value) { \ setAttribute(_AttrKeyString, value); \ } struct Styles { std::vector
> stylesheets; std::unordered_map
inlineStyles; std::unordered_map
> psuedoStyles; }; struct Bounds { int top; int right; int bottom; int left; }; struct State { // Bounds offset; // Border border; // std::vector
background; int width; int height; int z; bool hidden; int tabIndex; }; class ClassList { private: std::vector
values; public: std::string value() const { if (values.empty()) { return ""; } std::string collection = values[0]; for (size_t i = 1; i < values.size(); ++i) { collection += " " + values[i]; } return collection; } void add(std::string value) { values.push_back(value); } void remove(std::string value) { auto it_prev = std::find(values.begin(), values.end(), value); if (it_prev != values.end()) { *it_prev = values.back(); // Overwrite prevous position with last element values.pop_back(); // Remove the last element } } }; // !TODO: Cascade the styles during the parsing of the document and add the styles during the css parsing // + all other styles should cascade when added class Node { private: std::string TagName; // !NOTE: ContentEditable only supports plaintext std::unordered_map
Attributes; public: Node* parent; std::vector
> children; ClassList classList; Node() : parent(nullptr) {} // Can't use macro on tag name std::string getTagName() const { return TagName; } void setTagName(const std::string& name) { TagName = name; } const std::unordered_map
& getAttributes() const { return Attributes; } // --- Define Getters and Setters // !TODO: Add all global attributes // + Src: https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Global_attributes GENERATE_ATTRIBUTE_ACCESSORS(std::string, Id, "id") GENERATE_ATTRIBUTE_ACCESSORS(std::string, InnerText, "innerText") // If you want innerText as an attribute GENERATE_ATTRIBUTE_ACCESSORS(bool, ContentEditable, "contenteditable") GENERATE_ATTRIBUTE_ACCESSORS(std::string, Href, "href") GENERATE_ATTRIBUTE_ACCESSORS(std::string, Src, "src") GENERATE_ATTRIBUTE_ACCESSORS(std::string, Title, "title") GENERATE_ATTRIBUTE_ACCESSORS(std::string, Value, "value") GENERATE_ATTRIBUTE_ACCESSORS(int, TabIndex, "tabindex") GENERATE_ATTRIBUTE_ACCESSORS(bool, Disabled, "disabled") GENERATE_ATTRIBUTE_ACCESSORS(bool, Required, "required") GENERATE_ATTRIBUTE_ACCESSORS(bool, Checked, "checked") Node* createElement(std::string name) { std::unique_ptr
newNode = std::make_unique
(); newNode->setTagName(name); newNode->parent = this; children.push_back(std::move(newNode)); return children.back().get(); } // --- Generic setAttribute (Type to string conversion) --- template
void setAttribute(const std::string& name, const T& value) { if constexpr (std::is_same_v
) { if (value) { Attributes[name] = ""; } else { Attributes.erase(name); } } else if constexpr (std::is_arithmetic_v
) { // Handles int, double, etc. Attributes[name] = std::to_string(value); } else { // This static_assert will fire if you try to use setAttribute with a type // that doesn't have a specific handling or a std::string overload. static_assert(std::is_convertible_v
, "setAttribute: Type cannot be converted to std::string automatically."); // If it's convertible, let's try to convert it. Attributes[name] = static_cast
(value); } } // Overload for std::string to avoid unnecessary conversion (and explicit to_string) void setAttribute(const std::string& name, const std::string& value) { Attributes[name] = value; } // --- Generic getAttribute (string to Type conversion) --- template
T getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { const std::string& s = it->second; // Get the string value from the map // Use if constexpr to convert the string to the requested type T if constexpr (std::is_same_v
) { try { return std::stoi(s); } catch (const std::invalid_argument& e) { std::cerr << "Warning: Invalid integer attribute value '" << s << "' for '" << name << "'. Defaulting to 0. " << e.what() << std::endl; return 0; } catch (const std::out_of_range& e) { std::cerr << "Warning: Integer attribute value '" << s << "' for '" << name << "' out of range. Defaulting to 0. " << e.what() << std::endl; return 0; } } else if constexpr (std::is_same_v
) { // Return true if string is not empty and not "false" (case-insensitive) // Adjust this logic if you have different boolean attribute parsing rules std::string lower_s = s; std::transform(lower_s.begin(), lower_s.end(), lower_s.begin(), [](unsigned char c){ return std::tolower(c); }); return (!lower_s.empty() && lower_s != "false"); } else if constexpr (std::is_same_v
) { try { return std::stod(s); } catch (const std::invalid_argument& e) { std::cerr << "Warning: Invalid double attribute value '" << s << "' for '" << name << "'. Defaulting to 0.0. " << e.what() << std::endl; return 0.0; } catch (const std::out_of_range& e) { std::cerr << "Warning: Double attribute value '" << s << "' for '" << name << "' out of range. Defaulting to 0.0. " << e.what() << std::endl; return 0.0; } } else { // If a type is requested that isn't explicitly handled, // this static_assert will cause a compile error. static_assert(std::is_convertible_v
, "getAttribute: Type conversion from std::string not implemented for this type."); // If it's convertible, attempt a static_cast (might not be what you want for all types) return static_cast
(s); } } // Return a default-constructed T if attribute not found // This relies on T having a default constructor (e.g., int() is 0, bool() is false, std::string() is empty) return T(); } // Overload for std::string directly (avoids template instantiation and conversion overhead) // This function will be preferred by the compiler when T is std::string. std::string getAttribute(const std::string& name) const { auto it = Attributes.find(name); if (it != Attributes.end()) { return it->second; } return ""; // Default for string attributes if not found } void print(int indent = 0) const { // Print indentation for (int i = 0; i < indent; ++i) { std::cout << " "; } // Print node information std::cout << "<" << getTagName(); // Print attributes for (const auto& attr_pair : getAttributes()) { if (attr_pair.first == "innerText") { continue; } std::cout << " " << attr_pair.first; if (!attr_pair.second.empty()) { // Only print value if it's not empty (for boolean attributes) std::cout << "=\"" << attr_pair.second << "\""; } } std::cout << ">"; // Print inner text if any // Note: HTML whitespace rules are complex; this just prints it raw if (!getInnerText().empty()) { std::cout << "\n" << getInnerText(); } std::cout << std::endl; // Recursively call print for children for (const auto& child : children) { child->print(indent + 1); // Increase indent for children } // Print closing tag if it's not a self-closing/root tag // (Assuming you'll refine logic for self-closing tags eventually) if (!children.empty() || !getInnerText().empty()) { for (int i = 0; i < indent; ++i) { std::cout << " "; } std::cout << "" << getTagName() << ">" << std::endl; } } }; std::unordered_map
parseAttributes(std::vector
tokens) { std::unordered_map
attrs; for (unsigned short i = 0; i < tokens.size(); i++) { if (i != 0) { std::string name = ""; std::string value = ""; bool setValue = false; for (auto c : tokens[i]) { if (c == '=') { setValue = true; continue; } if (setValue) { value += c; } else { name += c; } } std::string trimmedName = ""; for (auto c : name) { if (!std::isspace(c)) { trimmedName += c; } } std::string trimmedValue = ""; int sliceStart = 0; int sliceEnd = value.length(); if (value.length() >= 2) { for (unsigned short t = 0; t < value.length(); t++) { if (!std::isspace(value[t])) { sliceStart = t; break; } } for (int t = value.length()-1; t >= 0; t--) { if (!std::isspace(value[t])) { sliceEnd = t; break; } } // Add and subtract 1 from each side to remove quotes for (unsigned short t = sliceStart+1; t < sliceEnd; t++) { trimmedValue += value[t]; } } attrs[trimmedName] = trimmedValue; } } return attrs; } enum Type { TEXT, NODE }; struct Token { Type type; std::string data; }; // !TODO: Make a html string parser as well std::unique_ptr
parseStream(std::istream& inputStream) { std::unique_ptr
root = std::make_unique
(); root->setTagName("root"); Node* currentNode = root.get(); bool inTag = false; bool escaped = false; bool inQuote = false; char quoteType = '"'; std::vector
tokens; char current; while (inputStream.get(current)) { if (inTag) { if ((current == '"' || current == '\'') && !escaped) { if (inQuote && current == quoteType) { inQuote = false; } else if (!inQuote) { inQuote = true; quoteType = current; } } if (current == '>' && !inQuote && !escaped) { inTag = false; // Even if the next tag is next still add a text tag as we can just check if its empty and remove it // this check would still have to be done either way so we aren't wasting anything std::string data = ""; Token newToken = Token{TEXT, data}; tokens.push_back(newToken); // for closing tags, use them as a checksum and to manage the position in the tree continue; } } else if (!escaped && current == '<') { inTag = true; std::string data = ""; Token newToken = Token{NODE, data}; tokens.push_back(newToken); // Heres where you actually make the text node and above the real nodes // can also prob make the vector of tokens a single variable continue; } escaped = false; if (current == '\\') { //' escaped = true; continue; } if (!tokens.empty()) { tokens.back().data += current; } } for (auto t : tokens) { std::cout << t.type << t.data << std::endl; } return root; } int main() { // std::string html1 = "
"; // std::stringstream ss1(html1); std::ifstream inputFile("./index.html"); auto document = parseStream(inputFile); document->print(); return 0; }