test.cc

Commits

b966b2a517365074e5c381dbdea05b3221dc0198 e840f1eeb0ae26af69e1ae146ea9938e28e9f1af e4e05418a640eaed08cd1ec7cd8644eb1dbcca50 4e01ba8ad2c3361fa4be3d896288020948b58b5e aae562ac1350480e4889aabb35899f776c5b59e9 6c3ae0e31eb0893f20e3872117f92cc6b9a942af 350e7d88bb2feb9db00c6e032cc6623f215b7adf 95e6c70d23e99ffcf70e5bbe12503496e5d8f232 e188783659b9bc3b9993a647e93ed110e7f41db6 5e4c38ff3c212cdd9881427ef3f8c2706539a190 e50ea9e1356a74af18fdd171337ef9dc931e1f4e 8f2e83556d12aaebe8e8597ea6923804b0eb7a43 1627c585128af263181053ab2cf1a4cdcd14ee21 def3513f75b325464ad88a33c741c4ca80572b77 a21501590980a905fa9b902897d700a42a08b7f0 56074a6bfe4498d092f3a227297c8c20e2bb962c d9cf1485b7ae0614130494f0e73237921323b9a1 80f04b134ae32ad8a9d526007b33dd02f6600f05 23d6c65f9368d3c622a55a3068a6b2f1efa0c8d4 09c195df02536b6a796bd648fce9669397b96109 f2b5c8202fbc904e2ed78260e3fdbd55164799d2 4bfba076120f389994fc46a98e8b7a2622314400 e36ac5417e10ee9b9f94f340e1ccf28afc5705ea d00dc89a86dd7e2fcfd4618bc3a1c8cfba9e3c3d d9eef16adaf292f3748db5fb5aa98463de10d712 18ff2ec1bfc1cf9fcd17c1acb05c3b41f8f0ed83 9e7fd2980d723437ea621b78d395fa72ca3f4922

Diff

diff --git a/test.cc b/test.cc
deleted file mode 100644
index a53d635..0000000
--- a/test.cc
+++ /dev/null
@@ -1,459 +0,0 @@
-#include <iostream>
-#include <string>
-#include <vector>
-#include <memory>
-#include <map>
-#include <sstream> // For stringstream input, useful for testing
-#include <fstream> // For file input
-
-// Forward declarations to define Node and parseAttributes before parseHtmlStream
-class Node;
-std::map<std::string, std::string> parseAttributes(const std::vector<std::string>& tokens);
-
-/**
- * @brief Represents a node in the HTML Document Object Model (DOM) tree.
- * Can be an element node (e.g., <div>), a text node (innerText), or the root.
- */
-class Node {
-public:
-    std::string tagName;                            // The tag name of the element (e.g., "div", "h1")
-    std::string innerText;                          // Text content directly inside this node
-    std::map<std::string, std::string> attributes;  // Map of attribute key-value pairs
-    std::vector<std::unique_ptr<Node>> children;    // Vector of unique pointers to child nodes
-    Node* parent;                                   // Raw pointer to the parent node (ownership managed by unique_ptr in children)
-
-    /**
-     * @brief Constructor for Node. Initializes parent to nullptr.
-     */
-    Node() : parent(nullptr) {}
-
-    // --- Setters ---
-    void setTagName(const std::string& name) { tagName = name; }
-    void setInnerText(const std::string& text) { innerText = text; }
-    void setAttribute(const std::string& key, const std::string& value) { attributes[key] = value; }
-
-    // --- Getters ---
-    const std::string& getTagName() const { return tagName; }
-    const std::string& getInnerText() const { return innerText; }
-    const std::map<std::string, std::string>& getAttributes() const { return attributes; }
-
-    /**
-     * @brief Creates a new child node for the current node.
-     * Sets the new node's tag name and its parent pointer to `this`.
-     * Transfers ownership of the new node to the `children` vector.
-     * @param name The tag name of the new element (e.g., "p", "a").
-     * @return A raw pointer to the newly created child node.
-     */
-    Node* createElement(const std::string& name) {
-        std::unique_ptr<Node> newNode = std::make_unique<Node>();
-        newNode->setTagName(name);
-        newNode->parent = this; // Set parent pointer for the new node
-        Node* rawPtr = newNode.get(); // Get raw pointer before moving ownership
-        children.push_back(std::move(newNode)); // Transfer ownership to the children vector
-        return rawPtr;
-    }
-
-    /**
-     * @brief Prints the HTML tree structure recursively for debugging purposes.
-     * Includes indentation for better readability.
-     * @param indent The current indentation level.
-     */
-    void print(int indent = 0) const {
-        // Print indentation for the current node
-        for (int i = 0; i < indent; ++i) std::cout << "  ";
-
-        // Print the opening tag and its attributes
-        std::cout << "<" << tagName;
-        for (const auto& attr : attributes) {
-            std::cout << " " << attr.first << "=\"" << attr.second << "\"";
-        }
-        std::cout << ">";
-
-        // Print inner text if it exists
-        if (!innerText.empty()) {
-            std::cout << innerText;
-        }
-
-        // Determine if a closing tag is required.
-        // If the node has children or inner text, it definitely needs a closing tag.
-        // Even for empty tags (like <h1></h1> with no content), a closing tag is required.
-        bool requiresClosingTag = true; // Assuming all elements created require a closing tag
-                                        // unless explicitly handled as self-closing in parser.
-
-        // If the node has content (text or children), print a newline for better formatting.
-        // Then recursively print children, followed by the closing tag.
-        if (!innerText.empty() || !children.empty()) {
-            std::cout << std::endl; // Newline after opening tag if content/children follow
-
-            // Recursively print all child nodes
-            for (const auto& child : children) {
-                child->print(indent + 1);
-            }
-
-            // Print indentation for the closing tag
-            for (int i = 0; i < indent; ++i) std::cout << "  ";
-            std::cout << "</" << tagName << ">" << std::endl;
-        } else if (requiresClosingTag) {
-            // For truly empty tags (e.g., <h1></h1>), print newline and then its closing tag
-            std::cout << std::endl;
-            for (int i = 0; i < indent; ++i) std::cout << "  ";
-            std::cout << "</" << tagName << ">" << std::endl;
-        } else {
-            // This case handles conceptual self-closing for elements that don't need a closing tag
-            // (e.g. <br/>). In the parser, `currentNode` immediately moves up for these.
-            // So, for the `print` function, if it reaches this branch, it means the node was effectively "closed".
-            std::cout << std::endl; // Just print a newline after the self-closing tag for formatting.
-        }
-    }
-};
-
-/**
- * @brief Helper function to parse attributes from a vector of tokens.
- * It expects tokens to be either `key=value` (with or without quotes) or just `key` (for boolean attributes).
- * @param tokens A vector of strings, where the first element is the tag name, and subsequent elements are attributes.
- * @return A map of attribute keys to their values.
- */
-std::map<std::string, std::string> parseAttributes(const std::vector<std::string>& tokens) {
-    std::map<std::string, std::string> attrs;
-    if (tokens.empty()) return attrs;
-
-    // Skip the first token which is the tag name itself
-    for (size_t i = 1; i < tokens.size(); ++i) {
-        const std::string& token = tokens[i];
-        size_t eqPos = token.find('='); // Find the position of '='
-
-        if (eqPos != std::string::npos) {
-            // Attribute is in `key=value` format
-            std::string key = token.substr(0, eqPos);
-            std::string value = token.substr(eqPos + 1);
-
-            // Remove quotes if the value is enclosed in them (e.g., value="my value")
-            if (!value.empty() && (value.front() == '"' || value.front() == '\'')) {
-                if (value.length() >= 2 && value.back() == value.front()) { // Ensure matching quotes
-                    value = value.substr(1, value.length() - 2);
-                }
-            }
-            attrs[key] = value;
-        } else {
-            // Attribute is a boolean attribute or a key without an explicit value (e.g., `disabled`)
-            attrs[token] = "";
-        }
-    }
-    return attrs;
-}
-
-/**
- * @brief Core HTML parsing logic that reads character by character from an input stream.
- * @param inputStream The input stream (e.g., `ifstream` for file, `stringstream` for string).
- * @return A unique_ptr to the root Node of the parsed HTML tree.
- */
-std::unique_ptr<Node> parseHtmlStream(std::istream& inputStream) {
-    std::unique_ptr<Node> root = std::make_unique<Node>();
-    root->setTagName("root"); // Create a conceptual root node for the document
-
-    Node* currentNode = root.get(); // Pointer to the currently active node in the tree
-
-    std::string currentTokenPart = "";      // Buffer for collecting tag names, attribute keys, or partial attribute values
-    std::string textBuffer = "";            // Buffer for collecting inner text content
-
-    bool inTag = false;             // True when parser is inside '<' and '>' of a tag
-    bool inClosingTag = false;      // True when parsing a closing tag (e.g., after `</`)
-    bool inAttributeValue = false;  // True when parsing a quoted attribute value (e.g., `value="...")
-    char attributeQuoteType = ' ';  // Stores the type of quote (`'` or `"`) used for the attribute value
-
-    std::vector<std::string> tagTokens; // Stores the tag name and all its attributes as individual tokens
-
-    char current; // Current character being processed
-
-    // Loop through the input stream character by character
-    while (inputStream.get(current)) {
-        if (inTag) {
-            // --- Parser is currently inside an HTML tag (`<...>` or `</...>`) ---
-            if (inAttributeValue) {
-                // Currently parsing content within an attribute's quoted value
-                if (current == attributeQuoteType) {
-                    // End of the attribute value (closing quote found)
-                    inAttributeValue = false;
-                    tagTokens.push_back(currentTokenPart); // Add the complete attribute value to tokens
-                    currentTokenPart = "";                 // Reset buffer for next part
-                } else {
-                    currentTokenPart += current; // Continue accumulating the attribute value
-                }
-            } else if (current == ' ' || current == '\t' || current == '\n' || current == '\r') {
-                // Whitespace: separates tag name, attributes, or self-closing markers
-                if (!currentTokenPart.empty()) {
-                    tagTokens.push_back(currentTokenPart); // Add the accumulated part (tag name or attribute key/value)
-                    currentTokenPart = "";                 // Reset buffer
-                }
-            } else if (current == '=' && !inClosingTag) {
-                // Encountered an equals sign (for `key=value` attributes)
-                if (!currentTokenPart.empty()) {
-                    tagTokens.push_back(currentTokenPart); // Push the attribute key
-                    currentTokenPart = "";
-                }
-                currentTokenPart += current; // Add the '=' to the current part (it will be combined with value later)
-            } else if (current == '"' || current == '\'') {
-                // Start of an attribute value (quoted)
-                // If there's an accumulated part, it should be the key or `key=`
-                if (!currentTokenPart.empty()) {
-                    tagTokens.push_back(currentTokenPart);
-                }
-                currentTokenPart = ""; // Clear for the actual value
-                inAttributeValue = true;
-                attributeQuoteType = current; // Store the quote type
-            } else if (current == '/') {
-                // Encountered a slash (`/`). Could be for:
-                // 1. Self-closing tag (e.g., `<br/>`, `<input />`)
-                // 2. Start of a closing tag (e.g., `</div`) - this is already handled by `inClosingTag` check before `inTag` starts
-                if (!currentTokenPart.empty()) {
-                    tagTokens.push_back(currentTokenPart);
-                }
-                currentTokenPart = ""; // Reset for '/' itself
-                currentTokenPart += current; // Add '/' as a token part
-            } else if (current == '>') {
-                // --- End of a tag (`>` found) ---
-                inTag = false; // Exiting the tag processing state
-
-                // If there's any remaining accumulated part, add it to tokens
-                if (!currentTokenPart.empty()) {
-                    tagTokens.push_back(currentTokenPart);
-                    currentTokenPart = "";
-                }
-
-                if (inClosingTag) {
-                    // This was a closing tag (e.g., `</h1>`)
-                    if (!tagTokens.empty()) {
-                        std::string closingTagName = tagTokens[0];
-                        // Trim trailing '/' if it was parsed as part of the tag name (e.g. `br/` due to tokenization)
-                        if (!closingTagName.empty() && closingTagName.back() == '/') {
-                            closingTagName.pop_back();
-                        }
-
-                        // Check if the closing tag matches the current node's tag name
-                        if (currentNode->getTagName() == closingTagName) {
-                            if (currentNode->parent) {
-                                currentNode = currentNode->parent; // Move up to the parent node
-                            } else {
-                                // Error: Tried to close the root node or unmatched closing tag without a parent
-                                std::cerr << "Warning: Attempted to close root tag or unmatched closing tag: </" << closingTagName << ">" << std::endl;
-                            }
-                        } else {
-                            // Mismatched closing tag (e.g., `<div><span></div>`)
-                            std::cerr << "Error: Mismatched closing tag. Expected </" << currentNode->getTagName()
-                                      << ">, got </" << closingTagName << ">" << std::endl;
-                            // For robustness, you might choose to still move up or try to find a matching ancestor.
-                            // For this simple parser, we'll still attempt to move up, assuming it's a parse error.
-                            if (currentNode->parent) {
-                                currentNode = currentNode->parent;
-                            }
-                        }
-                    } else {
-                        std::cerr << "Warning: Empty closing tag detected (e.g., </>)!" << std::endl;
-                    }
-                    inClosingTag = false; // Reset the closing tag state
-                } else {
-                    // This was an opening tag (e.g., `<h1>`, `<div id="x">`)
-                    // Or a self-closing tag (e.g., `<br/>`, `<img />`)
-                    if (!tagTokens.empty()) {
-                        std::string tagName = tagTokens[0]; // The first token is the tag name
-                        bool isSelfClosing = false;
-
-                        // Check if the tag is self-closing
-                        // 1. If the tag name itself ends with '/' (e.g., `<br/>` where `br/` is the token)
-                        if (!tagName.empty() && tagName.back() == '/') {
-                            isSelfClosing = true;
-                            tagName.pop_back(); // Remove the '/' from the tag name
-                        }
-                        // 2. If the last token is just "/" (e.g., `<input />` where `/` is a separate token)
-                        else if (!tagTokens.empty() && tagTokens.back() == "/") {
-                            isSelfClosing = true;
-                            tagTokens.pop_back(); // Remove the "/" token from the list
-                        }
-
-                        // Create the new node as a child of the current node
-                        Node* newNode = currentNode->createElement(tagName);
-                        // Parse attributes from the collected tokens
-                        auto attrs = parseAttributes(tagTokens);
-                        for (auto const& pair : attrs) {
-                            newNode->setAttribute(pair.first, pair.second);
-                        }
-                        currentNode = newNode; // Move `currentNode` down to the newly created element
-
-                        if (isSelfClosing) {
-                            // For self-closing tags, immediately move back up to the parent
-                            // as they do not contain children or inner text.
-                            if (currentNode->parent) {
-                                currentNode = currentNode->parent;
-                            }
-                        }
-                    } else {
-                        std::cerr << "Warning: Empty or malformed opening tag detected: < >" << std::endl;
-                    }
-                }
-                tagTokens.clear(); // Clear tokens for the next tag
-                textBuffer = "";   // Clear any accumulated text before a new tag or content starts
-            } else {
-                // Accumulate characters for the tag name or an attribute key/value part
-                currentTokenPart += current;
-            }
-        } else {
-            // --- Parser is currently outside a tag, processing text content or looking for new tags ---
-            if (current == '<') {
-                // A new tag is starting.
-                // If there's accumulated text, assign it as innerText to the current node
-                if (!textBuffer.empty()) {
-                    // Trim leading/trailing whitespace/newlines from the text content
-                    size_t first = textBuffer.find_first_not_of(" \t\n\r");
-                    size_t last = textBuffer.find_last_not_of(" \t\n\r");
-                    if (std::string::npos != first) {
-                        currentNode->setInnerText(textBuffer.substr(first, (last - first + 1)));
-                    }
-                    textBuffer = ""; // Reset text buffer for next content
-                }
-
-                // Peek at the next character to determine the type of tag
-                char nextChar;
-                if (inputStream.get(nextChar)) { // Attempt to read the next character without consuming it fully yet
-                    if (nextChar == '/') {
-                        // It's a closing tag (e.g., `</div`)
-                        inClosingTag = true;
-                        inTag = true; // Enter the tag parsing state
-                    } else if (nextChar == '!') {
-                        // It's a comment (``) or DOCTYPE (`<!DOCTYPE ...>`)
-                        // Simple skipping logic: read until the next `>`
-                        while (inputStream.get(current) && current != '>');
-                        continue; // Continue to the next character in the main loop
-                    } else {
-                        // It's a regular opening tag (e.g., `<div>`)
-                        inTag = true;          // Enter the tag parsing state
-                        currentTokenPart += nextChar; // Add the actual tag name character
-                    }
-                } else {
-                    // Unexpected end of file after '<'
-                    std::cerr << "Warning: Unexpected EOF after '<'" << std::endl;
-                    break; // Exit parsing loop
-                }
-            } else {
-                // Accumulate characters as inner text content for the current node
-                textBuffer += current;
-            }
-        }
-    }
-
-    // After the loop, if there's any remaining text in the buffer, assign it
-    if (!textBuffer.empty()) {
-        size_t first = textBuffer.find_first_not_of(" \t\n\r");
-        size_t last = textBuffer.find_last_not_of(" \t\n\r");
-        if (std::string::npos != first) {
-            currentNode->setInnerText(textBuffer.substr(first, (last - first + 1)));
-        }
-    }
-
-    return root; // Return the root of the parsed HTML tree
-}
-
-/**
- * @brief Parses an HTML file and constructs a DOM tree.
- * @param file The path to the HTML file.
- * @return A unique_ptr to the root Node of the parsed HTML tree, or nullptr if the file cannot be opened.
- */
-std::unique_ptr<Node> parserdocument(std::string file) {
-    std::ifstream inputFile(file); // Open the specified file
-
-    if (!inputFile.is_open()) {
-        std::cerr << "Unable to open: " << file << std::endl;
-        return nullptr; // Return nullptr if file opening fails
-    }
-
-    return parseHtmlStream(inputFile); // Use the core parsing logic
-}
-
-
-// --- Main function for demonstration and testing ---
-int main() {
-    // Test Case 1: Empty tag <h1></h1>
-    std::cout << "--- Test Case 1: Empty tag <h1></h1> ---" << std::endl;
-    std::string html1 = "<h1></h1>";
-    std::stringstream ss1(html1);
-    std::unique_ptr<Node> root1 = parseHtmlStream(ss1);
-    if (root1) {
-        root1->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 2: Tag with text content <div>Hello World</div>
-    std::cout << "--- Test Case 2: Tag with text content <div>Hello World</div> ---" << std::endl;
-    std::string html2 = "<div>Hello World</div>";
-    std::stringstream ss2(html2);
-    std::unique_ptr<Node> root2 = parseHtmlStream(ss2);
-    if (root2) {
-        root2->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 3: Nested tags with mixed content
-    std::cout << "--- Test Case 3: Nested tags with mixed content ---" << std::endl;
-    std::string html3 = "<body><p>This is <b>bold</b> text.</p></body>";
-    std::stringstream ss3(html3);
-    std::unique_ptr<Node> root3 = parseHtmlStream(ss3);
-    if (root3) {
-        root3->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 4: Tags with attributes and self-closing tags
-    std::cout << "--- Test Case 4: Tags with attributes and self-closing tags ---" << std::endl;
-    std::string html4 = "<div id=\"main\" class='container'><img src=\"image.jpg\" alt=\"My Image\" /><br></div>";
-    std::stringstream ss4(html4);
-    std::unique_ptr<Node> root4 = parseHtmlStream(ss4);
-    if (root4) {
-        root4->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 5: Empty tag with whitespace between
-    std::cout << "--- Test Case 5: Empty tag with whitespace between <h1>  </h1> ---" << std::endl;
-    std::string html5 = "<h1>  </h1>";
-    std::stringstream ss5(html5);
-    std::unique_ptr<Node> root5 = parseHtmlStream(ss5);
-    if (root5) {
-        root5->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 6: Malformed closing tag
-    std::cout << "--- Test Case 6: Malformed closing tag (mismatch) ---" << std::endl;
-    std::string html6 = "<div><span></div>"; // Mismatched closing tag
-    std::stringstream ss6(html6);
-    std::unique_ptr<Node> root6 = parseHtmlStream(ss6);
-    if (root6) {
-        root6->print();
-    }
-    std::cout << "\n";
-
-    // Test Case 7: Comment and DOCTYPE
-    std::cout << "--- Test Case 7: Comment and DOCTYPE ---" << std::endl;
-    std::string html7 = "<!DOCTYPE html>\n<body>Content</body>";
-    std::stringstream ss7(html7);
-    std::unique_ptr<Node> root7 = parseHtmlStream(ss7);
-    if (root7) {
-        root7->print();
-    }
-    std::cout << "\n";
-
-    // Example of using parserdocument function with a file (requires a test.html file)
-    // std::cout << "--- Test Case 8: Parsing from file (test.html) ---" << std::endl;
-    // // Create a dummy test.html file for this test
-    // std::ofstream outfile("test.html");
-    // outfile << "<html><body><p>Hello from file!</p></body></html>";
-    // outfile.close();
-    //
-    // std::unique_ptr<Node> root8 = parserdocument("test.html");
-    // if (root8) {
-    //     root8->print();
-    // } else {
-    //     std::cout << "Failed to parse test.html" << std::endl;
-    // }
-
-    return 0;
-}

#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <sstream> // For stringstream input, useful for testing
#include <fstream> // For file input

// Forward declarations to define Node and parseAttributes before parseHtmlStream
class Node;
std::map<std::string, std::string> parseAttributes(const std::vector<std::string>& tokens);

/**
 * @brief Represents a node in the HTML Document Object Model (DOM) tree.
 * Can be an element node (e.g., <div>), a text node (innerText), or the root.
 */
class Node {
public:
    std::string tagName;                            // The tag name of the element (e.g., "div", "h1")
    std::string innerText;                          // Text content directly inside this node
    std::map<std::string, std::string> attributes;  // Map of attribute key-value pairs
    std::vector<std::unique_ptr<Node>> children;    // Vector of unique pointers to child nodes
    Node* parent;                                   // Raw pointer to the parent node (ownership managed by unique_ptr in children)

/**
     * @brief Constructor for Node. Initializes parent to nullptr.
     */
    Node() : parent(nullptr) {}

// --- Setters ---
    void setTagName(const std::string& name) { tagName = name; }
    void setInnerText(const std::string& text) { innerText = text; }
    void setAttribute(const std::string& key, const std::string& value) { attributes[key] = value; }

// --- Getters ---
    const std::string& getTagName() const { return tagName; }
    const std::string& getInnerText() const { return innerText; }
    const std::map<std::string, std::string>& getAttributes() const { return attributes; }

/**
     * @brief Creates a new child node for the current node.
     * Sets the new node's tag name and its parent pointer to `this`.
     * Transfers ownership of the new node to the `children` vector.
     * @param name The tag name of the new element (e.g., "p", "a").
     * @return A raw pointer to the newly created child node.
     */
    Node* createElement(const std::string& name) {
        std::unique_ptr<Node> newNode = std::make_unique<Node>();
        newNode->setTagName(name);
        newNode->parent = this; // Set parent pointer for the new node
        Node* rawPtr = newNode.get(); // Get raw pointer before moving ownership
        children.push_back(std::move(newNode)); // Transfer ownership to the children vector
        return rawPtr;
    }

/**
     * @brief Prints the HTML tree structure recursively for debugging purposes.
     * Includes indentation for better readability.
     * @param indent The current indentation level.
     */
    void print(int indent = 0) const {
        // Print indentation for the current node
        for (int i = 0; i < indent; ++i) std::cout << "  ";

// Print the opening tag and its attributes
        std::cout << "<" << tagName;
        for (const auto& attr : attributes) {
            std::cout << " " << attr.first << "=\"" << attr.second << "\"";
        }
        std::cout << ">";

// Print inner text if it exists
        if (!innerText.empty()) {
            std::cout << innerText;
        }

// Determine if a closing tag is required.
        // If the node has children or inner text, it definitely needs a closing tag.
        // Even for empty tags (like <h1></h1> with no content), a closing tag is required.
        bool requiresClosingTag = true; // Assuming all elements created require a closing tag
                                        // unless explicitly handled as self-closing in parser.

// If the node has content (text or children), print a newline for better formatting.
        // Then recursively print children, followed by the closing tag.
        if (!innerText.empty() || !children.empty()) {
            std::cout << std::endl; // Newline after opening tag if content/children follow

// Recursively print all child nodes
            for (const auto& child : children) {
                child->print(indent + 1);
            }

// Print indentation for the closing tag
            for (int i = 0; i < indent; ++i) std::cout << "  ";
            std::cout << "</" << tagName << ">" << std::endl;
        } else if (requiresClosingTag) {
            // For truly empty tags (e.g., <h1></h1>), print newline and then its closing tag
            std::cout << std::endl;
            for (int i = 0; i < indent; ++i) std::cout << "  ";
            std::cout << "</" << tagName << ">" << std::endl;
        } else {
            // This case handles conceptual self-closing for elements that don't need a closing tag
            // (e.g. <br/>). In the parser, `currentNode` immediately moves up for these.
            // So, for the `print` function, if it reaches this branch, it means the node was effectively "closed".
            std::cout << std::endl; // Just print a newline after the self-closing tag for formatting.
        }
    }
};

/**
 * @brief Helper function to parse attributes from a vector of tokens.
 * It expects tokens to be either `key=value` (with or without quotes) or just `key` (for boolean attributes).
 * @param tokens A vector of strings, where the first element is the tag name, and subsequent elements are attributes.
 * @return A map of attribute keys to their values.
 */
std::map<std::string, std::string> parseAttributes(const std::vector<std::string>& tokens) {
    std::map<std::string, std::string> attrs;
    if (tokens.empty()) return attrs;

// Skip the first token which is the tag name itself
    for (size_t i = 1; i < tokens.size(); ++i) {
        const std::string& token = tokens[i];
        size_t eqPos = token.find('='); // Find the position of '='

if (eqPos != std::string::npos) {
            // Attribute is in `key=value` format
            std::string key = token.substr(0, eqPos);
            std::string value = token.substr(eqPos + 1);

// Remove quotes if the value is enclosed in them (e.g., value="my value")
            if (!value.empty() && (value.front() == '"' || value.front() == '\'')) {
                if (value.length() >= 2 && value.back() == value.front()) { // Ensure matching quotes
                    value = value.substr(1, value.length() - 2);
                }
            }
            attrs[key] = value;
        } else {
            // Attribute is a boolean attribute or a key without an explicit value (e.g., `disabled`)
            attrs[token] = "";
        }
    }
    return attrs;
}

/**
 * @brief Core HTML parsing logic that reads character by character from an input stream.
 * @param inputStream The input stream (e.g., `ifstream` for file, `stringstream` for string).
 * @return A unique_ptr to the root Node of the parsed HTML tree.
 */
std::unique_ptr<Node> parseHtmlStream(std::istream& inputStream) {
    std::unique_ptr<Node> root = std::make_unique<Node>();
    root->setTagName("root"); // Create a conceptual root node for the document

Node* currentNode = root.get(); // Pointer to the currently active node in the tree

std::string currentTokenPart = "";      // Buffer for collecting tag names, attribute keys, or partial attribute values
    std::string textBuffer = "";            // Buffer for collecting inner text content

bool inTag = false;             // True when parser is inside '<' and '>' of a tag
    bool inClosingTag = false;      // True when parsing a closing tag (e.g., after `</`)
    bool inAttributeValue = false;  // True when parsing a quoted attribute value (e.g., `value="...")
    char attributeQuoteType = ' ';  // Stores the type of quote (`'` or `"`) used for the attribute value

std::vector<std::string> tagTokens; // Stores the tag name and all its attributes as individual tokens

char current; // Current character being processed

// Loop through the input stream character by character
    while (inputStream.get(current)) {
        if (inTag) {
            // --- Parser is currently inside an HTML tag (`<...>` or `</...>`) ---
            if (inAttributeValue) {
                // Currently parsing content within an attribute's quoted value
                if (current == attributeQuoteType) {
                    // End of the attribute value (closing quote found)
                    inAttributeValue = false;
                    tagTokens.push_back(currentTokenPart); // Add the complete attribute value to tokens
                    currentTokenPart = "";                 // Reset buffer for next part
                } else {
                    currentTokenPart += current; // Continue accumulating the attribute value
                }
            } else if (current == ' ' || current == '\t' || current == '\n' || current == '\r') {
                // Whitespace: separates tag name, attributes, or self-closing markers
                if (!currentTokenPart.empty()) {
                    tagTokens.push_back(currentTokenPart); // Add the accumulated part (tag name or attribute key/value)
                    currentTokenPart = "";                 // Reset buffer
                }
            } else if (current == '=' && !inClosingTag) {
                // Encountered an equals sign (for `key=value` attributes)
                if (!currentTokenPart.empty()) {
                    tagTokens.push_back(currentTokenPart); // Push the attribute key
                    currentTokenPart = "";
                }
                currentTokenPart += current; // Add the '=' to the current part (it will be combined with value later)
            } else if (current == '"' || current == '\'') {
                // Start of an attribute value (quoted)
                // If there's an accumulated part, it should be the key or `key=`
                if (!currentTokenPart.empty()) {
                    tagTokens.push_back(currentTokenPart);
                }
                currentTokenPart = ""; // Clear for the actual value
                inAttributeValue = true;
                attributeQuoteType = current; // Store the quote type
            } else if (current == '/') {
                // Encountered a slash (`/`). Could be for:
                // 1. Self-closing tag (e.g., `<br/>`, `<input />`)
                // 2. Start of a closing tag (e.g., `</div`) - this is already handled by `inClosingTag` check before `inTag` starts
                if (!currentTokenPart.empty()) {
                    tagTokens.push_back(currentTokenPart);
                }
                currentTokenPart = ""; // Reset for '/' itself
                currentTokenPart += current; // Add '/' as a token part
            } else if (current == '>') {
                // --- End of a tag (`>` found) ---
                inTag = false; // Exiting the tag processing state

// If there's any remaining accumulated part, add it to tokens
                if (!currentTokenPart.empty()) {
                    tagTokens.push_back(currentTokenPart);
                    currentTokenPart = "";
                }

if (inClosingTag) {
                    // This was a closing tag (e.g., `</h1>`)
                    if (!tagTokens.empty()) {
                        std::string closingTagName = tagTokens[0];
                        // Trim trailing '/' if it was parsed as part of the tag name (e.g. `br/` due to tokenization)
                        if (!closingTagName.empty() && closingTagName.back() == '/') {
                            closingTagName.pop_back();
                        }

// Check if the closing tag matches the current node's tag name
                        if (currentNode->getTagName() == closingTagName) {
                            if (currentNode->parent) {
                                currentNode = currentNode->parent; // Move up to the parent node
                            } else {
                                // Error: Tried to close the root node or unmatched closing tag without a parent
                                std::cerr << "Warning: Attempted to close root tag or unmatched closing tag: </" << closingTagName << ">" << std::endl;
                            }
                        } else {
                            // Mismatched closing tag (e.g., `<div><span></div>`)
                            std::cerr << "Error: Mismatched closing tag. Expected </" << currentNode->getTagName()
                                      << ">, got </" << closingTagName << ">" << std::endl;
                            // For robustness, you might choose to still move up or try to find a matching ancestor.
                            // For this simple parser, we'll still attempt to move up, assuming it's a parse error.
                            if (currentNode->parent) {
                                currentNode = currentNode->parent;
                            }
                        }
                    } else {
                        std::cerr << "Warning: Empty closing tag detected (e.g., </>)!" << std::endl;
                    }
                    inClosingTag = false; // Reset the closing tag state
                } else {
                    // This was an opening tag (e.g., `<h1>`, `<div id="x">`)
                    // Or a self-closing tag (e.g., `<br/>`, `<img />`)
                    if (!tagTokens.empty()) {
                        std::string tagName = tagTokens[0]; // The first token is the tag name
                        bool isSelfClosing = false;

// Check if the tag is self-closing
                        // 1. If the tag name itself ends with '/' (e.g., `<br/>` where `br/` is the token)
                        if (!tagName.empty() && tagName.back() == '/') {
                            isSelfClosing = true;
                            tagName.pop_back(); // Remove the '/' from the tag name
                        }
                        // 2. If the last token is just "/" (e.g., `<input />` where `/` is a separate token)
                        else if (!tagTokens.empty() && tagTokens.back() == "/") {
                            isSelfClosing = true;
                            tagTokens.pop_back(); // Remove the "/" token from the list
                        }

// Create the new node as a child of the current node
                        Node* newNode = currentNode->createElement(tagName);
                        // Parse attributes from the collected tokens
                        auto attrs = parseAttributes(tagTokens);
                        for (auto const& pair : attrs) {
                            newNode->setAttribute(pair.first, pair.second);
                        }
                        currentNode = newNode; // Move `currentNode` down to the newly created element

if (isSelfClosing) {
                            // For self-closing tags, immediately move back up to the parent
                            // as they do not contain children or inner text.
                            if (currentNode->parent) {
                                currentNode = currentNode->parent;
                            }
                        }
                    } else {
                        std::cerr << "Warning: Empty or malformed opening tag detected: < >" << std::endl;
                    }
                }
                tagTokens.clear(); // Clear tokens for the next tag
                textBuffer = "";   // Clear any accumulated text before a new tag or content starts
            } else {
                // Accumulate characters for the tag name or an attribute key/value part
                currentTokenPart += current;
            }
        } else {
            // --- Parser is currently outside a tag, processing text content or looking for new tags ---
            if (current == '<') {
                // A new tag is starting.
                // If there's accumulated text, assign it as innerText to the current node
                if (!textBuffer.empty()) {
                    // Trim leading/trailing whitespace/newlines from the text content
                    size_t first = textBuffer.find_first_not_of(" \t\n\r");
                    size_t last = textBuffer.find_last_not_of(" \t\n\r");
                    if (std::string::npos != first) {
                        currentNode->setInnerText(textBuffer.substr(first, (last - first + 1)));
                    }
                    textBuffer = ""; // Reset text buffer for next content
                }

// Peek at the next character to determine the type of tag
                char nextChar;
                if (inputStream.get(nextChar)) { // Attempt to read the next character without consuming it fully yet
                    if (nextChar == '/') {
                        // It's a closing tag (e.g., `</div`)
                        inClosingTag = true;
                        inTag = true; // Enter the tag parsing state
                    } else if (nextChar == '!') {
                        // It's a comment (``) or DOCTYPE (`<!DOCTYPE ...>`)
                        // Simple skipping logic: read until the next `>`
                        while (inputStream.get(current) && current != '>');
                        continue; // Continue to the next character in the main loop
                    } else {
                        // It's a regular opening tag (e.g., `<div>`)
                        inTag = true;          // Enter the tag parsing state
                        currentTokenPart += nextChar; // Add the actual tag name character
                    }
                } else {
                    // Unexpected end of file after '<'
                    std::cerr << "Warning: Unexpected EOF after '<'" << std::endl;
                    break; // Exit parsing loop
                }
            } else {
                // Accumulate characters as inner text content for the current node
                textBuffer += current;
            }
        }
    }

// After the loop, if there's any remaining text in the buffer, assign it
    if (!textBuffer.empty()) {
        size_t first = textBuffer.find_first_not_of(" \t\n\r");
        size_t last = textBuffer.find_last_not_of(" \t\n\r");
        if (std::string::npos != first) {
            currentNode->setInnerText(textBuffer.substr(first, (last - first + 1)));
        }
    }

return root; // Return the root of the parsed HTML tree
}

/**
 * @brief Parses an HTML file and constructs a DOM tree.
 * @param file The path to the HTML file.
 * @return A unique_ptr to the root Node of the parsed HTML tree, or nullptr if the file cannot be opened.
 */
std::unique_ptr<Node> parserdocument(std::string file) {
    std::ifstream inputFile(file); // Open the specified file

if (!inputFile.is_open()) {
        std::cerr << "Unable to open: " << file << std::endl;
        return nullptr; // Return nullptr if file opening fails
    }

return parseHtmlStream(inputFile); // Use the core parsing logic
}

// --- Main function for demonstration and testing ---
int main() {
    // Test Case 1: Empty tag <h1></h1>
    std::cout << "--- Test Case 1: Empty tag <h1></h1> ---" << std::endl;
    std::string html1 = "<h1></h1>";
    std::stringstream ss1(html1);
    std::unique_ptr<Node> root1 = parseHtmlStream(ss1);
    if (root1) {
        root1->print();
    }
    std::cout << "\n";

// Test Case 2: Tag with text content <div>Hello World</div>
    std::cout << "--- Test Case 2: Tag with text content <div>Hello World</div> ---" << std::endl;
    std::string html2 = "<div>Hello World</div>";
    std::stringstream ss2(html2);
    std::unique_ptr<Node> root2 = parseHtmlStream(ss2);
    if (root2) {
        root2->print();
    }
    std::cout << "\n";

// Test Case 3: Nested tags with mixed content
    std::cout << "--- Test Case 3: Nested tags with mixed content ---" << std::endl;
    std::string html3 = "<body><p>This is <b>bold</b> text.</p></body>";
    std::stringstream ss3(html3);
    std::unique_ptr<Node> root3 = parseHtmlStream(ss3);
    if (root3) {
        root3->print();
    }
    std::cout << "\n";

// Test Case 4: Tags with attributes and self-closing tags
    std::cout << "--- Test Case 4: Tags with attributes and self-closing tags ---" << std::endl;
    std::string html4 = "<div id=\"main\" class='container'><img src=\"image.jpg\" alt=\"My Image\" /><br></div>";
    std::stringstream ss4(html4);
    std::unique_ptr<Node> root4 = parseHtmlStream(ss4);
    if (root4) {
        root4->print();
    }
    std::cout << "\n";

// Test Case 5: Empty tag with whitespace between
    std::cout << "--- Test Case 5: Empty tag with whitespace between <h1>  </h1> ---" << std::endl;
    std::string html5 = "<h1>  </h1>";
    std::stringstream ss5(html5);
    std::unique_ptr<Node> root5 = parseHtmlStream(ss5);
    if (root5) {
        root5->print();
    }
    std::cout << "\n";

// Test Case 6: Malformed closing tag
    std::cout << "--- Test Case 6: Malformed closing tag (mismatch) ---" << std::endl;
    std::string html6 = "<div><span></div>"; // Mismatched closing tag
    std::stringstream ss6(html6);
    std::unique_ptr<Node> root6 = parseHtmlStream(ss6);
    if (root6) {
        root6->print();
    }
    std::cout << "\n";

// Test Case 7: Comment and DOCTYPE
    std::cout << "--- Test Case 7: Comment and DOCTYPE ---" << std::endl;
    std::string html7 = "<!DOCTYPE html>\n<body>Content</body>";
    std::stringstream ss7(html7);
    std::unique_ptr<Node> root7 = parseHtmlStream(ss7);
    if (root7) {
        root7->print();
    }
    std::cout << "\n";

// Example of using parserdocument function with a file (requires a test.html file)
    // std::cout << "--- Test Case 8: Parsing from file (test.html) ---" << std::endl;
    // // Create a dummy test.html file for this test
    // std::ofstream outfile("test.html");
    // outfile << "<html><body><p>Hello from file!</p></body></html>";
    // outfile.close();
    //
    // std::unique_ptr<Node> root8 = parserdocument("test.html");
    // if (root8) {
    //     root8->print();
    // } else {
    //     std::cout << "Failed to parse test.html" << std::endl;
    // }

return 0;
}