Skip to main content

stylex_css_parser/
token_types.rs

1/*!
2Token types and tokenization utilities for CSS parsing.
3*/
4
5use crate::CssResult;
6use cssparser::{Parser, ParserInput, Token as CssToken};
7use log::error;
8use stylex_macros::stylex_panic;
9
10/// Simple token representation
11#[derive(Debug, Clone, PartialEq)]
12pub enum SimpleToken {
13  Ident(String),
14  AtKeyword(String),
15  Hash(String),
16  String(String),
17  Number(f64),
18  Dimension { value: f64, unit: String },
19  Percentage(f64),
20  Url(String),
21  Function(String),
22  Delim(char),
23  LeftParen,
24  RightParen,
25  LeftBracket,
26  RightBracket,
27  LeftBrace,
28  RightBrace,
29  Comma,
30  Semicolon,
31  Colon,
32  Whitespace,
33  Comment(String),
34  Unknown(String),
35}
36
37impl SimpleToken {
38  /// Extract token value
39  pub fn extract_value(&self) -> Option<String> {
40    match self {
41      SimpleToken::Function(name) => Some(name.clone()),
42      SimpleToken::Ident(value) => Some(value.clone()),
43      SimpleToken::String(value) => Some(value.clone()),
44      SimpleToken::Hash(value) => Some(value.clone()),
45      SimpleToken::AtKeyword(value) => Some(value.clone()),
46      SimpleToken::Comment(value) => Some(value.clone()),
47      SimpleToken::Number(value) => Some(value.to_string()),
48      SimpleToken::Percentage(value) => Some(value.to_string()),
49      SimpleToken::Dimension { value, unit } => Some(format!("{}{}", value, unit)),
50      SimpleToken::Delim(ch) => Some(ch.to_string()),
51      SimpleToken::Unknown(value) => Some(value.clone()),
52      _ => None, // No extractable value for structural tokens
53    }
54  }
55
56  /// Extract numeric value for Number and Percentage tokens
57  pub fn extract_number(&self) -> Option<f64> {
58    match self {
59      SimpleToken::Number(value) => Some(*value),
60      SimpleToken::Percentage(value) => Some(*value),
61      SimpleToken::Dimension { value, .. } => Some(*value),
62      _ => None,
63    }
64  }
65}
66
67fn map_css_token(token: &CssToken) -> Option<SimpleToken> {
68  use SimpleToken as T;
69  match token {
70    CssToken::Ident(v) => Some(T::Ident(v.as_ref().to_string())),
71    CssToken::AtKeyword(v) => Some(T::AtKeyword(v.as_ref().to_string())),
72    CssToken::IDHash(v) | CssToken::Hash(v) => Some(T::Hash(v.as_ref().to_string())),
73    CssToken::QuotedString(v) => Some(T::String(v.as_ref().to_string())),
74    CssToken::Number { value, .. } => Some(T::Number(*value as f64)),
75    CssToken::Percentage { unit_value, .. } => Some(T::Percentage(*unit_value as f64)),
76    CssToken::Dimension { value, unit, .. } => Some(T::Dimension {
77      value: *value as f64,
78      unit: unit.as_ref().to_string(),
79    }),
80    CssToken::Function(v) => Some(T::Function(v.as_ref().to_string())),
81    // Map parenthesis via Delim tokens if present
82    CssToken::Delim('(') => Some(T::LeftParen),
83    CssToken::Delim(')') => Some(T::RightParen),
84    CssToken::Delim(c) => Some(T::Delim(*c)),
85    CssToken::WhiteSpace(_) => Some(T::Whitespace),
86    CssToken::Comma => Some(T::Comma),
87    CssToken::Colon => Some(T::Colon),
88    CssToken::Semicolon => Some(T::Semicolon),
89    CssToken::BadUrl(_) | CssToken::BadString(_) => Some(T::Unknown(format!("{:?}", token))),
90    CssToken::UnquotedUrl(url) => Some(T::String(url.as_ref().to_string())),
91    CssToken::CloseParenthesis => Some(T::RightParen),
92    CssToken::SquareBracketBlock => Some(T::Delim('[')),
93    CssToken::CloseSquareBracket => Some(T::Delim(']')),
94    CssToken::CurlyBracketBlock => Some(T::Delim('{')),
95    CssToken::CloseCurlyBracket => Some(T::Delim('}')),
96    CssToken::CDC => Some(T::Delim('>')), // --> CSS comment close
97    CssToken::CDO => Some(T::Delim('<')), // <!-- CSS comment open
98
99    // Remaining tokens mapped to Unknown (e.g., future cssparser additions)
100    _ => Some(T::Unknown(format!("{:?}", token))),
101  }
102}
103
104/// Recursively tokenize nested content, handling ParenthesisBlock and other nested structures
105fn tokenize_nested_content(parser: &mut Parser, tokens: &mut Vec<SimpleToken>) {
106  while let Ok(inner_token) = parser.next_including_whitespace_and_comments() {
107    match &inner_token {
108      // Handle nested ParenthesisBlock recursively
109      CssToken::ParenthesisBlock => {
110        // Add opening parenthesis
111        tokens.push(SimpleToken::LeftParen);
112
113        // Parse the nested parenthesis content recursively
114        if let Err(e) = parser.parse_nested_block(|nested_parser| {
115          tokenize_nested_content(nested_parser, tokens);
116          Ok::<(), cssparser::ParseError<()>>(())
117        }) {
118          error!("Error parsing nested content: {:?}", e);
119          stylex_panic!("Error parsing nested content: {:?}", e); // Exit on error
120        }
121
122        // Add closing parenthesis
123        tokens.push(SimpleToken::RightParen);
124      },
125      // Handle nested Function tokens
126      CssToken::Function(func_name) => {
127        // Add the function name token
128        tokens.push(SimpleToken::Function(func_name.as_ref().to_string()));
129
130        // Parse the function content recursively
131        if let Err(e) = parser.parse_nested_block(|nested_parser| {
132          tokenize_nested_content(nested_parser, tokens);
133          Ok::<(), cssparser::ParseError<()>>(())
134        }) {
135          error!("Error parsing nested content: {:?}", e);
136          stylex_panic!("Error parsing nested content: {:?}", e); // Exit on error
137        }
138
139        // Add closing paren token
140        tokens.push(SimpleToken::RightParen);
141      },
142      // Handle all other tokens normally
143      _ => {
144        if let Some(mapped_inner) = map_css_token(inner_token) {
145          tokens.push(mapped_inner);
146        }
147      },
148    }
149  }
150}
151
152fn tokenize_all(input: &str) -> Vec<SimpleToken> {
153  let mut input_buf = ParserInput::new(input);
154  let mut parser = Parser::new(&mut input_buf);
155
156  let mut tokens = Vec::new();
157  while let Ok(t) = parser.next_including_whitespace_and_comments() {
158    match &t {
159      // ENHANCED: Handle Function tokens by expanding their content
160      CssToken::Function(func_name) => {
161        // Add the function name token first
162        tokens.push(SimpleToken::Function(func_name.as_ref().to_string()));
163
164        // Parse the function content to get individual argument tokens
165        if let Err(e) = parser.parse_nested_block(|nested_parser| {
166          // Recursively tokenize everything inside the function parentheses
167          tokenize_nested_content(nested_parser, &mut tokens);
168          Ok::<(), cssparser::ParseError<()>>(())
169        }) {
170          error!("Error parsing nested content: {:?}", e);
171          stylex_panic!("Error parsing nested content: {:?}", e); // Exit on error
172        }
173
174        // Add closing paren token (cssparser consumes it automatically)
175        tokens.push(SimpleToken::RightParen);
176      },
177      // ENHANCED: Handle ParenthesisBlock tokens by expanding their content
178      CssToken::ParenthesisBlock => {
179        // Add opening parenthesis
180        tokens.push(SimpleToken::LeftParen);
181
182        // Parse the parenthesis content to get individual tokens
183        if let Err(e) = parser.parse_nested_block(|nested_parser| {
184          // Recursively tokenize everything inside the parentheses, handling nested structures
185          tokenize_nested_content(nested_parser, &mut tokens);
186          Ok::<(), cssparser::ParseError<()>>(())
187        }) {
188          error!("Error parsing nested content: {:?}", e);
189          stylex_panic!("Error parsing nested content: {:?}", e); // Exit on error
190        }
191
192        // Add closing parenthesis (cssparser consumes it automatically)
193        tokens.push(SimpleToken::RightParen);
194      },
195      // Handle all other tokens normally
196      _ => {
197        if let Some(mapped) = map_css_token(t) {
198          tokens.push(mapped);
199        }
200      },
201    }
202  }
203  tokens
204}
205
206/// A list of CSS tokens with parsing state
207pub struct TokenList {
208  pub tokens: Vec<SimpleToken>, // Made public for debugging
209  pub current_index: usize,
210}
211
212impl TokenList {
213  /// Create a new TokenList from a CSS string
214  pub fn new(input: &str) -> Self {
215    Self {
216      tokens: tokenize_all(input),
217      current_index: 0,
218    }
219  }
220
221  /// Consume the next token
222  pub fn consume_next_token(&mut self) -> CssResult<Option<SimpleToken>> {
223    if self.current_index < self.tokens.len() {
224      let token = self.tokens[self.current_index].clone();
225      self.current_index += 1;
226      Ok(Some(token))
227    } else {
228      Ok(None)
229    }
230  }
231
232  /// Peek at the next token without consuming it
233  pub fn peek(&mut self) -> CssResult<Option<SimpleToken>> {
234    if self.current_index < self.tokens.len() {
235      Ok(Some(self.tokens[self.current_index].clone()))
236    } else {
237      Ok(None)
238    }
239  }
240
241  /// Save the current position for potential rollback
242  pub fn save_position(&self) -> usize {
243    self.current_index
244  }
245
246  /// Restore to a previously saved position
247  pub fn restore_position(&mut self, position: usize) -> CssResult<()> {
248    if position <= self.tokens.len() {
249      self.current_index = position;
250      Ok(())
251    } else {
252      Err(crate::CssParseError::ParseError {
253        message: "Invalid position for restore".to_string(),
254      })
255    }
256  }
257
258  /// Get the first token (alias for peek)
259  pub fn first(&mut self) -> CssResult<Option<SimpleToken>> {
260    self.peek()
261  }
262
263  /// Set the current parsing index
264  pub fn set_current_index(&mut self, new_index: usize) {
265    self.current_index = new_index.min(self.tokens.len());
266  }
267
268  /// Rewind the parser by a number of positions
269  pub fn rewind(&mut self, positions: usize) {
270    self.current_index = self.current_index.saturating_sub(positions);
271  }
272
273  /// Check if the token list is empty
274  pub fn is_empty(&self) -> bool {
275    self.current_index >= self.tokens.len()
276  }
277
278  /// Get all tokens
279  pub fn get_all_tokens(&mut self) -> Vec<SimpleToken> {
280    self.tokens.clone()
281  }
282
283  /// Get a slice of tokens from start to end index
284  pub fn slice(&mut self, start: usize, end: Option<usize>) -> Vec<SimpleToken> {
285    let end = end.unwrap_or(self.current_index);
286    if start >= end || start >= self.tokens.len() {
287      return Vec::new();
288    }
289    self.tokens[start..end.min(self.tokens.len())].to_vec()
290  }
291}
292
293#[cfg(test)]
294mod tests {
295  use super::*;
296
297  #[test]
298  fn test_basic_tokenization() {
299    let mut list = TokenList::new("color: red;\nbackground: rgb(1, 2, 3)");
300    assert!(list.peek().unwrap().is_some());
301    assert!(!list.get_all_tokens().is_empty());
302  }
303
304  #[test]
305  fn test_token_list_basic_peek_consume() {
306    let mut list = TokenList::new("color: red;");
307    let first = list.peek().unwrap();
308    assert_eq!(first, Some(SimpleToken::Ident("color".to_string())));
309    let consumed = list.consume_next_token().unwrap();
310    assert_eq!(consumed, Some(SimpleToken::Ident("color".to_string())));
311    let second = list.peek().unwrap();
312    assert_eq!(second, Some(SimpleToken::Colon));
313  }
314
315  #[test]
316  fn test_rewind_and_slice() {
317    let mut list = TokenList::new("a : b ; c");
318    list.consume_next_token().unwrap(); // a
319    list.consume_next_token().unwrap(); // :
320    list.rewind(1);
321    // With cssparser-backed tokenizer, whitespace tokens are preserved
322    assert_eq!(list.peek().unwrap(), Some(SimpleToken::Whitespace));
323
324    let slice = list.slice(1, Some(4));
325    assert_eq!(slice.len(), 3);
326    // slice should include whitespace, then colon, then whitespace
327    assert_eq!(slice[0], SimpleToken::Whitespace);
328    assert_eq!(slice[1], SimpleToken::Colon);
329  }
330}