use miette::{Diagnostic, Error, IntoDiagnostic, Report, Result, SourceSpan, bail, miette}; use std::{ fs::File, io::{BufReader, Bytes, Read}, iter::Peekable, path::{Path, PathBuf}, }; use thiserror::Error; use crate::{BytesToChar, SourceFile}; pub struct Tokenizer { inner: InnerTokenizer, path: PathBuf, base_path: PathBuf, peeked: Option>>, last_span: SourceSpan, } impl Tokenizer { pub fn new(path: PathBuf, base_path: PathBuf) -> Result { Ok(Self { peeked: None, inner: InnerTokenizer::new(&path)?, path, base_path, last_span: SourceSpan::new(0.into(), 0), }) } pub fn get_path(&self) -> &Path { &self.path } pub fn get_base_path(&self) -> &Path { &self.base_path } } struct InnerTokenizer { input_iterator: Peekable>>>, } impl InnerTokenizer { fn new(path: impl AsRef) -> Result { Ok(Self { input_iterator: BytesToChar::new( BufReader::new(File::open(path).into_diagnostic()?).bytes(), ) .peekable(), }) } } impl InnerTokenizer { fn next(&mut self) -> Option> { while self.input_iterator.peek().is_some_and(|c| { c.as_ref() .is_ok_and(|&(_, c)| c.is_whitespace() || c == '#') }) { if self .input_iterator .peek() .is_some_and(|c| c.as_ref().is_ok_and(|&(_, c)| c == '#')) { while self .input_iterator .next() .is_some_and(|c| c.is_ok_and(|(_, c)| c != '\n')) {} } else { match self.input_iterator.next()? { Ok(_) => (), Err(e) => return Some(Err(e)), } } } match self.input_iterator.next() { Some(Ok((i, '['))) => Some(Ok((String::from('['), SourceSpan::new(i.into(), 1)))), Some(Ok((i, ']'))) => Some(Ok((String::from(']'), SourceSpan::new(i.into(), 1)))), Some(Ok((i, '"'))) => { let mut r = String::from('"'); while let Some(p) = self .input_iterator .next_if(|c| c.as_ref().is_ok_and(|&(_, c)| c != '"')) { match p { Ok((_, c)) => r.push(c), Err(e) => return Some(Err(e)), } } if self .input_iterator .next() .is_none_or(|c| !c.is_ok_and(|(_, c)| c == '"')) { return Some(Err(From::from(UnfinishedString { src: SourceFile { path: PathBuf::from("ray-tracing-pbrt-scene/example.pbrt"), }, bad_bit: SourceSpan::new(i.into(), r.len()), }))); }; r.push('"'); let len = r.len(); Some(Ok((r, SourceSpan::new(i.into(), len)))) } Some(Ok((i, c))) => { let mut r = String::new(); r.push(c); while let Some(p) = self.input_iterator.next_if(|c| { c.as_ref() .is_ok_and(|&(_, c)| c != '#' && c != '[' && c != ']' && !c.is_whitespace()) }) { match p { Ok((_, c)) => r.push(c), Err(e) => return Some(Err(e)), } } let len = r.len(); Some(Ok((r, SourceSpan::new(i.into(), len)))) } Some(Err(e)) => Some(Err(e)), None => None, } } } impl Tokenizer { pub fn next_inner(&mut self) -> Option> { match self.peeked.take() { Some(v) => v, None => self.inner.next(), } } pub fn next(&mut self) -> Option> { let v = self.next_inner(); if let Some(Ok((_, s))) = v { self.last_span = s; } v.map(|o| o.map(|i| i.0)) } pub fn next_if(&mut self, func: impl FnOnce(&String) -> bool) -> Option> { match self.next_inner() { Some(Ok((matched, s))) if func(&matched) => { self.last_span = s; Some(Ok(matched)) } other => { assert!(self.peeked.is_none()); self.peeked = Some(other); None } } } pub fn parse_next(&mut self) -> Result where T: std::str::FromStr, ::Err: std::error::Error + std::marker::Send + std::marker::Sync + 'static, { let s = self.next().ok_or_else(|| MissingValueError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), })??; s.parse::().into_diagnostic().map_err(|e| { ParsingError { src: SourceFile { path: self.path.clone(), }, bad_bit: self.last_span, error: Some(e), } .into() }) } pub fn parse_next_if(&mut self, func: impl FnOnce(&String) -> bool) -> Option> where T: std::str::FromStr, ::Err: std::error::Error + std::marker::Send + std::marker::Sync + 'static, { let s = self.next_if(func)?; Some(match s { Ok(s) => s.parse::().into_diagnostic().map_err(|e| { ParsingError { src: SourceFile { path: self.path.clone(), }, bad_bit: self.last_span, error: Some(e), } .into() }), Err(e) => Err(e), }) } pub fn last_span(&self) -> SourceSpan { self.last_span } pub fn parse_parameter(&mut self) -> Result where T: std::str::FromStr, ::Err: std::marker::Send + std::marker::Sync + std::error::Error + 'static, { let p = self.next().ok_or_else(|| ListBeginError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), })??; match p.as_str() { "[" => { let d = self.parse_next()?; if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]")) { return Err(ListEndError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } Ok(d) } s => Ok(s.parse::().into_diagnostic()?), } } pub fn parse_list(&mut self) -> Result> where T: std::str::FromStr, ::Err: std::marker::Send + std::marker::Sync + std::error::Error + 'static, { let mut data = Vec::new(); if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "[")) { return Err(ListBeginError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } while let Some(p) = self.parse_next_if(|p| p != "]").transpose()? { data.push(p); } if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]")) { return Err(ListEndError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } Ok(data) } pub fn parse_list_2(&mut self, f: F) -> Result> where T: std::str::FromStr, ::Err: std::marker::Send + std::marker::Sync + std::error::Error + 'static, F: Fn(T, T) -> P, { let mut data = Vec::new(); if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "[")) { return Err(ListBeginError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } while let Some(pa) = self.parse_next_if(|p| p != "]").transpose()? { if let Some(pb) = self.parse_next_if(|p| p != "]").transpose()? { data.push(f(pa, pb)); } else { return Err(UncompleteError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } } if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]")) { return Err(ListEndError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } Ok(data) } pub fn parse_list_3(&mut self, f: F) -> Result> where T: std::str::FromStr, ::Err: std::marker::Send + std::marker::Sync + std::error::Error + 'static, F: Fn(T, T, T) -> P, { let mut data = Vec::new(); if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "[")) { return Err(ListBeginError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } while let Some(pa) = self.parse_next_if(|p| p != "]").transpose()? { if let Some(pb) = self.parse_next_if(|p| p != "]").transpose()? { if let Some(pc) = self.parse_next_if(|p| p != "]").transpose()? { data.push(f(pa, pb, pc)); } else { return Err(UncompleteError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } } else { return Err(UncompleteError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } } if !self .next() .is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]")) { return Err(ListEndError { src: SourceFile { path: self.path.to_path_buf(), }, bad_bit: self.last_span(), } .into()); } Ok(data) } pub fn get_src(&self) -> SourceFile { SourceFile { path: self.path.clone(), } } } macro_rules! parse_dict { ($tokenizer:expr => $($name_decl:ident, $type:ty, $default:expr);+ => $($name_parsing:ident, $expr:expr, $parsing:expr);+ ) => { { $( let mut $name_decl = None; )+ while let Some(p) = $tokenizer.next_if(|p| p.starts_with('"')).transpose()? { match p.as_str() { $( $expr => { if $name_parsing.is_none() { $name_parsing = Some($parsing); } else { return Err($crate::tokenizer::DuplicateDictEntryError {bad_bit: $tokenizer.last_span(), src: $tokenizer.get_src()}.into()) } } )+ _ => { return Err($crate::tokenizer::UnknownDictEntryError {bad_bit: $tokenizer.last_span(), src: $tokenizer.get_src()}.into()) } } } #[derive(Debug)] struct Dict { $( $name_decl: $type, )+ } $( let $name_decl = $name_decl.unwrap_or_else(|| $default); )* Dict { $($name_decl,)* } } }; } #[derive(Error, Debug, Diagnostic)] #[error("Duplicate dict entry error")] #[diagnostic(help("multiple dict entries with the same key"))] pub struct DuplicateDictEntryError { #[source_code] pub src: SourceFile, #[label("Here")] pub bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("Unknown dict entry error")] pub struct UnknownDictEntryError { #[source_code] pub src: SourceFile, #[label("Here")] pub bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("Parsing error")] #[diagnostic(help("could not parse value"))] struct ParsingError { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, #[related] error: Option, } #[derive(Error, Debug, Diagnostic)] #[error("Unfinished group")] #[diagnostic(help("groups are not completed"))] struct UncompleteError { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("Unfinished string")] struct UnfinishedString { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("list error")] #[diagnostic(help("expected list"))] struct ListBeginError { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("list error")] #[diagnostic(help("expected list to end"))] struct ListEndError { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, } #[derive(Error, Debug, Diagnostic)] #[error("value expected")] #[diagnostic(help("expected a value"))] struct MissingValueError { #[source_code] src: SourceFile, #[label("Here")] bad_bit: SourceSpan, }