ray-tracing2/ray-tracing-pbrt-scene/src/tokenizer.rs

536 lines
15 KiB
Rust

use miette::{Diagnostic, Error, IntoDiagnostic, Report, Result, SourceSpan, bail, miette};
use std::{
fs::File,
io::{BufReader, Bytes, Read},
iter::Peekable,
path::{Path, PathBuf},
};
use thiserror::Error;
use crate::{BytesToChar, SourceFile};
pub struct Tokenizer {
inner: InnerTokenizer,
path: PathBuf,
base_path: PathBuf,
peeked: Option<Option<Result<(String, SourceSpan)>>>,
last_span: SourceSpan,
}
impl Tokenizer {
pub fn new(path: PathBuf, base_path: PathBuf) -> Result<Self> {
Ok(Self {
peeked: None,
inner: InnerTokenizer::new(&path)?,
path,
base_path,
last_span: SourceSpan::new(0.into(), 0),
})
}
pub fn get_path(&self) -> &Path {
&self.path
}
pub fn get_base_path(&self) -> &Path {
&self.base_path
}
}
struct InnerTokenizer {
input_iterator: Peekable<BytesToChar<Bytes<BufReader<File>>>>,
}
impl InnerTokenizer {
fn new(path: impl AsRef<Path>) -> Result<Self> {
Ok(Self {
input_iterator: BytesToChar::new(
BufReader::new(File::open(path).into_diagnostic()?).bytes(),
)
.peekable(),
})
}
}
impl InnerTokenizer {
fn next(&mut self) -> Option<Result<(String, SourceSpan)>> {
while self.input_iterator.peek().is_some_and(|c| {
c.as_ref()
.is_ok_and(|&(_, c)| c.is_whitespace() || c == '#')
}) {
if self
.input_iterator
.peek()
.is_some_and(|c| c.as_ref().is_ok_and(|&(_, c)| c == '#'))
{
while self
.input_iterator
.next()
.is_some_and(|c| c.is_ok_and(|(_, c)| c != '\n'))
{}
} else {
match self.input_iterator.next()? {
Ok(_) => (),
Err(e) => return Some(Err(e)),
}
}
}
match self.input_iterator.next() {
Some(Ok((i, '['))) => Some(Ok((String::from('['), SourceSpan::new(i.into(), 1)))),
Some(Ok((i, ']'))) => Some(Ok((String::from(']'), SourceSpan::new(i.into(), 1)))),
Some(Ok((i, '"'))) => {
let mut r = String::from('"');
while let Some(p) = self
.input_iterator
.next_if(|c| c.as_ref().is_ok_and(|&(_, c)| c != '"'))
{
match p {
Ok((_, c)) => r.push(c),
Err(e) => return Some(Err(e)),
}
}
if self
.input_iterator
.next()
.is_none_or(|c| !c.is_ok_and(|(_, c)| c == '"'))
{
return Some(Err(From::from(UnfinishedString {
src: SourceFile {
path: PathBuf::from("ray-tracing-pbrt-scene/example.pbrt"),
},
bad_bit: SourceSpan::new(i.into(), r.len()),
})));
};
r.push('"');
let len = r.len();
Some(Ok((r, SourceSpan::new(i.into(), len))))
}
Some(Ok((i, c))) => {
let mut r = String::new();
r.push(c);
while let Some(p) = self.input_iterator.next_if(|c| {
c.as_ref()
.is_ok_and(|&(_, c)| c != '#' && c != '[' && c != ']' && !c.is_whitespace())
}) {
match p {
Ok((_, c)) => r.push(c),
Err(e) => return Some(Err(e)),
}
}
let len = r.len();
Some(Ok((r, SourceSpan::new(i.into(), len))))
}
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}
impl Tokenizer {
pub fn next_inner(&mut self) -> Option<Result<(String, SourceSpan)>> {
match self.peeked.take() {
Some(v) => v,
None => self.inner.next(),
}
}
pub fn next(&mut self) -> Option<Result<String>> {
let v = self.next_inner();
if let Some(Ok((_, s))) = v {
self.last_span = s;
}
v.map(|o| o.map(|i| i.0))
}
pub fn next_if(&mut self, func: impl FnOnce(&String) -> bool) -> Option<Result<String>> {
match self.next_inner() {
Some(Ok((matched, s))) if func(&matched) => {
self.last_span = s;
Some(Ok(matched))
}
other => {
assert!(self.peeked.is_none());
self.peeked = Some(other);
None
}
}
}
pub fn parse_next<T>(&mut self) -> Result<T>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::error::Error + std::marker::Send + std::marker::Sync + 'static,
{
let s = self.next().ok_or_else(|| MissingValueError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
})??;
s.parse::<T>().into_diagnostic().map_err(|e| {
ParsingError {
src: SourceFile {
path: self.path.clone(),
},
bad_bit: self.last_span,
error: Some(e),
}
.into()
})
}
pub fn parse_next_if<T>(&mut self, func: impl FnOnce(&String) -> bool) -> Option<Result<T>>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::error::Error + std::marker::Send + std::marker::Sync + 'static,
{
let s = self.next_if(func)?;
Some(match s {
Ok(s) => s.parse::<T>().into_diagnostic().map_err(|e| {
ParsingError {
src: SourceFile {
path: self.path.clone(),
},
bad_bit: self.last_span,
error: Some(e),
}
.into()
}),
Err(e) => Err(e),
})
}
pub fn last_span(&self) -> SourceSpan {
self.last_span
}
pub fn parse_parameter<T>(&mut self) -> Result<T>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::marker::Send + std::marker::Sync + std::error::Error + 'static,
{
let p = self.next().ok_or_else(|| ListBeginError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
})??;
match p.as_str() {
"[" => {
let d = self.parse_next()?;
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]"))
{
return Err(ListEndError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
Ok(d)
}
s => Ok(s.parse::<T>().into_diagnostic()?),
}
}
pub fn parse_list<T>(&mut self) -> Result<Vec<T>>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::marker::Send + std::marker::Sync + std::error::Error + 'static,
{
let mut data = Vec::new();
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "["))
{
return Err(ListBeginError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
while let Some(p) = self.parse_next_if(|p| p != "]").transpose()? {
data.push(p);
}
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]"))
{
return Err(ListEndError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
Ok(data)
}
pub fn parse_list_2<T, P, F>(&mut self, f: F) -> Result<Vec<P>>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::marker::Send + std::marker::Sync + std::error::Error + 'static,
F: Fn(T, T) -> P,
{
let mut data = Vec::new();
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "["))
{
return Err(ListBeginError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
while let Some(pa) = self.parse_next_if(|p| p != "]").transpose()? {
if let Some(pb) = self.parse_next_if(|p| p != "]").transpose()? {
data.push(f(pa, pb));
} else {
return Err(UncompleteError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
}
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]"))
{
return Err(ListEndError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
Ok(data)
}
pub fn parse_list_3<T, P, F>(&mut self, f: F) -> Result<Vec<P>>
where
T: std::str::FromStr,
<T as std::str::FromStr>::Err:
std::marker::Send + std::marker::Sync + std::error::Error + 'static,
F: Fn(T, T, T) -> P,
{
let mut data = Vec::new();
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "["))
{
return Err(ListBeginError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
while let Some(pa) = self.parse_next_if(|p| p != "]").transpose()? {
if let Some(pb) = self.parse_next_if(|p| p != "]").transpose()? {
if let Some(pc) = self.parse_next_if(|p| p != "]").transpose()? {
data.push(f(pa, pb, pc));
} else {
return Err(UncompleteError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
} else {
return Err(UncompleteError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
}
if !self
.next()
.is_none_or(|p| p.is_ok_and(|p| p.as_str() == "]"))
{
return Err(ListEndError {
src: SourceFile {
path: self.path.to_path_buf(),
},
bad_bit: self.last_span(),
}
.into());
}
Ok(data)
}
pub fn get_src(&self) -> SourceFile {
SourceFile {
path: self.path.clone(),
}
}
}
macro_rules! parse_dict {
($tokenizer:expr => $($name_decl:ident, $type:ty, $default:expr);+ => $($name_parsing:ident, $expr:expr, $parsing:expr);+
) => {
{
$(
let mut $name_decl = None;
)+
while let Some(p) = $tokenizer.next_if(|p| p.starts_with('"')).transpose()? {
match p.as_str() {
$(
$expr => {
if $name_parsing.is_none() {
$name_parsing = Some($parsing);
} else {
return Err($crate::tokenizer::DuplicateDictEntryError {bad_bit: $tokenizer.last_span(), src: $tokenizer.get_src()}.into())
}
}
)+
_ => {
return Err($crate::tokenizer::UnknownDictEntryError {bad_bit: $tokenizer.last_span(), src: $tokenizer.get_src()}.into())
}
}
}
#[derive(Debug)]
struct Dict {
$(
$name_decl: $type,
)+
}
$(
let $name_decl = $name_decl.unwrap_or_else(|| $default);
)*
Dict {
$($name_decl,)*
}
}
};
}
#[derive(Error, Debug, Diagnostic)]
#[error("Duplicate dict entry error")]
#[diagnostic(help("multiple dict entries with the same key"))]
pub struct DuplicateDictEntryError {
#[source_code]
pub src: SourceFile,
#[label("Here")]
pub bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("Unknown dict entry error")]
pub struct UnknownDictEntryError {
#[source_code]
pub src: SourceFile,
#[label("Here")]
pub bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("Parsing error")]
#[diagnostic(help("could not parse value"))]
struct ParsingError {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
#[related]
error: Option<Report>,
}
#[derive(Error, Debug, Diagnostic)]
#[error("Unfinished group")]
#[diagnostic(help("groups are not completed"))]
struct UncompleteError {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("Unfinished string")]
struct UnfinishedString {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("list error")]
#[diagnostic(help("expected list"))]
struct ListBeginError {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("list error")]
#[diagnostic(help("expected list to end"))]
struct ListEndError {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
}
#[derive(Error, Debug, Diagnostic)]
#[error("value expected")]
#[diagnostic(help("expected a value"))]
struct MissingValueError {
#[source_code]
src: SourceFile,
#[label("Here")]
bad_bit: SourceSpan,
}