feat: guess plain text encoding then set content-type charset (#186)
This commit is contained in:
parent
6dcb4dcd76
commit
45f4f5fc58
5 changed files with 111 additions and 18 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -204,6 +204,17 @@ version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chardetng"
|
||||||
|
version = "0.1.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"encoding_rs",
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chrono"
|
name = "chrono"
|
||||||
version = "0.4.23"
|
version = "0.4.23"
|
||||||
|
@ -425,6 +436,7 @@ dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async_zip",
|
"async_zip",
|
||||||
"base64 0.21.0",
|
"base64 0.21.0",
|
||||||
|
"chardetng",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"clap_complete",
|
"clap_complete",
|
||||||
|
|
|
@ -42,6 +42,7 @@ form_urlencoded = "1.0"
|
||||||
alphanumeric-sort = "1.4"
|
alphanumeric-sort = "1.4"
|
||||||
content_inspector = "0.2"
|
content_inspector = "0.2"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
|
chardetng = "0.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["tls"]
|
default = ["tls"]
|
||||||
|
|
|
@ -638,14 +638,10 @@ impl Server {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(mime) = mime_guess::from_path(path).first() {
|
res.headers_mut().insert(
|
||||||
res.headers_mut().typed_insert(ContentType::from(mime));
|
CONTENT_TYPE,
|
||||||
} else {
|
HeaderValue::from_str(&get_content_type(path).await?)?,
|
||||||
res.headers_mut().insert(
|
);
|
||||||
CONTENT_TYPE,
|
|
||||||
HeaderValue::from_static("application/octet-stream"),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
let filename = try_get_file_name(path)?;
|
let filename = try_get_file_name(path)?;
|
||||||
res.headers_mut().insert(
|
res.headers_mut().insert(
|
||||||
|
@ -1382,3 +1378,34 @@ fn set_webdav_headers(res: &mut Response) {
|
||||||
res.headers_mut()
|
res.headers_mut()
|
||||||
.insert("DAV", HeaderValue::from_static("1,2"));
|
.insert("DAV", HeaderValue::from_static("1,2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_content_type(path: &Path) -> Result<String> {
|
||||||
|
let mut buffer: Vec<u8> = vec![];
|
||||||
|
fs::File::open(path)
|
||||||
|
.await?
|
||||||
|
.take(1024)
|
||||||
|
.read_to_end(&mut buffer)
|
||||||
|
.await?;
|
||||||
|
let mime = mime_guess::from_path(path).first();
|
||||||
|
let is_text = content_inspector::inspect(&buffer).is_text();
|
||||||
|
let content_type = if is_text {
|
||||||
|
let mut detector = chardetng::EncodingDetector::new();
|
||||||
|
detector.feed(&buffer, buffer.len() < 1024);
|
||||||
|
let (enc, confident) = detector.guess_assess(None, true);
|
||||||
|
let charset = if confident {
|
||||||
|
format!("; charset={}", enc.name())
|
||||||
|
} else {
|
||||||
|
"".into()
|
||||||
|
};
|
||||||
|
match mime {
|
||||||
|
Some(m) => format!("{m}{charset}"),
|
||||||
|
None => format!("text/plain{charset}"),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match mime {
|
||||||
|
Some(m) => m.to_string(),
|
||||||
|
None => "application/octet-stream".into(),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(content_type)
|
||||||
|
}
|
||||||
|
|
|
@ -46,15 +46,12 @@ pub fn tmpdir() -> TempDir {
|
||||||
let tmpdir = assert_fs::TempDir::new().expect("Couldn't create a temp dir for tests");
|
let tmpdir = assert_fs::TempDir::new().expect("Couldn't create a temp dir for tests");
|
||||||
for file in FILES {
|
for file in FILES {
|
||||||
if *file == BIN_FILE {
|
if *file == BIN_FILE {
|
||||||
tmpdir
|
tmpdir.child(file).write_binary(b"bin\0\0123").unwrap();
|
||||||
.child(file)
|
|
||||||
.write_binary(b"bin\0\0123")
|
|
||||||
.expect("Couldn't write to file");
|
|
||||||
} else {
|
} else {
|
||||||
tmpdir
|
tmpdir
|
||||||
.child(file)
|
.child(file)
|
||||||
.write_str(&format!("This is {file}"))
|
.write_str(&format!("This is {file}"))
|
||||||
.expect("Couldn't write to file");
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for directory in DIRECTORIES {
|
for directory in DIRECTORIES {
|
||||||
|
@ -62,7 +59,7 @@ pub fn tmpdir() -> TempDir {
|
||||||
tmpdir
|
tmpdir
|
||||||
.child(format!("{}{}", directory, "index.html"))
|
.child(format!("{}{}", directory, "index.html"))
|
||||||
.write_str("__ASSERTS_PREFIX__index.js;DATA = __INDEX_DATA__")
|
.write_str("__ASSERTS_PREFIX__index.js;DATA = __INDEX_DATA__")
|
||||||
.expect("Couldn't write to file");
|
.unwrap();
|
||||||
} else {
|
} else {
|
||||||
for file in FILES {
|
for file in FILES {
|
||||||
if *directory == DIR_NO_INDEX && *file == "index.html" {
|
if *directory == DIR_NO_INDEX && *file == "index.html" {
|
||||||
|
@ -72,17 +69,37 @@ pub fn tmpdir() -> TempDir {
|
||||||
tmpdir
|
tmpdir
|
||||||
.child(format!("{directory}{file}"))
|
.child(format!("{directory}{file}"))
|
||||||
.write_binary(b"bin\0\0123")
|
.write_binary(b"bin\0\0123")
|
||||||
.expect("Couldn't write to file");
|
.unwrap();
|
||||||
} else {
|
} else {
|
||||||
tmpdir
|
tmpdir
|
||||||
.child(format!("{directory}{file}"))
|
.child(format!("{directory}{file}"))
|
||||||
.write_str(&format!("This is {directory}{file}"))
|
.write_str(&format!("This is {directory}{file}"))
|
||||||
.expect("Couldn't write to file");
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tmpdir.child("dir4/hidden").touch().unwrap();
|
tmpdir.child("dir4/hidden").touch().unwrap();
|
||||||
|
tmpdir
|
||||||
|
.child("content-types/bin.tar")
|
||||||
|
.write_binary(b"\x7f\x45\x4c\x46\x02\x01\x00\x00")
|
||||||
|
.unwrap();
|
||||||
|
tmpdir
|
||||||
|
.child("content-types/bin")
|
||||||
|
.write_binary(b"\x7f\x45\x4c\x46\x02\x01\x00\x00")
|
||||||
|
.unwrap();
|
||||||
|
tmpdir
|
||||||
|
.child("content-types/file-utf8.txt")
|
||||||
|
.write_str("世界")
|
||||||
|
.unwrap();
|
||||||
|
tmpdir
|
||||||
|
.child("content-types/file-gbk.txt")
|
||||||
|
.write_binary(b"\xca\xc0\xbd\xe7")
|
||||||
|
.unwrap();
|
||||||
|
tmpdir
|
||||||
|
.child("content-types/file")
|
||||||
|
.write_str("世界")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
tmpdir
|
tmpdir
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,7 +148,10 @@ fn empty_search(#[with(&["-A"])] server: TestServer) -> Result<(), Error> {
|
||||||
fn get_file(server: TestServer) -> Result<(), Error> {
|
fn get_file(server: TestServer) -> Result<(), Error> {
|
||||||
let resp = reqwest::blocking::get(format!("{}index.html", server.url()))?;
|
let resp = reqwest::blocking::get(format!("{}index.html", server.url()))?;
|
||||||
assert_eq!(resp.status(), 200);
|
assert_eq!(resp.status(), 200);
|
||||||
assert_eq!(resp.headers().get("content-type").unwrap(), "text/html");
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"text/html; charset=UTF-8"
|
||||||
|
);
|
||||||
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
||||||
assert!(resp.headers().contains_key("etag"));
|
assert!(resp.headers().contains_key("etag"));
|
||||||
assert!(resp.headers().contains_key("last-modified"));
|
assert!(resp.headers().contains_key("last-modified"));
|
||||||
|
@ -161,7 +164,10 @@ fn get_file(server: TestServer) -> Result<(), Error> {
|
||||||
fn head_file(server: TestServer) -> Result<(), Error> {
|
fn head_file(server: TestServer) -> Result<(), Error> {
|
||||||
let resp = fetch!(b"HEAD", format!("{}index.html", server.url())).send()?;
|
let resp = fetch!(b"HEAD", format!("{}index.html", server.url())).send()?;
|
||||||
assert_eq!(resp.status(), 200);
|
assert_eq!(resp.status(), 200);
|
||||||
assert_eq!(resp.headers().get("content-type").unwrap(), "text/html");
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"text/html; charset=UTF-8"
|
||||||
|
);
|
||||||
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
||||||
assert!(resp.headers().contains_key("content-disposition"));
|
assert!(resp.headers().contains_key("content-disposition"));
|
||||||
assert!(resp.headers().contains_key("etag"));
|
assert!(resp.headers().contains_key("etag"));
|
||||||
|
@ -259,3 +265,33 @@ fn delete_file_404(#[with(&["-A"])] server: TestServer) -> Result<(), Error> {
|
||||||
assert_eq!(resp.status(), 404);
|
assert_eq!(resp.status(), 404);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
fn get_file_content_type(server: TestServer) -> Result<(), Error> {
|
||||||
|
let resp = reqwest::blocking::get(format!("{}content-types/bin.tar", server.url()))?;
|
||||||
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"application/x-tar"
|
||||||
|
);
|
||||||
|
let resp = reqwest::blocking::get(format!("{}content-types/bin", server.url()))?;
|
||||||
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"application/octet-stream"
|
||||||
|
);
|
||||||
|
let resp = reqwest::blocking::get(format!("{}content-types/file-utf8.txt", server.url()))?;
|
||||||
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"text/plain; charset=UTF-8"
|
||||||
|
);
|
||||||
|
let resp = reqwest::blocking::get(format!("{}content-types/file-gbk.txt", server.url()))?;
|
||||||
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"text/plain; charset=GBK"
|
||||||
|
);
|
||||||
|
let resp = reqwest::blocking::get(format!("{}content-types/file", server.url()))?;
|
||||||
|
assert_eq!(
|
||||||
|
resp.headers().get("content-type").unwrap(),
|
||||||
|
"text/plain; charset=UTF-8"
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue