feat: guess plain text encoding then set content-type charset (#186)
This commit is contained in:
parent
6dcb4dcd76
commit
45f4f5fc58
5 changed files with 111 additions and 18 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -204,6 +204,17 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chardetng"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"encoding_rs",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.23"
|
||||
|
@ -425,6 +436,7 @@ dependencies = [
|
|||
"async-stream",
|
||||
"async_zip",
|
||||
"base64 0.21.0",
|
||||
"chardetng",
|
||||
"chrono",
|
||||
"clap",
|
||||
"clap_complete",
|
||||
|
|
|
@ -42,6 +42,7 @@ form_urlencoded = "1.0"
|
|||
alphanumeric-sort = "1.4"
|
||||
content_inspector = "0.2"
|
||||
anyhow = "1.0"
|
||||
chardetng = "0.1"
|
||||
|
||||
[features]
|
||||
default = ["tls"]
|
||||
|
|
|
@ -638,14 +638,10 @@ impl Server {
|
|||
None
|
||||
};
|
||||
|
||||
if let Some(mime) = mime_guess::from_path(path).first() {
|
||||
res.headers_mut().typed_insert(ContentType::from(mime));
|
||||
} else {
|
||||
res.headers_mut().insert(
|
||||
CONTENT_TYPE,
|
||||
HeaderValue::from_static("application/octet-stream"),
|
||||
HeaderValue::from_str(&get_content_type(path).await?)?,
|
||||
);
|
||||
}
|
||||
|
||||
let filename = try_get_file_name(path)?;
|
||||
res.headers_mut().insert(
|
||||
|
@ -1382,3 +1378,34 @@ fn set_webdav_headers(res: &mut Response) {
|
|||
res.headers_mut()
|
||||
.insert("DAV", HeaderValue::from_static("1,2"));
|
||||
}
|
||||
|
||||
async fn get_content_type(path: &Path) -> Result<String> {
|
||||
let mut buffer: Vec<u8> = vec![];
|
||||
fs::File::open(path)
|
||||
.await?
|
||||
.take(1024)
|
||||
.read_to_end(&mut buffer)
|
||||
.await?;
|
||||
let mime = mime_guess::from_path(path).first();
|
||||
let is_text = content_inspector::inspect(&buffer).is_text();
|
||||
let content_type = if is_text {
|
||||
let mut detector = chardetng::EncodingDetector::new();
|
||||
detector.feed(&buffer, buffer.len() < 1024);
|
||||
let (enc, confident) = detector.guess_assess(None, true);
|
||||
let charset = if confident {
|
||||
format!("; charset={}", enc.name())
|
||||
} else {
|
||||
"".into()
|
||||
};
|
||||
match mime {
|
||||
Some(m) => format!("{m}{charset}"),
|
||||
None => format!("text/plain{charset}"),
|
||||
}
|
||||
} else {
|
||||
match mime {
|
||||
Some(m) => m.to_string(),
|
||||
None => "application/octet-stream".into(),
|
||||
}
|
||||
};
|
||||
Ok(content_type)
|
||||
}
|
||||
|
|
|
@ -46,15 +46,12 @@ pub fn tmpdir() -> TempDir {
|
|||
let tmpdir = assert_fs::TempDir::new().expect("Couldn't create a temp dir for tests");
|
||||
for file in FILES {
|
||||
if *file == BIN_FILE {
|
||||
tmpdir
|
||||
.child(file)
|
||||
.write_binary(b"bin\0\0123")
|
||||
.expect("Couldn't write to file");
|
||||
tmpdir.child(file).write_binary(b"bin\0\0123").unwrap();
|
||||
} else {
|
||||
tmpdir
|
||||
.child(file)
|
||||
.write_str(&format!("This is {file}"))
|
||||
.expect("Couldn't write to file");
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
for directory in DIRECTORIES {
|
||||
|
@ -62,7 +59,7 @@ pub fn tmpdir() -> TempDir {
|
|||
tmpdir
|
||||
.child(format!("{}{}", directory, "index.html"))
|
||||
.write_str("__ASSERTS_PREFIX__index.js;DATA = __INDEX_DATA__")
|
||||
.expect("Couldn't write to file");
|
||||
.unwrap();
|
||||
} else {
|
||||
for file in FILES {
|
||||
if *directory == DIR_NO_INDEX && *file == "index.html" {
|
||||
|
@ -72,17 +69,37 @@ pub fn tmpdir() -> TempDir {
|
|||
tmpdir
|
||||
.child(format!("{directory}{file}"))
|
||||
.write_binary(b"bin\0\0123")
|
||||
.expect("Couldn't write to file");
|
||||
.unwrap();
|
||||
} else {
|
||||
tmpdir
|
||||
.child(format!("{directory}{file}"))
|
||||
.write_str(&format!("This is {directory}{file}"))
|
||||
.expect("Couldn't write to file");
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tmpdir.child("dir4/hidden").touch().unwrap();
|
||||
tmpdir
|
||||
.child("content-types/bin.tar")
|
||||
.write_binary(b"\x7f\x45\x4c\x46\x02\x01\x00\x00")
|
||||
.unwrap();
|
||||
tmpdir
|
||||
.child("content-types/bin")
|
||||
.write_binary(b"\x7f\x45\x4c\x46\x02\x01\x00\x00")
|
||||
.unwrap();
|
||||
tmpdir
|
||||
.child("content-types/file-utf8.txt")
|
||||
.write_str("世界")
|
||||
.unwrap();
|
||||
tmpdir
|
||||
.child("content-types/file-gbk.txt")
|
||||
.write_binary(b"\xca\xc0\xbd\xe7")
|
||||
.unwrap();
|
||||
tmpdir
|
||||
.child("content-types/file")
|
||||
.write_str("世界")
|
||||
.unwrap();
|
||||
|
||||
tmpdir
|
||||
}
|
||||
|
|
|
@ -148,7 +148,10 @@ fn empty_search(#[with(&["-A"])] server: TestServer) -> Result<(), Error> {
|
|||
fn get_file(server: TestServer) -> Result<(), Error> {
|
||||
let resp = reqwest::blocking::get(format!("{}index.html", server.url()))?;
|
||||
assert_eq!(resp.status(), 200);
|
||||
assert_eq!(resp.headers().get("content-type").unwrap(), "text/html");
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/html; charset=UTF-8"
|
||||
);
|
||||
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
||||
assert!(resp.headers().contains_key("etag"));
|
||||
assert!(resp.headers().contains_key("last-modified"));
|
||||
|
@ -161,7 +164,10 @@ fn get_file(server: TestServer) -> Result<(), Error> {
|
|||
fn head_file(server: TestServer) -> Result<(), Error> {
|
||||
let resp = fetch!(b"HEAD", format!("{}index.html", server.url())).send()?;
|
||||
assert_eq!(resp.status(), 200);
|
||||
assert_eq!(resp.headers().get("content-type").unwrap(), "text/html");
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/html; charset=UTF-8"
|
||||
);
|
||||
assert_eq!(resp.headers().get("accept-ranges").unwrap(), "bytes");
|
||||
assert!(resp.headers().contains_key("content-disposition"));
|
||||
assert!(resp.headers().contains_key("etag"));
|
||||
|
@ -259,3 +265,33 @@ fn delete_file_404(#[with(&["-A"])] server: TestServer) -> Result<(), Error> {
|
|||
assert_eq!(resp.status(), 404);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
fn get_file_content_type(server: TestServer) -> Result<(), Error> {
|
||||
let resp = reqwest::blocking::get(format!("{}content-types/bin.tar", server.url()))?;
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"application/x-tar"
|
||||
);
|
||||
let resp = reqwest::blocking::get(format!("{}content-types/bin", server.url()))?;
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"application/octet-stream"
|
||||
);
|
||||
let resp = reqwest::blocking::get(format!("{}content-types/file-utf8.txt", server.url()))?;
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=UTF-8"
|
||||
);
|
||||
let resp = reqwest::blocking::get(format!("{}content-types/file-gbk.txt", server.url()))?;
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=GBK"
|
||||
);
|
||||
let resp = reqwest::blocking::get(format!("{}content-types/file", server.url()))?;
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=UTF-8"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue