From 110f771cc53cc838fde51613f2248b1497816ccb Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Wed, 9 May 2018 11:35:53 -0400 Subject: [PATCH] feat: Added util method for removing BOM characters from UTF8 data --- CHANGELOG.md | 1 + ICD.Common.Utils.Tests/EncodingUtilsTest.cs | 23 ++++++++++++++ ICD.Common.Utils/EncodingUtils.cs | 30 +++++++++++++++++++ .../ICD.Common.Utils_SimplSharp.csproj | 1 + 4 files changed, 55 insertions(+) create mode 100644 ICD.Common.Utils.Tests/EncodingUtilsTest.cs create mode 100644 ICD.Common.Utils/EncodingUtils.cs diff --git a/CHANGELOG.md b/CHANGELOG.md index 165e567..0aa9c06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] - Added Yield extension to return a single-item enumerable for an object. + - Added util method for removing BOM characters from UTF8 data ## [3.0.0] - 2018-04-23 ### Added diff --git a/ICD.Common.Utils.Tests/EncodingUtilsTest.cs b/ICD.Common.Utils.Tests/EncodingUtilsTest.cs new file mode 100644 index 0000000..33bc129 --- /dev/null +++ b/ICD.Common.Utils.Tests/EncodingUtilsTest.cs @@ -0,0 +1,23 @@ +using System; +using System.Text; +using NUnit.Framework; + +namespace ICD.Common.Utils.Tests +{ + [TestFixture] + public sealed class EncodingUtilsTest + { + [Test] + public void StripUtf8BomTest() + { + Assert.Throws(() => EncodingUtils.StripUtf8Bom(null)); + + byte[] preamble = Encoding.UTF8.GetPreamble(); + string preambleString = Encoding.UTF8.GetString(preamble, 0, preamble.Length); + + Assert.AreEqual(string.Empty, EncodingUtils.StripUtf8Bom(string.Empty)); + Assert.AreEqual("test", EncodingUtils.StripUtf8Bom("test")); + Assert.AreEqual("test", EncodingUtils.StripUtf8Bom(preambleString + "test")); + } + } +} diff --git a/ICD.Common.Utils/EncodingUtils.cs b/ICD.Common.Utils/EncodingUtils.cs new file mode 100644 index 0000000..e1f45cc --- /dev/null +++ b/ICD.Common.Utils/EncodingUtils.cs @@ -0,0 +1,30 @@ +using System; +using System.Text; +using ICD.Common.Properties; + +namespace ICD.Common.Utils +{ + public static class EncodingUtils + { + /// + /// Strips leading Byte Order Mark characters from the given UTF8 data. + /// + /// Input string to remove leading BOM chars from. + /// Input string with leading BOM chars removed. + /// Data is null. + [PublicAPI] + public static string StripUtf8Bom(string data) + { + if (data == null) + throw new ArgumentNullException("data"); + + byte[] preamble = Encoding.UTF8.GetPreamble(); + string preambleString = Encoding.UTF8.GetString(preamble, 0, preamble.Length); + + if (data.StartsWith(preambleString, StringComparison.Ordinal)) + data = data.Remove(0, preambleString.Length); + + return data; + } + } +} diff --git a/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj b/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj index 231f2aa..ece69a3 100644 --- a/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj +++ b/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj @@ -77,6 +77,7 @@ +