代码来源:NET(C#):关于正确读取中文编码文件 - 雪域月光 - 博客园 (cnblogs.com)
static void Main() { PrintText("gb2312.txt"); PrintText("unicode.txt"); } //根据文件自动觉察编码并输出内容 static void PrintText(string path) { var enc = GetEncoding(path, Encoding.GetEncoding("GB2312")); using (var sr = new StreamReader(path, enc)) { Console.WriteLine(sr.ReadToEnd()); } } /// <summary> /// 根据文件尝试返回字符编码 /// </summary> /// <param name="file">文件路径</param> /// <param name="defEnc">没有BOM返回的默认编码</param> /// <returns>如果文件无法读取,返回null。否则,返回根据BOM判断的编码或者缺省编码(没有BOM)。</returns> static Encoding GetEncoding(string file, Encoding defEnc) { using (var stream = File.OpenRead(file)) { //判断流可读? if (!stream.CanRead) return null; //字节数组存储BOM var bom = new byte[4]; //实际读入的长度 int readc; readc = stream.Read(bom, 0, 4); if (readc >= 2) { if (readc >= 4) { //UTF32,Big-Endian if (CheckBytes(bom, 4, 0x00, 0x00, 0xFE, 0xFF)) return new UTF32Encoding(true, true); //UTF32,Little-Endian if (CheckBytes(bom, 4, 0xFF, 0xFE, 0x00, 0x00)) return new UTF32Encoding(false, true); } //UTF8 if (readc >= 3 && CheckBytes(bom, 3, 0xEF, 0xBB, 0xBF)) return new UTF8Encoding(true); //UTF16,Big-Endian if (CheckBytes(bom, 2, 0xFE, 0xFF)) return new UnicodeEncoding(true, true); //UTF16,Little-Endian if (CheckBytes(bom, 2, 0xFF, 0xFE)) return new UnicodeEncoding(false, true); } return defEnc; } } //辅助函数,判断字节中的值 static bool CheckBytes(byte[] bytes, int count, params int[] values) { for (int i = 0; i < count; i++) if (bytes[i] != values[i]) return false; return true; }
上面代码,对于Unicode文本,GetEncoding方法会返回UTF16编码(更具体:还会根据BOM返回Big或者Little-Endian的UTF16编码),而没有BOM的文件则会返回缺省值GB2312编码。