// Copyright (c) 2014 AlphaSierraPapa for the SharpDevelop Team // // Permission is hereby granted, free of charge, to any person obtaining a copy of this // software and associated documentation files (the "Software"), to deal in the Software // without restriction, including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons // to whom the Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or // substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. using System; using System.Globalization; using System.Windows.Documents; #if NREFACTORY using ICSharpCode.NRefactory.Editor; #endif namespace ICSharpCode.AvalonEdit.Document { /// /// Specifies the mode for getting the next caret position. /// public enum CaretPositioningMode { /// /// Normal positioning (stop after every grapheme) /// Normal, /// /// Stop only on word borders. /// WordBorder, /// /// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right. /// WordStart, /// /// Stop only at the beginning of words, and anywhere in the middle of symbols. /// WordStartOrSymbol, /// /// Stop only on word borders, and anywhere in the middle of symbols. /// WordBorderOrSymbol, /// /// Stop between every Unicode codepoint, even within the same grapheme. /// This is used to implement deleting the previous grapheme when Backspace is pressed. /// EveryCodepoint } /// /// Static helper methods for working with text. /// public static partial class TextUtilities { #region GetControlCharacterName // the names of the first 32 ASCII characters = Unicode C0 block static readonly string[] c0Table = { "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US" }; // DEL (ASCII 127) and // the names of the control characters in the C1 block (Unicode 128 to 159) static readonly string[] delAndC1Table = { "DEL", "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", "PM", "APC" }; /// /// Gets the name of the control character. /// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value. /// public static string GetControlCharacterName(char controlCharacter) { int num = (int)controlCharacter; if (num < c0Table.Length) return c0Table[num]; else if (num >= 127 && num <= 159) return delAndC1Table[num - 127]; else return num.ToString("x4", CultureInfo.InvariantCulture); } #endregion #region GetWhitespace /// /// Gets all whitespace (' ' and '\t', but no newlines) after offset. /// /// The text source. /// The offset where the whitespace starts. /// The segment containing the whitespace. [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", Justification = "WPF uses 'Whitespace'")] public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset) { if (textSource == null) throw new ArgumentNullException("textSource"); int pos; for (pos = offset; pos < textSource.TextLength; pos++) { char c = textSource.GetCharAt(pos); if (c != ' ' && c != '\t') break; } return new SimpleSegment(offset, pos - offset); } /// /// Gets all whitespace (' ' and '\t', but no newlines) before offset. /// /// The text source. /// The offset where the whitespace ends. /// The segment containing the whitespace. [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", Justification = "WPF uses 'Whitespace'")] public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset) { if (textSource == null) throw new ArgumentNullException("textSource"); int pos; for (pos = offset - 1; pos >= 0; pos--) { char c = textSource.GetCharAt(pos); if (c != ' ' && c != '\t') break; } pos++; // go back the one character that isn't whitespace return new SimpleSegment(pos, offset - pos); } /// /// Gets the leading whitespace segment on the document line. /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", Justification = "WPF uses 'Whitespace'")] [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine) { if (documentLine == null) throw new ArgumentNullException("documentLine"); return GetWhitespaceAfter(document, documentLine.Offset); } /// /// Gets the trailing whitespace segment on the document line. /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", Justification = "WPF uses 'Whitespace'")] [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine) { if (documentLine == null) throw new ArgumentNullException("documentLine"); ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset); // If the whole line consists of whitespace, we consider all of it as leading whitespace, // so return an empty segment as trailing whitespace. if (segment.Offset == documentLine.Offset) return new SimpleSegment(documentLine.EndOffset, 0); else return segment; } #endregion #region GetSingleIndentationSegment /// /// Gets a single indentation segment starting at - at most one tab /// or spaces. /// /// The text source. /// The offset where the indentation segment starts. /// The size of an indentation unit. See . /// The indentation segment. /// If there is no indentation character at the specified , /// an empty segment is returned. public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize) { if (textSource == null) throw new ArgumentNullException("textSource"); int pos = offset; while (pos < textSource.TextLength) { char c = textSource.GetCharAt(pos); if (c == '\t') { if (pos == offset) return new SimpleSegment(offset, 1); else break; } else if (c == ' ') { if (pos - offset >= indentationSize) break; } else { break; } // continue only if c==' ' and (pos-offset) /// Gets whether the character is whitespace, part of an identifier, or line terminator. /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")] public static CharacterClass GetCharacterClass(char c) { if (c == '\r' || c == '\n') return CharacterClass.LineTerminator; if (c == '_') return CharacterClass.IdentifierPart; return GetCharacterClass(char.GetUnicodeCategory(c)); } static CharacterClass GetCharacterClass(char highSurrogate, char lowSurrogate) { if (char.IsSurrogatePair(highSurrogate, lowSurrogate)) { return GetCharacterClass(char.GetUnicodeCategory(highSurrogate.ToString() + lowSurrogate.ToString(), 0)); } else { // malformed surrogate pair return CharacterClass.Other; } } static CharacterClass GetCharacterClass(UnicodeCategory c) { switch (c) { case UnicodeCategory.SpaceSeparator: case UnicodeCategory.LineSeparator: case UnicodeCategory.ParagraphSeparator: case UnicodeCategory.Control: return CharacterClass.Whitespace; case UnicodeCategory.UppercaseLetter: case UnicodeCategory.LowercaseLetter: case UnicodeCategory.TitlecaseLetter: case UnicodeCategory.ModifierLetter: case UnicodeCategory.OtherLetter: case UnicodeCategory.DecimalDigitNumber: return CharacterClass.IdentifierPart; case UnicodeCategory.NonSpacingMark: case UnicodeCategory.SpacingCombiningMark: case UnicodeCategory.EnclosingMark: return CharacterClass.CombiningMark; default: return CharacterClass.Other; } } #endregion #region GetNextCaretPosition /// /// Gets the next caret position. /// /// The text source. /// The start offset inside the text source. /// The search direction (forwards or backwards). /// The mode for caret positioning. /// The offset of the next caret position, or -1 if there is no further caret position /// in the text source. /// /// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition. /// In real caret movement, there are additional caret stops at line starts and ends. This method /// treats linefeeds as simple whitespace. /// public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode) { if (textSource == null) throw new ArgumentNullException("textSource"); switch (mode) { case CaretPositioningMode.Normal: case CaretPositioningMode.EveryCodepoint: case CaretPositioningMode.WordBorder: case CaretPositioningMode.WordBorderOrSymbol: case CaretPositioningMode.WordStart: case CaretPositioningMode.WordStartOrSymbol: break; // OK default: throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode"); } if (direction != LogicalDirection.Backward && direction != LogicalDirection.Forward) { throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction"); } int textLength = textSource.TextLength; if (textLength <= 0) { // empty document? has a normal caret position at 0, though no word borders if (IsNormal(mode)) { if (offset > 0 && direction == LogicalDirection.Backward) return 0; if (offset < 0 && direction == LogicalDirection.Forward) return 0; } return -1; } while (true) { int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1; // return -1 if there is no further caret position in the text source // we also need this to handle offset values outside the valid range if (nextPos < 0 || nextPos > textLength) return -1; // check if we've run against the textSource borders. // a 'textSource' usually isn't the whole document, but a single VisualLineElement. if (nextPos == 0) { // at the document start, there's only a word border // if the first character is not whitespace if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(0))) return nextPos; } else if (nextPos == textLength) { // at the document end, there's never a word start if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) { // at the document end, there's only a word border // if the last character is not whitespace if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(textLength - 1))) return nextPos; } } else { char charBefore = textSource.GetCharAt(nextPos - 1); char charAfter = textSource.GetCharAt(nextPos); // Don't stop in the middle of a surrogate pair if (!char.IsSurrogatePair(charBefore, charAfter)) { CharacterClass classBefore = GetCharacterClass(charBefore); CharacterClass classAfter = GetCharacterClass(charAfter); // get correct class for characters outside BMP: if (char.IsLowSurrogate(charBefore) && nextPos >= 2) { classBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 2), charBefore); } if (char.IsHighSurrogate(charAfter) && nextPos + 1 < textLength) { classAfter = GetCharacterClass(charAfter, textSource.GetCharAt(nextPos + 1)); } if (StopBetweenCharacters(mode, classBefore, classAfter)) { return nextPos; } } } // we'll have to continue searching... offset = nextPos; } } static bool IsNormal(CaretPositioningMode mode) { return mode == CaretPositioningMode.Normal || mode == CaretPositioningMode.EveryCodepoint; } static bool StopBetweenCharacters(CaretPositioningMode mode, CharacterClass charBefore, CharacterClass charAfter) { if (mode == CaretPositioningMode.EveryCodepoint) return true; // Don't stop in the middle of a grapheme if (charAfter == CharacterClass.CombiningMark) return false; // Stop after every grapheme in normal mode if (mode == CaretPositioningMode.Normal) return true; if (charBefore == charAfter) { if (charBefore == CharacterClass.Other && (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol)) { // With the "OrSymbol" modes, there's a word border and start between any two unknown characters return true; } } else { // this looks like a possible border // if we're looking for word starts, check that this is a word start (and not a word end) // if we're just checking for word borders, accept unconditionally if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol) && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator))) { return true; } } return false; } #endregion } /// /// Classifies a character as whitespace, line terminator, part of an identifier, or other. /// public enum CharacterClass { /// /// The character is not whitespace, line terminator or part of an identifier. /// Other, /// /// The character is whitespace (but not line terminator). /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", Justification = "WPF uses 'Whitespace'")] Whitespace, /// /// The character can be part of an identifier (Letter, digit or underscore). /// IdentifierPart, /// /// The character is line terminator (\r or \n). /// LineTerminator, /// /// The character is a unicode combining mark that modifies the previous character. /// Corresponds to the Unicode designations "Mn", "Mc" and "Me". /// CombiningMark } }