/******************************************************************************* * You may amend and distribute as you like, but don't remove this header! * * EPPlus provides server-side generation of Excel 2007/2010 spreadsheets. * See http://www.codeplex.com/EPPlus for details. * * Copyright (C) 2011 Jan Källman * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php * If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html * * All code and executables are provided "as is" with no warranty either express or implied. * The author accepts no liability for any damage or loss of business that this product may cause. * * Code change notes: * * Author Change Date * ****************************************************************************** * Mats Alm Added 2013-03-01 (Prior file history on https://github.com/swmal/ExcelFormulaParser) *******************************************************************************/ using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; using OfficeOpenXml.FormulaParsing.Excel.Functions; namespace OfficeOpenXml.FormulaParsing.LexicalAnalysis { public class SourceCodeTokenizer : ISourceCodeTokenizer { public static ISourceCodeTokenizer Default { get { return new SourceCodeTokenizer(FunctionNameProvider.Empty, NameValueProvider.Empty); } } public SourceCodeTokenizer(IFunctionNameProvider functionRepository, INameValueProvider nameValueProvider) : this(new TokenFactory(functionRepository, nameValueProvider), new TokenSeparatorProvider()) { } public SourceCodeTokenizer(ITokenFactory tokenFactory, ITokenSeparatorProvider tokenProvider) { _tokenFactory = tokenFactory; _tokenProvider = tokenProvider; } private readonly ITokenSeparatorProvider _tokenProvider; private readonly ITokenFactory _tokenFactory; public IEnumerable Tokenize(string input) { return Tokenize(input, null); } public IEnumerable Tokenize(string input, string worksheet) { if (string.IsNullOrEmpty(input)) { return Enumerable.Empty(); } // MA 1401: Ignore leading plus in formula. input = input.TrimStart('+'); var context = new TokenizerContext(input); for (int i = 0; i 0) { context.AppendToCurrentToken(c); continue; } // two operators in sequence could be "<=" or ">=" if (IsPartOfMultipleCharSeparator(context, c)) { var sOp = context.LastToken.Value + c.ToString(CultureInfo.InvariantCulture); var op = _tokenProvider.Tokens[sOp]; context.ReplaceLastToken(op); context.NewToken(); continue; } if (tokenSeparator.TokenType == TokenType.String) { if (context.LastToken != null && context.LastToken.TokenType == TokenType.OpeningEnumerable) { context.AppendToCurrentToken(c); context.ToggleIsInString(); continue; } if (context.LastToken != null && context.LastToken.TokenType == TokenType.String) { context.AddToken(!context.CurrentTokenHasValue ? new Token(string.Empty, TokenType.StringContent) : new Token(context.CurrentToken, TokenType.StringContent)); } context.AddToken(new Token("\"", TokenType.String)); context.ToggleIsInString(); context.NewToken(); continue; } if (context.CurrentTokenHasValue) { if (Regex.IsMatch(context.CurrentToken, "^\"*$")) { context.AddToken(_tokenFactory.Create(context.CurrentToken, TokenType.StringContent)); } else { context.AddToken(CreateToken(context, worksheet)); } //If the a next token is an opening parantheses and the previous token is interpeted as an address or name, then the currenct token is a function if(tokenSeparator.TokenType==TokenType.OpeningParenthesis && (context.LastToken.TokenType==TokenType.ExcelAddress || context.LastToken.TokenType==TokenType.NameValue)) { context.LastToken.TokenType=TokenType.Function; } } if (tokenSeparator.Value == "-") { if (TokenIsNegator(context)) { context.AddToken(new Token("-", TokenType.Negator)); continue; } } context.AddToken(tokenSeparator); context.NewToken(); continue; } context.AppendToCurrentToken(c); } if (context.CurrentTokenHasValue) { context.AddToken(CreateToken(context, worksheet)); } CleanupTokens(context, _tokenProvider.Tokens); return context.Result; } private static bool IsDoubleQuote(Token tokenSeparator, int formulaCharIndex, TokenizerContext context) { return tokenSeparator.TokenType == TokenType.String && formulaCharIndex + 1 < context.FormulaChars.Length && context.FormulaChars[formulaCharIndex + 1] == '\"'; } private static void CleanupTokens(TokenizerContext context, IDictionary tokens) { for (int i = 0; i < context.Result.Count; i++) { var token=context.Result[i]; if (token.TokenType == TokenType.Unrecognized) { if (i < context.Result.Count - 1) { if (context.Result[i+1].TokenType == TokenType.OpeningParenthesis) { token.TokenType = TokenType.Function; } else { token.TokenType = TokenType.NameValue; } } else { token.TokenType = TokenType.NameValue; } } else if ((token.TokenType == TokenType.Operator || token.TokenType == TokenType.Negator) && i < context.Result.Count - 1 && (token.Value=="+" || token.Value=="-")) { if (i > 0 && token.Value == "+") //Remove any + with an opening parenthesis before. { if (context.Result[i - 1].TokenType == TokenType.OpeningParenthesis) { context.Result.RemoveAt(i); SetNegatorOperator(context, i, tokens); i--; continue; } } var nextToken = context.Result[i + 1]; if (nextToken.TokenType == TokenType.Operator || nextToken.TokenType == TokenType.Negator) { if (token.Value == "+" && (nextToken.Value=="+" || nextToken.Value == "-")) { //Remove first context.Result.RemoveAt(i); SetNegatorOperator(context, i, tokens); i--; } else if (token.Value == "-" && nextToken.Value == "+") { //Remove second context.Result.RemoveAt(i+1); SetNegatorOperator(context, i, tokens); i--; } else if (token.Value == "-" && nextToken.Value == "-") { //Remove first and set operator to + context.Result.RemoveAt(i); if (i == 0) { context.Result.RemoveAt(i+1); i += 2; } else { //context.Result[i].TokenType = TokenType.Operator; //context.Result[i].Value = "+"; context.Result[i] = tokens["+"]; SetNegatorOperator(context, i, tokens); i--; } } } } } } private static void SetNegatorOperator(TokenizerContext context, int i, IDictionary tokens) { if (context.Result[i].Value == "-" && i > 0 && (context.Result[i].TokenType == TokenType.Operator || context.Result[i].TokenType == TokenType.Negator)) { if (TokenIsNegator(context.Result[i - 1])) { context.Result[i] = new Token("-", TokenType.Negator); } else { context.Result[i] = tokens["-"]; } } } private static bool TokenIsNegator(TokenizerContext context) { return TokenIsNegator(context.LastToken); } private static bool TokenIsNegator(Token t) { return t == null || t.TokenType == TokenType.Operator || t.TokenType == TokenType.OpeningParenthesis || t.TokenType == TokenType.Comma || t.TokenType == TokenType.SemiColon || t.TokenType == TokenType.OpeningEnumerable; } private bool IsPartOfMultipleCharSeparator(TokenizerContext context, char c) { var lastToken = context.LastToken != null ? context.LastToken.Value : string.Empty; return _tokenProvider.IsOperator(lastToken) && _tokenProvider.IsPossibleLastPartOfMultipleCharOperator(c.ToString(CultureInfo.InvariantCulture)) && !context.CurrentTokenHasValue; } private Token CreateToken(TokenizerContext context, string worksheet) { if (context.CurrentToken == "-") { if (context.LastToken == null && context.LastToken.TokenType == TokenType.Operator) { return new Token("-", TokenType.Negator); } } return _tokenFactory.Create(context.Result, context.CurrentToken, worksheet); } private bool CharIsTokenSeparator(char c, out Token token) { var result = _tokenProvider.Tokens.ContainsKey(c.ToString()); token = result ? token = _tokenProvider.Tokens[c.ToString()] : null; return result; } } }