/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: GPL 2.0 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License. You should have * received a copy of the GPL license along with this program; if you * did not, you can find it at http://www.gnu.org/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Coreseek.com code. * * Copyright (C) 2007-2008. All Rights Reserved. * * Author: * Li monan * * ***** END LICENSE BLOCK ***** */ #ifndef css_UnigramDict_h #define css_UnigramDict_h #include #include "darts.h" namespace css { class UnigramCorpusReader; } /* End of namespace css */ namespace css { /** * How to find item fast is a real problem here. * @return the string(utf-8,encoded) of the id. */ class UnigramDict { public: typedef Darts::DoubleArray::result_pair_type result_pair_type; UnigramDict(){}; virtual ~UnigramDict(){}; public: virtual int load(const char* filename); virtual int isLoad(); /** * This function should be used only, in Debug mode. */ virtual std::string getString(int id); /** * Find all word item in UnigramDict, which buf as a prefix * @return total items found */ virtual int findHits(const char* buf, result_pair_type* result = NULL, size_t result_len = 0, int keylen = 0); virtual int import(UnigramCorpusReader& ur); virtual int save(const char* filename); virtual int exactMatch(const char* key, int* id = NULL); protected: Darts::DoubleArray m_da; }; } /* End of namespace css */ #endif