tokenizer_impl.h 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #pragma once
  19. #include "tokenizer.h"
  20. namespace wwsearch {
  21. class TokenizerSpaceImpl : public Tokenizer {
  22. private:
  23. public:
  24. TokenizerSpaceImpl() {}
  25. virtual ~TokenizerSpaceImpl() {}
  26. virtual bool Do(wwsearch::Document &document) override;
  27. virtual bool BuildTerms(const char *buffer, size_t buffer_size,
  28. std::set<std::string> &terms,
  29. bool no_covert_to_lower_case = false) override;
  30. inline bool IsSkipChar(char c) { return c == ' ' || c == '\t' || c == '\r'; }
  31. virtual void ToLowerCase(std::string &old) override {}
  32. private:
  33. };
  34. } // namespace wwsearch