以下是基于KWIC 的keyword匹配算法(管道+过滤器模式下实现)
关键部分的管道+过滤器 软件体系下的实现, 在非常多的keyword搜索平台都使用了这一 循环移位+排序输出的 keyword匹配算法:详细需求例如以下:
1、使用管道-过滤器风格: 每一个过滤器处理数据,然后将结果送至下一个过滤器,。
要有数据传入,过滤器即開始工作。
过滤器之间的数据共享被严格限制在管道传输
四个过滤器: 输入(Input filter): 从数据源读取输入文件,解析格式,将行写入输出管道 移位(CircularShifter filter):循环移位 排序(Alphabetizer filter): 输出(Output filter) 管道: in_cs pipe cs_al pipe al_ou pile比如:
代码例如以下:
using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.IO;namespace KWIC{ ////// 管道类 /// public class Pipe { Listword; public List read() { return word; } public void write(List word) { this.word = word; } } /// /// 管道之间的过滤器接口 /// public abstract class Filter { public virtual void Transform() { } } ////// 继承并实现实现管道接口 /// public class InputFilter : Filter { public Pipe outPipe; public Listword; public InputFilter(List word, Pipe outPipe) { this.word = word; this.outPipe = outPipe; } public void Transform() { outPipe.write(word); } } /// /// 继承并实现过滤器接口 /// public class CircleShiftFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public CircleShiftFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } ////// 关键的循环移位函数 /// public virtual void Transform() { Listword = inputPipe.read(); /// 补充代码,将WORD数组中字符串循环移位 List turned_words = new List (); // 获得每一行字符串数据 foreach (string line in word) { // 拆分一句话 string[] words = line.Split(' '); // 获取单词数 ulong word_number = (ulong)words.LongLength; // 暂时存储中间排序好的串 List tmp_words = new List (); tmp_words.Clear(); tmp_words.Add(line); string tmp_line = ""; for (ulong i = 0; i < word_number - 1; i++) { // 获取上一行串 tmp_line = tmp_words[tmp_words.Count - 1]; // 获取上一行串的最后一个单词 string last_word = tmp_line.Split(' ')[word_number -1]; // 获取上一行串的除了最后一个单词之外的全部单词 string left_words = tmp_line.Substring(0, (tmp_line.Length -last_word.Length-1 )); tmp_words.Add(last_word +" "+ left_words ); } // 移除原有的串 tmp_words.RemoveAt(0); // 将一句移位的串加到暂时的list集合 turned_words.AddRange(tmp_words); } // 将全部移位的串加到原来list集合 word.AddRange(turned_words); / outPipe.write(word); } } /// /// 实现的排序过滤器类 /// public class AlphaFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public AlphaFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } ////// 排序输出函数 /// public void Transform() { Listword = inputPipe.read(); // 补充代码,将word数组中单词排序输出/ word.Sort(); outPipe.write(word); } } /// /// 实现输出过滤器接口类 /// public class OutputFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public OutputFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } public void Transform() { Listword = inputPipe.read(); outPipe.write(word); } } /// /// 程序的总体执行框架 /// public class KWIC_System { Pipe in_cs; // create three objects of Pipe Pipe cs_al; // and one object of type Pipe al_ou; // FileInputStream Pipe ou_ui; // FileInputStream InputFilter inputFilter; CircleShiftFilter shifter; AlphaFilter alpha; OutputFilter output; // output to screen public KWIC_System() { in_cs = new Pipe(); // create three objects of Pipe cs_al = new Pipe(); // and one object of type al_ou = new Pipe(); // FileInputStream ou_ui = new Pipe(); // FileInputStream Listword = new List (); word.Add(Regex.Replace("I love you".Trim(), @"\s+", " ")); //正则会获取到全部类型的空格(比方制表符。新行等等),然后将其替换为一个空格 word.Add(Regex.Replace("me too".Trim(), @"\s+", " ")); word.Add(Regex.Replace("do you know".Trim(), @"\s+", " ")); inputFilter = new InputFilter(word, in_cs); shifter = new CircleShiftFilter(in_cs, cs_al); alpha = new AlphaFilter(cs_al, al_ou); output = new OutputFilter(al_ou,ou_ui); // output to screen } public List GetResult() { inputFilter.Transform(); shifter.Transform(); alpha.Transform(); output.Transform(); return ou_ui.read(); } }}
(备注:假设想换行这里想换行输出,须要在结尾输出的每一行结尾加‘\r\n’)
在广泛的搜索技术中。事实上这个keyword匹配算法应用范围非常广,比方我们常见的Baidu和Google的搜索keyword 提示功能。
个人论坛:http://itpark.sinaapp.com/