THIS IS A TEST INSTANCE ONLY! REPOSITORIES CAN BE DELETED AT ANY TIME!

Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. /*
  2. * This file has been copied from commit e7ac713d^ in the GNU grep git
  3. * repository. A few small changes have been made to adapt the code to
  4. * Git.
  5. */
  6. /* kwset.c - search for any of a set of keywords.
  7. Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc.
  8. This program is free software; you can redistribute it and/or modify
  9. it under the terms of the GNU General Public License as published by
  10. the Free Software Foundation; either version 2, or (at your option)
  11. any later version.
  12. This program is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. GNU General Public License for more details.
  16. You should have received a copy of the GNU General Public License
  17. along with this program; if not, see <http://www.gnu.org/licenses/>. */
  18. /* Written August 1989 by Mike Haertel.
  19. The author may be reached (Email) at the address mike@ai.mit.edu,
  20. or (US mail) as Mike Haertel c/o Free Software Foundation. */
  21. /* The algorithm implemented by these routines bears a startling resemblance
  22. to one discovered by Beate Commentz-Walter, although it is not identical.
  23. See "A String Matching Algorithm Fast on the Average," Technical Report,
  24. IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
  25. Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
  26. String Matching: An Aid to Bibliographic Search," CACM June 1975,
  27. Vol. 18, No. 6, which describes the failure function used below. */
  28. #include "cache.h"
  29. #include "kwset.h"
  30. #include "compat/obstack.h"
  31. #define NCHAR (UCHAR_MAX + 1)
  32. /* adapter for `xmalloc()`, which takes `size_t`, not `long` */
  33. static void *obstack_chunk_alloc(long size)
  34. {
  35. if (size < 0)
  36. BUG("Cannot allocate a negative amount: %ld", size);
  37. return xmalloc(size);
  38. }
  39. #define obstack_chunk_free free
  40. #define U(c) ((unsigned char) (c))
  41. /* Balanced tree of edges and labels leaving a given trie node. */
  42. struct tree
  43. {
  44. struct tree *llink; /* Left link; MUST be first field. */
  45. struct tree *rlink; /* Right link (to larger labels). */
  46. struct trie *trie; /* Trie node pointed to by this edge. */
  47. unsigned char label; /* Label on this edge. */
  48. char balance; /* Difference in depths of subtrees. */
  49. };
  50. /* Node of a trie representing a set of reversed keywords. */
  51. struct trie
  52. {
  53. unsigned int accepting; /* Word index of accepted word, or zero. */
  54. struct tree *links; /* Tree of edges leaving this node. */
  55. struct trie *parent; /* Parent of this node. */
  56. struct trie *next; /* List of all trie nodes in level order. */
  57. struct trie *fail; /* Aho-Corasick failure function. */
  58. int depth; /* Depth of this node from the root. */
  59. int shift; /* Shift function for search failures. */
  60. int maxshift; /* Max shift of self and descendants. */
  61. };
  62. /* Structure returned opaquely to the caller, containing everything. */
  63. struct kwset
  64. {
  65. struct obstack obstack; /* Obstack for node allocation. */
  66. int words; /* Number of words in the trie. */
  67. struct trie *trie; /* The trie itself. */
  68. int mind; /* Minimum depth of an accepting node. */
  69. int maxd; /* Maximum depth of any node. */
  70. unsigned char delta[NCHAR]; /* Delta table for rapid search. */
  71. struct trie *next[NCHAR]; /* Table of children of the root. */
  72. char *target; /* Target string if there's only one. */
  73. int mind2; /* Used in Boyer-Moore search for one string. */
  74. unsigned char const *trans; /* Character translation table. */
  75. };
  76. /* Allocate and initialize a keyword set object, returning an opaque
  77. pointer to it. Return NULL if memory is not available. */
  78. kwset_t
  79. kwsalloc (unsigned char const *trans)
  80. {
  81. struct kwset *kwset;
  82. kwset = (struct kwset *) xmalloc(sizeof (struct kwset));
  83. obstack_init(&kwset->obstack);
  84. kwset->words = 0;
  85. kwset->trie
  86. = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
  87. if (!kwset->trie)
  88. {
  89. kwsfree((kwset_t) kwset);
  90. return NULL;
  91. }
  92. kwset->trie->accepting = 0;
  93. kwset->trie->links = NULL;
  94. kwset->trie->parent = NULL;
  95. kwset->trie->next = NULL;
  96. kwset->trie->fail = NULL;
  97. kwset->trie->depth = 0;
  98. kwset->trie->shift = 0;
  99. kwset->mind = INT_MAX;
  100. kwset->maxd = -1;
  101. kwset->target = NULL;
  102. kwset->trans = trans;
  103. return (kwset_t) kwset;
  104. }
  105. /* This upper bound is valid for CHAR_BIT >= 4 and
  106. exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
  107. #define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
  108. /* Add the given string to the contents of the keyword set. Return NULL
  109. for success, an error message otherwise. */
  110. const char *
  111. kwsincr (kwset_t kws, char const *text, size_t len)
  112. {
  113. struct kwset *kwset;
  114. register struct trie *trie;
  115. register unsigned char label;
  116. register struct tree *link;
  117. register int depth;
  118. struct tree *links[DEPTH_SIZE];
  119. enum { L, R } dirs[DEPTH_SIZE];
  120. struct tree *t, *r, *l, *rl, *lr;
  121. kwset = (struct kwset *) kws;
  122. trie = kwset->trie;
  123. text += len;
  124. /* Descend the trie (built of reversed keywords) character-by-character,
  125. installing new nodes when necessary. */
  126. while (len--)
  127. {
  128. label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
  129. /* Descend the tree of outgoing links for this trie node,
  130. looking for the current character and keeping track
  131. of the path followed. */
  132. link = trie->links;
  133. links[0] = (struct tree *) &trie->links;
  134. dirs[0] = L;
  135. depth = 1;
  136. while (link && label != link->label)
  137. {
  138. links[depth] = link;
  139. if (label < link->label)
  140. dirs[depth++] = L, link = link->llink;
  141. else
  142. dirs[depth++] = R, link = link->rlink;
  143. }
  144. /* The current character doesn't have an outgoing link at
  145. this trie node, so build a new trie node and install
  146. a link in the current trie node's tree. */
  147. if (!link)
  148. {
  149. link = (struct tree *) obstack_alloc(&kwset->obstack,
  150. sizeof (struct tree));
  151. if (!link)
  152. return "memory exhausted";
  153. link->llink = NULL;
  154. link->rlink = NULL;
  155. link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
  156. sizeof (struct trie));
  157. if (!link->trie)
  158. {
  159. obstack_free(&kwset->obstack, link);
  160. return "memory exhausted";
  161. }
  162. link->trie->accepting = 0;
  163. link->trie->links = NULL;
  164. link->trie->parent = trie;
  165. link->trie->next = NULL;
  166. link->trie->fail = NULL;
  167. link->trie->depth = trie->depth + 1;
  168. link->trie->shift = 0;
  169. link->label = label;
  170. link->balance = 0;
  171. /* Install the new tree node in its parent. */
  172. if (dirs[--depth] == L)
  173. links[depth]->llink = link;
  174. else
  175. links[depth]->rlink = link;
  176. /* Back up the tree fixing the balance flags. */
  177. while (depth && !links[depth]->balance)
  178. {
  179. if (dirs[depth] == L)
  180. --links[depth]->balance;
  181. else
  182. ++links[depth]->balance;
  183. --depth;
  184. }
  185. /* Rebalance the tree by pointer rotations if necessary. */
  186. if (depth && ((dirs[depth] == L && --links[depth]->balance)
  187. || (dirs[depth] == R && ++links[depth]->balance)))
  188. {
  189. switch (links[depth]->balance)
  190. {
  191. case (char) -2:
  192. switch (dirs[depth + 1])
  193. {
  194. case L:
  195. r = links[depth], t = r->llink, rl = t->rlink;
  196. t->rlink = r, r->llink = rl;
  197. t->balance = r->balance = 0;
  198. break;
  199. case R:
  200. r = links[depth], l = r->llink, t = l->rlink;
  201. rl = t->rlink, lr = t->llink;
  202. t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
  203. l->balance = t->balance != 1 ? 0 : -1;
  204. r->balance = t->balance != (char) -1 ? 0 : 1;
  205. t->balance = 0;
  206. break;
  207. default:
  208. abort ();
  209. }
  210. break;
  211. case 2:
  212. switch (dirs[depth + 1])
  213. {
  214. case R:
  215. l = links[depth], t = l->rlink, lr = t->llink;
  216. t->llink = l, l->rlink = lr;
  217. t->balance = l->balance = 0;
  218. break;
  219. case L:
  220. l = links[depth], r = l->rlink, t = r->llink;
  221. lr = t->llink, rl = t->rlink;
  222. t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
  223. l->balance = t->balance != 1 ? 0 : -1;
  224. r->balance = t->balance != (char) -1 ? 0 : 1;
  225. t->balance = 0;
  226. break;
  227. default:
  228. abort ();
  229. }
  230. break;
  231. default:
  232. abort ();
  233. }
  234. if (dirs[depth - 1] == L)
  235. links[depth - 1]->llink = t;
  236. else
  237. links[depth - 1]->rlink = t;
  238. }
  239. }
  240. trie = link->trie;
  241. }
  242. /* Mark the node we finally reached as accepting, encoding the
  243. index number of this word in the keyword set so far. */
  244. if (!trie->accepting)
  245. trie->accepting = 1 + 2 * kwset->words;
  246. ++kwset->words;
  247. /* Keep track of the longest and shortest string of the keyword set. */
  248. if (trie->depth < kwset->mind)
  249. kwset->mind = trie->depth;
  250. if (trie->depth > kwset->maxd)
  251. kwset->maxd = trie->depth;
  252. return NULL;
  253. }
  254. /* Enqueue the trie nodes referenced from the given tree in the
  255. given queue. */
  256. static void
  257. enqueue (struct tree *tree, struct trie **last)
  258. {
  259. if (!tree)
  260. return;
  261. enqueue(tree->llink, last);
  262. enqueue(tree->rlink, last);
  263. (*last) = (*last)->next = tree->trie;
  264. }
  265. /* Compute the Aho-Corasick failure function for the trie nodes referenced
  266. from the given tree, given the failure function for their parent as
  267. well as a last resort failure node. */
  268. static void
  269. treefails (register struct tree const *tree, struct trie const *fail,
  270. struct trie *recourse)
  271. {
  272. register struct tree *link;
  273. if (!tree)
  274. return;
  275. treefails(tree->llink, fail, recourse);
  276. treefails(tree->rlink, fail, recourse);
  277. /* Find, in the chain of fails going back to the root, the first
  278. node that has a descendant on the current label. */
  279. while (fail)
  280. {
  281. link = fail->links;
  282. while (link && tree->label != link->label)
  283. if (tree->label < link->label)
  284. link = link->llink;
  285. else
  286. link = link->rlink;
  287. if (link)
  288. {
  289. tree->trie->fail = link->trie;
  290. return;
  291. }
  292. fail = fail->fail;
  293. }
  294. tree->trie->fail = recourse;
  295. }
  296. /* Set delta entries for the links of the given tree such that
  297. the preexisting delta value is larger than the current depth. */
  298. static void
  299. treedelta (register struct tree const *tree,
  300. register unsigned int depth,
  301. unsigned char delta[])
  302. {
  303. if (!tree)
  304. return;
  305. treedelta(tree->llink, depth, delta);
  306. treedelta(tree->rlink, depth, delta);
  307. if (depth < delta[tree->label])
  308. delta[tree->label] = depth;
  309. }
  310. /* Return true if A has every label in B. */
  311. static int
  312. hasevery (register struct tree const *a, register struct tree const *b)
  313. {
  314. if (!b)
  315. return 1;
  316. if (!hasevery(a, b->llink))
  317. return 0;
  318. if (!hasevery(a, b->rlink))
  319. return 0;
  320. while (a && b->label != a->label)
  321. if (b->label < a->label)
  322. a = a->llink;
  323. else
  324. a = a->rlink;
  325. return !!a;
  326. }
  327. /* Compute a vector, indexed by character code, of the trie nodes
  328. referenced from the given tree. */
  329. static void
  330. treenext (struct tree const *tree, struct trie *next[])
  331. {
  332. if (!tree)
  333. return;
  334. treenext(tree->llink, next);
  335. treenext(tree->rlink, next);
  336. next[tree->label] = tree->trie;
  337. }
  338. /* Compute the shift for each trie node, as well as the delta
  339. table and next cache for the given keyword set. */
  340. const char *
  341. kwsprep (kwset_t kws)
  342. {
  343. register struct kwset *kwset;
  344. register int i;
  345. register struct trie *curr;
  346. register unsigned char const *trans;
  347. unsigned char delta[NCHAR];
  348. kwset = (struct kwset *) kws;
  349. /* Initial values for the delta table; will be changed later. The
  350. delta entry for a given character is the smallest depth of any
  351. node at which an outgoing edge is labeled by that character. */
  352. memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
  353. /* Check if we can use the simple boyer-moore algorithm, instead
  354. of the hairy commentz-walter algorithm. */
  355. if (kwset->words == 1 && kwset->trans == NULL)
  356. {
  357. char c;
  358. /* Looking for just one string. Extract it from the trie. */
  359. kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
  360. if (!kwset->target)
  361. return "memory exhausted";
  362. for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
  363. {
  364. kwset->target[i] = curr->links->label;
  365. curr = curr->links->trie;
  366. }
  367. /* Build the Boyer Moore delta. Boy that's easy compared to CW. */
  368. for (i = 0; i < kwset->mind; ++i)
  369. delta[U(kwset->target[i])] = kwset->mind - (i + 1);
  370. /* Find the minimal delta2 shift that we might make after
  371. a backwards match has failed. */
  372. c = kwset->target[kwset->mind - 1];
  373. for (i = kwset->mind - 2; i >= 0; --i)
  374. if (kwset->target[i] == c)
  375. break;
  376. kwset->mind2 = kwset->mind - (i + 1);
  377. }
  378. else
  379. {
  380. register struct trie *fail;
  381. struct trie *last, *next[NCHAR];
  382. /* Traverse the nodes of the trie in level order, simultaneously
  383. computing the delta table, failure function, and shift function. */
  384. for (curr = last = kwset->trie; curr; curr = curr->next)
  385. {
  386. /* Enqueue the immediate descendants in the level order queue. */
  387. enqueue(curr->links, &last);
  388. curr->shift = kwset->mind;
  389. curr->maxshift = kwset->mind;
  390. /* Update the delta table for the descendants of this node. */
  391. treedelta(curr->links, curr->depth, delta);
  392. /* Compute the failure function for the descendants of this node. */
  393. treefails(curr->links, curr->fail, kwset->trie);
  394. /* Update the shifts at each node in the current node's chain
  395. of fails back to the root. */
  396. for (fail = curr->fail; fail; fail = fail->fail)
  397. {
  398. /* If the current node has some outgoing edge that the fail
  399. doesn't, then the shift at the fail should be no larger
  400. than the difference of their depths. */
  401. if (!hasevery(fail->links, curr->links))
  402. if (curr->depth - fail->depth < fail->shift)
  403. fail->shift = curr->depth - fail->depth;
  404. /* If the current node is accepting then the shift at the
  405. fail and its descendants should be no larger than the
  406. difference of their depths. */
  407. if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
  408. fail->maxshift = curr->depth - fail->depth;
  409. }
  410. }
  411. /* Traverse the trie in level order again, fixing up all nodes whose
  412. shift exceeds their inherited maxshift. */
  413. for (curr = kwset->trie->next; curr; curr = curr->next)
  414. {
  415. if (curr->maxshift > curr->parent->maxshift)
  416. curr->maxshift = curr->parent->maxshift;
  417. if (curr->shift > curr->maxshift)
  418. curr->shift = curr->maxshift;
  419. }
  420. /* Create a vector, indexed by character code, of the outgoing links
  421. from the root node. */
  422. for (i = 0; i < NCHAR; ++i)
  423. next[i] = NULL;
  424. treenext(kwset->trie->links, next);
  425. if ((trans = kwset->trans) != NULL)
  426. for (i = 0; i < NCHAR; ++i)
  427. kwset->next[i] = next[U(trans[i])];
  428. else
  429. COPY_ARRAY(kwset->next, next, NCHAR);
  430. }
  431. /* Fix things up for any translation table. */
  432. if ((trans = kwset->trans) != NULL)
  433. for (i = 0; i < NCHAR; ++i)
  434. kwset->delta[i] = delta[U(trans[i])];
  435. else
  436. memcpy(kwset->delta, delta, NCHAR);
  437. return NULL;
  438. }
  439. /* Fast boyer-moore search. */
  440. static size_t
  441. bmexec (kwset_t kws, char const *text, size_t size)
  442. {
  443. struct kwset const *kwset;
  444. register unsigned char const *d1;
  445. register char const *ep, *sp, *tp;
  446. register int d, gc, i, len, md2;
  447. kwset = (struct kwset const *) kws;
  448. len = kwset->mind;
  449. if (len == 0)
  450. return 0;
  451. if (len > size)
  452. return -1;
  453. if (len == 1)
  454. {
  455. tp = memchr (text, kwset->target[0], size);
  456. return tp ? tp - text : -1;
  457. }
  458. d1 = kwset->delta;
  459. sp = kwset->target + len;
  460. gc = U(sp[-2]);
  461. md2 = kwset->mind2;
  462. tp = text + len;
  463. /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
  464. if (size > 12 * len)
  465. /* 11 is not a bug, the initial offset happens only once. */
  466. for (ep = text + size - 11 * len;;)
  467. {
  468. while (tp <= ep)
  469. {
  470. d = d1[U(tp[-1])], tp += d;
  471. d = d1[U(tp[-1])], tp += d;
  472. if (d == 0)
  473. goto found;
  474. d = d1[U(tp[-1])], tp += d;
  475. d = d1[U(tp[-1])], tp += d;
  476. d = d1[U(tp[-1])], tp += d;
  477. if (d == 0)
  478. goto found;
  479. d = d1[U(tp[-1])], tp += d;
  480. d = d1[U(tp[-1])], tp += d;
  481. d = d1[U(tp[-1])], tp += d;
  482. if (d == 0)
  483. goto found;
  484. d = d1[U(tp[-1])], tp += d;
  485. d = d1[U(tp[-1])], tp += d;
  486. }
  487. break;
  488. found:
  489. if (U(tp[-2]) == gc)
  490. {
  491. for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
  492. ;
  493. if (i > len)
  494. return tp - len - text;
  495. }
  496. tp += md2;
  497. }
  498. /* Now we have only a few characters left to search. We
  499. carefully avoid ever producing an out-of-bounds pointer. */
  500. ep = text + size;
  501. d = d1[U(tp[-1])];
  502. while (d <= ep - tp)
  503. {
  504. d = d1[U((tp += d)[-1])];
  505. if (d != 0)
  506. continue;
  507. if (U(tp[-2]) == gc)
  508. {
  509. for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
  510. ;
  511. if (i > len)
  512. return tp - len - text;
  513. }
  514. d = md2;
  515. }
  516. return -1;
  517. }
  518. /* Hairy multiple string search. */
  519. static size_t
  520. cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
  521. {
  522. struct kwset const *kwset;
  523. struct trie * const *next;
  524. struct trie const *trie;
  525. struct trie const *accept;
  526. char const *beg, *lim, *mch, *lmch;
  527. register unsigned char c;
  528. register unsigned char const *delta;
  529. register int d;
  530. register char const *end, *qlim;
  531. register struct tree const *tree;
  532. register unsigned char const *trans;
  533. accept = NULL;
  534. /* Initialize register copies and look for easy ways out. */
  535. kwset = (struct kwset *) kws;
  536. if (len < kwset->mind)
  537. return -1;
  538. next = kwset->next;
  539. delta = kwset->delta;
  540. trans = kwset->trans;
  541. lim = text + len;
  542. end = text;
  543. if ((d = kwset->mind) != 0)
  544. mch = NULL;
  545. else
  546. {
  547. mch = text, accept = kwset->trie;
  548. goto match;
  549. }
  550. if (len >= 4 * kwset->mind)
  551. qlim = lim - 4 * kwset->mind;
  552. else
  553. qlim = NULL;
  554. while (lim - end >= d)
  555. {
  556. if (qlim && end <= qlim)
  557. {
  558. end += d - 1;
  559. while ((d = delta[c = *end]) && end < qlim)
  560. {
  561. end += d;
  562. end += delta[U(*end)];
  563. end += delta[U(*end)];
  564. }
  565. ++end;
  566. }
  567. else
  568. d = delta[c = (end += d)[-1]];
  569. if (d)
  570. continue;
  571. beg = end - 1;
  572. trie = next[c];
  573. if (trie->accepting)
  574. {
  575. mch = beg;
  576. accept = trie;
  577. }
  578. d = trie->shift;
  579. while (beg > text)
  580. {
  581. c = trans ? trans[U(*--beg)] : *--beg;
  582. tree = trie->links;
  583. while (tree && c != tree->label)
  584. if (c < tree->label)
  585. tree = tree->llink;
  586. else
  587. tree = tree->rlink;
  588. if (tree)
  589. {
  590. trie = tree->trie;
  591. if (trie->accepting)
  592. {
  593. mch = beg;
  594. accept = trie;
  595. }
  596. }
  597. else
  598. break;
  599. d = trie->shift;
  600. }
  601. if (mch)
  602. goto match;
  603. }
  604. return -1;
  605. match:
  606. /* Given a known match, find the longest possible match anchored
  607. at or before its starting point. This is nearly a verbatim
  608. copy of the preceding main search loops. */
  609. if (lim - mch > kwset->maxd)
  610. lim = mch + kwset->maxd;
  611. lmch = NULL;
  612. d = 1;
  613. while (lim - end >= d)
  614. {
  615. if ((d = delta[c = (end += d)[-1]]) != 0)
  616. continue;
  617. beg = end - 1;
  618. if (!(trie = next[c]))
  619. {
  620. d = 1;
  621. continue;
  622. }
  623. if (trie->accepting && beg <= mch)
  624. {
  625. lmch = beg;
  626. accept = trie;
  627. }
  628. d = trie->shift;
  629. while (beg > text)
  630. {
  631. c = trans ? trans[U(*--beg)] : *--beg;
  632. tree = trie->links;
  633. while (tree && c != tree->label)
  634. if (c < tree->label)
  635. tree = tree->llink;
  636. else
  637. tree = tree->rlink;
  638. if (tree)
  639. {
  640. trie = tree->trie;
  641. if (trie->accepting && beg <= mch)
  642. {
  643. lmch = beg;
  644. accept = trie;
  645. }
  646. }
  647. else
  648. break;
  649. d = trie->shift;
  650. }
  651. if (lmch)
  652. {
  653. mch = lmch;
  654. goto match;
  655. }
  656. if (!d)
  657. d = 1;
  658. }
  659. if (kwsmatch)
  660. {
  661. kwsmatch->index = accept->accepting / 2;
  662. kwsmatch->offset[0] = mch - text;
  663. kwsmatch->size[0] = accept->depth;
  664. }
  665. return mch - text;
  666. }
  667. /* Search through the given text for a match of any member of the
  668. given keyword set. Return a pointer to the first character of
  669. the matching substring, or NULL if no match is found. If FOUNDLEN
  670. is non-NULL store in the referenced location the length of the
  671. matching substring. Similarly, if FOUNDIDX is non-NULL, store
  672. in the referenced location the index number of the particular
  673. keyword matched. */
  674. size_t
  675. kwsexec (kwset_t kws, char const *text, size_t size,
  676. struct kwsmatch *kwsmatch)
  677. {
  678. struct kwset const *kwset = (struct kwset *) kws;
  679. if (kwset->words == 1 && kwset->trans == NULL)
  680. {
  681. size_t ret = bmexec (kws, text, size);
  682. if (kwsmatch != NULL && ret != (size_t) -1)
  683. {
  684. kwsmatch->index = 0;
  685. kwsmatch->offset[0] = ret;
  686. kwsmatch->size[0] = kwset->mind;
  687. }
  688. return ret;
  689. }
  690. else
  691. return cwexec(kws, text, size, kwsmatch);
  692. }
  693. /* Free the components of the given keyword set. */
  694. void
  695. kwsfree (kwset_t kws)
  696. {
  697. struct kwset *kwset;
  698. kwset = (struct kwset *) kws;
  699. obstack_free(&kwset->obstack, NULL);
  700. free(kws);
  701. }