create or replace package body gestalt_pkg    
IS   
   
l_min_threshold SIMPLE_FLOAT := 0;   
   
FUNCTION lcs(         
  in_str1 IN VARCHAR2,         
  in_str2 IN VARCHAR2         
) RETURN VARCHAR2         
IS   
    PRAGMA UDF;   
    TYPE numbers_array IS TABLE OF PLS_INTEGER INDEX BY PLS_INTEGER;         
    TYPE Array2D IS TABLE OF numbers_array INDEX BY PLS_INTEGER;         
    c_l1                  SIMPLE_INTEGER := 0;         
    c_l2                  SIMPLE_INTEGER := 0;         
    p_matrix              Array2D;         
    p_c1                  NCHAR(1);         
    p_c2                  NCHAR(1);         
    endingIndex           SIMPLE_INTEGER := 0;         
    maxlen                SIMPLE_INTEGER := 0; --lcs       
    l_check               SIMPLE_INTEGER := 0; --check if there are any matching characters between the 2 strings       
BEGIN         
             
    IF in_str1 IS NULL OR in_str2 IS NULL THEN         
        RETURN NULL;         
    ELSIF c_l1 > c_l2 THEN         
        RETURN lcs( in_str2, in_str1 );         
    END IF;       
           
    --check if there are any matching characters; if none found, RETURN NULL       
    --@todo: might consider removing the whitespaces before making the comparison     
    SELECT       
        COUNT(1)       
    INTO       
        l_check       
    FROM       
    (       
        SELECT in_str1 initial_string, TRANSLATE ( in_str1, in_str2, '#' ) output_string FROM dual       
    )       
    WHERE       
        initial_String = output_string       
    ;       
    IF l_check = 1       
    THEN       
        RETURN '';       
    END IF;   
           
    c_l1 := LENGTH(in_str1);         
    c_l2 := LENGTH(in_str2);         
             
    --initialize all cells of lookup table to 0       
    FOR i IN 1..c_l1       
    LOOP       
        FOR j IN 1..c_l2       
        LOOP       
            p_matrix(i)(j) := 0;       
        END LOOP;       
    END LOOP;       
       
    FOR i IN 1..c_l1         
    LOOP         
        --Check if the length of current position + length of remaining string <= max length found, exit       
        p_c1 := SUBSTR( in_str1, i, 1 );       
        FOR j IN 1..c_l2         
        LOOP         
            p_c2 := SUBSTR( in_str2, j, 1 );         
            --if current character of X and Y matches         
            IF p_c1 = p_c2         
            THEN         
                if p_matrix.exists ( i-1 ) and p_matrix ( i-1 ).exists ( j-1 )        
                then       
                    p_matrix ( i )( j ) := p_matrix ( i-1 )( j-1 ) + 1;        
                else        
                    p_matrix ( i )( j ) := 1;       
                end if;       
                       
                IF p_matrix(i)(j) > maxlen         
                THEN         
                    maxlen := p_matrix(i)(j);         
                    endingIndex := i;       
                END IF;   
                --if there are fewer chars remaining in string1 than maxlen already found, exit to save some cpu cycles   
                IF (c_l1 - i + 1) < maxlen       
                THEN       
                    RETURN SUBSTR(in_str1, endingIndex - maxlen + 1, maxlen);         
                END IF;      
        
            END IF;         
        END LOOP;   
      
    END LOOP;         
       
    RETURN SUBSTR(in_str1, endingIndex - maxlen + 1, maxlen);         
         
END lcs;   
   
FUNCTION cs(     
  in_str1 IN VARCHAR2,     
  in_str2 IN VARCHAR2         
) RETURN FLOAT     
AS     
    PRAGMA UDF;   
    v_lcs   VARCHAR2(123);   
    ll      VARCHAR2(123);   
    rl      VARCHAR2(123);   
    lr      VARCHAR2(123);   
    rr      VARCHAR2(123);    
     
BEGIN     
    --compute longest common substring     
    v_lcs := lcs(in_str1, in_str2);   
    IF v_lcs IS NULL     
    THEN     
        RETURN 0;     
    END IF;   
   
    LOOP   
        ll := SUBSTR( in_str1, 1, INSTR ( in_str1, v_lcs ) - 1 );   
        lr := SUBSTR( in_str1, INSTR ( in_str1, v_lcs ) + LENGTH ( v_lcs ) );   
   
        rl := SUBSTR( in_str2, 1, INSTR ( in_str2, v_lcs ) - 1 );   
        rr := SUBSTR( in_str2, INSTR ( in_str2, v_lcs ) + LENGTH ( v_lcs ) );   
           
        RETURN 2 * (COALESCE(length(v_lcs), 0) + COALESCE(length(lcs(ll, rl)), 0) + COALESCE(length(lcs(lr, rr)), 0)) / ( LENGTH(in_str1) + LENGTH(in_str2) );   
    END LOOP;   
    RETURN 0;   
         
END cs;   
   
FUNCTION lcs_sql   
(   
      l IN VARCHAR2   
    , r IN VARCHAR2   
)    
RETURN SIMPLE_FLOAT   
IS   
    PRAGMA UDF;   
    ll VARCHAR2(123);   
    rl VARCHAR2(123);   
    lr VARCHAR2(123);   
    rr VARCHAR2(123);   
BEGIN   
    FOR rec IN   
    (   
        SELECT   
            INSTR(l, SUBSTR(r,r_strt,len)) l_strt   
            , r_strt   
            , len   
        FROM   
            dual   
            , LATERAL(SELECT LENGTH(r) + 1 - LEVEL len from dual CONNECT BY LENGTH(r) + 1 - LEVEL > 0) s   
            , LATERAL(SELECT LEVEL r_strt FROM dual CONNECT BY LEVEL + s.len - 1 <= LENGTH(r)) e   
        WHERE   
            INSTR(l, SUBSTR(r,e.r_strt,len)) > 0   
            AND ROWNUM <= 1   
    )   
    LOOP   
        ll := SUBSTR(l, 1, rec.l_strt - 1);   
        rl := SUBSTR(r, 1, rec.r_strt - 1);   
        lr := SUBSTR(l, rec.l_strt + rec.len);   
        rr := SUBSTR(r, rec.r_strt + rec.len);   
        RETURN rec.len + lcs_sql(ll, rl) + lcs_sql(lr, rr);   
    END LOOP;   
    RETURN 0;   
END lcs_sql;   
   
FUNCTION ratcliff_obershelp_alg(   
  in_str1 in varchar2,   
  in_str2 in varchar2   
) RETURN SIMPLE_FLOAT   
   
IS   
    PRAGMA UDF;   
BEGIN   
   
    RETURN 2 * lcs_sql(in_str1, in_str2) / ( LENGTH(in_str1) + LENGTH(in_str2) );   
   
END ratcliff_obershelp_alg;   
   
PROCEDURE test_similarity   
(   
    p_string IN VARCHAR2   
)   
IS   
    TYPE t_results is RECORD (obj_id INTEGER, object_name VARCHAR2(123));   
    TYPE results_tab is TABLE OF t_results INDEX BY PLS_INTEGER;   
    l_results   results_tab;   
       
    TYPE t_results2 is RECORD (obj_id INTEGER, object_name VARCHAR2(123), score FLOAT);   
    TYPE results2_tab is TABLE OF t_results2 INDEX BY PLS_INTEGER;   
       
    l_results2  results2_tab := results2_tab();   
       
    CURSOR cref   
    IS   
    SELECT obj_id, object_name   
    FROM   
        (   
            select object_id+rownum AS obj_id, object_name from all_objects   
            , ( select 1 from dual connect by level <= 2 ) t   
        )   
    ;   
    i       SIMPLE_INTEGER  := 1;   
    l_score SIMPLE_FLOAT    := 0;  
    l_time  NUMBER;  
   
BEGIN   
   
    l_time := dbms_utility.get_time;  
    OPEN cref;   
    FETCH cref BULK COLLECT INTO l_results;   
    CLOSE cref;   
       
    FOR indx in 1..l_results.COUNT   
    LOOP   
        --SQL version   
        l_score := ratcliff_obershelp_alg(p_string, l_results(indx).object_name);   
        --PL/SQL version   
        --l_score := cs(p_string, l_results(indx).object_name);   
           
        IF l_score >= l_min_threshold   
        THEN   
            l_results2(i).obj_id := l_results(indx).obj_id;   
            l_results2(i).object_name := l_results(indx).object_name;   
            l_results2(i).score := l_score;   
            i := i + 1;   
        END IF;   
   
    END LOOP;   
   
    dbms_output.put_line('[' || l_results2.COUNT || '] strings selected out of pool of [' || l_results.COUNT || '] in ' || (dbms_utility.get_time - l_time)/100 || ' seconds.');   
   
END test_similarity;   
   
   
BEGIN   
   
    l_min_threshold:= 0.65;   
   
END gestalt_pkg;