RandomTextGenerator.cs :  » Development » DotNetZip » Ionic » Zip » Tests » Utilities » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » Development » DotNetZip 
DotNetZip » Ionic » Zip » Tests » Utilities » RandomTextGenerator.cs
// RandomTextGenerator.cs
// ------------------------------------------------------------------
//
// Copyright (c) 2009 Dino Chiesa
// All rights reserved.
//
// This code module is part of DotNetZip, a zipfile class library.
//
// ------------------------------------------------------------------
//
// This code is licensed under the Microsoft Public License.
// See the file License.txt for the license details.
// More info on: http://dotnetzip.codeplex.com
//
// ------------------------------------------------------------------
//
// last saved (in emacs):
// Time-stamp: <2010-February-15 10:55:49>
//
// ------------------------------------------------------------------
//
// This module defines a class that generates random text sequences
// using a Markov chain.
//
// ------------------------------------------------------------------

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using Ionic.Zip;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Threading;

namespace Ionic.Zip.Tests.Utilities{

    public class RandomTextGenerator
    {
        static string[] uris = new string[]
            {
                // "Through the Looking Glass", by Lewis Carroll (~181k)
                "http://www.gutenberg.org/files/12/12.txt",

                // Decl of Independence (~16k)
                "http://www.gutenberg.org/files/16780/16780.txt",

                // The Naval War of 1812, by Theodore Roosevelt (968k)
                "http://www.gutenberg.org/dirs/etext05/7trnv10.txt",

                // On Prayer and the Contemplative Life, by Thomas Aquinas (440k)
                "http://www.gutenberg.org/files/22295/22295.txt",
            };

        SimpleMarkovChain markov;

        public RandomTextGenerator()
        {
            System.Random rnd = new System.Random();
            string uri = uris[rnd.Next(uris.Length)];
            string seedText = GetPageMarkup(uri);
            markov = new SimpleMarkovChain(seedText);
        }


        public string Generate(int length)
        {
            return markov.GenerateText(length);
        }


        private static string GetPageMarkup(string uri)
        {
            string pageData = null;
            using (WebClient client = new WebClient())
            {
                pageData = client.DownloadString(uri);
            }
            return pageData;
        }
    }


    /// <summary>
    /// Implements a simple Markov chain for text.
    /// </summary>
    ///
    /// <remarks>
    /// Uses a Markov chain starting with some base texts to produce
    /// random natural-ish text. This implementation is based on Pike's
    /// perl implementation, see
    /// http://cm.bell-labs.com/cm/cs/tpop/markov.pl
    /// </remarks>
    public class SimpleMarkovChain
    {
        Dictionary<String, List<String>> table = new Dictionary<String, List<String>>();
        System.Random rnd = new System.Random();

        public SimpleMarkovChain(string seed)
        {
            string NEWLINE = "\n";
            string key = NEWLINE;
            var sr = new StringReader(seed);
            string line;
            while ((line = sr.ReadLine()) != null)
            {
                foreach (var word in line.SplitByWords())
                {
                    var w = (word == "") ? NEWLINE : word; // newline
                    if (word == "\r") w = NEWLINE;

                    if (!table.ContainsKey(key)) table.Add(key, new List<string>());
                    table[key].Add(w);
                    key = w.ToLower().TrimPunctuation();
                }
            }
            if (!table.ContainsKey(key)) table.Add(key, new List<string>());
            table[key].Add(NEWLINE);
            key = NEWLINE;
        }


        internal void Diag()
        {
            Console.WriteLine("There are {0} keys in the table", table.Keys.Count);
            foreach (string s in table.Keys)
            {
                string x = s.Replace("\n", "");
                var y = table[s].ToArray();
                Console.WriteLine("  {0}: {1}", x, String.Join(", ", y));
            }
        }

        internal void ShowList(string word)
        {
            string x = word.Replace("\n", "");
            if (table.ContainsKey(word))
            {
                var y = table[word].ToArray();
                var z = Array.ConvertAll(y, x1 => x1.Replace("\n", ""));
                Console.WriteLine("  {0}: {1}", x, String.Join(", ", z));
            }
            else
                Console.WriteLine("  {0}: -key not found-", x);
        }

        private List<string> _keywords;
        private List<string> keywords
        {
            get
            {
                if (_keywords == null)
                    _keywords = new List<String>(table.Keys);
                return _keywords;
            }
        }

        /// <summary>
        /// Generates random text with a minimum character length.
        /// </summary>
        ///
        /// <param name="minimumLength">
        /// The minimum length of text, in characters, to produce.
        /// </param>
        public string GenerateText(int minimumLength)
        {
            var chosenStartWord = keywords[rnd.Next(keywords.Count)];
            return _InternalGenerate(chosenStartWord, StopCriterion.NumberOfChars, minimumLength);
        }

        /// <summary>
        /// Generates random text with a minimum character length.
        /// </summary>
        ///
        /// <remarks>
        /// The first sentence will start with the given start word.
        /// </remarks>
        ///
        /// <param name="minimumLength">
        /// The minimum length of text, in characters, to produce.
        /// </param>
        /// <param name="start">
        /// The word to start with. If this word does not exist in the
        /// seed text, the generation will fail.
        /// </param>
        /// <seealso cref="GenerateText(int)"/>
        /// <seealso cref="GenerateWords(int)"/>
        /// <seealso cref="GenerateWords(string, int)"/>
        public string GenerateText(string start, int minimumLength)
        {
            return _InternalGenerate(start, StopCriterion.NumberOfChars, minimumLength);
        }

        /// <summary>
        /// Generate random text with a minimum number of words.
        /// </summary>
        ///
        /// <remarks>
        /// The first sentence will start with the given start word.
        /// </remarks>
        ///
        /// <param name="minimumWords">
        /// The minimum number of words of text to produce.
        /// </param>
        /// <param name="start">
        /// The word to start with. If this word does not exist in the
        /// seed text, the generation will fail.
        /// </param>
        /// <seealso cref="GenerateText(int)"/>
        /// <seealso cref="GenerateText(string, int)"/>
        /// <seealso cref="GenerateWords(int)"/>
        public string GenerateWords(string start, int minimumWords)
        {
            return _InternalGenerate(start, StopCriterion.NumberOfWords, minimumWords);
        }


        /// <summary>
        /// Generate random text with a minimum number of words.
        /// </summary>
        ///
        /// <param name="minimumWords">
        /// The minimum number of words of text to produce.
        /// </param>
        /// <seealso cref="GenerateText(int)"/>
        /// <seealso cref="GenerateWords(string, int)"/>
        public string GenerateWords(int minimumWords)
        {
            var chosenStartWord = keywords[rnd.Next(keywords.Count)];
            return _InternalGenerate(chosenStartWord, StopCriterion.NumberOfWords, minimumWords);
        }


        private string _InternalGenerate(string start, StopCriterion crit, int limit)
        {
            string w1 = start.ToLower();
            StringBuilder sb = new StringBuilder();
            sb.Append(start.Capitalize());

            int consecutiveNewLines = 0;
            string word = null;
            string priorWord = null;

            // About the stop criteria:
            // we keep going til we reach the specified number of words or chars, with the added
            // proviso that we have to complete the in-flight sentence when the limit is reached.

            for (int i = 0;
                 (crit == StopCriterion.NumberOfWords && i < limit) ||
                     (crit == StopCriterion.NumberOfChars && sb.Length < limit) ||
                     consecutiveNewLines == 0;
                 i++)
            {
                if (table.ContainsKey(w1))
                {
                    var list = table[w1];
                    int ix = rnd.Next(list.Count);
                    priorWord = word;
                    word = list[ix];
                    if (word != "\n")
                    {
                        // capitalize
                        if (consecutiveNewLines > 0)
                            sb.Append(word.Capitalize());
                        else
                            sb.Append(" ").Append(word);

                        // words that end sentences get a newline
                        if (word.EndsWith("."))
                        {
                            if (consecutiveNewLines == 0 || consecutiveNewLines == 1)
                                sb.Append("\n");
                            consecutiveNewLines++;
                        }
                        else consecutiveNewLines = 0;
                    }
                    w1 = word.ToLower().TrimPunctuation();
                }
            }
            return sb.ToString();
        }



        private enum StopCriterion
        {
            NumberOfWords,
            NumberOfChars
        }

    }



    public class RandomTextInputStream : Stream
    {
        RandomTextGenerator _rtg;
        Int64 _desiredLength;
        Int64 _bytesRead;
        bool _isDisposed;
        System.Text.Encoding _encoding;
        string[] _randomText;
        byte[] _src;
        int _nblocks, _p, _readyIndex, _slurps, _gnt;
        static readonly int _maxChunkSize = 1024 * 32;
        System.Threading.ManualResetEvent _producerDone;
        System.Threading.AutoResetEvent _needData, _haveData;

        public RandomTextInputStream(Int64 length)
        : this(length, System.Text.Encoding.GetEncoding("ascii"))
        {
        }

        public RandomTextInputStream(Int64 length, System.Text.Encoding encoding)
        : base()
        {
            _desiredLength = length;
            _rtg = new RandomTextGenerator();
            _encoding = encoding;
            _nblocks = 0;
            _p = 0;
            _isDisposed = false;
            _randomText = new string[2];
            _needData = new AutoResetEvent(false);
            _haveData = new AutoResetEvent(false);
            _producerDone = new ManualResetEvent(false);

            ThreadPool.QueueUserWorkItem(new WaitCallback(_Producer));
        }


        /// <summary>
        ///   Method for the background thread that generates random text strings
        /// </summary>
        /// <remarks>
        ///   <para>
        ///     The RandomTextInputStream is often used to generate
        ///     large texts or strings.  There's an advantage to using a
        ///     background thread to generating those texts while the
        ///     foreground is reading from the text
        ///   </para>
        /// </remarks>
        private void _Producer(object state)
        {
            int nowServing = 0;
            try
            {
                do
                {
                    _randomText[nowServing]= _rtg.Generate(_maxChunkSize);
                    _slurps++;
                    _needData.WaitOne();
                    if (_isDisposed) break;
                    _readyIndex = nowServing;
                    nowServing = 1-nowServing;
                    _haveData.Set();
                } while (true);
            }
            catch (System.Exception)
            {
            }

            _producerDone.Set();
        }

        /// <summary>
        ///   for diagnostic purposes only
        /// </summary>
        public int SlurpsCount
        {
            get
            {
                return _slurps;
            }
        }

        /// <summary>
        ///   for diagnostic purposes only
        /// </summary>
        public int GetNewTextCount
        {
            get
            {
                return _gnt;
            }
        }

        new public void  Dispose()
        {
             Dispose(true);
        }

        private void StopProducer()
        {
            _isDisposed = true;
            _needData.Set();
            _producerDone.WaitOne();        
        }

        /// <summary>The Dispose method</summary>
        protected override void Dispose(bool disposeManagedResources)
        {
            if (disposeManagedResources)
            {
                StopProducer();
                // dispose managed resources
                _producerDone.Close();
                _haveData.Close();
                _needData.Close();
            }
        }

        private string GetNewText()
        {
            _gnt++;
            _needData.Set();
            _haveData.WaitOne();
            return _randomText[_readyIndex];
        }

        public Int64 BytesRead
        {
            get { return _bytesRead; }
        }

        public override int Read(byte[] buffer, int offset, int count)
        {
            int remainingBytesToRead = count;
            if (_desiredLength - _bytesRead < remainingBytesToRead)
                remainingBytesToRead = unchecked((int)(_desiredLength - _bytesRead));

            int totalBytesToRead = remainingBytesToRead;

            while (remainingBytesToRead > 0)
            {
                if (_nblocks % 32 == 0)
                {
                    // after 32 cycles through the buffer, get a new one.
                    _src = _encoding.GetBytes(GetNewText());
                    _p = 0;
                }

                int bytesAvailable = _src.Length - _p;
                int chunksize = (remainingBytesToRead > bytesAvailable) ? bytesAvailable : remainingBytesToRead;

                Array.Copy(_src, _p, buffer, offset, chunksize);
                _p += chunksize;
                remainingBytesToRead -= chunksize;
                offset += chunksize;
                if (_p >= _src.Length)
                {
                    // reset pointer to beginning of buffer
                    _p = 0;
                    _nblocks++;
                }
            }
            _bytesRead += totalBytesToRead;

            return totalBytesToRead;
        }

        public override void Write(byte[] buffer, int offset, int count)
        {
            throw new NotImplementedException();
        }

        public override bool CanRead
        {
            get { return true; }
        }
        public override bool CanSeek
        {
            get { return false; }
        }

        public override bool CanWrite
        {
            get { return false; }
        }

        public override long Length
        {
            get { return _desiredLength; }
        }

        public override long Position
        {
            get { return _desiredLength - _bytesRead; }
            set
            {
                Console.WriteLine("setting position to {0}", value);
                throw new NotImplementedException();
            }
        }

        public override long Seek(long offset, System.IO.SeekOrigin origin)
        {
            throw new NotImplementedException();
        }

        public override void SetLength(long value)
        {
            if (value < _bytesRead)
                throw new NotImplementedException();
            _desiredLength = value;
        }

        public override void Flush()
        {
        }
    }



}
www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.